[Libguestfs] [PATCH v3 1/5] New API: file-architecture

Richard W.M. Jones rjones at redhat.com
Tue Aug 10 12:41:21 UTC 2010


On Mon, Aug 09, 2010 at 05:08:06PM +0100, Matthew Booth wrote:
> On 02/08/10 18:12, Richard W.M. Jones wrote:
> >>From 647be56b2d8b171a33608b36e0b7557d94db3c96 Mon Sep 17 00:00:00 2001
> >From: Richard Jones<rjones at redhat.com>
> >Date: Wed, 28 Jul 2010 15:38:57 +0100
> >Subject: [PATCH 1/5] New API: file-architecture
> >
> >This change simply converts the existing Perl-only function
> >file_architecture into a core API call.  The core API call is
> >written in C and available in all languages and from guestfish.
> >---
> >  README                      |    2 +
> >  configure.ac                |    9 ++
> >  perl/lib/Sys/Guestfs/Lib.pm |  147 +-----------------------
> >  perl/t/510-lib-file-arch.t  |   70 -----------
> >  po/POTFILES.in              |    1 +
> >  src/Makefile.am             |    3 +-
> >  src/generator.ml            |  128 +++++++++++++++++++++
> >  src/inspect.c               |  266 +++++++++++++++++++++++++++++++++++++++++++
> >  8 files changed, 411 insertions(+), 215 deletions(-)
> >  delete mode 100644 perl/t/510-lib-file-arch.t
> >  create mode 100644 src/inspect.c
> >
> 
> >diff --git a/src/inspect.c b/src/inspect.c
> >new file mode 100644
> >index 0000000..8e28d8a
> >--- /dev/null
> >+++ b/src/inspect.c
> >@@ -0,0 +1,266 @@
> >+/* libguestfs
> >+ * Copyright (C) 2010 Red Hat Inc.
> >+ *
> >+ * This library is free software; you can redistribute it and/or
> >+ * modify it under the terms of the GNU Lesser General Public
> >+ * License as published by the Free Software Foundation; either
> >+ * version 2 of the License, or (at your option) any later version.
> >+ *
> >+ * This library is distributed in the hope that it will be useful,
> >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >+ * Lesser General Public License for more details.
> >+ *
> >+ * You should have received a copy of the GNU Lesser General Public
> >+ * License along with this library; if not, write to the Free Software
> >+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> >+ */
> >+
> >+#include<config.h>
> >+
> >+#include<stdio.h>
> >+#include<stdlib.h>
> >+#include<stdint.h>
> >+#include<inttypes.h>
> >+#include<unistd.h>
> >+#include<string.h>
> >+#include<sys/stat.h>
> >+
> >+#include<pcre.h>
> >+
> >+#include "ignore-value.h"
> >+
> >+#include "guestfs.h"
> >+#include "guestfs-internal.h"
> >+#include "guestfs-internal-actions.h"
> >+#include "guestfs_protocol.h"
> >+
> >+/* Compile all the regular expressions once when the shared library is
> >+ * loaded.  PCRE is thread safe so we're supposedly OK here if
> >+ * multiple threads call into the libguestfs API functions below
> >+ * simultaneously.
> >+ */
> >+static pcre *re_file_elf;
> >+static pcre *re_file_win64;
> >+static pcre *re_elf_ppc64;
> >+
> >+static void compile_regexps (void) __attribute__((constructor));
> >+static void
> >+compile_regexps (void)
> >+{
> >+  const char *err;
> >+  int offset;
> >+
> >+#define COMPILE(re,pattern,options)                                     \
> >+  do {                                                                  \
> >+    re = pcre_compile ((pattern), (options),&err,&offset, NULL);      \
> >+    if (re == NULL) {                                                   \
> >+      ignore_value (write (2, err, strlen (err)));                      \
> >+      abort ();                                                         \
> >+    }                                                                   \
> >+  } while (0)
> >+
> >+  COMPILE (re_file_elf,
> >+           "ELF.*(?:executable|shared object|relocatable), (.+?),", 0);
> >+  COMPILE (re_file_win64, "PE32\\+ executable", 0);
> >+  COMPILE (re_elf_ppc64, "64.*PowerPC", 0);
> >+}
> >+
> >+/* Match a regular expression which contains no captures.  Returns
> >+ * true if it matches or false if it doesn't.
> >+ */
> >+static int
> >+match (const char *str, const pcre *re)
> >+{
> >+  size_t len = strlen (str);
> >+  int vec[30], r;
> >+
> >+  r = pcre_exec (re, NULL, str, len, 0, 0, vec, 30);
> 
> You can ditch vec here and pass in NULL, 0. None of the above REs
> use back references, so this shouldn't be an issue.

My reading of the pcre_exec docs suggested otherwise.  However I will
need to try it when I get back from the conference.

> Aside: using a portion of a passed-in vector for slack space under
> certain circumstances is just hideous!
>
> >+  if (r == PCRE_ERROR_NOMATCH)
> >+    return 0;
> >+  if (r != 1) {
> >+    /* Internal error -- should not happen. */
> >+    fprintf (stderr, "libguestfs: %s: %s: internal error: pcre_exec returned unexpected error code %d when matching against the string \"%s\"\n",
> >+             __FILE__, __func__, r, str);
> >+    return 0;
> >+  }
> >+
> >+  return 1;
> >+}
> >+
> >+/* Match a regular expression which contains exactly one capture.  If
> >+ * the string matches, return the capture, otherwise return NULL.  The
> >+ * caller must free the result.
> >+ */
> >+static char *
> >+match1 (const char *str, const pcre *re)
> >+{
> >+  size_t len = strlen (str);
> >+  int vec[30], r;
> >+
> >+  r = pcre_exec (re, NULL, str, len, 0, 0, vec, 30);
> 
> vec here could be of size 9 (need 0, 1, 2, 3; round up to multiple
> of 3 == 6; * 1.5 = 9). Whatever you set it to, though, please don't
> hard-code the size twice. Please use sizeof(vec)/sizeof(vec[0]) in
> the function call.

Yes, good idea.

> >+  if (r == PCRE_ERROR_NOMATCH)
> >+    return NULL;
> >+  if (r != 2) {
> >+    /* Internal error -- should not happen. */
> >+    fprintf (stderr, "libguestfs: %s: %s: internal error: pcre_exec returned unexpected error code %d when matching against the string \"%s\"\n",
> >+             __FILE__, __func__, r, str);
> >+    return NULL;
> >+  }
> >+
> >+  return strndup (&str[vec[2]], vec[3]-vec[2]);
> 
> Is there a safe_ version of that? If not, you probably ought to
> check for NULL return for consistency.

Urr yes you are right, I thought I'd caught all instances of this ...

> >+}
> >+
> >+/* Convert output from 'file' command on ELF files to the canonical
> >+ * architecture string.  Caller must free the result.
> >+ */
> >+static char *
> >+canonical_elf_arch (guestfs_h *g, const char *elf_arch)
> >+{
> >+  const char *r;
> >+
> >+  if (strstr (elf_arch, "Intel 80386"))
> >+    r = "i386";
> >+  else if (strstr (elf_arch, "Intel 80486"))
> >+    r = "i486";
> >+  else if (strstr (elf_arch, "x86-64"))
> >+    r = "x86_64";
> >+  else if (strstr (elf_arch, "AMD x86-64"))
> >+    r = "x86_64";
> >+  else if (strstr (elf_arch, "SPARC32"))
> >+    r = "sparc";
> >+  else if (strstr (elf_arch, "SPARC V9"))
> >+    r = "sparc64";
> >+  else if (strstr (elf_arch, "IA-64"))
> >+    r = "ia64";
> >+  else if (match (elf_arch, re_elf_ppc64))
> >+    r = "ppc64";
> >+  else if (strstr (elf_arch, "PowerPC"))
> >+    r = "ppc";
> >+  else
> >+    r = elf_arch;
> >+
> >+  char *ret = safe_strdup (g, r);
> >+  return ret;
> >+}
> >+
> >+static int
> >+is_regular_file (const char *filename)
> >+{
> >+  struct stat statbuf;
> >+
> >+  return lstat (filename,&statbuf) == 0&&  S_ISREG (statbuf.st_mode);
> >+}
> >+
> >+/* Download and uncompress the cpio file to find binaries within. */
> >+#define INITRD_BINARIES1 "bin/ls bin/rm bin/modprobe sbin/modprobe bin/sh bin/bash bin/dash bin/nash"
> >+#define INITRD_BINARIES2 {"bin/ls", "bin/rm", "bin/modprobe", "sbin/modprobe", "bin/sh", "bin/bash", "bin/dash", "bin/nash"}
> 
> Eww.

Right, but it does make sense in context of the later code ...

If C had LISP-like macros ...

> >+
> >+static char *
> >+cpio_arch (guestfs_h *g, const char *file, const char *path)
> >+{
> 
> This function is really the architecture of an initrd. It should
> probably be called initrd_arch.

Yup, although we can only handle cpio-format (see documentation).
Very old distros (RHEL 3 and earlier IIRC) used a compressed ext2 disk
image, and we cannot handle that at the moment.

> >+  char *ret = NULL;
> >+
> >+  const char *method;
> >+  if (strstr (file, "gzip"))
> >+    method = "zcat";
> >+  else if (strstr (file, "bzip2"))
> >+    method = "bzcat";
> >+  else
> >+    method = "cat";
> >+
> >+  char dir[] = "/tmp/initrd.XXXXXX";
> >+#define dir_len 18
> 
> This should be strlen(dir). The compiler will recognise this as a constant.

OK.

> >+  if (mkdtemp (dir) == NULL) {
> >+    perrorf (g, "mkdtemp");
> >+    goto out;
> >+  }
> >+
> >+  char dir_initrd[dir_len + 16];
> >+  snprintf (dir_initrd, dir_len + 16, "%s/initrd", dir);
> >+  if (guestfs_download (g, path, dir_initrd) == -1)
> >+    goto out;
> >+
> >+  char cmd[dir_len + 256];
> >+  snprintf (cmd, dir_len + 256,
> >+            "cd %s&&  %s initrd | cpio --quiet -id " INITRD_BINARIES1,
> >+            dir, method);
> >+  int r = system (cmd);
> >+  if (r == -1 || WEXITSTATUS (r) != 0) {
> >+    perrorf (g, "cpio command failed");
> >+    goto out;
> >+  }
> >+
> >+  char bin[dir_len + 32];
> 
> You've placed a limit here of 31 bytes on the contents of any member
> of INITRD_BINARIES2, which is defined above. You need a comment next
> to the definition of INITRD_BINARIES2 about this. Or remove the
> limit...

Yup.

> >+  const char *bins[] = INITRD_BINARIES2;
> >+  size_t i;
> >+  for (i = 0; i<  sizeof bins / sizeof bins[0]; ++i) {
> >+    snprintf (bin, dir_len + 32, "%s/%s", dir, bins[i]);
> 
> Why not just use asprintf, or whatever the equivalent is in gnulib?

Because we'd have to free it up.  I think there is an alloca-variant
in gnulib, but I think the static array should be OK if we document
it.

> >+
> >+    if (is_regular_file (bin)) {
> >+      snprintf (cmd, dir_len + 256, "file %s", bin);
> 
> Reusing a magic number from above in a different context. Magic
> numbers are getting a bit hard to track at this point. Could do with
> cmd_len for the size of cmd.
> 
> Stepping back slightly, you could more cleanly use libmagic here
> instead of invoking file. It would be less code, and wouldn't encode
> length limits which aren't defined anywhere.

Yes, that would make a lot of sense.

> >+      FILE *fp = popen (cmd, "r");
> >+      if (!fp) {
> >+        perrorf (g, "popen: %s", cmd);
> >+        goto out;
> >+      }
> >+
> >+      char line[1024];
> >+      if (!fgets (line, sizeof line, fp)) {
> >+        perrorf (g, "fgets");
> >+        fclose (fp);
> >+        goto out;
> >+      }
> >+      pclose (fp);
> 
> ^^^ This chunk could go.
> 
> >+
> >+      char *elf_arch;
> >+      if ((elf_arch = match1 (line, re_file_elf)) != NULL) {
> >+        ret = canonical_elf_arch (g, elf_arch);
> >+        free (elf_arch);
> >+        goto out;
> >+      }
> >+    }
> >+  }
> >+  error (g, "file_architecture: could not determine architecture of cpio archive");
> >+
> >+ out:
> >+  /* Free up the temporary directory.  Note the directory name cannot
> >+   * contain shell meta-characters because of the way it was
> >+   * constructed above.
> >+   */
> >+  snprintf (cmd, dir_len + 256, "rm -rf %s", dir);
> 
> cmd_len
> 
> Is there really no widely available C implementation of this?

Not that I'm aware of ...?

> >+  ignore_value (system (cmd));
> >+
> >+  return ret;
> >+#undef dir_len
> >+}
> >+
> >+char *
> >+guestfs__file_architecture (guestfs_h *g, const char *path)
> >+{
> >+  char *file = NULL;
> >+  char *elf_arch = NULL;
> >+  char *ret = NULL;
> >+
> >+  /* Get the output of the "file" command.  Note that because this
> >+   * runs in the daemon, LANG=C so it's in English.
> >+   */
> >+  file = guestfs_file (g, path);
> >+  if (file == NULL)
> >+    return NULL;
> >+
> >+  if ((elf_arch = match1 (file, re_file_elf)) != NULL)
> >+    ret = canonical_elf_arch (g, elf_arch);
> >+  else if (strstr (file, "PE32 executable"))
> >+    ret = safe_strdup (g, "i386");
> >+  else if (match (file, re_file_win64))
> 
> Why not strstr(file, "PE32+ executable") ?

Yes, that's a mistake.

> >+    ret = safe_strdup (g, "x86_64");
> >+  else if (strstr (file, "cpio archive"))
> >+    ret = cpio_arch (g, file, path);
> >+  else
> >+    error (g, "file_architecture: unknown architecture: %s", path);
> >+
> >+  free (file);
> >+  free (elf_arch);
> >+  return ret;                   /* caller frees */
> >+}
> >-- 1.7.1
> 
> API is fine. I think we need to think hard, though, about how to do
> safe string manipulation in libguestfs. In the above code, it's hard
> to be sure that you're not leaking or overflowing anything. Perhaps
> using a string library would be in order.

Use OCaml ...

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
virt-top is 'top' for virtual machines.  Tiny program with many
powerful monitoring features, net stats, disk stats, logging, etc.
http://et.redhat.com/~rjones/virt-top




More information about the Libguestfs mailing list