[Libguestfs] [PATCH v3 1/5] New API: file-architecture
Richard W.M. Jones
rjones at redhat.com
Tue Aug 10 12:41:21 UTC 2010
On Mon, Aug 09, 2010 at 05:08:06PM +0100, Matthew Booth wrote:
> On 02/08/10 18:12, Richard W.M. Jones wrote:
> >>From 647be56b2d8b171a33608b36e0b7557d94db3c96 Mon Sep 17 00:00:00 2001
> >From: Richard Jones<rjones at redhat.com>
> >Date: Wed, 28 Jul 2010 15:38:57 +0100
> >Subject: [PATCH 1/5] New API: file-architecture
> >
> >This change simply converts the existing Perl-only function
> >file_architecture into a core API call. The core API call is
> >written in C and available in all languages and from guestfish.
> >---
> > README | 2 +
> > configure.ac | 9 ++
> > perl/lib/Sys/Guestfs/Lib.pm | 147 +-----------------------
> > perl/t/510-lib-file-arch.t | 70 -----------
> > po/POTFILES.in | 1 +
> > src/Makefile.am | 3 +-
> > src/generator.ml | 128 +++++++++++++++++++++
> > src/inspect.c | 266 +++++++++++++++++++++++++++++++++++++++++++
> > 8 files changed, 411 insertions(+), 215 deletions(-)
> > delete mode 100644 perl/t/510-lib-file-arch.t
> > create mode 100644 src/inspect.c
> >
>
> >diff --git a/src/inspect.c b/src/inspect.c
> >new file mode 100644
> >index 0000000..8e28d8a
> >--- /dev/null
> >+++ b/src/inspect.c
> >@@ -0,0 +1,266 @@
> >+/* libguestfs
> >+ * Copyright (C) 2010 Red Hat Inc.
> >+ *
> >+ * This library is free software; you can redistribute it and/or
> >+ * modify it under the terms of the GNU Lesser General Public
> >+ * License as published by the Free Software Foundation; either
> >+ * version 2 of the License, or (at your option) any later version.
> >+ *
> >+ * This library is distributed in the hope that it will be useful,
> >+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> >+ * Lesser General Public License for more details.
> >+ *
> >+ * You should have received a copy of the GNU Lesser General Public
> >+ * License along with this library; if not, write to the Free Software
> >+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> >+ */
> >+
> >+#include<config.h>
> >+
> >+#include<stdio.h>
> >+#include<stdlib.h>
> >+#include<stdint.h>
> >+#include<inttypes.h>
> >+#include<unistd.h>
> >+#include<string.h>
> >+#include<sys/stat.h>
> >+
> >+#include<pcre.h>
> >+
> >+#include "ignore-value.h"
> >+
> >+#include "guestfs.h"
> >+#include "guestfs-internal.h"
> >+#include "guestfs-internal-actions.h"
> >+#include "guestfs_protocol.h"
> >+
> >+/* Compile all the regular expressions once when the shared library is
> >+ * loaded. PCRE is thread safe so we're supposedly OK here if
> >+ * multiple threads call into the libguestfs API functions below
> >+ * simultaneously.
> >+ */
> >+static pcre *re_file_elf;
> >+static pcre *re_file_win64;
> >+static pcre *re_elf_ppc64;
> >+
> >+static void compile_regexps (void) __attribute__((constructor));
> >+static void
> >+compile_regexps (void)
> >+{
> >+ const char *err;
> >+ int offset;
> >+
> >+#define COMPILE(re,pattern,options) \
> >+ do { \
> >+ re = pcre_compile ((pattern), (options),&err,&offset, NULL); \
> >+ if (re == NULL) { \
> >+ ignore_value (write (2, err, strlen (err))); \
> >+ abort (); \
> >+ } \
> >+ } while (0)
> >+
> >+ COMPILE (re_file_elf,
> >+ "ELF.*(?:executable|shared object|relocatable), (.+?),", 0);
> >+ COMPILE (re_file_win64, "PE32\\+ executable", 0);
> >+ COMPILE (re_elf_ppc64, "64.*PowerPC", 0);
> >+}
> >+
> >+/* Match a regular expression which contains no captures. Returns
> >+ * true if it matches or false if it doesn't.
> >+ */
> >+static int
> >+match (const char *str, const pcre *re)
> >+{
> >+ size_t len = strlen (str);
> >+ int vec[30], r;
> >+
> >+ r = pcre_exec (re, NULL, str, len, 0, 0, vec, 30);
>
> You can ditch vec here and pass in NULL, 0. None of the above REs
> use back references, so this shouldn't be an issue.
My reading of the pcre_exec docs suggested otherwise. However I will
need to try it when I get back from the conference.
> Aside: using a portion of a passed-in vector for slack space under
> certain circumstances is just hideous!
>
> >+ if (r == PCRE_ERROR_NOMATCH)
> >+ return 0;
> >+ if (r != 1) {
> >+ /* Internal error -- should not happen. */
> >+ fprintf (stderr, "libguestfs: %s: %s: internal error: pcre_exec returned unexpected error code %d when matching against the string \"%s\"\n",
> >+ __FILE__, __func__, r, str);
> >+ return 0;
> >+ }
> >+
> >+ return 1;
> >+}
> >+
> >+/* Match a regular expression which contains exactly one capture. If
> >+ * the string matches, return the capture, otherwise return NULL. The
> >+ * caller must free the result.
> >+ */
> >+static char *
> >+match1 (const char *str, const pcre *re)
> >+{
> >+ size_t len = strlen (str);
> >+ int vec[30], r;
> >+
> >+ r = pcre_exec (re, NULL, str, len, 0, 0, vec, 30);
>
> vec here could be of size 9 (need 0, 1, 2, 3; round up to multiple
> of 3 == 6; * 1.5 = 9). Whatever you set it to, though, please don't
> hard-code the size twice. Please use sizeof(vec)/sizeof(vec[0]) in
> the function call.
Yes, good idea.
> >+ if (r == PCRE_ERROR_NOMATCH)
> >+ return NULL;
> >+ if (r != 2) {
> >+ /* Internal error -- should not happen. */
> >+ fprintf (stderr, "libguestfs: %s: %s: internal error: pcre_exec returned unexpected error code %d when matching against the string \"%s\"\n",
> >+ __FILE__, __func__, r, str);
> >+ return NULL;
> >+ }
> >+
> >+ return strndup (&str[vec[2]], vec[3]-vec[2]);
>
> Is there a safe_ version of that? If not, you probably ought to
> check for NULL return for consistency.
Urr yes you are right, I thought I'd caught all instances of this ...
> >+}
> >+
> >+/* Convert output from 'file' command on ELF files to the canonical
> >+ * architecture string. Caller must free the result.
> >+ */
> >+static char *
> >+canonical_elf_arch (guestfs_h *g, const char *elf_arch)
> >+{
> >+ const char *r;
> >+
> >+ if (strstr (elf_arch, "Intel 80386"))
> >+ r = "i386";
> >+ else if (strstr (elf_arch, "Intel 80486"))
> >+ r = "i486";
> >+ else if (strstr (elf_arch, "x86-64"))
> >+ r = "x86_64";
> >+ else if (strstr (elf_arch, "AMD x86-64"))
> >+ r = "x86_64";
> >+ else if (strstr (elf_arch, "SPARC32"))
> >+ r = "sparc";
> >+ else if (strstr (elf_arch, "SPARC V9"))
> >+ r = "sparc64";
> >+ else if (strstr (elf_arch, "IA-64"))
> >+ r = "ia64";
> >+ else if (match (elf_arch, re_elf_ppc64))
> >+ r = "ppc64";
> >+ else if (strstr (elf_arch, "PowerPC"))
> >+ r = "ppc";
> >+ else
> >+ r = elf_arch;
> >+
> >+ char *ret = safe_strdup (g, r);
> >+ return ret;
> >+}
> >+
> >+static int
> >+is_regular_file (const char *filename)
> >+{
> >+ struct stat statbuf;
> >+
> >+ return lstat (filename,&statbuf) == 0&& S_ISREG (statbuf.st_mode);
> >+}
> >+
> >+/* Download and uncompress the cpio file to find binaries within. */
> >+#define INITRD_BINARIES1 "bin/ls bin/rm bin/modprobe sbin/modprobe bin/sh bin/bash bin/dash bin/nash"
> >+#define INITRD_BINARIES2 {"bin/ls", "bin/rm", "bin/modprobe", "sbin/modprobe", "bin/sh", "bin/bash", "bin/dash", "bin/nash"}
>
> Eww.
Right, but it does make sense in context of the later code ...
If C had LISP-like macros ...
> >+
> >+static char *
> >+cpio_arch (guestfs_h *g, const char *file, const char *path)
> >+{
>
> This function is really the architecture of an initrd. It should
> probably be called initrd_arch.
Yup, although we can only handle cpio-format (see documentation).
Very old distros (RHEL 3 and earlier IIRC) used a compressed ext2 disk
image, and we cannot handle that at the moment.
> >+ char *ret = NULL;
> >+
> >+ const char *method;
> >+ if (strstr (file, "gzip"))
> >+ method = "zcat";
> >+ else if (strstr (file, "bzip2"))
> >+ method = "bzcat";
> >+ else
> >+ method = "cat";
> >+
> >+ char dir[] = "/tmp/initrd.XXXXXX";
> >+#define dir_len 18
>
> This should be strlen(dir). The compiler will recognise this as a constant.
OK.
> >+ if (mkdtemp (dir) == NULL) {
> >+ perrorf (g, "mkdtemp");
> >+ goto out;
> >+ }
> >+
> >+ char dir_initrd[dir_len + 16];
> >+ snprintf (dir_initrd, dir_len + 16, "%s/initrd", dir);
> >+ if (guestfs_download (g, path, dir_initrd) == -1)
> >+ goto out;
> >+
> >+ char cmd[dir_len + 256];
> >+ snprintf (cmd, dir_len + 256,
> >+ "cd %s&& %s initrd | cpio --quiet -id " INITRD_BINARIES1,
> >+ dir, method);
> >+ int r = system (cmd);
> >+ if (r == -1 || WEXITSTATUS (r) != 0) {
> >+ perrorf (g, "cpio command failed");
> >+ goto out;
> >+ }
> >+
> >+ char bin[dir_len + 32];
>
> You've placed a limit here of 31 bytes on the contents of any member
> of INITRD_BINARIES2, which is defined above. You need a comment next
> to the definition of INITRD_BINARIES2 about this. Or remove the
> limit...
Yup.
> >+ const char *bins[] = INITRD_BINARIES2;
> >+ size_t i;
> >+ for (i = 0; i< sizeof bins / sizeof bins[0]; ++i) {
> >+ snprintf (bin, dir_len + 32, "%s/%s", dir, bins[i]);
>
> Why not just use asprintf, or whatever the equivalent is in gnulib?
Because we'd have to free it up. I think there is an alloca-variant
in gnulib, but I think the static array should be OK if we document
it.
> >+
> >+ if (is_regular_file (bin)) {
> >+ snprintf (cmd, dir_len + 256, "file %s", bin);
>
> Reusing a magic number from above in a different context. Magic
> numbers are getting a bit hard to track at this point. Could do with
> cmd_len for the size of cmd.
>
> Stepping back slightly, you could more cleanly use libmagic here
> instead of invoking file. It would be less code, and wouldn't encode
> length limits which aren't defined anywhere.
Yes, that would make a lot of sense.
> >+ FILE *fp = popen (cmd, "r");
> >+ if (!fp) {
> >+ perrorf (g, "popen: %s", cmd);
> >+ goto out;
> >+ }
> >+
> >+ char line[1024];
> >+ if (!fgets (line, sizeof line, fp)) {
> >+ perrorf (g, "fgets");
> >+ fclose (fp);
> >+ goto out;
> >+ }
> >+ pclose (fp);
>
> ^^^ This chunk could go.
>
> >+
> >+ char *elf_arch;
> >+ if ((elf_arch = match1 (line, re_file_elf)) != NULL) {
> >+ ret = canonical_elf_arch (g, elf_arch);
> >+ free (elf_arch);
> >+ goto out;
> >+ }
> >+ }
> >+ }
> >+ error (g, "file_architecture: could not determine architecture of cpio archive");
> >+
> >+ out:
> >+ /* Free up the temporary directory. Note the directory name cannot
> >+ * contain shell meta-characters because of the way it was
> >+ * constructed above.
> >+ */
> >+ snprintf (cmd, dir_len + 256, "rm -rf %s", dir);
>
> cmd_len
>
> Is there really no widely available C implementation of this?
Not that I'm aware of ...?
> >+ ignore_value (system (cmd));
> >+
> >+ return ret;
> >+#undef dir_len
> >+}
> >+
> >+char *
> >+guestfs__file_architecture (guestfs_h *g, const char *path)
> >+{
> >+ char *file = NULL;
> >+ char *elf_arch = NULL;
> >+ char *ret = NULL;
> >+
> >+ /* Get the output of the "file" command. Note that because this
> >+ * runs in the daemon, LANG=C so it's in English.
> >+ */
> >+ file = guestfs_file (g, path);
> >+ if (file == NULL)
> >+ return NULL;
> >+
> >+ if ((elf_arch = match1 (file, re_file_elf)) != NULL)
> >+ ret = canonical_elf_arch (g, elf_arch);
> >+ else if (strstr (file, "PE32 executable"))
> >+ ret = safe_strdup (g, "i386");
> >+ else if (match (file, re_file_win64))
>
> Why not strstr(file, "PE32+ executable") ?
Yes, that's a mistake.
> >+ ret = safe_strdup (g, "x86_64");
> >+ else if (strstr (file, "cpio archive"))
> >+ ret = cpio_arch (g, file, path);
> >+ else
> >+ error (g, "file_architecture: unknown architecture: %s", path);
> >+
> >+ free (file);
> >+ free (elf_arch);
> >+ return ret; /* caller frees */
> >+}
> >-- 1.7.1
>
> API is fine. I think we need to think hard, though, about how to do
> safe string manipulation in libguestfs. In the above code, it's hard
> to be sure that you're not leaking or overflowing anything. Perhaps
> using a string library would be in order.
Use OCaml ...
Rich.
--
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
virt-top is 'top' for virtual machines. Tiny program with many
powerful monitoring features, net stats, disk stats, logging, etc.
http://et.redhat.com/~rjones/virt-top
More information about the Libguestfs
mailing list