[Libguestfs] [PATCH v3 1/5] New API: file-architecture

Matthew Booth mbooth at redhat.com
Mon Aug 9 16:08:06 UTC 2010


On 02/08/10 18:12, Richard W.M. Jones wrote:
>> From 647be56b2d8b171a33608b36e0b7557d94db3c96 Mon Sep 17 00:00:00 2001
> From: Richard Jones<rjones at redhat.com>
> Date: Wed, 28 Jul 2010 15:38:57 +0100
> Subject: [PATCH 1/5] New API: file-architecture
>
> This change simply converts the existing Perl-only function
> file_architecture into a core API call.  The core API call is
> written in C and available in all languages and from guestfish.
> ---
>   README                      |    2 +
>   configure.ac                |    9 ++
>   perl/lib/Sys/Guestfs/Lib.pm |  147 +-----------------------
>   perl/t/510-lib-file-arch.t  |   70 -----------
>   po/POTFILES.in              |    1 +
>   src/Makefile.am             |    3 +-
>   src/generator.ml            |  128 +++++++++++++++++++++
>   src/inspect.c               |  266 +++++++++++++++++++++++++++++++++++++++++++
>   8 files changed, 411 insertions(+), 215 deletions(-)
>   delete mode 100644 perl/t/510-lib-file-arch.t
>   create mode 100644 src/inspect.c
>

> diff --git a/src/inspect.c b/src/inspect.c
> new file mode 100644
> index 0000000..8e28d8a
> --- /dev/null
> +++ b/src/inspect.c
> @@ -0,0 +1,266 @@
> +/* libguestfs
> + * Copyright (C) 2010 Red Hat Inc.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include<config.h>
> +
> +#include<stdio.h>
> +#include<stdlib.h>
> +#include<stdint.h>
> +#include<inttypes.h>
> +#include<unistd.h>
> +#include<string.h>
> +#include<sys/stat.h>
> +
> +#include<pcre.h>
> +
> +#include "ignore-value.h"
> +
> +#include "guestfs.h"
> +#include "guestfs-internal.h"
> +#include "guestfs-internal-actions.h"
> +#include "guestfs_protocol.h"
> +
> +/* Compile all the regular expressions once when the shared library is
> + * loaded.  PCRE is thread safe so we're supposedly OK here if
> + * multiple threads call into the libguestfs API functions below
> + * simultaneously.
> + */
> +static pcre *re_file_elf;
> +static pcre *re_file_win64;
> +static pcre *re_elf_ppc64;
> +
> +static void compile_regexps (void) __attribute__((constructor));
> +static void
> +compile_regexps (void)
> +{
> +  const char *err;
> +  int offset;
> +
> +#define COMPILE(re,pattern,options)                                     \
> +  do {                                                                  \
> +    re = pcre_compile ((pattern), (options),&err,&offset, NULL);      \
> +    if (re == NULL) {                                                   \
> +      ignore_value (write (2, err, strlen (err)));                      \
> +      abort ();                                                         \
> +    }                                                                   \
> +  } while (0)
> +
> +  COMPILE (re_file_elf,
> +           "ELF.*(?:executable|shared object|relocatable), (.+?),", 0);
> +  COMPILE (re_file_win64, "PE32\\+ executable", 0);
> +  COMPILE (re_elf_ppc64, "64.*PowerPC", 0);
> +}
> +
> +/* Match a regular expression which contains no captures.  Returns
> + * true if it matches or false if it doesn't.
> + */
> +static int
> +match (const char *str, const pcre *re)
> +{
> +  size_t len = strlen (str);
> +  int vec[30], r;
> +
> +  r = pcre_exec (re, NULL, str, len, 0, 0, vec, 30);

You can ditch vec here and pass in NULL, 0. None of the above REs use 
back references, so this shouldn't be an issue.

Aside: using a portion of a passed-in vector for slack space under 
certain circumstances is just hideous!

> +  if (r == PCRE_ERROR_NOMATCH)
> +    return 0;
> +  if (r != 1) {
> +    /* Internal error -- should not happen. */
> +    fprintf (stderr, "libguestfs: %s: %s: internal error: pcre_exec returned unexpected error code %d when matching against the string \"%s\"\n",
> +             __FILE__, __func__, r, str);
> +    return 0;
> +  }
> +
> +  return 1;
> +}
> +
> +/* Match a regular expression which contains exactly one capture.  If
> + * the string matches, return the capture, otherwise return NULL.  The
> + * caller must free the result.
> + */
> +static char *
> +match1 (const char *str, const pcre *re)
> +{
> +  size_t len = strlen (str);
> +  int vec[30], r;
> +
> +  r = pcre_exec (re, NULL, str, len, 0, 0, vec, 30);

vec here could be of size 9 (need 0, 1, 2, 3; round up to multiple of 3 
== 6; * 1.5 = 9). Whatever you set it to, though, please don't hard-code 
the size twice. Please use sizeof(vec)/sizeof(vec[0]) in the function call.

> +  if (r == PCRE_ERROR_NOMATCH)
> +    return NULL;
> +  if (r != 2) {
> +    /* Internal error -- should not happen. */
> +    fprintf (stderr, "libguestfs: %s: %s: internal error: pcre_exec returned unexpected error code %d when matching against the string \"%s\"\n",
> +             __FILE__, __func__, r, str);
> +    return NULL;
> +  }
> +
> +  return strndup (&str[vec[2]], vec[3]-vec[2]);

Is there a safe_ version of that? If not, you probably ought to check 
for NULL return for consistency.

> +}
> +
> +/* Convert output from 'file' command on ELF files to the canonical
> + * architecture string.  Caller must free the result.
> + */
> +static char *
> +canonical_elf_arch (guestfs_h *g, const char *elf_arch)
> +{
> +  const char *r;
> +
> +  if (strstr (elf_arch, "Intel 80386"))
> +    r = "i386";
> +  else if (strstr (elf_arch, "Intel 80486"))
> +    r = "i486";
> +  else if (strstr (elf_arch, "x86-64"))
> +    r = "x86_64";
> +  else if (strstr (elf_arch, "AMD x86-64"))
> +    r = "x86_64";
> +  else if (strstr (elf_arch, "SPARC32"))
> +    r = "sparc";
> +  else if (strstr (elf_arch, "SPARC V9"))
> +    r = "sparc64";
> +  else if (strstr (elf_arch, "IA-64"))
> +    r = "ia64";
> +  else if (match (elf_arch, re_elf_ppc64))
> +    r = "ppc64";
> +  else if (strstr (elf_arch, "PowerPC"))
> +    r = "ppc";
> +  else
> +    r = elf_arch;
> +
> +  char *ret = safe_strdup (g, r);
> +  return ret;
> +}
> +
> +static int
> +is_regular_file (const char *filename)
> +{
> +  struct stat statbuf;
> +
> +  return lstat (filename,&statbuf) == 0&&  S_ISREG (statbuf.st_mode);
> +}
> +
> +/* Download and uncompress the cpio file to find binaries within. */
> +#define INITRD_BINARIES1 "bin/ls bin/rm bin/modprobe sbin/modprobe bin/sh bin/bash bin/dash bin/nash"
> +#define INITRD_BINARIES2 {"bin/ls", "bin/rm", "bin/modprobe", "sbin/modprobe", "bin/sh", "bin/bash", "bin/dash", "bin/nash"}

Eww.

> +
> +static char *
> +cpio_arch (guestfs_h *g, const char *file, const char *path)
> +{

This function is really the architecture of an initrd. It should 
probably be called initrd_arch.

> +  char *ret = NULL;
> +
> +  const char *method;
> +  if (strstr (file, "gzip"))
> +    method = "zcat";
> +  else if (strstr (file, "bzip2"))
> +    method = "bzcat";
> +  else
> +    method = "cat";
> +
> +  char dir[] = "/tmp/initrd.XXXXXX";
> +#define dir_len 18

This should be strlen(dir). The compiler will recognise this as a constant.

> +  if (mkdtemp (dir) == NULL) {
> +    perrorf (g, "mkdtemp");
> +    goto out;
> +  }
> +
> +  char dir_initrd[dir_len + 16];
> +  snprintf (dir_initrd, dir_len + 16, "%s/initrd", dir);
> +  if (guestfs_download (g, path, dir_initrd) == -1)
> +    goto out;
> +
> +  char cmd[dir_len + 256];
> +  snprintf (cmd, dir_len + 256,
> +            "cd %s&&  %s initrd | cpio --quiet -id " INITRD_BINARIES1,
> +            dir, method);
> +  int r = system (cmd);
> +  if (r == -1 || WEXITSTATUS (r) != 0) {
> +    perrorf (g, "cpio command failed");
> +    goto out;
> +  }
> +
> +  char bin[dir_len + 32];

You've placed a limit here of 31 bytes on the contents of any member of 
INITRD_BINARIES2, which is defined above. You need a comment next to the 
definition of INITRD_BINARIES2 about this. Or remove the limit...

> +  const char *bins[] = INITRD_BINARIES2;
> +  size_t i;
> +  for (i = 0; i<  sizeof bins / sizeof bins[0]; ++i) {
> +    snprintf (bin, dir_len + 32, "%s/%s", dir, bins[i]);

Why not just use asprintf, or whatever the equivalent is in gnulib?

> +
> +    if (is_regular_file (bin)) {
> +      snprintf (cmd, dir_len + 256, "file %s", bin);

Reusing a magic number from above in a different context. Magic numbers 
are getting a bit hard to track at this point. Could do with cmd_len for 
the size of cmd.

Stepping back slightly, you could more cleanly use libmagic here instead 
of invoking file. It would be less code, and wouldn't encode  length 
limits which aren't defined anywhere.

> +      FILE *fp = popen (cmd, "r");
> +      if (!fp) {
> +        perrorf (g, "popen: %s", cmd);
> +        goto out;
> +      }
> +
> +      char line[1024];
> +      if (!fgets (line, sizeof line, fp)) {
> +        perrorf (g, "fgets");
> +        fclose (fp);
> +        goto out;
> +      }
> +      pclose (fp);

^^^ This chunk could go.

> +
> +      char *elf_arch;
> +      if ((elf_arch = match1 (line, re_file_elf)) != NULL) {
> +        ret = canonical_elf_arch (g, elf_arch);
> +        free (elf_arch);
> +        goto out;
> +      }
> +    }
> +  }
> +  error (g, "file_architecture: could not determine architecture of cpio archive");
> +
> + out:
> +  /* Free up the temporary directory.  Note the directory name cannot
> +   * contain shell meta-characters because of the way it was
> +   * constructed above.
> +   */
> +  snprintf (cmd, dir_len + 256, "rm -rf %s", dir);

cmd_len

Is there really no widely available C implementation of this?

> +  ignore_value (system (cmd));
> +
> +  return ret;
> +#undef dir_len
> +}
> +
> +char *
> +guestfs__file_architecture (guestfs_h *g, const char *path)
> +{
> +  char *file = NULL;
> +  char *elf_arch = NULL;
> +  char *ret = NULL;
> +
> +  /* Get the output of the "file" command.  Note that because this
> +   * runs in the daemon, LANG=C so it's in English.
> +   */
> +  file = guestfs_file (g, path);
> +  if (file == NULL)
> +    return NULL;
> +
> +  if ((elf_arch = match1 (file, re_file_elf)) != NULL)
> +    ret = canonical_elf_arch (g, elf_arch);
> +  else if (strstr (file, "PE32 executable"))
> +    ret = safe_strdup (g, "i386");
> +  else if (match (file, re_file_win64))

Why not strstr(file, "PE32+ executable") ?

> +    ret = safe_strdup (g, "x86_64");
> +  else if (strstr (file, "cpio archive"))
> +    ret = cpio_arch (g, file, path);
> +  else
> +    error (g, "file_architecture: unknown architecture: %s", path);
> +
> +  free (file);
> +  free (elf_arch);
> +  return ret;                   /* caller frees */
> +}
> -- 1.7.1

API is fine. I think we need to think hard, though, about how to do safe 
string manipulation in libguestfs. In the above code, it's hard to be 
sure that you're not leaking or overflowing anything. Perhaps using a 
string library would be in order.

Matt
-- 
Matthew Booth, RHCA, RHCSS
Red Hat Engineering, Virtualisation Team

GPG ID:  D33C3490
GPG FPR: 3733 612D 2D05 5458 8A8A 1600 3441 EA19 D33C 3490




More information about the Libguestfs mailing list