[Libguestfs] [PATCH 3/3] virt-ls: Add -lR option for complex file iteration.

Richard W.M. Jones rjones at redhat.com
Sat Jun 4 10:11:49 UTC 2011


-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
New in Fedora 11: Fedora Windows cross-compiler. Compile Windows
programs, test, and build Windows installers. Over 70 libraries supprt'd
http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw
-------------- next part --------------
>From 78537f9f03f0416eb19d2e1aa9a40feb6dc0298a Mon Sep 17 00:00:00 2001
From: Richard W.M. Jones <rjones at redhat.com>
Date: Fri, 3 Jun 2011 17:00:07 +0100
Subject: [PATCH 3/3] virt-ls: Add -lR option for complex file iteration.

---
 cat/test-virt-ls.sh |   16 ++
 cat/virt-ls.c       |  738 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 cat/virt-ls.pod     |  288 +++++++++++++++++++--
 3 files changed, 1017 insertions(+), 25 deletions(-)

diff --git a/cat/test-virt-ls.sh b/cat/test-virt-ls.sh
index 784fd67..3f702b7 100755
--- a/cat/test-virt-ls.sh
+++ b/cat/test-virt-ls.sh
@@ -15,3 +15,19 @@ test7" ]; then
     echo "$0: error: unexpected output from virt-ls"
     exit 1
 fi
+
+# Try the -lR option.
+output="$(./virt-ls -lR ../images/fedora.img /boot | awk '{print $1 $2 $4}')"
+expected="d0755/boot
+d0755/boot/grub
+-0644/boot/grub/grub.conf
+d0700/boot/lost+found"
+if [ "$output" != "$expected" ]; then
+    echo "$0: error: unexpected output from virt-ls -lR"
+    echo "output: ------------------------------------------"
+    echo "$output"
+    echo "expected: ----------------------------------------"
+    echo "$expected"
+    echo "--------------------------------------------------"
+    exit 1
+fi
diff --git a/cat/virt-ls.c b/cat/virt-ls.c
index e8b3bf0..0260c74 100644
--- a/cat/virt-ls.c
+++ b/cat/virt-ls.c
@@ -20,15 +20,17 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <inttypes.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <fcntl.h>
 #include <locale.h>
 #include <assert.h>
-#include <string.h>
+#include <time.h>
 #include <libintl.h>
 
+#include "human.h"
 #include "progname.h"
 
 #include "guestfs.h"
@@ -45,9 +47,39 @@ int echo_keys = 0;
 const char *libvirt_uri = NULL;
 int inspector = 1;
 
+static int csv = 0;
+static int human = 0;
+static int enable_uids = 0;
+static int enable_times = 0;
+static int time_t_output = 0;
+static int enable_extra_stats = 0;
+static const char *checksum = NULL;
+
 static int do_ls (const char *dir);
 static int do_ls_l (const char *dir);
 static int do_ls_R (const char *dir);
+static int do_ls_lR (const char *dir);
+
+static void output_start_line (void);
+static void output_end_line (void);
+static void output_string (const char *);
+static void output_int64 (int64_t);
+static void output_int64_size (int64_t);
+static void output_int64_perms (int64_t);
+static void output_int64_time (int64_t);
+static void output_int64_dev (int64_t);
+
+static int is_reg (int64_t mode);
+static int is_dir (int64_t mode);
+static int is_chr (int64_t mode);
+static int is_blk (int64_t mode);
+static int is_fifo (int64_t mode);
+static int is_lnk (int64_t mode);
+static int is_sock (int64_t mode);
+
+static size_t count_strings (char **);
+static void free_strings (char **);
+static char **take_strings (char **, size_t n, char ***);
 
 static inline char *
 bad_cast (char const *s)
@@ -70,14 +102,21 @@ usage (int status)
              "  %s [--options] -a disk.img [-a disk.img ...] dir [dir ...]\n"
              "Options:\n"
              "  -a|--add image       Add image\n"
+             "  --checksum[=...]     Display file checksums\n"
              "  -c|--connect uri     Specify libvirt URI for -d option\n"
+             "  --csv                Comma-Separated Values output\n"
              "  -d|--domain guest    Add disks from libvirt guest\n"
              "  --echo-keys          Don't turn off echo for passphrases\n"
+             "  --extra-stats        Display extra stats\n"
              "  --format[=raw|..]    Force disk format for -a option\n"
              "  --help               Display brief help\n"
+             "  -h|--human-readable  Human-readable sizes in output\n"
              "  --keys-from-stdin    Read passphrases from stdin\n"
              "  -l|--long            Long listing\n"
              "  -R|--recursive       Recursive listing\n"
+             "  --times              Display file times\n"
+             "  --time-t             Display file times as time_t's\n"
+             "  --uids               Display UID, GID\n"
              "  -v|--verbose         Verbose messages\n"
              "  -V|--version         Display version and exit\n"
              "  -x                   Trace libguestfs API calls\n"
@@ -100,17 +139,28 @@ main (int argc, char *argv[])
 
   enum { HELP_OPTION = CHAR_MAX + 1 };
 
-  static const char *options = "a:c:d:lRvVx";
+  static const char *options = "a:c:d:hlRvVx";
   static const struct option long_options[] = {
     { "add", 1, 0, 'a' },
+    { "checksum", 2, 0, 0 },
+    { "checksums", 2, 0, 0 },
+    { "csv", 0, 0, 0 },
     { "connect", 1, 0, 'c' },
     { "domain", 1, 0, 'd' },
     { "echo-keys", 0, 0, 0 },
+    { "extra-stat", 0, 0, 0 },
+    { "extra-stats", 0, 0, 0 },
     { "format", 2, 0, 0 },
     { "help", 0, 0, HELP_OPTION },
+    { "human-readable", 0, 0, 'h' },
     { "keys-from-stdin", 0, 0, 0 },
     { "long", 0, 0, 'l' },
     { "recursive", 0, 0, 'R' },
+    { "time", 0, 0, 0 },
+    { "times", 0, 0, 0 },
+    { "time-t", 0, 0, 0 },
+    { "uid", 0, 0, 0 },
+    { "uids", 0, 0, 0 },
     { "verbose", 0, 0, 'v' },
     { "version", 0, 0, 'V' },
     { 0, 0, 0, 0 }
@@ -148,6 +198,26 @@ main (int argc, char *argv[])
           format = NULL;
         else
           format = optarg;
+      } else if (STREQ (long_options[option_index].name, "checksum") ||
+                 STREQ (long_options[option_index].name, "checksums")) {
+        if (!optarg || STREQ (optarg, ""))
+          checksum = "md5";
+        else
+          checksum = optarg;
+      } else if (STREQ (long_options[option_index].name, "csv")) {
+        csv = 1;
+      } else if (STREQ (long_options[option_index].name, "extra-stat") ||
+                 STREQ (long_options[option_index].name, "extra-stats")) {
+        enable_extra_stats = 1;
+      } else if (STREQ (long_options[option_index].name, "time") ||
+                 STREQ (long_options[option_index].name, "times")) {
+        enable_times = 1;
+      } else if (STREQ (long_options[option_index].name, "time-t")) {
+        enable_times = 1;
+        time_t_output = 1;
+      } else if (STREQ (long_options[option_index].name, "uid") ||
+                 STREQ (long_options[option_index].name, "uids")) {
+        enable_uids = 1;
       } else {
         fprintf (stderr, _("%s: unknown long option: %s (%d)\n"),
                  program_name, long_options[option_index].name, option_index);
@@ -168,7 +238,8 @@ main (int argc, char *argv[])
       break;
 
     case 'h':
-      usage (EXIT_SUCCESS);
+      human = 1;
+      break;
 
     case 'l':
       mode |= MODE_LS_L;
@@ -232,12 +303,6 @@ main (int argc, char *argv[])
     }
   }
 
-  if (mode == MODE_LS_LR) {
-    fprintf (stderr, _("%s: cannot combine -l and -R options\n"),
-             program_name);
-    exit (EXIT_FAILURE);
-  }
-
   /* These are really constants, but they have to be variables for the
    * options parsing code.  Assert here that they have known-good
    * values.
@@ -246,6 +311,24 @@ main (int argc, char *argv[])
   assert (inspector == 1);
   assert (live == 0);
 
+  /* Many flags only apply to -lR mode. */
+  if (mode != MODE_LS_LR &&
+      (csv || human || enable_uids || enable_times || enable_extra_stats ||
+       checksum)) {
+    fprintf (stderr, _("%s: used a flag which can only be combined with -lR mode\nFor more information, read the virt-ls(1) man page.\n"),
+             program_name);
+    exit (EXIT_FAILURE);
+  }
+
+  /* CSV && human is unsafe because spreadsheets fail to parse these
+   * fields correctly.  (RHBZ#600977).
+   */
+  if (human && csv) {
+    fprintf (stderr, _("%s: you cannot use -h and --csv options together.\n"),
+             program_name);
+    exit (EXIT_FAILURE);
+  }
+
   /* User must specify at least one directory name on the command line. */
   if (optind >= argc || argc - optind < 1)
     usage (EXIT_FAILURE);
@@ -288,6 +371,11 @@ main (int argc, char *argv[])
         errors++;
       break;
 
+    case MODE_LS_LR:            /* virt-ls -lR */
+      if (do_ls_lR (dir) == -1)
+        errors++;
+      break;
+
     default:
       abort ();                 /* can't happen */
     }
@@ -394,3 +482,635 @@ do_ls_R (const char *dir)
 
  return 0;
 }
+
+/* Adapted from
+https://rwmj.wordpress.com/2010/12/15/tip-audit-virtual-machine-for-setuid-files/
+*/
+static char *full_path (const char *dir, const char *name);
+static struct guestfs_stat_list *lstatlist (const char *dir, char **names);
+static struct guestfs_xattr_list *lxattrlist (const char *dir, char **names);
+static int show_file (const char *dir, const char *name, const struct guestfs_stat *stat, const struct guestfs_xattr_list *xattrs);
+
+typedef int (*visitor_function) (const char *dir, const char *name, const struct guestfs_stat *stat, const struct guestfs_xattr_list *xattrs);
+
+static int
+visit (int depth, const char *dir, visitor_function f)
+{
+  /* Call 'f' with the top directory.  Note that ordinary recursive
+   * visits will not otherwise do this, so we have to have a special
+   * case.
+   */
+  if (depth == 0) {
+    struct guestfs_stat *stat;
+    struct guestfs_xattr_list *xattrs;
+    int r;
+
+    stat = guestfs_lstat (g, dir);
+    if (stat == NULL)
+      return -1;
+
+    xattrs = guestfs_lgetxattrs (g, dir);
+    if (xattrs == NULL) {
+      guestfs_free_stat (stat);
+      return -1;
+    }
+
+    r = f (dir, NULL, stat, xattrs);
+    guestfs_free_stat (stat);
+    guestfs_free_xattr_list (xattrs);
+
+    if (r == -1)
+      return -1;
+  }
+
+  int ret = -1;
+  char **names = NULL;
+  char *path = NULL;
+  size_t i, xattrp;
+  struct guestfs_stat_list *stats = NULL;
+  struct guestfs_xattr_list *xattrs = NULL;
+
+  names = guestfs_ls (g, dir);
+  if (names == NULL)
+    goto out;
+
+  stats = lstatlist (dir, names);
+  if (stats == NULL)
+    goto out;
+
+  xattrs = lxattrlist (dir, names);
+  if (xattrs == NULL)
+    goto out;
+
+  /* Call function on everything in this directory. */
+  for (i = 0, xattrp = 0; names[i] != NULL; ++i, ++xattrp) {
+    struct guestfs_xattr_list file_xattrs;
+    size_t nr_xattrs;
+
+    assert (stats->len >= i);
+    assert (xattrs->len >= xattrp);
+
+    /* Find the list of extended attributes for this file. */
+    assert (strlen (xattrs->val[xattrp].attrname) == 0);
+
+    if (xattrs->val[xattrp].attrval_len == 0) {
+      fprintf (stderr, _("%s: error getting extended attrs for %s %s\n"),
+               program_name, dir, names[i]);
+      goto out;
+    }
+    /* lxattrlist function made sure attrval was \0-terminated, so we can do */
+    if (sscanf (xattrs->val[xattrp].attrval, "%zu", &nr_xattrs) != 1) {
+      fprintf (stderr, _("%s: error: cannot parse xattr count for %s %s\n"),
+               program_name, dir, names[i]);
+      goto out;
+    }
+
+    file_xattrs.len = nr_xattrs;
+    file_xattrs.val = &xattrs->val[xattrp];
+    xattrp += nr_xattrs;
+
+    /* Call the function. */
+    if (f (dir, names[i], &stats->val[i], &file_xattrs) == -1)
+      goto out;
+
+    /* Recursively call visit, but only on directories. */
+    if (is_dir (stats->val[i].mode)) {
+      path = full_path (dir, names[i]);
+      if (visit (depth + 1, path, f) == -1)
+        goto out;
+      free (path); path = NULL;
+    }
+  }
+
+  ret = 0;
+
+ out:
+  free (path);
+  if (names)
+    free_strings (names);
+  if (stats)
+    guestfs_free_stat_list (stats);
+  if (xattrs)
+    guestfs_free_xattr_list (xattrs);
+  return ret;
+}
+
+static char *
+full_path (const char *dir, const char *name)
+{
+  int r;
+  char *path;
+
+  if (STREQ (dir, "/"))
+    r = asprintf (&path, "/%s", name ? name : "");
+  else if (name)
+    r = asprintf (&path, "%s/%s", dir, name);
+  else
+    r = asprintf (&path, "%s", dir);
+
+  if (r == -1) {
+    perror ("asprintf");
+    exit (EXIT_FAILURE);
+  }
+
+  return path;
+}
+
+/* This calls guestfs_lstatlist, but it splits the names list up so that we
+ * don't overrun the libguestfs protocol limit.
+ */
+#define LSTATLIST_MAX 1000
+
+static struct guestfs_stat_list *
+lstatlist (const char *dir, char **names)
+{
+  size_t len = count_strings (names);
+  char **first;
+  size_t old_len;
+  struct guestfs_stat_list *ret, *stats;
+
+  ret = malloc (sizeof *ret);
+  if (ret == NULL) {
+    perror ("malloc");
+    exit (EXIT_FAILURE);
+  }
+  ret->len = 0;
+  ret->val = NULL;
+
+  while (len > 0) {
+    first = take_strings (names, LSTATLIST_MAX, &names);
+    len = len <= LSTATLIST_MAX ? 0 : len - LSTATLIST_MAX;
+
+    stats = guestfs_lstatlist (g, dir, first);
+    /* Note we don't need to free up the strings because take_strings
+     * does not do a deep copy.
+     */
+    free (first);
+
+    if (stats == NULL) {
+      free (ret);
+      return NULL;
+    }
+
+    /* Append stats to ret. */
+    old_len = ret->len;
+    ret->len += stats->len;
+    ret->val = realloc (ret->val, ret->len * sizeof (struct guestfs_stat));
+    if (ret->val == NULL) {
+      perror ("realloc");
+      exit (EXIT_FAILURE);
+    }
+    memcpy (&ret->val[old_len], stats->val,
+            stats->len * sizeof (struct guestfs_stat));
+
+    guestfs_free_stat_list (stats);
+  }
+
+  return ret;
+}
+
+/* Same as above, for lxattrlist.  Note the rather peculiar format
+ * used to return the list of extended attributes (see
+ * guestfs_lxattrlist documentation).
+ */
+#define LXATTRLIST_MAX 1000
+
+static struct guestfs_xattr_list *
+lxattrlist (const char *dir, char **names)
+{
+  size_t len = count_strings (names);
+  char **first;
+  size_t i, old_len;
+  struct guestfs_xattr_list *ret, *xattrs;
+
+  ret = malloc (sizeof *ret);
+  if (ret == NULL) {
+    perror ("malloc");
+    exit (EXIT_FAILURE);
+  }
+  ret->len = 0;
+  ret->val = NULL;
+
+  while (len > 0) {
+    first = take_strings (names, LXATTRLIST_MAX, &names);
+    len = len <= LXATTRLIST_MAX ? 0 : len - LXATTRLIST_MAX;
+
+    xattrs = guestfs_lxattrlist (g, dir, first);
+    /* Note we don't need to free up the strings because take_strings
+     * does not do a deep copy.
+     */
+    free (first);
+
+    if (xattrs == NULL) {
+      free (ret);
+      return NULL;
+    }
+
+    /* Append xattrs to ret. */
+    old_len = ret->len;
+    ret->len += xattrs->len;
+    ret->val = realloc (ret->val, ret->len * sizeof (struct guestfs_xattr));
+    if (ret->val == NULL) {
+      perror ("realloc");
+      exit (EXIT_FAILURE);
+    }
+    for (i = 0; i < xattrs->len; ++i, ++old_len) {
+      /* We have to make a deep copy of the attribute name and value.
+       * The attrval contains 8 bit data.  However make sure also that
+       * it is \0-terminated, because that makes the calling code
+       * simpler.
+       */
+      ret->val[old_len].attrname = strdup (xattrs->val[i].attrname);
+      ret->val[old_len].attrval = malloc (xattrs->val[i].attrval_len + 1);
+      if (ret->val[old_len].attrname == NULL ||
+          ret->val[old_len].attrval == NULL) {
+        perror ("malloc");
+        exit (EXIT_FAILURE);
+      }
+      ret->val[old_len].attrval_len = xattrs->val[i].attrval_len;
+      memcpy (ret->val[old_len].attrval, xattrs->val[i].attrval,
+              xattrs->val[i].attrval_len);
+      ret->val[i].attrval[ret->val[i].attrval_len] = '\0';
+    }
+
+    guestfs_free_xattr_list (xattrs);
+  }
+
+  return ret;
+}
+
+static int
+do_ls_lR (const char *dir)
+{
+  return visit (0, dir, show_file);
+}
+
+/* This is the function which is called to display all files and
+ * directories, and it's where the magic happens.  We are called with
+ * full stat and extended attributes for each file, so there is no
+ * penalty for displaying anything in those structures.  However if we
+ * need other things (eg. checksum) we may have to go back to the
+ * appliance and then there can be a very large penalty.
+ */
+static int
+show_file (const char *dir, const char *name,
+           const struct guestfs_stat *stat,
+           const struct guestfs_xattr_list *xattrs)
+{
+  char filetype[2];
+  char *path = NULL, *csum = NULL, *link = NULL;
+
+  /* Display the basic fields. */
+  output_start_line ();
+
+  if (is_reg (stat->mode))
+    filetype[0] = '-';
+  else if (is_dir (stat->mode))
+    filetype[0] = 'd';
+  else if (is_chr (stat->mode))
+    filetype[0] = 'c';
+  else if (is_blk (stat->mode))
+    filetype[0] = 'b';
+  else if (is_fifo (stat->mode))
+    filetype[0] = 'p';
+  else if (is_lnk (stat->mode))
+    filetype[0] = 'l';
+  else if (is_sock (stat->mode))
+    filetype[0] = 's';
+  else
+    filetype[0] = 'u';
+  filetype[1] = '\0';
+  output_string (filetype);
+  output_int64_perms (stat->mode & 07777);
+
+  output_int64_size (stat->size);
+
+  path = full_path (dir, name);
+  output_string (path);
+
+  if (is_lnk (stat->mode))
+    /* XXX Fix this for NTFS. */
+    link = guestfs_readlink (g, path);
+  if (link)
+    output_string (link);
+  else
+    output_string ("-");
+
+  /* Display extra fields when enabled. */
+  if (enable_uids) {
+    output_int64 (stat->uid);
+    output_int64 (stat->gid);
+  }
+
+  if (enable_times) {
+    output_int64_time (stat->atime);
+    output_int64_time (stat->mtime);
+    output_int64_time (stat->ctime);
+  }
+
+  if (enable_extra_stats) {
+    output_int64_dev (stat->dev);
+    output_int64 (stat->ino);
+    output_int64 (stat->nlink);
+    output_int64_dev (stat->rdev);
+    output_int64 (stat->blocks);
+  }
+
+  /* Disabled for now -- user would definitely want these to be interpreted.
+  if (enable_xattrs)
+    output_xattrs (xattrs);
+  */
+
+  if (checksum && is_reg (stat->mode)) {
+    csum = guestfs_checksum (g, checksum, path);
+    if (!csum)
+      exit (EXIT_FAILURE);
+
+    output_string (csum);
+  }
+
+  output_end_line ();
+
+  free (path);
+  free (csum);
+  free (link);
+
+  return 0;
+}
+
+/* Output functions.
+ *
+ * Note that we have to be careful to check return values from printf
+ * in these functions, because we want to catch ENOSPC errors.
+ */
+static int field;
+static void
+next_field (void)
+{
+  int c = csv ? ',' : ' ';
+
+  field++;
+  if (field == 1) return;
+
+  if (putchar (c) == EOF) {
+    perror ("putchar");
+    exit (EXIT_FAILURE);
+  }
+}
+
+static void
+output_start_line (void)
+{
+  field = 0;
+}
+
+static void
+output_end_line (void)
+{
+  if (printf ("\n") < 0) {
+    perror ("printf");
+    exit (EXIT_FAILURE);
+  }
+}
+
+static void
+output_string (const char *s)
+{
+  next_field ();
+
+  if (!csv) {
+  print_no_quoting:
+    if (printf ("%s", s) < 0) {
+      perror ("printf");
+      exit (EXIT_FAILURE);
+    }
+  }
+  else {
+    /* Quote CSV string without requiring an external module. */
+    size_t i, len;
+    int needs_quoting = 0;
+
+    len = strlen (s);
+
+    for (i = 0; i < len; ++i) {
+      if (s[i] == ' ' || s[i] == '"' ||
+          s[i] == '\n' || s[i] == ',') {
+        needs_quoting = 1;
+        break;
+      }
+    }
+
+    if (!needs_quoting)
+      goto print_no_quoting;
+
+    /* Quoting for CSV fields. */
+    if (putchar ('"') == EOF) {
+      perror ("putchar");
+      exit (EXIT_FAILURE);
+    }
+    for (i = 0; i < len; ++i) {
+      if (s[i] == '"') {
+        if (putchar ('"') == EOF || putchar ('"') == EOF) {
+          perror ("putchar");
+          exit (EXIT_FAILURE);
+        }
+      } else {
+        if (putchar (s[i]) == EOF) {
+          perror ("putchar");
+          exit (EXIT_FAILURE);
+        }
+      }
+    }
+    if (putchar ('"') == EOF) {
+      perror ("putchar");
+      exit (EXIT_FAILURE);
+    }
+  }
+}
+
+static void
+output_int64 (int64_t i)
+{
+  next_field ();
+  /* csv doesn't need escaping */
+  if (printf ("%" PRIi64, i) < 0) {
+    perror ("printf");
+    exit (EXIT_FAILURE);
+  }
+}
+
+static void
+output_int64_size (int64_t size)
+{
+  char buf[LONGEST_HUMAN_READABLE];
+  int hopts = human_round_to_nearest|human_autoscale|human_base_1024|human_SI;
+  int r;
+
+  next_field ();
+
+  if (!csv) {
+    if (!human)
+      r = printf ("%10" PRIi64, size);
+    else
+      r = printf ("%10s",
+                  human_readable ((uintmax_t) size, buf, hopts, 1, 1));
+  } else {
+    /* CSV is the same as non-CSV but we don't need to right-align. */
+    if (!human)
+      r = printf ("%" PRIi64, size);
+    else
+      r = printf ("%s",
+                  human_readable ((uintmax_t) size, buf, hopts, 1, 1));
+  }
+
+  if (r < 0) {
+    perror ("printf");
+    exit (EXIT_FAILURE);
+  }
+}
+
+static void
+output_int64_perms (int64_t i)
+{
+  next_field ();
+  /* csv doesn't need escaping */
+  if (printf ("%04" PRIo64, i) < 0) {
+    perror ("printf");
+    exit (EXIT_FAILURE);
+  }
+}
+
+static void
+output_int64_time (int64_t i)
+{
+  int r;
+
+  next_field ();
+
+  /* csv doesn't need escaping */
+  if (time_t_output)
+    r = printf ("%" PRIi64, i);
+  else {
+    time_t t = (time_t) i;
+    char buf[26];
+
+    ctime_r (&t, buf);
+    buf[24] = '\0';
+
+    r = printf ("%s", buf);
+  }
+
+  if (r < 0) {
+    perror ("printf");
+    exit (EXIT_FAILURE);
+  }
+}
+
+static void
+output_int64_dev (int64_t i)
+{
+  dev_t dev = i;
+
+  next_field ();
+
+  /* csv doesn't need escaping */
+  if (printf ("%d:%d", major (dev), minor (dev)) < 0) {
+    perror ("printf");
+    exit (EXIT_FAILURE);
+  }
+}
+
+/* In the libguestfs API, modes returned by lstat and friends are
+ * defined to contain Linux ABI values.  However since the "current
+ * operating system" might not be Linux, we have to hard-code those
+ * numbers here.
+ */
+static int
+is_reg (int64_t mode)
+{
+  return (mode & 0170000) == 0100000;
+}
+
+static int
+is_dir (int64_t mode)
+{
+  return (mode & 0170000) == 0040000;
+}
+
+static int
+is_chr (int64_t mode)
+{
+  return (mode & 0170000) == 0020000;
+}
+
+static int
+is_blk (int64_t mode)
+{
+  return (mode & 0170000) == 0060000;
+}
+
+static int
+is_fifo (int64_t mode)
+{
+  return (mode & 0170000) == 0010000;
+}
+
+/* symbolic link */
+static int
+is_lnk (int64_t mode)
+{
+  return (mode & 0170000) == 0120000;
+}
+
+static int
+is_sock (int64_t mode)
+{
+  return (mode & 0170000) == 0140000;
+}
+
+/* String functions. */
+static size_t
+count_strings (char **names)
+{
+  size_t ret = 0;
+
+  while (names[ret] != NULL)
+    ret++;
+  return ret;
+}
+
+static void
+free_strings (char **names)
+{
+  size_t i;
+
+  for (i = 0; names[i] != NULL; ++i)
+    free (names[i]);
+  free (names);
+}
+
+/* Take the first 'n' names, returning a newly allocated list.  The
+ * strings themselves are not duplicated.  If 'lastp' is not NULL,
+ * then it is updated with the pointer to the list of remaining names.
+ */
+static char **
+take_strings (char **names, size_t n, char ***lastp)
+{
+  size_t i;
+
+  char **ret = malloc ((n+1) * sizeof (char *));
+  if (ret == NULL) {
+    perror ("malloc");
+    exit (EXIT_FAILURE);
+  }
+
+  for (i = 0; names[i] != NULL && i < n; ++i)
+    ret[i] = names[i];
+
+  ret[i] = NULL;
+
+  if (lastp)
+    *lastp = &names[i];
+
+  return ret;
+}
diff --git a/cat/virt-ls.pod b/cat/virt-ls.pod
index dc3093a..b93df0e 100755
--- a/cat/virt-ls.pod
+++ b/cat/virt-ls.pod
@@ -18,29 +18,35 @@ Old style:
 
 =head1 DESCRIPTION
 
-C<virt-ls> is a command line tool to list the names of files in a
-directory inside a virtual machine or disk image.
+C<virt-ls> lists filenames, file sizes, checksums, extended attributes
+and more from a virtual machine or disk image.
 
-Multiple directory names can be given, in which case the output
-from each is concatenated.
-
-C<virt-ls> is just a simple wrapper around L<libguestfs(3)>
-functionality.  For more complex cases you should look at the
-L<guestfish(1)> tool.
+Multiple directory names can be given, in which case the output from
+each is concatenated.
 
 To list directories from a libvirt guest use the I<-d> option to
 specify the name of the guest.  For a disk image, use the I<-a>
 option.
 
-C<virt-ls> can be used in one of three modes: simple, long and
-recursive.  A simple listing is like the ordinary L<ls(1)> command:
+=head1 OUTPUT MODES
+
+C<virt-ls> has four output modes, controlled by different
+combinations of the I<-l> and I<-R> options.
+
+=head2 SIMPLE LISTING
+
+A simple listing is like the ordinary L<ls(1)> command:
 
  $ virt-ls -d myguest /
  bin
  boot
  [etc.]
 
-With the I<-l> (I<--long>) option, C<virt-ls> shows more detail:
+=head2 LONG LISTING
+(I<-l>)
+
+With the I<-l> (I<--long>) option, the output is like the C<ls -l>
+command.
 
  $ virt-ls -l -d myguest /
  total 204
@@ -48,16 +54,188 @@ With the I<-l> (I<--long>) option, C<virt-ls> shows more detail:
  dr-xr-xr-x.   5 root root   3072 2009-08-25 19:06 boot
  [etc.]
 
-With the I<-R> (I<--recursive>) option, C<virt-ls> lists the
-names of files and directories recursively:
+Note that while this is useful for displaying a directory, do not try
+parsing this output in another program.  Use L</RECURSIVE LONG LISTING>
+instead.
+
+=head2 RECURSIVE LISTING
+(I<-R>)
+
+With the I<-R> (I<--recursive>) option, C<virt-ls> lists the names of
+files and directories recursively:
 
  $ virt-ls -R -d myguest /tmp
  foo
  foo/bar
  [etc.]
 
-You I<cannot> combine these options.  To do more complicated things,
-use L<guestfish(1)>.
+=head2 RECURSIVE LONG LISTING
+(I<-lR>)
+
+Using I<-lR> options together changes the output to display
+directories recursively, with file stats, and optionally other
+features such as checksums and extended attributes.
+
+Most of the interesting features of C<virt-ls> are only available when
+using I<-lR> mode.
+
+The fields are normally space-separated.  Filenames are I<not> quoted,
+so you cannot parse filenames from another program.  To safely parse
+the output in another program, it is recommended that you use the
+I<--csv> (Comma-Separated Values) option.
+
+Note that this output format is completely unrelated to the C<ls -lR>
+command.
+
+ $ virt-ls -lR -d myguest /bin
+ d 0555       4096 /bin -
+ - 0755        123 /bin/alsaunmute -
+ - 0755      28328 /bin/arch -
+ l 0777          4 /bin/awk gawk
+ - 0755      27216 /bin/basename -
+ - 0755     943360 /bin/bash -
+ [etc.]
+
+These basic fields are always shown:
+
+=over 4
+
+=item type
+
+The file type, one of:
+C<-> (regular file),
+C<d> (directory),
+C<c> (character device),
+C<b> (block device),
+C<p> (named pipe),
+C<l> (symbolic link),
+C<s> (socket) or
+C<u> (unknown).
+
+=item permissions
+
+The Unix permissions, displayed as a 4 digit octal number.
+
+=item size
+
+The size of the file.  This is shown in bytes unless I<-h> or
+I<--human-readable> option is given, in which case this is shown as a
+human-readable number.
+
+=item path
+
+The full path of the file or directory.
+
+=item link
+
+For symbolic links, the link target, else C<->.
+
+=back
+
+In I<-lR> mode, additional command line options enable the display of
+more fields.
+
+With the I<--uids> flag, these additional fields are displayed:
+
+=over 4
+
+=item uid
+
+=item gid
+
+The UID and GID of the owner of the file (displayed numerically).
+Note these only make sense in the context of a Unix-like guest.
+
+=back
+
+With the I<--times> flag, these additional fields are displayed:
+
+=over 4
+
+=item atime
+
+The time of last access.
+
+=item mtime
+
+The time of last modification.
+
+=item ctime
+
+The time of last status change.
+
+=back
+
+The time fields are displayed as strings, unless the I<--time-t> flag
+is given in which case these are displayed as seconds since the Unix
+epoch.
+
+With the I<--extra-stats> flag, these additional fields are displayed:
+
+=over 4
+
+=item device
+
+The device containing the file (displayed as major:minor).
+This may not match devices as known to the guest.
+
+=item inode
+
+The inode number.
+
+=item nlink
+
+The number of hard links.
+
+=item rdev
+
+For block and char special files, the device
+(displayed as major:minor).
+
+=item blocks
+
+The number of 512 byte blocks allocated to the file.
+
+=back
+
+With the I<--checksum> flag, the checksum of the file contents is
+shown (only for regular files).  Computing file checksums can take a
+considerable amount of time.
+
+=head1 EXAMPLES
+
+Get a list of all files and directories in a virtual machine:
+
+ virt-ls -R -d myguest /
+
+List all setuid or setgid programs in a Linux virtual machine:
+
+ virt-ls -lR -d myguest / | grep '^- [42]'
+
+List all public-writable directories in a Linux virtual machine:
+
+ virt-ls -lR -d myguest / | grep '^d ...7'
+
+List all Unix domain sockets in a Linux virtual machine:
+
+ virt-ls -lR -d myguest / | grep '^s'
+
+Find the differences between files in a guest and an earlier snapshot
+of the same guest.
+
+ virt-ls -lR -a snapshot.img / --uids --time-t > old
+ virt-ls -lR -a current.img / --uids --time-t > new
+ diff -u old new | less
+
+The commands above won't find files that have changed content yet have
+the same size and modification date.  To do that, you need to add the
+I<--checksum> parameter to both C<virt-ls> commands.  However that is
+quite slow since it has to read and compute a checksum of every
+regular file in the virtual machine.
+
+To display files larger than 10MB in home directories:
+
+ virt-ls -lR -d myguest /home | awk '$3 > 10*1024*1024'
 
 =head1 OPTIONS
 
@@ -78,6 +256,17 @@ them with separate I<-a> options.
 The format of the disk image is auto-detected.  To override this and
 force a particular format use the I<--format=..> option.
 
+=item B<--checksum>
+
+=item B<--checksum=crc|md5|sha1|sha224|sha256|sha384|sha512>
+
+Display checksum over file contents for regular files.  With no
+argument, this defaults to using I<md5>.  Using an argument, you can
+select the checksum type to use.
+
+This option only has effect in I<-lR> output mode.  See
+L</RECURSIVE LONG LISTING> above.
+
 =item B<-c> URI
 
 =item B<--connect> URI
@@ -88,6 +277,12 @@ connect to the default libvirt hypervisor.
 If you specify guest block devices directly (I<-a>), then libvirt is
 not used at all.
 
+=item B<--csv>
+
+Write out the results in CSV format (comma-separated values).  This
+format can be imported easily into databases and spreadsheets, but
+read L</NOTE ABOUT CSV FORMAT> below.
+
 =item B<-d> guest
 
 =item B<--domain> guest
@@ -102,6 +297,13 @@ echoing off so you cannot see what you are typing.  If you are not
 worried about Tempest attacks and there is no one else in the room you
 can specify this flag to see what you are typing.
 
+=item B<--extra-stats>
+
+Display extra stats.
+
+This option only has effect in I<-lR> output mode.  See
+L</RECURSIVE LONG LISTING> above.
+
 =item B<--format=raw|qcow2|..>
 
 =item B<--format>
@@ -126,6 +328,15 @@ If you have untrusted raw-format guest disk images, you should use
 this option to specify the disk format.  This avoids a possible
 security problem with malicious guests (CVE-2010-3851).
 
+=item B<-h>
+
+=item B<--human-readable>
+
+Display file sizes in human-readable format.
+
+This option only has effect in I<-lR> output mode.  See
+L</RECURSIVE LONG LISTING> above.
+
 =item B<--keys-from-stdin>
 
 Read key or passphrase parameters from stdin.  The default is
@@ -147,6 +358,27 @@ for precise details.
 
 You cannot combine these options.
 
+=item B<--times>
+
+Display time fields.
+
+This option only has effect in I<-lR> output mode.  See
+L</RECURSIVE LONG LISTING> above.
+
+=item B<--time-t>
+
+Display time fields as seconds since the Unix epoch.
+
+This option only has effect in I<-lR> output mode.  See
+L</RECURSIVE LONG LISTING> above.
+
+=item B<--uids>
+
+Display UID and GID fields.
+
+This option only has effect in I<-lR> output mode.  See
+L</RECURSIVE LONG LISTING> above.
+
 =item B<-v>
 
 =item B<--verbose>
@@ -181,6 +413,30 @@ name as a guest.
 
 For compatibility the old style is still supported.
 
+=head1 NOTE ABOUT CSV FORMAT
+
+Comma-separated values (CSV) is a deceptive format.  It I<seems> like
+it should be easy to parse, but it is definitely not easy to parse.
+
+Myth: Just split fields at commas.  Reality: This does I<not> work
+reliably.  This example has two columns:
+
+ "foo,bar",baz
+
+Myth: Read the file one line at a time.  Reality: This does I<not>
+work reliably.  This example has one row:
+
+ "foo
+ bar",baz
+
+For shell scripts, use C<csvtool> (L<http://merjis.com/developers/csv>
+also packaged in major Linux distributions).
+
+For other languages, use a CSV processing library (eg. C<Text::CSV>
+for Perl or Python's built-in csv library).
+
+Most spreadsheets and databases can import CSV directly.
+
 =head1 SHELL QUOTING
 
 Libvirt guest names can contain arbitrary characters, some of which
@@ -205,7 +461,7 @@ Richard W.M. Jones L<http://people.redhat.com/~rjones/>
 
 =head1 COPYRIGHT
 
-Copyright (C) 2009 Red Hat Inc.
+Copyright (C) 2009-2011 Red Hat Inc.
 
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
-- 
1.7.5.1



More information about the Libguestfs mailing list