[Libguestfs] [PATCH] guestfish: Enable grouping in string lists

Matthew Booth mbooth at redhat.com
Fri Sep 11 16:54:44 UTC 2009


This change adds the ability to group entries in a string list with single
quotes. So the string:
  "'foo bar'"
becomes 1 token rather than 2. Consequently single quotes must now be escaped:
  "\'"
resolves to a literal single quote.

Incidentally, this change also alters another, probably unintentional behaviour
of the previous implementation, in that tokens are separated by any amount of
whitespace rather than a single whitespace character. I.e.:
  "a  b"
resolves to:
  'a' 'b'
rather than:
  'a' '' 'b'
That last syntax can be used if an empty argument is still desired. Whitespace
is now also defined to include tabs.

parse_string_list can also now fail if it contains an unmatched open quote.
---
 fish/fish.c      |  131 +++++++++++++++++++++++++++++++++++++++++++++--------
 guestfish.pod    |    6 ++-
 src/generator.ml |    3 +-
 3 files changed, 118 insertions(+), 22 deletions(-)

diff --git a/fish/fish.c b/fish/fish.c
index a4069d6..86c0dfa 100644
--- a/fish/fish.c
+++ b/fish/fish.c
@@ -1082,30 +1082,121 @@ is_true (const char *str)
     strcasecmp (str, "no") != 0;
 }
 
-/* XXX We could improve list parsing. */
 char **
 parse_string_list (const char *str)
 {
-  char **argv;
-  const char *p, *pend;
-  int argc, i;
-
-  argc = 1;
-  for (i = 0; str[i]; ++i)
-    if (str[i] == ' ') argc++;
-
-  argv = malloc (sizeof (char *) * (argc+1));
-  if (argv == NULL) { perror ("malloc"); exit (1); }
-
-  p = str;
-  i = 0;
-  while (*p) {
-    pend = strchrnul (p, ' ');
-    argv[i] = strndup (p, pend-p);
-    i++;
-    p = *pend == ' ' ? pend+1 : pend;
+  char **argv = NULL;
+  size_t argv_len = 0;
+
+  /* Current position pointer */
+  const char *p = str;
+
+  /* Token might be simple:
+   *  Token
+   * or be quoted:
+   *  'This is a single token'
+   * or contain embedded single-quoted sections:
+   *  This' is a sing'l'e to'ken
+   *
+   * The latter may seem over-complicated, but it's what a normal shell does.
+   * Not doing it risks surprising somebody.
+   *
+   * This outer loop is over complete tokens.
+   */
+  while(*p) {
+    char *tok = NULL;
+    size_t tok_len = 0;
+
+    /* Skip leading whitespace */
+    p += strspn (p, " \t");
+
+    char in_quote = 0;
+
+    /* This loop is over token 'fragments'. A token can be in multiple bits if
+     * it contains single quotes. We also treat both sides of an escaped quote
+     * as separate fragments because we can't just copy it: we have to remove
+     * the \.
+     */
+    while (*p && (!isblank (*p) || in_quote)) {
+      const char *end = p;
+
+      /* Check if the fragment starts with a quote */
+      if ('\'' == *p) {
+        /* Toggle in_quote */
+        in_quote = !in_quote;
+
+        /* Skip the quote */
+        p++; end++;
+      }
+
+      /* If we're in a quote, look for an end quote */
+      if (in_quote) {
+        end += strcspn (end, "'");
+      }
+
+      /* Otherwise, look for whitespace or a quote */
+      else {
+        end += strcspn (end, " \t'");
+      }
+
+      /* Grow the token to accommodate the fragment */
+      size_t tok_end = tok_len;
+      tok_len += end - p;
+      tok = realloc (tok, tok_len + 1);
+      if (NULL == tok) { perror ("realloc"); exit (1); }
+
+      /* Check if we stopped on an escaped quote */
+      if ('\'' == *end && end != p && *(end-1) == '\\') {
+        /* Add everything before \' to the token */
+        memcpy (&tok[tok_end], p, end - p - 1);
+
+        /* Add the quote */
+        tok[tok_len-1] = '\'';
+
+        /* Already processed the quote */
+        p = end + 1;
+      }
+
+      else {
+        /* Add the whole fragment */
+        memcpy (&tok[tok_end], p, end - p);
+
+        p = end;
+      }
+    }
+
+    /* We've reached the end of a token. We shouldn't still be in quotes. */
+    if (in_quote) {
+      fprintf(stderr, _("Runaway quote in string \"%s\"\n"), str);
+
+      /* Can't use free_strings here because argv isn't NULL terminated yet */
+      for (size_t i = 0; i < argv_len; i++) {
+        free (argv[i]);
+      }
+      free(argv);
+
+      return NULL;
+    }
+
+    /* Add this token if there is one. There might not be if there was
+     * whitespace at the end of the input string */
+    if(tok) {
+      /* Add the NULL terminator */
+      tok[tok_len] = '\0';
+
+      /* Add the argument to the argument list */
+      argv_len++;
+      argv = realloc(argv, sizeof(*argv) * argv_len);
+      if (NULL == argv) { perror ("realloc"); exit (1); }
+      argv[argv_len-1] = tok;
+    }
   }
-  argv[i] = NULL;
+
+  /* NULL terminate the argument list */
+  argv_len++;
+  argv = realloc(argv, sizeof(*argv) * argv_len);
+  if (NULL == argv) { perror ("realloc"); exit (1); }
+  argv[argv_len-1] = NULL;
 
   return argv;
 }
diff --git a/guestfish.pod b/guestfish.pod
index d24c162..affb83b 100644
--- a/guestfish.pod
+++ b/guestfish.pod
@@ -250,9 +250,13 @@ quotes.  For example:
  rm '/"'
 
 A few commands require a list of strings to be passed.  For these, use
-a space-separated list, enclosed in quotes.  For example:
+a whitespace-separated list, enclosed in quotes.  Strings containing whitespace
+to be passed through must be enclosed in single quotes.  A literal single quote
+must be escaped with a backslash.
 
  vgcreate VG "/dev/sda1 /dev/sdb1"
+ command "/bin/echo 'foo      bar'"
+ command "/bin/echo \'foo\'"
 
 =head1 WILDCARDS AND GLOBBING
 
diff --git a/src/generator.ml b/src/generator.ml
index db37228..9b2600f 100755
--- a/src/generator.ml
+++ b/src/generator.ml
@@ -6361,7 +6361,8 @@ and generate_fish_cmds () =
               pr "  %s = strcmp (argv[%d], \"-\") != 0 ? argv[%d] : \"/dev/stdout\";\n"
                 name i i
           | StringList name | DeviceList name ->
-              pr "  %s = parse_string_list (argv[%d]);\n" name i
+              pr "  %s = parse_string_list (argv[%d]);\n" name i;
+              pr "  if (%s == NULL) return -1;\n" name;
           | Bool name ->
               pr "  %s = is_true (argv[%d]) ? 1 : 0;\n" name i
           | Int name ->
-- 
1.6.2.5




More information about the Libguestfs mailing list