[Libguestfs] [nbdkit PATCH v2 2/2] utils: Revamp nbdkit_parse_size

Wed Feb 7 23:32:58 UTC 2018

The existing implementation admitted that it was not very robust
in the face of garbage input (including, but not limited to, the
the fact that the scanf() family has undefined behavior on integer
overflow).  Tighten things up by reimplementing the function.

The old comment mentioned the 'human*' interface from gnulib;
we can't use that for licensing reasons; so my rewrite is done
from scratch.  In particular, I know that gnulib's code accepts
'M' as a synonym for '1M'; treats '1k' and '1kiB' as 1024 but
'1kB' as 1000, and so on, which I didn't bother to implement here.

The unit test can now be adjusted to cover the cases that are
now handled correctly.

Signed-off-by: Eric Blake <eblake at redhat.com>
---
 src/test-utils.c | 20 ++++++------
 src/utils.c      | 95 ++++++++++++++++++++++++++++++++++++--------------------
 2 files changed, 72 insertions(+), 43 deletions(-)

diff --git a/src/test-utils.c b/src/test-utils.c
index e26a6f3..80bef2e 100644
--- a/src/test-utils.c
+++ b/src/test-utils.c
@@ -61,19 +61,19 @@ test_nbdkit_parse_size (void)
     { "", -1 },
     { "0x0", -1 },
     { "garbage", -1 },
-    /* { "0garbage", -1 }, */
-    /* { "-1", -1 }, */
-    /* { "-2", -1 }, */
-    /* { "9223372036854775808", -1 }, */
-    /* { "-9223372036854775808", -1 }, */
-    /* { "8E", -1 }, */
-    /* { "8192P", -1 }, */
-    /* { "999999999999999999999999", -1 }, */
+    { "0garbage", -1 },
+    { "-1", -1 },
+    { "-2", -1 },
+    { "9223372036854775808", -1 },
+    { "-9223372036854775808", -1 },
+    { "8E", -1 },
+    { "8192P", -1 },
+    { "999999999999999999999999", -1 },

     /* Strings we may want to support in the future */
     { "M", -1 },
-    /* { "1MB", -1 }, */
-    /* { "1MiB", -1 }, */
+    { "1MB", -1 },
+    { "1MiB", -1 },
     { "1.5M", -1 },

     /* Valid strings */
diff --git a/src/utils.c b/src/utils.c
index 5663043..a7519ec 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -1,5 +1,5 @@
 /* nbdkit
- * Copyright (C) 2013 Red Hat Inc.
+ * Copyright (C) 2013-2018 Red Hat Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -80,49 +80,78 @@ nbdkit_absolute_path (const char *path)
   return ret;
 }

-/* XXX Multiple problems with this function.  Really we should use the
- * 'human*' functions from gnulib.
+/* Parse a string with possible scaling suffix, or return -1 after reporting
+ * the error.
  */
 int64_t
 nbdkit_parse_size (const char *str)
 {
   uint64_t size;
-  char t;
+  char *end;
+  uint64_t scale = 1;

-  if (sscanf (str, "%" SCNu64 "%c", &size, &t) == 2) {
-    switch (t) {
-    case 'b': case 'B':
-      return (int64_t) size;
-    case 'k': case 'K':
-      return (int64_t) size * 1024;
-    case 'm': case 'M':
-      return (int64_t) size * 1024 * 1024;
-    case 'g': case 'G':
-      return (int64_t) size * 1024 * 1024 * 1024;
-    case 't': case 'T':
-      return (int64_t) size * 1024 * 1024 * 1024 * 1024;
-    case 'p': case 'P':
-      return (int64_t) size * 1024 * 1024 * 1024 * 1024 * 1024;
-    case 'e': case 'E':
-      return (int64_t) size * 1024 * 1024 * 1024 * 1024 * 1024 * 1024;
+  /* Scan unsigned, but range check later that result fits in signed. */
+  /* XXX Should we also parse things like '1.5M'? */
+  /* XXX Should we allow hex? If so, hex cannot use scaling suffixes,
+   * because some of them are valid hex digits */
+  errno = 0;
+  size = strtoumax (str, &end, 10);
+  if (errno || str == end) {
+    nbdkit_error ("could not parse size string (%s)", str);
+    return -1;
+  }
+  switch (*end) {
+  case '\0':
+    end--; /* Safe, since we already filtered out empty string */
+    break;
+
+    /* Powers of 1024 */
+  case 'e': case 'E':
+    scale *= 1024;
+    /* fallthru */
+  case 'p': case 'P':
+    scale *= 1024;
+    /* fallthru */
+  case 't': case 'T':
+    scale *= 1024;
+    /* fallthru */
+  case 'g': case 'G':
+    scale *= 1024;
+    /* fallthru */
+  case 'm': case 'M':
+    scale *= 1024;
+    /* fallthru */
+  case 'k': case 'K':
+    scale *= 1024;
+    /* fallthru */
+  case 'b': case 'B':
+    break;

-    case 's': case 'S':         /* "sectors", ie. units of 512 bytes,
-                                 * even if that's not the real sector size
-                                 */
-      return (int64_t) size * 512;
+  case 's': case 'S':         /* "sectors", ie. units of 512 bytes,
+                               * even if that's not the real sector size
+                               */
+    scale = 512;
+    break;

-    default:
-      nbdkit_error ("could not parse size: unknown specifier '%c'", t);
-      return -1;
-    }
+  default:
+    nbdkit_error ("could not parse size: unknown suffix '%s'", end);
+    return -1;
   }

-  /* bytes */
-  if (sscanf (str, "%" SCNu64, &size) == 1)
-    return (int64_t) size;
+  /* XXX Maybe we should support 'MiB' as a synonym for 'M'; and 'MB'
+   * for powers of 1000, for similarity to GNU tools. But for now,
+   * anything beyond 'M' is dropped.  */
+  if (end[1]) {
+    nbdkit_error ("could not parse size: unknown suffix '%s'", end);
+    return -1;
+  }
+
+  if (INT64_MAX / scale < size) {
+    nbdkit_error ("overflow computing size (%s)", str);
+    return -1;
+  }

-  nbdkit_error ("could not parse size string (%s)", str);
-  return -1;
+  return size * scale;
 }

 /* Read a password from configuration value. */
-- 
2.14.3