[Libguestfs] [PATCH 4/6] python: fix crash by validating key and value

Peter Wu peter at lekensteyn.nl
Sat Aug 16 11:28:48 UTC 2014


User-visible changes: no crashes if you try to pass a non-str type as
key/value in Python 3. Exceptions are thrown rather than silently
ignoring type violations (such as passing a Unicode string as value
in Python 2).

Until now, Python 2 keys and values were assumes to be bytes, in
Python 3 these were seen as Unicode strings instead. Remove the
compile-check for a deprecated Python 2 function (`PyString_AsString`)
and check for byte types instead.

For keys, accept both strings and bytes as the common case is to pass
a ASCII string name. (The `\u20ac` Unicode character (Euro-sign) is
apparently also valid as key name, verified with Python 3 and
Windows 7).

For values, be more conservative and reject all types except bytes. This
better matches the C API and removes the burden of character conversion
from the wrapper. Instead, callers should know the correct encoding
and terminate strings.

Modify tests to specify bytes instead of strings. (Otherwise Python 3
would break while Python 2 still passes.)
---
 configure.ac             |  4 ----
 generator/generator.ml   | 41 +++++++++++++++++++++++++----------------
 python/t/200-write.py    |  4 ++--
 python/t/210-setvalue.py | 28 ++++++++++------------------
 4 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/configure.ac b/configure.ac
index f50daff..4566a32 100644
--- a/configure.ac
+++ b/configure.ac
@@ -345,10 +345,6 @@ AS_IF([test "x$enable_python" != "xno"],
                          [AC_DEFINE([HAVE_PYCAPSULE_NEW],1,
                                     [Found PyCapsule_New in libpython])],
                          [],[$PYTHON_BLDLIBRARY])
-            AC_CHECK_LIB([c],[PyString_AsString],
-                         [AC_DEFINE([HAVE_PYSTRING_ASSTRING],1,
-                                    [Found PyString_AsString in libpython])],
-                         [],[$PYTHON_BLDLIBRARY])
 
             LIBS="$old_LIBS"
         fi
diff --git a/generator/generator.ml b/generator/generator.ml
index 44a4d47..3c7b5ab 100755
--- a/generator/generator.ml
+++ b/generator/generator.ml
@@ -2840,9 +2840,7 @@ static int
 get_value (PyObject *v, hive_set_value *ret)
 {
   PyObject *obj;
-#ifndef HAVE_PYSTRING_ASSTRING
   PyObject *bytes;
-#endif
 
   if (!PyDict_Check (v)) {
     PyErr_SetString (PyExc_TypeError, \"expected dictionary type for value\");
@@ -2854,12 +2852,20 @@ get_value (PyObject *v, hive_set_value *ret)
     PyErr_SetString (PyExc_KeyError, \"no 'key' element in dictionary\");
     return -1;
   }
-#ifdef HAVE_PYSTRING_ASSTRING
-  ret->key = PyString_AsString (obj);
-#else
-  bytes = PyUnicode_AsUTF8String (obj);
-  ret->key = PyBytes_AS_STRING (bytes);
-#endif
+  if (PyUnicode_Check (obj)) {
+    /* TODO: use PyUnicode_DecodeASCII or PyUnicode_AsUTF16String instead? */
+    bytes = PyUnicode_AsUTF8String (obj);
+    if (bytes == NULL) {
+      PyErr_SetString (PyExc_ValueError, \"failed to decode 'key'\");
+      return -1;
+    }
+    ret->key = PyBytes_AS_STRING (bytes);
+  } else if (PyBytes_Check (obj)) {
+    ret->key = PyBytes_AS_STRING (obj);
+  } else {
+    PyErr_SetString (PyExc_TypeError, \"expected bytes type for 'key'\");
+    return -1;
+  }
 
   obj = PyDict_GetItemString (v, \"t\");
   if (!obj) {
@@ -2877,14 +2883,17 @@ get_value (PyObject *v, hive_set_value *ret)
     PyErr_SetString (PyExc_KeyError, \"no 'value' element in dictionary\");
     return -1;
   }
-#ifdef HAVE_PYSTRING_ASSTRING
-  ret->value = PyString_AsString (obj);
-  ret->len = PyString_Size (obj);
-#else
-  bytes = PyUnicode_AsUTF8String (obj);
-  ret->value = PyBytes_AS_STRING (bytes);
-  ret->len = PyBytes_GET_SIZE (bytes);
-#endif
+  /* Support bytes only. As the registry can use multiple character sets, reject
+   * Unicode str types and let the caller handle conversion to nul-terminated
+   * UTF-16-LE, ASCII, etc. as necessary. This means that 'x' and b'x' are valid
+   * in Python 2 (but not u'x') but that in Python 3, only b'x' is valid. */
+  if (PyBytes_Check (obj)) {
+    ret->len = PyBytes_GET_SIZE (obj);
+    ret->value = PyBytes_AS_STRING (obj);
+  } else {
+    PyErr_SetString (PyExc_TypeError, \"expected bytes type for 'value'\");
+    return -1;
+  }
 
   return 0;
 }
diff --git a/python/t/200-write.py b/python/t/200-write.py
index 0692f11..4759f82 100644
--- a/python/t/200-write.py
+++ b/python/t/200-write.py
@@ -37,7 +37,7 @@ b = h.node_get_child (root, "B")
 assert b
 
 values = [
-    { "key": "Key1", "t": 3, "value": "ABC" },
-    { "key": "Key2", "t": 3, "value": "DEF" }
+    { "key": "Key1", "t": 3, "value": b"ABC" },
+    { "key": "Key2", "t": 3, "value": b"DEF" }
 ]
 h.node_set_values (b, values)
diff --git a/python/t/210-setvalue.py b/python/t/210-setvalue.py
index 8e7ed5c..495c55a 100644
--- a/python/t/210-setvalue.py
+++ b/python/t/210-setvalue.py
@@ -19,14 +19,6 @@ import sys
 import os
 import hivex
 
-if sys.version >= '3':
-    import codecs
-    def b(x):
-        return codecs.encode(x)
-else:
-    def b(x):
-        return x
-
 srcdir = os.environ["srcdir"]
 if not srcdir:
     srcdir = "."
@@ -44,23 +36,23 @@ B = h.node_get_child (root, "B")
 assert B
 
 values = [
-    { "key": "Key1", "t": 3, "value": "ABC" },
-    { "key": "Key2", "t": 3, "value": "DEF" }
+    { "key": "Key1", "t": 3, "value": b"ABC" },
+    { "key": "Key2", "t": 3, "value": b"DEF" }
 ]
 h.node_set_values (B, values)
 
-value1 = { "key": "Key3", "t": 3, "value": "GHI" }
+value1 = { "key": "Key3", "t": 3, "value": b"GHI" }
 h.node_set_value (B, value1)
 
-value1 = { "key": "Key1", "t": 3, "value": "JKL" }
+value1 = { "key": "Key1", "t": 3, "value": b"JKL" }
 h.node_set_value (B, value1)
 
 val = h.node_get_value (B, "Key1")
-t_data = h.value_value (val)
-assert t_data[0] == 3
-assert t_data[1] == b("JKL")
+val_t, val_value = h.value_value (val)
+assert val_t == 3
+assert val_value == b"JKL"
 
 val = h.node_get_value (B, "Key3")
-t_data = h.value_value (val)
-assert t_data[0] == 3
-assert t_data[1] == b("GHI")
+val_t, val_value = h.value_value (val)
+assert val_t == 3
+assert val_value == b"GHI"
-- 
2.0.4




More information about the Libguestfs mailing list