[Libguestfs] [PATCH 2/2] python: unicode decode handler error scheme setter

Matteo Cafasso noxdafox at gmail.com
Sun May 21 16:29:03 UTC 2017


The set_decode_error_handler function allows the User to set the
decoding error scheme to be used when non UTF8 characters are
encountered in Python 3.

The function has no effect in Python 2.

Signed-off-by: Matteo Cafasso <noxdafox at gmail.com>
---
 generator/python.ml            | 16 ++++++++++++++++
 python/handle.c                | 18 ++++++++++++++++--
 python/t/test830RHBZ1406906.py |  6 ++++++
 3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/generator/python.ml b/generator/python.ml
index f7c1f80bb..66bb7f27d 100644
--- a/generator/python.ml
+++ b/generator/python.ml
@@ -82,6 +82,7 @@ put_handle (guestfs_h *g)
 }

 extern void guestfs_int_py_extend_module (PyObject *module);
+extern PyObject *guestfs_int_py_set_decode_error_handler (PyObject *self, PyObject *args);

 extern PyObject *guestfs_int_py_create (PyObject *self, PyObject *args);
 extern PyObject *guestfs_int_py_close (PyObject *self, PyObject *args);
@@ -577,6 +578,8 @@ and generate_python_module () =

   (* Table of functions. *)
   pr "static PyMethodDef methods[] = {\n";
+  pr "  { (char *) \"set_decode_error_handler\", \n";
+  pr "    guestfs_int_py_set_decode_error_handler, METH_VARARGS, NULL },\n";
   pr "  { (char *) \"create\", guestfs_int_py_create, METH_VARARGS, NULL },\n";
   pr "  { (char *) \"close\", guestfs_int_py_close, METH_VARARGS, NULL },\n";
   pr "  { (char *) \"set_event_callback\",\n";
@@ -728,6 +731,19 @@ class ClosedHandle(ValueError):
     pass


+def set_decode_error_handler(handler):
+    \"\"\"Set the error handling scheme to use for the handling
+    of decoding errors.
+    The default is 'strict' meaning that decoding errors raise a
+    UnicodeDecodeError.
+
+    The other possible value is 'surrogateescape', see PEP383 for reference.
+
+    Return the previous error handler.
+    \"\"\"
+    return libguestfsmod.set_decode_error_handler(handler)
+
+
 class GuestFS(object):
     \"\"\"Instances of this class are libguestfs API handles.\"\"\"

diff --git a/python/handle.c b/python/handle.c
index 52c36f1d2..b665bb899 100644
--- a/python/handle.c
+++ b/python/handle.c
@@ -35,6 +35,8 @@

 #include "actions.h"

+static const char *decode_error_handler = "strict";
+
 static PyObject **get_all_event_callbacks (guestfs_h *g, size_t *len_rtn);

 void
@@ -45,6 +47,17 @@ guestfs_int_py_extend_module (PyObject *module)
 }

 PyObject *
+guestfs_int_py_set_decode_error_handler (PyObject *self, PyObject *args)
+{
+  const char *previous_handler = decode_error_handler;
+
+  if (!PyArg_ParseTuple (args, (char *) "s:set_decode_error_handler", &decode_error_handler))
+    return NULL;
+
+  return guestfs_int_py_fromstring (previous_handler);
+}
+
+PyObject *
 guestfs_int_py_create (PyObject *self, PyObject *args)
 {
   guestfs_h *g;
@@ -386,7 +399,8 @@ guestfs_int_py_fromstring (const char *str)
 #ifdef HAVE_PYSTRING_ASSTRING
   return PyString_FromString (str);
 #else
-  return PyUnicode_FromString (str);
+  Py_ssize_t size = strlen(str);
+  return PyUnicode_DecodeUTF8 (str, size, decode_error_handler);
 #endif
 }

@@ -396,7 +410,7 @@ guestfs_int_py_fromstringsize (const char *str, size_t size)
 #ifdef HAVE_PYSTRING_ASSTRING
   return PyString_FromStringAndSize (str, size);
 #else
-  return PyUnicode_FromStringAndSize (str, size);
+  return PyUnicode_DecodeUTF8 (str, size, decode_error_handler);
 #endif
 }

diff --git a/python/t/test830RHBZ1406906.py b/python/t/test830RHBZ1406906.py
index 17b875226..0bb1ac1d0 100644
--- a/python/t/test830RHBZ1406906.py
+++ b/python/t/test830RHBZ1406906.py
@@ -55,3 +55,9 @@ class Test830RHBZ1406906(unittest.TestCase):
         elif sys.version_info >= (2, 0):
             self.assertTrue(
                 any(path for path in g.find("/") if non_utf8_fname in path))
+
+        # change decoding error handler
+        self.assertEqual(
+            guestfs.set_decode_error_handler("surrogateescape"), 'strict')
+        self.assertTrue(
+            any(path for path in g.find("/") if non_utf8_fname in path))
--
2.11.0




More information about the Libguestfs mailing list