[Libguestfs] [libnbd PATCH 3/2] python: Slice pread_structured buffer from original

Eric Blake eblake at redhat.com
Tue May 31 15:49:03 UTC 2022


This patch fixes the corner-case regression introduced by the previous
patch where the pread_structured callback buffer lifetime ends as soon
as the callback (that is, where accessing a stashed callback parameter
can result in ValueError instead of modifying a harmless copy).  With
careful effort, we can create a memoryview of the Python object that
we read into, then slice that memoryview as part of the callback; now
the slice will be valid as long as the original object is also valid.

Note that this does not tackle the case of aio_pread_structured; for
that, we either need to make nbd.Buffer honor the Python buffer
protocol, or else change our handling of BytesPersistOut to utilize
the buffer protocol directly instead of requiring the user to
round-trip through nbd.Buffer.

The runtime of this patch is still about the same 3.0s as in the test
of the previous patch, but now you can do:

$ nbdsh
nbd> h.connect_command(["nbdkit","-s","memory","10"])
nbd> copy=None
nbd> def f(b,o,s,e):
...  global copy
...  copy = b
...  print(b[0])
...
nbd> print(copy)
None
nbd> buf = h.pread_structured(10,0,f)
0
nbd> copy
<memory at 0x7fecef4e07c0>
nbd> copy[0]=0x31
nbd> print(buf)
bytearray(b'1\x00\x00\x00\x00\x00\x00\x00\x00\x00')

That is, because we passed in a memoryview slice of the original
buffer instead of a temporary slice of C memory, we can now stash that
slice into a global, modify it, and see those modifications reflected
back to the overall bytearray object returned by pread_structured.

The generated diff is a bit more verbose, including some no-op lines
added into pread, aio_pread, and aio_pread_structured.  In practice,
only pread_structured is actually impacted.

| --- python/methods.c.bak	2022-05-31 09:51:30.406757934 -0500
| +++ python/methods.c	2022-05-31 10:26:01.797636894 -0500
| @@ -27,6 +27,7 @@
|  #include <stdlib.h>
|  #include <stdint.h>
|  #include <stdbool.h>
| +#include <stddef.h>
|
|  #include <libnbd.h>
|
| @@ -76,8 +77,24 @@ chunk_wrapper (void *user_data, const vo
|    PyObject *py_subbuf = NULL;
|    PyObject *py_error = NULL;
|
| -  /* Cast away const */
| -  py_subbuf = PyMemoryView_FromMemory ((char *) subbuf, count, PyBUF_READ);
| +  if (data->buf) {
| +    PyObject *start, *end, *slice;
| +    assert (PyMemoryView_Check (data->buf));
| +    ptrdiff_t offs = subbuf - PyMemoryView_GET_BUFFER (data->buf)->buf;
| +    start = PyLong_FromLong (offs);
| +    if (!start) { PyErr_PrintEx (0); goto out; }
| +    end = PyLong_FromLong (offs + count);
| +    if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; }
| +    slice = PySlice_New (start, end, NULL);
| +    Py_DECREF (start);
| +    Py_DECREF (end);
| +    if (!slice) { PyErr_PrintEx (0); goto out; }
| +    py_subbuf = PyObject_GetItem (data->buf, slice);
| +  }
| +  else {
| +    /* Cast away const */
| +    py_subbuf = PyMemoryView_FromMemory ((char *) subbuf, count, PyBUF_READ);
| +  }
|    if (!py_subbuf) { PyErr_PrintEx (0); goto out; }
|    PyObject *py_error_modname = PyUnicode_FromString ("ctypes");
|    if (!py_error_modname) { PyErr_PrintEx (0); goto out; }
| @@ -115,11 +132,11 @@ chunk_wrapper (void *user_data, const vo
|    };
|
|   out:
| -  if (py_subbuf) {
| +  if (py_subbuf && !data->buf) {
|      PyObject *tmp = PyObject_CallMethod(py_subbuf, "release", NULL);
|      Py_XDECREF (tmp);
| -    Py_DECREF (py_subbuf);
|    }
| +  Py_XDECREF (py_subbuf);
|    if (py_error) {
|      PyObject *py_error_ret = PyObject_GetAttrString (py_error, "value");
|      *error = PyLong_AsLong (py_error_ret);
| @@ -2304,7 +2321,7 @@ nbd_internal_py_pread (PyObject *self, P
|    struct nbd_handle *h;
|    int ret;
|    PyObject *py_ret = NULL;
| -  PyObject *buf = NULL;
| +  PyObject *py_buf = NULL;
|    Py_ssize_t count;
|    uint64_t offset_u64;
|    unsigned long long offset; /* really uint64_t */
| @@ -2318,20 +2335,20 @@ nbd_internal_py_pread (PyObject *self, P
|    h = get_handle (py_h);
|    if (!h) goto out;
|    flags_u32 = flags;
| -  buf = PyByteArray_FromStringAndSize (NULL, count);
| -  if (buf == NULL) goto out;
| +  py_buf = PyByteArray_FromStringAndSize (NULL, count);
| +  if (py_buf == NULL) goto out;
|    offset_u64 = offset;
|
| -  ret = nbd_pread (h, PyByteArray_AS_STRING (buf), count, offset_u64, flags_u32);
| +  ret = nbd_pread (h, PyByteArray_AS_STRING (py_buf), count, offset_u64, flags_u32);
|    if (ret == -1) {
|      raise_exception ();
|      goto out;
|    }
| -  py_ret = buf;
| -  buf = NULL;
| +  py_ret = py_buf;
| +  py_buf = NULL;
|
|   out:
| -  Py_XDECREF (buf);
| +  Py_XDECREF (py_buf);
|    return py_ret;
|  }
|
| @@ -2342,7 +2359,7 @@ nbd_internal_py_pread_structured (PyObje
|    struct nbd_handle *h;
|    int ret;
|    PyObject *py_ret = NULL;
| -  PyObject *buf = NULL;
| +  PyObject *py_buf = NULL;
|    Py_ssize_t count;
|    uint64_t offset_u64;
|    unsigned long long offset; /* really uint64_t */
| @@ -2360,8 +2377,8 @@ nbd_internal_py_pread_structured (PyObje
|    h = get_handle (py_h);
|    if (!h) goto out;
|    flags_u32 = flags;
| -  buf = PyByteArray_FromStringAndSize (NULL, count);
| -  if (buf == NULL) goto out;
| +  py_buf = PyByteArray_FromStringAndSize (NULL, count);
| +  if (py_buf == NULL) goto out;
|    offset_u64 = offset;
|    chunk.user_data = chunk_user_data = alloc_user_data ();
|    if (chunk_user_data == NULL) goto out;
| @@ -2373,18 +2390,20 @@ nbd_internal_py_pread_structured (PyObje
|    /* Increment refcount since pointer may be saved by libnbd. */
|    Py_INCREF (py_chunk_fn);
|    chunk_user_data->fn = py_chunk_fn;
| +  if (py_buf)
| +    chunk_user_data->buf = PyMemoryView_FromObject (py_buf);
|
| -  ret = nbd_pread_structured (h, PyByteArray_AS_STRING (buf), count, offset_u64, chunk, flags_u32);
| +  ret = nbd_pread_structured (h, PyByteArray_AS_STRING (py_buf), count, offset_u64, chunk, flags_u32);
|    chunk_user_data = NULL;
|    if (ret == -1) {
|      raise_exception ();
|      goto out;
|    }
| -  py_ret = buf;
| -  buf = NULL;
| +  py_ret = py_buf;
| +  py_buf = NULL;
|
|   out:
| -  Py_XDECREF (buf);
| +  Py_XDECREF (py_buf);
|    free_user_data (chunk_user_data);
|    return py_ret;
|  }
| @@ -3171,6 +3190,7 @@ nbd_internal_py_aio_pread (PyObject *sel
|    struct nbd_handle *h;
|    int64_t ret;
|    PyObject *py_ret = NULL;
| +  PyObject *py_buf = NULL;
|    PyObject *buf; /* PyCapsule pointing to struct py_aio_buffer */
|    struct py_aio_buffer *buf_buf;
|    uint64_t offset_u64;
| @@ -3230,6 +3250,7 @@ nbd_internal_py_aio_pread_structured (Py
|    struct nbd_handle *h;
|    int64_t ret;
|    PyObject *py_ret = NULL;
| +  PyObject *py_buf = NULL;
|    PyObject *buf; /* PyCapsule pointing to struct py_aio_buffer */
|    struct py_aio_buffer *buf_buf;
|    uint64_t offset_u64;
| @@ -3282,6 +3303,8 @@ nbd_internal_py_aio_pread_structured (Py
|    /* Increment refcount since pointer may be saved by libnbd. */
|    Py_INCREF (py_chunk_fn);
|    chunk_user_data->fn = py_chunk_fn;
| +  if (py_buf)
| +    chunk_user_data->buf = PyMemoryView_FromObject (py_buf);
|
|    ret = nbd_aio_pread_structured (h, buf_buf->data, buf_buf->len, offset_u64, chunk, completion, flags_u32);
|    chunk_user_data = NULL;
---
 generator/Python.ml | 60 +++++++++++++++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/generator/Python.ml b/generator/Python.ml
index 0191f79..4160759 100644
--- a/generator/Python.ml
+++ b/generator/Python.ml
@@ -187,8 +187,24 @@ let
        pr "    PyList_SET_ITEM (py_%s, i_%s, py_e_%s);\n" n n n;
        pr "  }\n"
     | CBBytesIn (n, len) ->
-       pr "  /* Cast away const */\n";
-       pr "  py_%s = PyMemoryView_FromMemory ((char *) %s, %s, PyBUF_READ);\n" n n len;
+       pr "  if (data->buf) {\n";
+       pr "    PyObject *start, *end, *slice;\n";
+       pr "    assert (PyMemoryView_Check (data->buf));\n";
+       pr "    ptrdiff_t offs = %s - PyMemoryView_GET_BUFFER (data->buf)->buf;\n" n;
+       pr "    start = PyLong_FromLong (offs);\n";
+       pr "    if (!start) { PyErr_PrintEx (0); goto out; }\n";
+       pr "    end = PyLong_FromLong (offs + %s);\n" len;
+       pr "    if (!end) { Py_DECREF (start); PyErr_PrintEx (0); goto out; }\n";
+       pr "    slice = PySlice_New (start, end, NULL);\n";
+       pr "    Py_DECREF (start);\n";
+       pr "    Py_DECREF (end);\n";
+       pr "    if (!slice) { PyErr_PrintEx (0); goto out; }\n";
+       pr "    py_%s = PyObject_GetItem (data->buf, slice);\n" n;
+       pr "  }\n";
+       pr "  else {\n";
+       pr "    /* Cast away const */\n";
+       pr "    py_%s = PyMemoryView_FromMemory ((char *) %s, %s, PyBUF_READ);\n" n n len;
+       pr "  }\n";
        pr "  if (!py_%s) { PyErr_PrintEx (0); goto out; }\n" n
     | CBInt _
     | CBInt64 _ -> ()
@@ -272,11 +288,11 @@ let
        pr "  }\n"
     | CBBytesIn (n, _) ->
        (* https://stackoverflow.com/questions/58870010/how-to-call-release-on-a-memoryview-in-python-c-api *)
-       pr "  if (py_%s) {\n" n;
+       pr "  if (py_%s && !data->buf) {\n" n;
        pr "    PyObject *tmp = PyObject_CallMethod(py_%s, \"release\", NULL);\n" n;
        pr "    Py_XDECREF (tmp);\n";
-       pr "    Py_DECREF (py_%s);\n" n;
-       pr "  }\n"
+       pr "  }\n";
+       pr "  Py_XDECREF (py_%s);\n" n
     | CBInt _ | CBInt64 _
     | CBString _
     | CBUInt _ | CBUInt64 _ -> ()
@@ -288,6 +304,7 @@ let

 (* Generate the Python binding. *)
 let print_python_binding name { args; optargs; ret; may_set_error } =
+  let store_mem = ref false in
   pr "PyObject *\n";
   pr "nbd_internal_py_%s (PyObject *self, PyObject *args)\n" name;
   pr "{\n";
@@ -301,13 +318,19 @@ let
     | BytesIn (n, _) ->
        pr "  Py_buffer %s = { .obj = NULL };\n" n
     | BytesOut (n, count) ->
-       pr "  PyObject *%s = NULL;\n" n;
-       pr "  Py_ssize_t %s;\n" count
-    | BytesPersistIn (n, _)
-    | BytesPersistOut (n, _) ->
+       pr "  PyObject *py_%s = NULL;\n" n;
+       pr "  Py_ssize_t %s;\n" count;
+       store_mem := true
+    | BytesPersistIn (n, _) ->
        pr "  PyObject *%s; /* PyCapsule pointing to struct py_aio_buffer */\n"
           n;
        pr "  struct py_aio_buffer *%s_buf;\n" n
+    | BytesPersistOut (n, _) ->
+       pr "  PyObject *py_%s = NULL;\n" n;
+       pr "  PyObject *%s; /* PyCapsule pointing to struct py_aio_buffer */\n"
+          n;
+       pr "  struct py_aio_buffer *%s_buf;\n" n;
+       store_mem := true
     | Closure { cbname } ->
        pr "  struct user_data *%s_user_data = NULL;\n" cbname;
        pr "  PyObject *py_%s_fn;\n" cbname;
@@ -440,8 +463,8 @@ let
     | Bool _ -> ()
     | BytesIn _ -> ()
     | BytesOut (n, count) ->
-       pr "  %s = PyByteArray_FromStringAndSize (NULL, %s);\n" n count;
-       pr "  if (%s == NULL) goto out;\n" n
+       pr "  py_%s = PyByteArray_FromStringAndSize (NULL, %s);\n" n count;
+       pr "  if (py_%s == NULL) goto out;\n" n
     | BytesPersistIn (n, _) | BytesPersistOut (n, _) ->
        pr "  %s_buf = nbd_internal_py_get_aio_buffer (%s);\n" n n;
        pr "  if (!%s_buf) goto out;\n" n;
@@ -458,7 +481,11 @@ let
        pr "  }\n";
        pr "  /* Increment refcount since pointer may be saved by libnbd. */\n";
        pr "  Py_INCREF (py_%s_fn);\n" cbname;
-       pr "  %s_user_data->fn = py_%s_fn;\n" cbname cbname
+       pr "  %s_user_data->fn = py_%s_fn;\n" cbname cbname;
+       if !store_mem then (
+         pr "  if (py_buf)\n";
+         pr "    %s_user_data->buf = PyMemoryView_FromObject (py_buf);\n" cbname
+       )
     | Enum _ -> ()
     | Flags (n, _) -> pr "  %s_u32 = %s;\n" n n
     | Fd _ | Int _ -> ()
@@ -487,7 +514,7 @@ let
     function
     | Bool n -> pr ", %s" n
     | BytesIn (n, _) -> pr ", %s.buf, %s.len" n n
-    | BytesOut (n, count) -> pr ", PyByteArray_AS_STRING (%s), %s" n count
+    | BytesOut (n, count) -> pr ", PyByteArray_AS_STRING (py_%s), %s" n count
     | BytesPersistIn (n, _)
     | BytesPersistOut (n, _) -> pr ", %s_buf->data, %s_buf->len" n n
     | Closure { cbname } -> pr ", %s" cbname
@@ -533,8 +560,8 @@ let
   List.iter (
     function
     | BytesOut (n, _) ->
-       pr "  py_ret = %s;\n" n;
-       pr "  %s = NULL;\n" n;
+       pr "  py_ret = py_%s;\n" n;
+       pr "  py_%s = NULL;\n" n;
        use_ret := false
     | Bool _
     | BytesIn _
@@ -581,7 +608,7 @@ let
     | BytesIn (n, _) ->
        pr "  if (%s.obj)\n" n;
        pr "    PyBuffer_Release (&%s);\n" n
-    | BytesOut (n, _) -> pr "  Py_XDECREF (%s);\n" n
+    | BytesOut (n, _) -> pr "  Py_XDECREF (py_%s);\n" n
     | BytesPersistIn _ | BytesPersistOut _ -> ()
     | Closure { cbname } ->
        pr "  free_user_data (%s_user_data);\n" cbname
@@ -620,6 +647,7 @@ let
   pr "#include <stdlib.h>\n";
   pr "#include <stdint.h>\n";
   pr "#include <stdbool.h>\n";
+  pr "#include <stddef.h>\n";
   pr "\n";
   pr "#include <libnbd.h>\n";
   pr "\n";
-- 
2.36.1



More information about the Libguestfs mailing list