[Libguestfs] [PATCH] hivex: Add byte runs for nodes and values

Alex Nelson ajnelson at cs.ucsc.edu
Wed Aug 31 23:34:30 UTC 2011


This patch adds byte run reporters for node and value metadata in the
hivexml program.  Each byte run represents the offset and length of a
data structure within the hive, one per node, and one or two per value
depending on the length of the value data.  In order to add this
metadata reporting, the following changes were put in place:

* Split value_key function into value_key and value_key_len.
* Add return types to the generator:  RSize, RLenNode, and RLenValue.
* Add support for the new return types to all language bindings,
including an extra OCaml and Python function.
* Add metadata length functions for nodes and values: hivex_{node,value}
_struct_length.
* Add an offset-&-length function for long value data.
* Have value data be reported in attributes instead of text.
* Add byte run functions to hivexml:  value_byte_runs and
node_byte_runs.

Signed-off-by: Alex Nelson <ajnelson at cs.ucsc.edu>
---
 generator/generator.ml |  169 +++++++++++++++++++++++++++++++++++++++++++++++-
 lib/hivex.c            |  115 +++++++++++++++++++++++++++++---
 xml/hivexml.c          |  127 ++++++++++++++++++++++++++++++++----
 3 files changed, 385 insertions(+), 26 deletions(-)

diff --git a/generator/generator.ml b/generator/generator.ml
index c98e625..b607421 100755
--- a/generator/generator.ml
+++ b/generator/generator.ml
@@ -45,11 +45,14 @@ and ret =
   | RErr                                (* 0 = ok, -1 = error *)
   | RErrDispose                         (* Disposes handle, see hivex_close. *)
   | RHive                               (* Returns a hive_h or NULL. *)
+  | RSize                               (* Returns size_t or 0. *)
   | RNode                               (* Returns hive_node_h or 0. *)
   | RNodeNotFound                       (* See hivex_node_get_child. *)
   | RNodeList                           (* Returns hive_node_h* or NULL. *)
+  | RLenNode                            (* See node_struct_length. *)
   | RValue                              (* Returns hive_value_h or 0. *)
   | RValueList                          (* Returns hive_value_h* or NULL. *)
+  | RLenValue                           (* See value_struct_length. *)
   | RString                             (* Returns char* or NULL. *)
   | RStringList                         (* Returns char** or NULL. *)
   | RLenType                            (* See hivex_value_type. *)
@@ -228,6 +231,17 @@ string C<\"\"> here.  The default key is often written C<\"@\">, but
 inside hives that has no meaning and won't give you the
 default key.";
 
+  "value_key_len", (RSize, [AHive; AValue "val"]),
+    "return the length of a value's key",
+    "\
+Return the length of the key (name) of a (key, value) pair.  The
+length can legitimately be 0, so errno is the necesary mechanism
+to check for errors.
+
+In the context of Windows Registries, a zero-length name means
+that this value is the default key for this node in the tree.
+This is usually written as C<\"@\">.";
+
   "value_key", (RString, [AHive; AValue "val"]),
     "return the key of a (key, value) pair",
     "\
@@ -250,6 +264,29 @@ information, and the value itself.  Also, C<hivex_value_*> functions
 below which can be used to return the value in a more useful form when
 you know the type in advance.";
 
+  "node_struct_length", (RSize, [AHive; ANode "node"]),
+    "return the length of a node",
+    "\
+Return the length of the node data structure.  Returns 0
+and sets errno on error.";
+
+  "value_struct_length", (RSize, [AHive; AValue "val"]),
+    "return the length of a value data structure",
+    "\
+Return the length of the value data structure.  Returns 0
+and sets errno on error.";
+
+  "value_data_cell_offset", (RLenValue, [AHive; AValue "val"]),
+    "return the offset and length of a value data cell",
+    "\
+Return the offset and length of the value's data cell, not value cell.
+E.g. if the value were \"foobar\" then the length would be 6, and the
+offset would be to a value data cell, which is the data prefixed with
+4 bytes describing the size.  If the length is not greater than 4,
+then 0 is returned, as the data are inline in the value.
+
+Returns 0 and sets errno on error.";
+
   "value_value", (RLenTypeVal, [AHive; AValue "val"]),
     "return data length, data type and data of a value",
     "\
@@ -858,6 +895,7 @@ and generate_c_prototype ?(extern = false) name style =
    | RErr -> pr "int "
    | RErrDispose -> pr "int "
    | RHive -> pr "hive_h *"
+   | RSize -> pr "size_t "
    | RNode -> pr "hive_node_h "
    | RNodeNotFound -> pr "hive_node_h "
    | RNodeList -> pr "hive_node_h *"
@@ -865,6 +903,8 @@ and generate_c_prototype ?(extern = false) name style =
    | RValueList -> pr "hive_value_h *"
    | RString -> pr "char *"
    | RStringList -> pr "char **"
+   | RLenNode -> pr "hive_node_h "
+   | RLenValue -> pr "hive_value_h "
    | RLenType -> pr "int "
    | RLenTypeVal -> pr "char *"
    | RInt32 -> pr "int32_t "
@@ -886,6 +926,8 @@ and generate_c_prototype ?(extern = false) name style =
   ) (snd style);
   (match fst style with
    | RLenType | RLenTypeVal -> pr ", hive_type *t, size_t *len"
+   | RLenNode -> pr ", size_t *len"
+   | RLenValue -> pr ", size_t *len"
    | _ -> ()
   );
   pr ");\n"
@@ -1046,6 +1088,10 @@ The hive handle must not be used again after calling this function.\n\n"
            pr "\
 Returns a new hive handle.
 On error this returns NULL and sets errno.\n\n"
+       | RSize ->
+           pr "\
+Returns a size.
+On error this returns 0 and sets errno.\n\n"
        | RNode ->
            pr "\
 Returns a node handle.
@@ -1084,6 +1130,14 @@ On error this returns NULL and sets errno.\n\n"
            pr "\
 Returns 0 on success.
 On error this returns -1 and sets errno.\n\n"
+       | RLenNode ->
+           pr "\
+Returns a positive number on success.
+On error this returns 0 and sets errno.\n\n"
+       | RLenValue ->
+           pr "\
+Returns a positive number on success.
+On error this returns 0 and sets errno.\n\n"
        | RLenTypeVal ->
            pr "\
 The value is returned as an array of bytes (of length C<len>).
@@ -1586,6 +1640,7 @@ and generate_ocaml_prototype ?(is_external = false) name style =
    | RErr -> pr "unit" (* all errors are turned into exceptions *)
    | RErrDispose -> pr "unit"
    | RHive -> pr "t"
+   | RSize -> pr "size"
    | RNode -> pr "node"
    | RNodeNotFound -> pr "node"
    | RNodeList -> pr "node array"
@@ -1594,6 +1649,8 @@ and generate_ocaml_prototype ?(is_external = false) name style =
    | RString -> pr "string"
    | RStringList -> pr "string array"
    | RLenType -> pr "hive_type * int"
+   | RLenNode -> pr "node"
+   | RLenValue -> pr "value"
    | RLenTypeVal -> pr "hive_type * string"
    | RInt32 -> pr "int32"
    | RInt64 -> pr "int64"
@@ -1679,6 +1736,8 @@ static void raise_closed (const char *) Noreturn;
       let c_params =
         match fst style with
         | RLenType | RLenTypeVal -> c_params @ [["&t"; "&len"]]
+        | RLenNode -> c_params @ [["&len"]]
+        | RLenValue -> c_params @ [["&len"]]
         | _ -> c_params in
       let c_params = List.concat c_params in
 
@@ -1735,6 +1794,7 @@ static void raise_closed (const char *) Noreturn;
         | RErr -> pr "  int r;\n"; "-1"
         | RErrDispose -> pr "  int r;\n"; "-1"
         | RHive -> pr "  hive_h *r;\n"; "NULL"
+        | RSize -> pr "  size_t r;\n"; "0"
         | RNode -> pr "  hive_node_h r;\n"; "0"
         | RNodeNotFound ->
             pr "  errno = 0;\n";
@@ -1750,6 +1810,14 @@ static void raise_closed (const char *) Noreturn;
             pr "  size_t len;\n";
             pr "  hive_type t;\n";
             "-1"
+        | RLenNode ->
+            pr "  int r;\n";
+            pr "  size_t len;\n";
+            "0"
+        | RLenValue ->
+            pr "  int r;\n";
+            pr "  size_t len;\n";
+            "0"
         | RLenTypeVal ->
             pr "  char *r;\n";
             pr "  size_t len;\n";
@@ -1808,6 +1876,7 @@ static void raise_closed (const char *) Noreturn;
        | RErr -> pr "  rv = Val_unit;\n"
        | RErrDispose -> pr "  rv = Val_unit;\n"
        | RHive -> pr "  rv = Val_hiveh (r);\n"
+       | RSize -> pr "  rv = Val_int (r);\n"
        | RNode -> pr "  rv = Val_int (r);\n"
        | RNodeNotFound ->
            pr "  if (r == 0)\n";
@@ -1829,6 +1898,8 @@ static void raise_closed (const char *) Noreturn;
            pr "  for (int i = 0; r[i] != NULL; ++i) free (r[i]);\n";
            pr "  free (r);\n"
        | RLenType -> pr "  rv = copy_type_len (len, t);\n"
+       | RLenNode -> pr "  rv = copy_len (len);\n"
+       | RLenValue -> pr "  rv = copy_len (len);\n"
        | RLenTypeVal ->
            pr "  rv = copy_type_value (r, len, t);\n";
            pr "  free (r);\n"
@@ -1951,6 +2022,18 @@ copy_type_len (size_t len, hive_type t)
 }
 
 static value
+copy_type (size_t len, hive_type t)
+{
+  CAMLparam0 ();
+  CAMLlocal2 (v, rv);
+
+  rv = caml_alloc (1, 0);
+  v = Val_int (len);
+  Store_field (rv, 0, v);
+  CAMLreturn (rv);
+}
+
+static value
 copy_type_value (const char *r, size_t len, hive_type t)
 {
   CAMLparam0 ();
@@ -2140,9 +2223,14 @@ sub open {
 	 | RString
 	 | RStringList
 	 | RLenType
+	 | RLenNode
+	 | RLenValue
 	 | RLenTypeVal
 	 | RInt32
 	 | RInt64 -> ()
+	 | RSize ->
+             pr "\
+This returns a size.\n\n"
 	 | RNode ->
 	     pr "\
 This returns a node handle.\n\n"
@@ -2202,6 +2290,7 @@ and generate_perl_prototype name style =
    | RErr
    | RErrDispose -> ()
    | RHive -> pr "$h = "
+   | RSize -> pr "$size = "
    | RNode
    | RNodeNotFound -> pr "$node = "
    | RNodeList -> pr "@nodes = "
@@ -2210,6 +2299,8 @@ and generate_perl_prototype name style =
    | RString -> pr "$string = "
    | RStringList -> pr "@strings = "
    | RLenType -> pr "($type, $len) = "
+   | RLenNode -> pr "($len) = "
+   | RLenValue -> pr "($len) = "
    | RLenTypeVal -> pr "($type, $data) = "
    | RInt32 -> pr "$int32 = "
    | RInt64 -> pr "$int64 = "
@@ -2424,6 +2515,7 @@ DESTROY (h)
 	 | RErr -> pr "void\n"
 	 | RErrDispose -> failwith "perl bindings cannot handle a call which disposes of the handle"
 	 | RHive -> failwith "perl bindings cannot handle a call which returns a handle"
+	 | RSize
 	 | RNode
 	 | RNodeNotFound
 	 | RValue
@@ -2432,6 +2524,8 @@ DESTROY (h)
 	 | RValueList
 	 | RStringList
 	 | RLenType
+	 | RLenNode
+	 | RLenValue
 	 | RLenTypeVal -> pr "void\n"
 	 | RInt32 -> pr "SV *\n"
 	 | RInt64 -> pr "SV *\n"
@@ -2500,6 +2594,7 @@ DESTROY (h)
 	 | RErrDispose -> assert false
 	 | RHive -> assert false
 
+	 | RSize
 	 | RNode
 	 | RValue ->
              pr "PREINIT:\n";
@@ -2603,6 +2698,34 @@ DESTROY (h)
 	     pr "      PUSHs (sv_2mortal (newSViv (type)));\n";
 	     pr "      PUSHs (sv_2mortal (newSViv (len)));\n";
 
+	 | RLenNode ->
+	     pr "PREINIT:\n";
+	     pr "      int r;\n";
+	     pr "      size_t len;\n";
+	     pr " PPCODE:\n";
+             pr "      r = hivex_%s (%s, &len);\n"
+	       name (String.concat ", " c_params);
+	     free_args ();
+             pr "      if (r == 0)\n";
+             pr "        croak (\"%%s: \", \"%s\", strerror (errno));\n"
+	       name;
+	     pr "      EXTEND (SP, 2);\n";
+	     pr "      PUSHs (sv_2mortal (newSViv (len)));\n";
+
+	 | RLenValue ->
+	     pr "PREINIT:\n";
+	     pr "      int r;\n";
+	     pr "      size_t len;\n";
+	     pr " PPCODE:\n";
+             pr "      r = hivex_%s (%s, &len);\n"
+	       name (String.concat ", " c_params);
+	     free_args ();
+             pr "      if (r == 0)\n";
+             pr "        croak (\"%%s: \", \"%s\", strerror (errno));\n"
+	       name;
+	     pr "      EXTEND (SP, 2);\n";
+	     pr "      PUSHs (sv_2mortal (newSViv (len)));\n";
+
 	 | RLenTypeVal ->
 	     pr "PREINIT:\n";
 	     pr "      char *r;\n";
@@ -2841,6 +2964,14 @@ put_len_type (size_t len, hive_type t)
 }
 
 static PyObject *
+put_len (size_t len)
+{
+  PyObject *r = PyTuple_New (1);
+  PyTuple_SetItem (r, 0, PyLong_FromLongLong ((long) len));
+  return r;
+}
+
+static PyObject *
 put_val_type (char *val, size_t len, hive_type t)
 {
   PyObject *r = PyTuple_New (2);
@@ -2864,6 +2995,7 @@ put_val_type (char *val, size_t len, hive_type t)
         | RErr -> pr "  int r;\n"; "-1"
 	| RErrDispose -> pr "  int r;\n"; "-1"
 	| RHive -> pr "  hive_h *r;\n"; "NULL"
+        | RSize -> pr "  size_t r;\n"; "0"
         | RNode -> pr "  hive_node_h r;\n"; "0"
         | RNodeNotFound ->
             pr "  errno = 0;\n";
@@ -2879,6 +3011,14 @@ put_val_type (char *val, size_t len, hive_type t)
             pr "  size_t len;\n";
             pr "  hive_type t;\n";
             "-1"
+        | RLenNode ->
+            pr "  int r;\n";
+            pr "  size_t len;\n";
+            "0"
+        | RLenValue ->
+            pr "  int r;\n";
+            pr "  size_t len;\n";
+            "0"
         | RLenTypeVal ->
             pr "  char *r;\n";
             pr "  size_t len;\n";
@@ -2903,6 +3043,7 @@ put_val_type (char *val, size_t len, hive_type t)
       let c_params =
         match fst style with
         | RLenType | RLenTypeVal -> c_params @ ["&t"; "&len"]
+        | RLenNode | RLenValue -> c_params @ ["&len"]
         | _ -> c_params in
 
       List.iter (
@@ -3023,6 +3164,7 @@ put_val_type (char *val, size_t len, hive_type t)
            pr "  py_r = Py_None;\n"
        | RHive ->
            pr "  py_r = put_handle (r);\n"
+       | RSize
        | RNode ->
            pr "  py_r = PyLong_FromLongLong (r);\n"
        | RNodeNotFound ->
@@ -3046,6 +3188,10 @@ put_val_type (char *val, size_t len, hive_type t)
            pr "  free_strings (r);\n"
        | RLenType ->
            pr "  py_r = put_len_type (len, t);\n"
+       | RLenNode ->
+           pr "  py_r = put_len (len);\n"
+       | RLenValue ->
+           pr "  py_r = put_len (len);\n"
        | RLenTypeVal ->
            pr "  py_r = put_val_type (r, len, t);\n";
            pr "  free (r);\n"
@@ -3249,13 +3395,15 @@ get_values (VALUE valuesv, size_t *nr_values)
           match ret with
           | RErr | RErrDispose -> "nil"
           | RHive -> "Hivex::Hivex"
-          | RNode | RNodeNotFound -> "integer"
+          | RSize | RNode | RNodeNotFound -> "integer"
           | RNodeList -> "list"
           | RValue -> "integer"
           | RValueList -> "list"
           | RString -> "string"
           | RStringList -> "list"
           | RLenType -> "hash"
+          | RLenNode -> "integer"
+          | RLenValue -> "integer"
           | RLenTypeVal -> "hash"
           | RInt32 -> "integer"
           | RInt64 -> "integer" in
@@ -3338,6 +3486,7 @@ get_values (VALUE valuesv, size_t *nr_values)
         | RErr -> pr "  int r;\n"; "-1"
 	| RErrDispose -> pr "  int r;\n"; "-1"
 	| RHive -> pr "  hive_h *r;\n"; "NULL"
+        | RSize -> pr "  size_t r;\n"; "0"
         | RNode -> pr "  hive_node_h r;\n"; "0"
         | RNodeNotFound ->
             pr "  errno = 0;\n";
@@ -3353,6 +3502,14 @@ get_values (VALUE valuesv, size_t *nr_values)
             pr "  size_t len;\n";
             pr "  hive_type t;\n";
             "-1"
+        | RLenNode ->
+            pr "  int r;\n";
+            pr "  size_t len;\n";
+            "0"
+        | RLenValue ->
+            pr "  int r;\n";
+            pr "  size_t len;\n";
+            "0"
         | RLenTypeVal ->
             pr "  char *r;\n";
             pr "  size_t len;\n";
@@ -3377,6 +3534,7 @@ get_values (VALUE valuesv, size_t *nr_values)
       let c_params =
         match ret with
         | RLenType | RLenTypeVal -> c_params @ [["&t"; "&len"]]
+        | RLenNode | RLenValue -> c_params @ [["&len"]]
         | _ -> c_params in
       let c_params = List.concat c_params in
 
@@ -3418,6 +3576,7 @@ get_values (VALUE valuesv, size_t *nr_values)
         pr "  return Qnil;\n"
       | RHive ->
         pr "  return Data_Wrap_Struct (c_hivex, NULL, ruby_hivex_free, r);\n"
+      | RSize
       | RNode
       | RValue
       | RInt64 ->
@@ -3457,6 +3616,14 @@ get_values (VALUE valuesv, size_t *nr_values)
         pr "  rb_hash_aset (rv, ID2SYM (rb_intern (\"len\")), INT2NUM (len));\n";
         pr "  rb_hash_aset (rv, ID2SYM (rb_intern (\"type\")), INT2NUM (t));\n";
         pr "  return rv;\n"
+      | RLenNode ->
+        pr "  VALUE rv = rb_hash_new ();\n";
+        pr "  rb_hash_aset (rv, ID2SYM (rb_intern (\"len\")), INT2NUM (len));\n";
+        pr "  return rv;\n"
+      | RLenValue ->
+        pr "  VALUE rv = rb_hash_new ();\n";
+        pr "  rb_hash_aset (rv, ID2SYM (rb_intern (\"len\")), INT2NUM (len));\n";
+        pr "  return rv;\n"
       | RLenTypeVal ->
         pr "  VALUE rv = rb_hash_new ();\n";
         pr "  rb_hash_aset (rv, ID2SYM (rb_intern (\"len\")), INT2NUM (len));\n";
diff --git a/lib/hivex.c b/lib/hivex.c
index 4b9fcf0..04ceed3 100644
--- a/lib/hivex.c
+++ b/lib/hivex.c
@@ -585,6 +585,30 @@ hivex_root (hive_h *h)
   return ret;
 }
 
+size_t
+hivex_node_struct_length (hive_h *h, hive_node_h node)
+{
+  if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
+    errno = EINVAL;
+    return 0;
+  }
+
+  struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
+  size_t name_len = le16toh (nk->name_len);
+  /* -1 to avoid double-counting the first name character */
+  size_t ret = name_len + sizeof (struct ntreg_nk_record) - 1;
+  int used;
+  size_t seg_len = block_len (h, node, &used);
+  if (ret > seg_len) {
+    if (h->msglvl >= 2)
+      fprintf (stderr, "hivex_node_struct_length: returning EFAULT because"
+               " node name is too long (%zu, %zu)\n", name_len, seg_len);
+    errno = EFAULT;
+    return 0;
+  }
+  return ret;
+}
+
 char *
 hivex_node_name (hive_h *h, hive_node_h node)
 {
@@ -1189,8 +1213,17 @@ hivex_node_get_value (hive_h *h, hive_node_h node, const char *key)
   return ret;
 }
 
-char *
-hivex_value_key (hive_h *h, hive_value_h value)
+size_t
+hivex_value_struct_length (hive_h *h, hive_value_h value) {
+  size_t key_len = hivex_value_key_len (h, value);
+  if (errno)
+    return 0;
+  /* -1 to avoid double-counting the first name character */
+  return key_len + sizeof (struct ntreg_vk_record) - 1;
+}
+
+size_t
+hivex_value_key_len (hive_h *h, hive_value_h value)
 {
   if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
     errno = EINVAL;
@@ -1199,24 +1232,39 @@ hivex_value_key (hive_h *h, hive_value_h value)
 
   struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
 
-  /* AFAIK the key is always plain ASCII, so no conversion to UTF-8 is
-   * necessary.  However we do need to nul-terminate the string.
-   */
-
   /* vk->name_len is unsigned, 16 bit, so this is safe ...  However
    * we have to make sure the length doesn't exceed the block length.
    */
-  size_t len = le16toh (vk->name_len);
+  size_t ret = le16toh (vk->name_len);
   size_t seg_len = block_len (h, value, NULL);
-  if (sizeof (struct ntreg_vk_record) + len - 1 > seg_len) {
+  if (sizeof (struct ntreg_vk_record) + ret - 1 > seg_len) {
     if (h->msglvl >= 2)
-      fprintf (stderr, "hivex_value_key: returning EFAULT"
+      fprintf (stderr, "hivex_value_key_len: returning EFAULT"
                " because key length is too long (%zu, %zu)\n",
-               len, seg_len);
+               ret, seg_len);
     errno = EFAULT;
-    return NULL;
+    return 0;
+  }
+  return ret;
+}
+
+char *
+hivex_value_key (hive_h *h, hive_value_h value)
+{
+  if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
+    errno = EINVAL;
+    return 0;
   }
 
+  struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
+
+  /* AFAIK the key is always plain ASCII, so no conversion to UTF-8 is
+   * necessary.  However we do need to nul-terminate the string.
+   */
+  size_t len = hivex_value_key_len (h, value);
+  if (errno != 0)
+    return NULL;
+
   char *ret = malloc (len + 1);
   if (ret == NULL)
     return NULL;
@@ -1246,6 +1294,51 @@ hivex_value_type (hive_h *h, hive_value_h value, hive_type *t, size_t *len)
   return 0;
 }
 
+hive_value_h
+hivex_value_data_cell_offset (hive_h *h, hive_value_h value, size_t *len)
+{
+  if (!IS_VALID_BLOCK (h, value) || !BLOCK_ID_EQ (h, value, "vk")) {
+    errno = EINVAL;
+    return 0;
+  }
+
+  struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + value);
+
+  size_t data_len;
+  int is_inline;
+
+  data_len = le32toh (vk->data_len);
+  is_inline = !!(data_len & 0x80000000);
+  data_len &= 0x7fffffff;
+
+  if (is_inline && data_len > 4) {
+    errno = ENOTSUP;
+    return 0;
+  }
+
+  if (is_inline) {
+    /* There is no other location for the value data. */
+    if (len)
+      *len = 0;
+    return 0;
+  } else {
+    if (len)
+      *len = data_len + 4;  /* Include 4 header length bytes */
+  }
+
+  size_t data_offset = le32toh (vk->data_offset);
+  data_offset += 0x1000;  /* Add 0x1000 because everything's off by 4KiB */
+  if (!IS_VALID_BLOCK (h, data_offset)) {
+    if (h->msglvl >= 2)
+      fprintf (stderr, "hivex_value_data_cell_offset: returning EFAULT because data "
+               "offset is not a valid block (0x%zx)\n",
+               data_offset);
+    errno = EFAULT;
+    return 0;
+  }
+  return data_offset;
+}
+
 char *
 hivex_value_value (hive_h *h, hive_value_h value,
                    hive_type *t_rtn, size_t *len_rtn)
diff --git a/xml/hivexml.c b/xml/hivexml.c
index f29c80c..db9cd7d 100644
--- a/xml/hivexml.c
+++ b/xml/hivexml.c
@@ -194,11 +194,39 @@ filetime_to_8601 (int64_t windows_ticks)
   return ret;
 }
 
+#define BYTE_RUN_BUF_LEN 32
+
+static int
+node_byte_runs (hive_h *h, void *writer_v, hive_node_h node)
+{
+  xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+  char buf[1+BYTE_RUN_BUF_LEN];
+  size_t node_struct_length = hivex_node_struct_length (h, node);
+  if (errno) {
+    if (errno == EINVAL) {
+      fprintf (stderr, "node_byte_runs: Invoked on what does not seem to be a node (%zu).\n", node);
+    }
+    return -1;
+  }
+  /* A node has one byte run. */
+  XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "byte_runs"));
+  XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "byte_run"));
+  memset (buf, 0, 1+BYTE_RUN_BUF_LEN);
+  snprintf (buf, 1+BYTE_RUN_BUF_LEN, "%d", node);
+  XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "offset", BAD_CAST buf));
+  snprintf (buf, 1+BYTE_RUN_BUF_LEN, "%d", node_struct_length);
+  XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "len", BAD_CAST buf));
+  XML_CHECK (xmlTextWriterEndElement, (writer));
+  XML_CHECK (xmlTextWriterEndElement, (writer));
+  return 0;
+}
+
 static int
 node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
 {
   int64_t last_modified;
   char *timebuf;
+  int ret = 0;
 
   xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
   XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "node"));
@@ -219,7 +247,8 @@ node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
     }
   }
 
-  return 0;
+  ret = node_byte_runs (h, writer_v, node);
+  return ret;
 }
 
 static int
@@ -251,11 +280,52 @@ end_value (xmlTextWriterPtr writer)
 }
 
 static int
+value_byte_runs (hive_h *h, void *writer_v, hive_value_h value) {
+  xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+  char buf[1+BYTE_RUN_BUF_LEN];
+  size_t value_data_cell_length;
+  size_t value_data_structure_length = hivex_value_struct_length (h, value);
+  if (errno != 0) {
+    if (errno == EINVAL) {
+      fprintf (stderr, "value_byte_runs: Invoked on what does not seem to be a value (%zu).\n", value);
+    }
+    return -1;
+  }
+  hive_value_h value_data_cell_offset = hivex_value_data_cell_offset (h, value, &value_data_cell_length);
+  if (errno != 0)
+    return -1;
+
+  XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "byte_runs"));
+  memset (buf, 0, 1+BYTE_RUN_BUF_LEN);
+
+  /* Write first byte run for data structure */
+  XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "byte_run"));
+  snprintf (buf, 1+BYTE_RUN_BUF_LEN, "%d", value);
+  XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "offset", BAD_CAST buf));
+  snprintf (buf, 1+BYTE_RUN_BUF_LEN, "%d", value_data_structure_length);
+  XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "len", BAD_CAST buf));
+  XML_CHECK (xmlTextWriterEndElement, (writer));
+
+  /* Write second byte run for longer values */
+  if (value_data_cell_length > 4) {
+    XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "byte_run"));
+    snprintf (buf, 1+BYTE_RUN_BUF_LEN, "%d", value_data_cell_offset);
+    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "offset", BAD_CAST buf));
+    snprintf (buf, 1+BYTE_RUN_BUF_LEN, "%d", value_data_cell_length);
+    XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST "len", BAD_CAST buf));
+    XML_CHECK (xmlTextWriterEndElement, (writer));
+  }
+  XML_CHECK (xmlTextWriterEndElement, (writer));
+  return 0;
+}
+
+static int
 value_string (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
               hive_type t, size_t len, const char *key, const char *str)
 {
   xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
   const char *type;
+  int ret = 0;
 
   switch (t) {
   case hive_t_string: type = "string"; break;
@@ -278,9 +348,12 @@ value_string (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
   }
 
   start_value (writer, key, type, NULL);
-  XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
+  XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
+  XML_CHECK (xmlTextWriterWriteString, (writer, str));
+  XML_CHECK (xmlTextWriterEndAttribute, (writer));
+  ret = value_byte_runs (h, writer_v, value);
   end_value (writer);
-  return 0;
+  return ret;
 }
 
 static int
@@ -289,6 +362,7 @@ value_multiple_strings (hive_h *h, void *writer_v, hive_node_h node,
                         const char *key, char **argv)
 {
   xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+  int ret = 0;
   start_value (writer, key, "string-list", NULL);
 
   size_t i;
@@ -298,8 +372,9 @@ value_multiple_strings (hive_h *h, void *writer_v, hive_node_h node,
     XML_CHECK (xmlTextWriterEndElement, (writer));
   }
 
+  ret = value_byte_runs (h, writer_v, value);
   end_value (writer);
-  return 0;
+  return ret;
 }
 
 static int
@@ -310,6 +385,7 @@ value_string_invalid_utf16 (hive_h *h, void *writer_v, hive_node_h node,
 {
   xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
   const char *type;
+  int ret = 0;
 
   switch (t) {
   case hive_t_string: type = "bad-string"; break;
@@ -332,10 +408,13 @@ value_string_invalid_utf16 (hive_h *h, void *writer_v, hive_node_h node,
   }
 
   start_value (writer, key, type, "base64");
+  XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
   XML_CHECK (xmlTextWriterWriteBase64, (writer, str, 0, len));
+  XML_CHECK (xmlTextWriterEndAttribute, (writer));
+  ret = value_byte_runs (h, writer_v, value);
   end_value (writer);
 
-  return 0;
+  return ret;
 }
 
 static int
@@ -343,10 +422,12 @@ value_dword (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
              hive_type t, size_t len, const char *key, int32_t v)
 {
   xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+  int ret = 0;
   start_value (writer, key, "int32", NULL);
-  XML_CHECK (xmlTextWriterWriteFormatString, (writer, "%" PRIi32, v));
+  XML_CHECK (xmlTextWriterWriteFormatAttribute, (writer, BAD_CAST "value", "%" PRIi32, v));
+  ret = value_byte_runs (h, writer_v, value);
   end_value (writer);
-  return 0;
+  return ret;
 }
 
 static int
@@ -354,10 +435,12 @@ value_qword (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
              hive_type t, size_t len, const char *key, int64_t v)
 {
   xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+  int ret = 0;
   start_value (writer, key, "int64", NULL);
-  XML_CHECK (xmlTextWriterWriteFormatString, (writer, "%" PRIi64, v));
+  XML_CHECK (xmlTextWriterWriteFormatAttribute, (writer, "value", "%" PRIi64, v));
+  ret = value_byte_runs (h, writer_v, value);
   end_value (writer);
-  return 0;
+  return ret;
 }
 
 static int
@@ -365,10 +448,14 @@ value_binary (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
               hive_type t, size_t len, const char *key, const char *v)
 {
   xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+  int ret = 0;
   start_value (writer, key, "binary", "base64");
+  XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
   XML_CHECK (xmlTextWriterWriteBase64, (writer, v, 0, len));
+  XML_CHECK (xmlTextWriterEndAttribute, (writer));
+  ret = value_byte_runs (h, writer_v, value);
   end_value (writer);
-  return 0;
+  return ret;
 }
 
 static int
@@ -376,10 +463,16 @@ value_none (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
             hive_type t, size_t len, const char *key, const char *v)
 {
   xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
+  int ret = 0;
   start_value (writer, key, "none", "base64");
-  if (len > 0) XML_CHECK (xmlTextWriterWriteBase64, (writer, v, 0, len));
+  if (len > 0) {
+    XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
+    XML_CHECK (xmlTextWriterWriteBase64, (writer, v, 0, len));
+    XML_CHECK (xmlTextWriterEndAttribute, (writer));
+    ret = value_byte_runs (h, writer_v, value);
+  }
   end_value (writer);
-  return 0;
+  return ret;
 }
 
 static int
@@ -388,6 +481,7 @@ value_other (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
 {
   xmlTextWriterPtr writer = (xmlTextWriterPtr) writer_v;
   const char *type;
+  int ret = 0;
 
   switch (t) {
   case hive_t_none:
@@ -410,8 +504,13 @@ value_other (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
   }
 
   start_value (writer, key, type, "base64");
-  if (len > 0) XML_CHECK (xmlTextWriterWriteBase64, (writer, v, 0, len));
+  if (len > 0) {
+    XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
+    XML_CHECK (xmlTextWriterWriteBase64, (writer, v, 0, len));
+    XML_CHECK (xmlTextWriterEndAttribute, (writer));
+    ret = value_byte_runs (h, writer_v, value);
+  }
   end_value (writer);
 
-  return 0;
+  return ret;
 }
-- 
1.7.6






More information about the Libguestfs mailing list