[Libguestfs] [PATCH 14/13 NOT FOR REVIEW] hivex: Implement writing to hives.

Richard W.M. Jones rjones at redhat.com
Thu Jan 28 10:23:44 UTC 2010


This final patch actually implements writing to hives.  It is not
complete yet because although it works as far as our tools are
concerned, Windows ignores any new values added to a node, for reasons
which we don't yet understand.  Therefore I am continuing to reverse-
engineer the hive format itself so that we fully understand all the
fields.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
New in Fedora 11: Fedora Windows cross-compiler. Compile Windows
programs, test, and build Windows installers. Over 70 libraries supprt'd
http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw
-------------- next part --------------
>From 04cfc3dd9aae969272e810f9a6a66b73d0cf93ba Mon Sep 17 00:00:00 2001
From: Richard Jones <rjones at redhat.com>
Date: Mon, 18 Jan 2010 13:36:20 +0000
Subject: [PATCH] hivex: Implement writing to hives.

---
 .gitignore                  |    3 +
 hivex/Makefile.am           |   35 +++-
 hivex/README                |    2 +-
 hivex/example1.c            |   46 ++++
 hivex/example2.c            |   86 +++++++
 hivex/hivex.c               |  543 ++++++++++++++++++++++++++++++++++++++++++-
 hivex/hivex.h               |   14 ++
 hivex/hivex.pod             |  143 ++++++++++++
 hivex/visualizer.ml         |  531 ++++++++++++++++++++++++++++++++++++++++++
 hivex/visualizer_NT_time.ml |   30 +++
 hivex/visualizer_utils.ml   |  124 ++++++++++
 m4/.gitignore               |    1 +
 po/POTFILES.in              |    2 +
 13 files changed, 1557 insertions(+), 3 deletions(-)
 create mode 100644 hivex/example1.c
 create mode 100644 hivex/example2.c
 create mode 100644 hivex/visualizer.ml
 create mode 100644 hivex/visualizer_NT_time.ml
 create mode 100644 hivex/visualizer_utils.ml

diff --git a/.gitignore b/.gitignore
index 829f807..d066611 100644
--- a/.gitignore
+++ b/.gitignore
@@ -82,8 +82,11 @@ haskell/Guestfs.hs
 *.hi
 hivex/*.1
 hivex/*.3
+hivex/example1
+hivex/example2
 hivex/hivexget
 hivex/hivexml
+hivex/visualizer.opt
 html/guestfish.1.html
 html/guestfs.3.html
 html/guestmount.1.html
diff --git a/hivex/Makefile.am b/hivex/Makefile.am
index a2be7e3..c8a7cf6 100644
--- a/hivex/Makefile.am
+++ b/hivex/Makefile.am
@@ -15,7 +15,14 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 
-EXTRA_DIST = hivex.pod hivexml.pod hivexget.pod LICENSE
+EXTRA_DIST = \
+	hivex.pod \
+	hivexml.pod \
+	hivexget.pod \
+	LICENSE \
+	visualizer.ml \
+	visualizer_utils.ml \
+	visualizer_NT_time.ml
 
 lib_LTLIBRARIES = libhivex.la
 
@@ -28,6 +35,7 @@ libhivex_la_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS)
 libhivex_la_CPPFLAGS = -I$(top_srcdir)/gnulib/lib
 
 bin_PROGRAMS = hivexml hivexget
+noinst_PROGRAMS = example1 example2
 
 hivexml_SOURCES = \
   hivexml.c
@@ -44,6 +52,20 @@ hivexget_LDADD = libhivex.la ../gnulib/lib/libgnu.la
 hivexget_CFLAGS = \
   $(WARN_CFLAGS) $(WERROR_CFLAGS)
 
+example1_SOURCES = \
+  example1.c
+
+example1_LDADD = libhivex.la ../gnulib/lib/libgnu.la
+example1_CFLAGS = \
+  $(WARN_CFLAGS) $(WERROR_CFLAGS)
+
+example2_SOURCES = \
+  example2.c
+
+example2_LDADD = libhivex.la ../gnulib/lib/libgnu.la
+example2_CFLAGS = \
+  $(WARN_CFLAGS) $(WERROR_CFLAGS)
+
 man_MANS = hivex.3 hivexml.1 hivexget.1
 
 hivex.3: hivex.pod
@@ -98,3 +120,14 @@ $(top_builddir)/html/hivexget.1.html: hivexget.pod
 	  --htmldir html \
 	  --outfile html/hivexget.1.html \
 	  hivex/hivexget.pod
+
+# OCaml Windows Registry visualizer.  This was used while reverse
+# engineering the hive format, and is not normally compiled.  If you
+# do with to compile it, you'll need ocaml-bitstring-devel and
+# ocaml-extlib-devel.  Also you'll need a collection of hive files
+# from Windows machines to experiment with.
+
+visualizer.opt: visualizer_utils.ml visualizer_NT_time.ml visualizer.ml
+	ocamlfind ocamlopt \
+	  -package bitstring,bitstring.syntax,extlib \
+	  -syntax camlp4 -linkpkg $^ -o $@
diff --git a/hivex/README b/hivex/README
index 583d351..0aebc8a 100644
--- a/hivex/README
+++ b/hivex/README
@@ -15,7 +15,7 @@ This library was derived from several sources:
 
  . NTREG registry reader/writer library by Petter Nordahl-Hagen
     (LGPL v2.1 licensed library and program)
- . http://home.eunet.no/pnordahl/ntpasswd/WinReg.txt
+ . http://pogostick.net/~pnh/ntpasswd/WinReg.txt
  . dumphive (a BSD-licensed Pascal program by Markus Stephany)
  . http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf
  . editreg program from Samba - this program was removed in later
diff --git a/hivex/example1.c b/hivex/example1.c
new file mode 100644
index 0000000..18c847d
--- /dev/null
+++ b/hivex/example1.c
@@ -0,0 +1,46 @@
+/* Example program which loads and saves a hive.
+ * This example may be freely copied and modified without restrictions.
+ *
+ * The intention of this example is just to check that we can do this
+ * without corrupting the hive (header etc).
+ *
+ * NB: The copy of the hive will not be absolutely identical.  The
+ * sequence numbers in the header will change.  If we implement the
+ * last modified field in the header, then that and the checksum will
+ * also change.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "hivex.h"
+
+int
+main (int argc, char *argv[])
+{
+  if (argc != 3) {
+    fprintf (stderr, "example1 hive.orig hive.new\n");
+    exit (EXIT_FAILURE);
+  }
+  char *orig = argv[1];
+  char *newf = argv[2];
+
+  hive_h *h = hivex_open (orig, HIVEX_OPEN_WRITE /*| HIVEX_OPEN_DEBUG*/);
+  if (h == NULL) {
+  error:
+    perror (orig);
+    exit (EXIT_FAILURE);
+  }
+
+  if (hivex_commit (h, newf, 0) == -1)
+    goto error;
+
+  if (hivex_close (h) == -1)
+    goto error;
+
+  exit (EXIT_SUCCESS);
+}
diff --git a/hivex/example2.c b/hivex/example2.c
new file mode 100644
index 0000000..5b1cb17
--- /dev/null
+++ b/hivex/example2.c
@@ -0,0 +1,86 @@
+/* Example program which modifies a hive.
+ * This example may be freely copied and modified without restrictions.
+ *
+ * You need to supply the 'software' hive from a Windows distribution
+ * (usually in C:\windows\system32\config\software).  This hive
+ * contains a node '\Classes\*'.  This program removes existing (key,
+ * value) pairs at this node and replaces them with some example
+ * values.
+ *
+ * You can load the modified hive using another tool to see the
+ * changes.  eg. Using Windows regedit, select HKLM and then in the
+ * File menu choose "Load Hive ...".  Point to the update hive, and
+ * then give a key (eg. "test1").  The modified hive will be loaded
+ * under HKLM\test1 and the modified class can be inspected under
+ * HKLM\test1\Classes\*.  After inspecting the changes, unload the
+ * hive using File -> Unload Hive.
+ *
+ * Don't replace the original Windows 'software' hive, else you'll
+ * break things :-)
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "hivex.h"
+
+int
+main (int argc, char *argv[])
+{
+  if (argc != 3) {
+    fprintf (stderr, "example1 software software.new\n");
+    exit (EXIT_FAILURE);
+  }
+  char *orig = argv[1];
+  char *newf = argv[2];
+
+  hive_h *h = hivex_open (orig, HIVEX_OPEN_WRITE | HIVEX_OPEN_DEBUG);
+  if (h == NULL) {
+  error:
+    perror (orig);
+    exit (EXIT_FAILURE);
+  }
+
+  /* Navigate to the desired node. */
+  hive_node_h root = hivex_root (h);
+  if (!root)
+    goto error;
+
+  hive_node_h node_cl = hivex_node_get_child (h, root, "Classes");
+  if (!node_cl) {
+    fprintf (stderr, "%s: cannot find node \\Classes", orig);
+    exit (EXIT_FAILURE);
+  }
+
+  hive_node_h node_star = hivex_node_get_child (h, node_cl, "*");
+  if (!node_star) {
+    fprintf (stderr, "%s: cannot find node \\Classes\\*", orig);
+    exit (EXIT_FAILURE);
+  }
+
+  const hive_set_value values[] = {
+    { "A", hive_t_string, 8, "a\0b\0c\0d\0" },
+    { "B", hive_t_dword, 4, "\x78\x56\x34\x12" /* little endian 0x12345678 */ },
+    { "C", hive_t_string, 32, "d\0c\0b\0a\0d\0c\0b\0a\0d\0c\0b\0a\0a\0b\0c\0d\0" },
+  };
+  const int nr_values = sizeof values / sizeof values[0];
+
+  printf ("setting %d new values in node \\Classes\\* ...\n", nr_values);
+
+  if (hivex_node_set_values (h, node_star, nr_values, values, 0) == -1)
+    goto error;
+
+  printf ("committing changes to new file %s ...\n", newf);
+
+  if (hivex_commit (h, newf, 0) == -1)
+    goto error;
+
+  if (hivex_close (h) == -1)
+    goto error;
+
+  exit (EXIT_SUCCESS);
+}
diff --git a/hivex/hivex.c b/hivex/hivex.c
index 43d5788..b0657e3 100644
--- a/hivex/hivex.c
+++ b/hivex/hivex.c
@@ -41,6 +41,7 @@
 #endif
 
 #include "full-read.h"
+#include "full-write.h"
 
 #ifndef O_CLOEXEC
 #define O_CLOEXEC 0
@@ -60,35 +61,65 @@
 #ifndef be32toh
 #define be32toh(x) __bswap_32 (x)
 #endif
+#ifndef htobe32
+#define htobe32(x) __bswap_32 (x)
+#endif
 #ifndef be64toh
 #define be64toh(x) __bswap_64 (x)
 #endif
+#ifndef htobe64
+#define htobe64(x) __bswap_64 (x)
+#endif
 #ifndef le16toh
 #define le16toh(x) (x)
 #endif
+#ifndef htole16
+#define htole16(x) (x)
+#endif
 #ifndef le32toh
 #define le32toh(x) (x)
 #endif
+#ifndef htole32
+#define htole32(x) (x)
+#endif
 #ifndef le64toh
 #define le64toh(x) (x)
 #endif
-#else
+#ifndef htole64
+#define htole64(x) (x)
+#endif
+#else /* __BYTE_ORDER == __BIG_ENDIAN */
 #ifndef be32toh
 #define be32toh(x) (x)
 #endif
+#ifndef htobe32
+#define htobe32(x) (x)
+#endif
 #ifndef be64toh
 #define be64toh(x) (x)
 #endif
+#ifndef htobe64
+#define htobe64(x) (x)
+#endif
 #ifndef le16toh
 #define le16toh(x) __bswap_16 (x)
 #endif
+#ifndef htole16
+#define htole16(x) __bswap_16 (x)
+#endif
 #ifndef le32toh
 #define le32toh(x) __bswap_32 (x)
 #endif
+#ifndef htole32
+#define htole32(x) __bswap_32 (x)
+#endif
 #ifndef le64toh
 #define le64toh(x) __bswap_64 (x)
 #endif
+#ifndef htole64
+#define htole64(x) __bswap_64 (x)
 #endif
+#endif /* __BYTE_ORDER == __BIG_ENDIAN */
 
 #include "hivex.h"
 
@@ -127,6 +158,10 @@ struct hive_h {
   /* Fields from the header, extracted from little-endianness hell. */
   size_t rootoffs;              /* Root key offset (always an nk-block). */
   size_t endpages;              /* Offset of end of pages. */
+
+  /* For writing. */
+  size_t endblocks;             /* Offset to next block allocation (0
+                                   if not allocated anything yet). */
 };
 
 /* NB. All fields are little endian. */
@@ -552,6 +587,10 @@ hivex_close (hive_h *h)
   return r;
 }
 
+/*----------------------------------------------------------------------
+ * Reading.
+ */
+
 hive_node_h
 hivex_root (hive_h *h)
 {
@@ -1431,6 +1470,10 @@ hivex_value_qword (hive_h *h, hive_value_h value)
   return ret;
 }
 
+/*----------------------------------------------------------------------
+ * Visiting.
+ */
+
 int
 hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len,
              void *opaque, int flags)
@@ -1674,3 +1717,501 @@ hivex__visit_node (hive_h *h, hive_node_h node,
   free_strings (strs);
   return ret;
 }
+
+/*----------------------------------------------------------------------
+ * Writing.
+ */
+
+/* Allocate an hbin (page), extending the malloc'd space if necessary,
+ * and updating the hive handle fields (but NOT the hive disk header
+ * -- the hive disk header is updated when we commit).  This function
+ * also extends the bitmap if necessary.
+ *
+ * 'allocation_hint' is the size of the block allocation we would like
+ * to make.  Normally registry blocks are very small (avg 50 bytes)
+ * and are contained in standard-sized pages (4KB), but the registry
+ * can support blocks which are larger than a standard page, in which
+ * case it creates a page of 8KB, 12KB etc.
+ *
+ * Returns:
+ * > 0 : offset of first usable byte of new page (after page header)
+ * 0   : error (errno set)
+ */
+static size_t
+allocate_page (hive_h *h, size_t allocation_hint)
+{
+  /* In almost all cases this will be 1. */
+  size_t nr_4k_pages =
+    1 + (allocation_hint + sizeof (struct ntreg_hbin_page) - 1) / 4096;
+  assert (nr_4k_pages >= 1);
+
+  /* 'extend' is the number of bytes to extend the file by.  Note that
+   * hives found in the wild often contain slack between 'endpages'
+   * and the actual end of the file, so we don't always need to make
+   * the file larger.
+   */
+  ssize_t extend = h->endpages + nr_4k_pages * 4096 - h->size;
+
+  if (h->msglvl >= 2) {
+    fprintf (stderr, "allocate_page: current endpages = 0x%zx, current size = 0x%zx\n",
+             h->endpages, h->size);
+    fprintf (stderr, "allocate_page: extending file by %zd bytes (<= 0 if no extension)\n",
+             extend);
+  }
+
+  if (extend > 0) {
+    size_t oldsize = h->size;
+    size_t newsize = h->size + extend;
+    char *newaddr = realloc (h->addr, newsize);
+    if (newaddr == NULL)
+      return 0;
+
+    size_t oldbitmapsize = 1 + oldsize / 32;
+    size_t newbitmapsize = 1 + newsize / 32;
+    char *newbitmap = realloc (h->addr, newbitmapsize);
+    if (newbitmap == NULL) {
+      free (newaddr);
+      return 0;
+    }
+
+    h->addr = newaddr;
+    h->size = newsize;
+
+    memset (h->addr + oldsize, 0, newsize - oldsize);
+    memset (h->bitmap + oldbitmapsize, 0, newbitmapsize - oldbitmapsize);
+  }
+
+  size_t offset = h->endpages;
+  h->endpages += nr_4k_pages * 4096;
+
+  if (h->msglvl >= 2)
+    fprintf (stderr, "allocate_page: new endpages = 0x%zx, new size = 0x%zx\n",
+             h->endpages, h->size);
+
+  /* Write the hbin header. */
+  struct ntreg_hbin_page *page =
+    (struct ntreg_hbin_page *) (h->addr + offset);
+  page->magic[0] = 'h';
+  page->magic[1] = 'b';
+  page->magic[2] = 'i';
+  page->magic[3] = 'n';
+  page->offset_first = htole32 (offset - 0x1000);
+  page->page_size = htole32 (nr_4k_pages * 4096);
+  memset (page->unknown, 0, sizeof (page->unknown));
+
+  if (h->msglvl >= 2)
+    fprintf (stderr, "allocate_page: new page at 0x%zx\n", offset);
+
+  /* Offset of first usable byte after the header. */
+  return offset + sizeof (struct ntreg_hbin_page);
+}
+
+/* Allocate a single block, first allocating an hbin (page) at the end
+ * of the current file if necessary.  NB. To keep the implementation
+ * simple and more likely to be correct, we do not reuse existing free
+ * blocks.
+ *
+ * seg_len is the size of the block (this INCLUDES the block header).
+ * The header of the block is initialized to -seg_len (negative to
+ * indicate used).  id[2] is the block ID (type), eg. "nk" for nk-
+ * record.  The block bitmap is updated to show this block as valid.
+ * The rest of the contents of the block will be zero.
+ *
+ * Returns:
+ * > 0 : offset of new block
+ * 0   : error (errno set)
+ */
+static size_t
+allocate_block (hive_h *h, size_t seg_len, const char id[2])
+{
+  if (!h->writable) {
+    errno = EROFS;
+    return 0;
+  }
+
+  if (seg_len < 4) {
+    /* The caller probably forgot to include the header.  Note that
+     * value lists have no ID field, so seg_len == 4 would be possible
+     * for them, albeit unusual.
+     */
+    if (h->msglvl >= 2)
+      fprintf (stderr, "allocate_block: refusing too small allocation (%zu), returning ERANGE\n",
+               seg_len);
+    errno = ERANGE;
+    return 0;
+  }
+
+  /* Refuse really large allocations. */
+  if (seg_len > 1000000) {
+    if (h->msglvl >= 2)
+      fprintf (stderr, "allocate_block: refusing large allocation (%zu), returning ERANGE\n",
+               seg_len);
+    errno = ERANGE;
+    return 0;
+  }
+
+  /* Round up allocation to multiple of 4 bytes. */
+  seg_len = (seg_len + 3) & ~3;
+
+  /* Allocate a new page if necessary. */
+  if (h->endblocks == 0 || h->endblocks + seg_len > h->endpages) {
+    size_t newendblocks = allocate_page (h, seg_len);
+    if (newendblocks == 0)
+      return 0;
+    h->endblocks = newendblocks;
+  }
+
+  size_t offset = h->endblocks;
+
+  if (h->msglvl >= 2)
+    fprintf (stderr, "allocate_block: new block at 0x%zx, size %zu\n",
+             offset, seg_len);
+
+  struct ntreg_hbin_block *blockhdr =
+    (struct ntreg_hbin_block *) (h->addr + offset);
+
+  blockhdr->seg_len = htole32 (- (int32_t) seg_len);
+  if (id[0] && id[1] && seg_len >= 6) {
+    blockhdr->id[0] = id[0];
+    blockhdr->id[1] = id[1];
+  }
+
+  h->endblocks += seg_len;
+
+  /* If there is space after the last block in the last page, then we
+   * have to put a dummy free block header here to mark the rest of
+   * the page as free.
+   */
+  ssize_t rem = h->endpages - h->endblocks;
+  if (rem > 0) {
+    if (h->msglvl >= 2)
+      fprintf (stderr, "allocate_block: marking remainder of page free starting at 0x%zx, size %zd\n",
+               h->endblocks, rem);
+
+    assert (rem >= 4);
+
+    blockhdr = (struct ntreg_hbin_block *) (h->addr + h->endblocks);
+    blockhdr->seg_len = htole32 ((int32_t) rem);
+  }
+
+  return offset;
+}
+
+/* 'offset' must point to a valid, used block.  This function marks
+ * the block unused (by updating the seg_len field) and invalidates
+ * the bitmap.  It does NOT do this recursively, so to avoid creating
+ * unreachable used blocks, callers may have to recurse over the hive
+ * structures.  Also callers must ensure there are no references to
+ * this block from other parts of the hive.
+ */
+static void
+mark_block_unused (hive_h *h, size_t offset)
+{
+  assert (h->writable);
+  assert (IS_VALID_BLOCK (h, offset));
+
+  struct ntreg_hbin_block *blockhdr =
+    (struct ntreg_hbin_block *) (h->addr + offset);
+
+  size_t seg_len = block_len (h, offset, NULL);
+  blockhdr->seg_len = htole32 (seg_len);
+
+  BITMAP_CLR (h->bitmap, offset);
+}
+
+/* Delete all existing values at this node. */
+static int
+delete_values (hive_h *h, hive_node_h node)
+{
+  assert (h->writable);
+
+  hive_value_h *values;
+  size_t *blocks;
+  if (get_values (h, node, &values, &blocks) == -1)
+    return -1;
+
+  size_t i;
+  for (i = 0; blocks[i] != 0; ++i)
+    mark_block_unused (h, blocks[i]);
+
+  free (blocks);
+
+  for (i = 0; values[i] != 0; ++i) {
+    struct ntreg_vk_record *vk =
+      (struct ntreg_vk_record *) (h->addr + values[i]);
+
+    size_t len;
+    len = le32toh (vk->data_len);
+    if (len == 0x80000000)      /* special case */
+      len = 4;
+    len &= 0x7fffffff;
+
+    if (len > 4) {              /* non-inline, so remove data block */
+      size_t data_offset = le32toh (vk->data_offset);
+      data_offset += 0x1000;
+      mark_block_unused (h, data_offset);
+    }
+
+    /* remove vk record */
+    mark_block_unused (h, values[i]);
+  }
+
+  free (values);
+
+  struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
+  nk->nr_values = htole32 (0);
+  nk->vallist = htole32 (0xffffffff);
+
+  return 0;
+}
+
+int
+hivex_commit (hive_h *h, const char *filename, int flags)
+{
+  if (flags != 0) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (!h->writable) {
+    errno = EROFS;
+    return -1;
+  }
+
+  filename = filename ? : h->filename;
+  int fd = open (filename, O_WRONLY);
+  if (fd == -1)
+    return -1;
+
+  /* Update the header fields. */
+  uint32_t sequence = le32toh (h->hdr->sequence1);
+  sequence++;
+  h->hdr->sequence1 = htole32 (sequence);
+  h->hdr->sequence2 = htole32 (sequence);
+  /* XXX Ought to update h->hdr->last_modified. */
+  h->hdr->blocks = htole32 (h->endpages - 0x1000);
+
+  /* Recompute header checksum. */
+  uint32_t sum = header_checksum (h);
+  h->hdr->csum = htole32 (sum);
+
+  if (h->msglvl >= 2)
+    fprintf (stderr, "hivex_commit: new header checksum: 0x%x\n", sum);
+
+  if (full_write (fd, h->addr, h->size) != h->size) {
+    int err = errno;
+    close (fd);
+    errno = err;
+    return -1;
+  }
+
+  if (close (fd) == -1)
+    return -1;
+
+  return 0;
+}
+
+#if 0
+hive_node_h
+hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name)
+{
+  if (!h->writable) {
+    errno = EROFS;
+    return 0;
+  }
+
+  if (!IS_VALID_BLOCK (h, parent) || !BLOCK_ID_EQ (h, parent, "nk")) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (name == NULL) {
+    errno = EINVAL;
+    return -1;
+  }
+
+
+
+
+
+
+}
+#endif
+
+/* Callback from hivex_node_delete_child which is called to delete a
+ * node AFTER its subnodes have been visited.  The subnodes have been
+ * deleted but we still have to delete any lf/lh/li/ri records and the
+ * value list block and values, followed by deleting the node itself.
+ */
+static int
+delete_node (hive_h *h, void *opaque, hive_node_h node, const char *name)
+{
+  hive_node_h *unused;
+  size_t *blocks;
+  if (get_children (h, node, &unused, &blocks) == -1)
+    return -1;
+  free (unused);
+
+  /* We don't care what's in these intermediate blocks, so we can just
+   * delete them unconditionally.
+   */
+  size_t i;
+  for (i = 0; blocks[i] != 0; ++i)
+    mark_block_unused (h, blocks[i]);
+
+  free (blocks);
+
+  /* Delete the values in the node. */
+  if (delete_values (h, node) == -1)
+    return -1;
+
+  /* XXX
+     mark_block_unused (node->sk);
+     mark_block_unused (node->classname);
+  */
+
+  /* Delete the node itself. */
+  mark_block_unused (h, node);
+
+  return 0;
+}
+
+int
+hivex_node_delete_child (hive_h *h, hive_node_h node)
+{
+  if (!h->writable) {
+    errno = EROFS;
+    return -1;
+  }
+
+  if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (node == hivex_root (h)) {
+    if (h->msglvl >= 2)
+      fprintf (stderr, "hivex_node_delete_child: cannot delete root node\n");
+    errno = EINVAL;
+    return -1;
+  }
+
+  hive_node_h parent = hivex_node_parent (h, node);
+  if (parent == 0)
+    return -1;
+
+  /* Delete node and all its children and values recursively. */
+  static const struct hivex_visitor visitor = { .node_end = delete_node };
+  if (hivex_visit_node (h, node, &visitor, sizeof visitor, NULL, 0) == -1)
+    return -1;
+
+  /* Delete the link from parent to child.  We need to find the lf/lh
+   * record which contains the offset and remove the offset from that
+   * record, then decrement the element count in that record, and
+   * decrement the overall number of subkeys stored in the parent
+   * node.
+   */
+  hive_node_h *unused;
+  size_t *blocks;
+  if (get_children (h, parent, &unused, &blocks) == -1)
+    return -1;
+
+  size_t i, j;
+  for (i = 0; blocks[i] != 0; ++i) {
+    struct ntreg_hbin_block *block =
+      (struct ntreg_hbin_block *) (h->addr + blocks[i]);
+
+    if (block->id[0] == 'l' && (block->id[1] == 'f' || block->id[1] == 'h')) {
+      struct ntreg_lf_record *lf = (struct ntreg_lf_record *) block;
+
+      size_t nr_subkeys_in_lf = le16toh (lf->nr_keys);
+
+      for (j = 0; j < nr_subkeys_in_lf; ++j)
+        if (le32toh (lf->keys[j].offset) + 0x1000 == node) {
+          for (; j < nr_subkeys_in_lf - 1; ++j)
+            memcpy (&lf->keys[j], &lf->keys[j+1], sizeof (lf->keys[j]));
+          lf->nr_keys = htole16 (nr_subkeys_in_lf - 1);
+          goto found;
+        }
+    }
+  }
+  if (h->msglvl >= 2)
+    fprintf (stderr, "hivex_node_delete_child: could not find parent to child link\n");
+  errno = ENOTSUP;
+  return -1;
+
+ found:;
+  struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
+  size_t nr_subkeys_in_nk = le32toh (nk->nr_subkeys);
+  nk->nr_subkeys = htole32 (nr_subkeys_in_nk - 1);
+
+  return 0;
+}
+
+int
+hivex_node_set_values (hive_h *h, hive_node_h node,
+                       size_t nr_values, const hive_set_value *values,
+                       int flags)
+{
+  if (!h->writable) {
+    errno = EROFS;
+    return -1;
+  }
+
+  if (!IS_VALID_BLOCK (h, node) || !BLOCK_ID_EQ (h, node, "nk")) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  /* Delete all existing values. */
+  if (delete_values (h, node) == -1)
+    return -1;
+
+  if (nr_values == 0)
+    return 0;
+
+  /* Allocate value list node.  Value lists have no id field. */
+  static const char nul_id[2] = { 0, 0 };
+  size_t seg_len =
+    sizeof (struct ntreg_value_list) + (nr_values - 1) * sizeof (uint32_t);
+  size_t vallist_offs = allocate_block (h, seg_len, nul_id);
+  if (vallist_offs == 0)
+    return -1;
+
+  struct ntreg_nk_record *nk = (struct ntreg_nk_record *) (h->addr + node);
+  nk->nr_values = htole32 (nr_values);
+  nk->vallist = htole32 (vallist_offs - 0x1000);
+
+  struct ntreg_value_list *vallist =
+    (struct ntreg_value_list *) (h->addr + vallist_offs);
+
+  size_t i;
+  for (i = 0; i < nr_values; ++i) {
+    /* Allocate vk record to store this (key, value) pair. */
+    static const char vk_id[2] = { 'v', 'k' };
+    seg_len = sizeof (struct ntreg_vk_record) + strlen (values[i].key);
+    size_t vk_offs = allocate_block (h, seg_len, vk_id);
+    if (vk_offs == 0)
+      return -1;
+
+    vallist->offset[i] = htole32 (vk_offs - 0x1000);
+
+    struct ntreg_vk_record *vk = (struct ntreg_vk_record *) (h->addr + vk_offs);
+    vk->name_len = htole16 (strlen (values[i].key));
+    strcpy (vk->name, values[i].key);
+    vk->data_type = htole32 (values[i].t);
+    vk->data_len = htole16 (values[i].len);
+
+    if (values[i].len <= 4)     /* Store data inline. */
+      memcpy (&vk->data_offset, values[i].value, values[i].len);
+    else {
+      size_t offs = allocate_block (h, values[i].len + 4, nul_id);
+      if (offs == 0)
+        return -1;
+      memcpy (h->addr + offs + 4, values[i].value, values[i].len);
+      vk->data_offset = htole32 (offs - 0x1000);
+    }
+  }
+
+  return 0;
+}
diff --git a/hivex/hivex.h b/hivex/hivex.h
index 56718b4..cca1971 100644
--- a/hivex/hivex.h
+++ b/hivex/hivex.h
@@ -110,6 +110,20 @@ struct hivex_visitor {
 extern int hivex_visit (hive_h *h, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags);
 extern int hivex_visit_node (hive_h *h, hive_node_h node, const struct hivex_visitor *visitor, size_t len, void *opaque, int flags);
 
+extern int hivex_commit (hive_h *h, const char *filename, int flags);
+extern hive_node_h hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name);
+extern int hivex_node_delete_child (hive_h *h, hive_node_h node);
+
+struct hive_set_value {
+  const char *key;
+  hive_type t;
+  size_t len;
+  const char *value;
+};
+typedef struct hive_set_value hive_set_value;
+
+extern int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const hive_set_value *values, int flags);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/hivex/hivex.pod b/hivex/hivex.pod
index 5a58144..f8386e0 100644
--- a/hivex/hivex.pod
+++ b/hivex/hivex.pod
@@ -326,6 +326,145 @@ starts at C<node>.
 
 =back
 
+=head2 WRITING TO HIVE FILES
+
+The hivex library supports making limited modifications to hive files.
+We have tried to implement this very conservatively in order to reduce
+the chance of corrupting your registry.  However you should be careful
+and take back-ups, since Microsoft has never documented the hive
+format, and so it is possible there are nuances in the
+reverse-engineered format that we do not understand.
+
+To be able to modify a hive, you must pass the C<HIVEX_OPEN_WRITE>
+flag to C<hivex_open>, otherwise any write operation will return with
+errno C<EROFS>.
+
+The write operations shown below do not modify the on-disk file
+immediately.  You must call C<hivex_commit> in order to write the
+changes to disk.  If you call C<hivex_close> without committing then
+any writes are discarded.
+
+Hive files internally consist of a "memory dump" of binary blocks
+(like the C heap), and some of these blocks can be unused.  The hivex
+library never reuses these unused blocks.  Instead, to ensure
+robustness in the face of the partially understood on-disk format,
+hivex only allocates new blocks after the end of the file, and makes
+minimal modifications to existing structures in the file to point to
+these new blocks.  This makes hivex slightly less disk-efficient than
+it could be, but disk is cheap, and registry modifications tend to be
+very small.
+
+When deleting nodes, it is possible that this library may leave
+unreachable live blocks in the hive.  This is because certain parts of
+the hive disk format such as security (sk) records and big data (db)
+records and classname fields are not well understood (and not
+documented at all) and we play it safe by not attempting to modify
+them.  Apart from wasting a little bit of disk space, it is not
+thought that unreachable blocks are a problem.
+
+=over 4
+
+=item int hivex_commit (hive_h *h, const char *filename, int flags);
+
+Commit (write) any changes which have been made.
+
+C<filename> is the new file to write.  If C<filename == NULL> then we
+overwrite the original file (ie. the file name that was passed to
+C<hivex_open>).  C<flags> is not used, always pass 0.
+
+Returns 0 on success.  On error this returns -1 and sets errno.
+
+Note this does not close the hive handle.  You can perform further
+operations on the hive after committing, including making more
+modifications.  If you no longer wish to use the hive, call
+C<hivex_close> after this.
+
+=item hive_node_h hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name);
+
+Add a new child node named C<name> to the existing node C<parent>.
+The new child initially has no subnodes and contains no keys or
+values.  The parent must not have an existing child called C<name>, so
+if you want to overwrite an existing child, call
+C<hivex_node_delete_child> first.
+
+Returns the node handle.  On error this returns 0 and sets errno.
+
+=item int hivex_node_delete_child (hive_h *h, hive_node_h node);
+
+Delete the node C<node>.  All values at the node and all subnodes are
+deleted (recursively).  The C<node> handle and the handles of all
+subnodes become invalid.  You cannot delete the root node.
+
+Returns 0 on success.  On error this returns -1 and sets errno.
+
+=item hive_set_value
+
+The typedef C<hive_set_value> is used in conjunction with the
+C<hivex_node_set_values> call described below.
+
+ struct hive_set_value {
+   const char *key;   /* key - a UTF-8 encoded ASCIIZ string */
+   hive_type t;       /* type of value field */
+   size_t len;        /* length of value field in bytes */
+   const char *value; /* value field */
+ };
+ typedef struct hive_set_value hive_set_value;
+
+To set the default value for a node, you have to pass C<key = "">.
+
+Note that the C<value> field is just treated as a list of bytes, and
+is stored directly in the hive.  The caller has to ensure correct
+encoding and endianness, for example converting dwords to little
+endian.
+
+The correct type and encoding for values depends on the node and key
+in the registry, the version of Windows, and sometimes even changes
+between versions of Windows for the same key.  We don't document it
+here.  Often it's not documented at all.
+
+=item int hivex_node_set_values (hive_h *h, hive_node_h node, size_t nr_values, const hive_set_value *values, int flags);
+
+This call can be used to set all the (key, value) pairs stored in C<node>.
+
+C<node> is the node to modify.  C<values> is an array of (key, value)
+pairs.  There should be C<nr_values> elements in this array.  C<flags>
+is not used, always pass 0.
+
+Any existing values stored at the node are discarded, and their
+C<hive_value_h> handles become invalid.  Thus you can remove all
+values stored at C<node> by passing C<nr_values = 0>.
+
+Returns 0 on success.  On error this returns -1 and sets errno.
+
+Note that this library does not offer a way to modify just a single
+key at a node.  We don't implement a way to do this efficiently.
+
+=back
+
+=head3 WRITE OPERATIONS WHICH ARE NOT SUPPORTED
+
+=over 4
+
+=item *
+
+Changing the root node.
+
+=item *
+
+Creating a new hive file from scratch.  This is impossible at present
+because not all fields in the header are understood.
+
+=item *
+
+Modifying or deleting single values at a node.
+
+=item *
+
+Modifying security key (sk) records or classnames.  These are not
+well understood.
+
+=back
+
 =head1 THE STRUCTURE OF THE WINDOWS REGISTRY
 
 Note: To understand the relationship between hives and the common
@@ -452,6 +591,10 @@ Registry contains cycles.
 
 Field in the registry out of range.
 
+=item EROFS
+
+Tried to write to a registry which is not opened for writing.
+
 =back
 
 =head1 ENVIRONMENT VARIABLES
diff --git a/hivex/visualizer.ml b/hivex/visualizer.ml
new file mode 100644
index 0000000..acfce5b
--- /dev/null
+++ b/hivex/visualizer.ml
@@ -0,0 +1,531 @@
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * For existing information on the registry format, please refer
+ * to the following documents.  Note they are both incomplete
+ * and inaccurate in some respects.
+ *
+ * http://www.sentinelchicken.com/data/TheWindowsNTRegistryFileFormat.pdf
+ * http://pogostick.net/~pnh/ntpasswd/WinReg.txt
+ *)
+
+open Bitstring
+open ExtString
+open Printf
+open Visualizer_utils
+open Visualizer_NT_time
+
+let filename =
+  if Array.length Sys.argv = 2 then Sys.argv.(1)
+  else
+    failwithf "error: missing filename\nusage: %s hivefile\n"
+      Sys.executable_name
+
+(* Load the file. *)
+let bits = bitstring_of_file filename
+
+(* Split into header + data at the 4KB boundary. *)
+let header, data = takebits (4096 * 8) bits, dropbits (4096 * 8) bits
+
+(* Define a persistent pattern which matches the header fields.  By
+ * using persistent patterns, we can reuse them later in the
+ * program.
+ *)
+let bitmatch header_fields =
+  { "regf" : 4*8 : string;
+    seq1 : 4*8 : littleendian;
+    seq2 : 4*8 : littleendian;
+    last_modified : 64
+      : littleendian, bind (nt_to_time_t last_modified);
+    major : 4*8 : littleendian;
+    minor : 4*8 : littleendian;
+    unknown1 : 4*8 : littleendian;
+    unknown2 : 4*8 : littleendian;
+    root_key : 4*8
+      : littleendian, bind (get_offset root_key);
+    end_pages : 4*8
+      : littleendian, bind (get_offset end_pages);
+    unknown3 : 4*8 : littleendian;
+    filename : 64*8 : string;
+    (* sentinelchicken documentation has some fields here which
+     * plainly don't exist in any hives I've seen.  Treat it as a big
+     * block of unknown.
+     *)
+    unknown4 : 396*8 : bitstring;
+    csum : 4*8
+      : littleendian, save_offset_to (crc_offset),
+        check (assert (crc_offset = 0x1fc * 8); true);
+    unknown5 : (0x1000-0x200)*8 : bitstring }
+
+let fprintf_header chan bits =
+  bitmatch bits with
+  | { :header_fields } ->
+      fprintf chan
+        "HD %6ld %6ld %s %ld.%ld u%08lx u%08lx %s %s u%08lx %s %s %08lx %s\n"
+        seq1 seq2 (print_time last_modified) major minor
+        unknown1 unknown2
+        (print_offset root_key) (print_offset end_pages)
+        unknown3 (print_utf16 filename)
+        (print_bitstring unknown4) csum (print_bitstring unknown5)
+
+(* Parse the header and check it. *)
+let root_key, end_pages =
+  bitmatch header with
+  |  { :header_fields } ->
+       if major <> 1 then
+         eprintf "!HD major";
+
+
+       root_key, end_pages
+  | {_} ->
+      failwithf "%s: this doesn't look like a registry hive file\n" filename
+
+(* Define persistent patterns to match page and block fields. *)
+let bitmatch page_fields =
+  { "hbin" : 4*8 : string;
+    page_offset : 4*8
+      : littleendian, bind (get_offset page_offset);
+    page_size : 4*8
+      : littleendian, check (Int32.rem page_size 4096_l = 0_l),
+        bind (Int32.to_int page_size);
+    unknown : 20*8 : bitstring;
+    blocks : (page_size - 32) * 8 : bitstring;
+    rest : -1 : bitstring }
+
+let fprintf_page chan bits =
+  bitmatch bits with
+  | { :page_fields } ->
+      ignore (blocks, rest);
+      fprintf chan "HB %s %08x %s\n"
+        (print_offset page_offset)
+        page_size (print_bitstring unknown)
+
+let bitmatch block_fields =
+  { seg_len : 4*8
+      : littleendian, bind (Int32.to_int seg_len);
+    block_data : (abs seg_len - 4) * 8 : bitstring;
+    rest : -1 : bitstring }
+
+(* Iterate over the pages and blocks.  In the process we will examine
+ * each page (hbin) header.  Also we will build block_list which is a
+ * list of (block offset, length, used flag, data).
+ *)
+let block_list = ref []
+let () =
+  let rec loop_over_pages data data_offset =
+    if data_offset >= end_pages then ()
+    else (
+      bitmatch data with
+      | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
+
+      | { :page_fields } ->
+          ignore (unknown);
+
+          assert (page_offset = data_offset);
+
+          (* Loop over the blocks in this page. *)
+          loop_over_blocks blocks (data_offset + 32);
+
+          (* Loop over rest of the pages. *)
+          loop_over_pages rest (data_offset + page_size)
+
+      | {_} ->
+          failwithf "%s: invalid hbin at offset %s\n"
+            filename (print_offset data_offset)
+    )
+  and loop_over_blocks blocks block_offset =
+    bitmatch blocks with
+    | { rest : -1 : bitstring } when bitstring_length rest = 0 -> ()
+
+    | { :block_fields } ->
+        let used, seg_len =
+          if seg_len < 0 then true, -seg_len else false, seg_len in
+
+        let block = block_offset, (seg_len, used, block_data) in
+        block_list := block :: !block_list;
+
+        (* Loop over the rest of the blocks in this page. *)
+        loop_over_blocks rest (block_offset + seg_len)
+
+    | {_} ->
+        failwithf "%s: invalid block near offset %s\n"
+          filename (print_offset block_offset)
+  in
+  loop_over_pages data 0
+
+(* Turn the block_list into a map so we can quickly look up a block
+ * from its offset.
+ *)
+let block_list = !block_list
+let block_map =
+  List.fold_left (
+    fun map (block_offset, block) -> IntMap.add block_offset block map
+  ) IntMap.empty block_list
+let lookup fn offset =
+  try
+    let (_, used, _) as block = IntMap.find offset block_map in
+    if not used then
+      failwithf "%s: %s: lookup: free block %s referenced from hive tree"
+        filename fn (print_offset offset);
+    block
+  with Not_found ->
+    failwithf "%s: %s: lookup: unknown block %s referenced from hive tree"
+      filename fn (print_offset offset)
+
+(* Use this to mark blocks that we've visited.  If the hive contains
+ * no unreferenced blocks, then by the end this should just contain
+ * free blocks.
+ *)
+let mark_visited, is_not_visited, unvisited_blocks =
+  let v = ref block_map in
+  let mark_visited offset = v := IntMap.remove offset !v
+  and is_not_visited offset = IntMap.mem offset !v
+  and unvisited_blocks () = !v in
+  mark_visited, is_not_visited, unvisited_blocks
+
+(* Define persistent patterns to match nk-records, vk-records and
+ * sk-records, which are the record types that we especially want to
+ * analyze later.  Other blocks types (eg. value lists, lf-records)
+ * have no "spare space" so everything is known about them and we don't
+ * store these.
+ *)
+let bitmatch nk_fields =
+  { "nk" : 2*8 : string;
+    (* Flags stored in the file as a little endian word, hence the
+     * unusual ordering:
+     *)
+    unknownflag0080 : 1;
+    predefinedhandle : 1; keynameascii : 1; symlinkkey : 1;
+    cannotbedeleted : 1; isroot : 1; ismountpoint : 1; isvolatile : 1;
+    unknownflag8000 : 1; unknownflag4000 : 1;
+    unknownflag2000 : 1; unknownflag1000 : 1;
+    unknownflag0800 : 1; unknownflag0400 : 1;
+    unknownflag0200 : 1; unknownflag0100 : 1;
+    timestamp : 64 : littleendian, bind (nt_to_time_t timestamp);
+    unknown1 : 4*8 : littleendian;
+    parent : 4*8 : littleendian, bind (get_offset parent);
+    nr_subkeys : 4*8 : littleendian, bind (Int32.to_int nr_subkeys);
+    nr_subkeys_vol : 4*8;
+    subkeys : 4*8 : littleendian, bind (get_offset subkeys);
+    subkeys_vol : 4*8;
+    nr_values : 4*8 : littleendian, bind (Int32.to_int nr_values);
+    vallist : 4*8 : littleendian, bind (get_offset vallist);
+    sk : 4*8 : littleendian, bind (get_offset sk);
+    classname : 4*8 : littleendian, bind (get_offset classname);
+    unknown2 : 4*8 : littleendian;
+    unknown3 : 4*8 : littleendian;
+    unknown4 : 4*8 : littleendian;
+    unknown5 : 4*8 : littleendian;
+    unknown6 : 4*8 : littleendian;
+    name_len : 2*8 : littleendian;
+    classname_len : 2*8 : littleendian;
+    name : name_len * 8 : string }
+
+let fprintf_nk chan nk =
+  let (_, _, bits) = lookup "fprintf_nk" nk in
+  bitmatch bits with
+  | { :nk_fields } ->
+      fprintf chan
+        "NK %s %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s %s u%08lx %08x %d %ld %s %08lx %d %s %s %s u%08lx u%08lx u%08lx u%08lx u%08lx %d %d %s\n"
+        (print_offset nk)
+        (if unknownflag8000 then "8" else ".")
+        (if unknownflag4000 then "4" else ".")
+        (if unknownflag2000 then "2" else ".")
+        (if unknownflag1000 then "1" else ".")
+        (if unknownflag0800 then "8" else ".")
+        (if unknownflag0400 then "4" else ".")
+        (if unknownflag0200 then "2" else ".")
+        (if unknownflag0100 then "1" else ".")
+        (if unknownflag0080 then "8" else ".")
+        (if predefinedhandle then "P" else ".")
+        (if keynameascii then "A" else ".")
+        (if symlinkkey then "S" else ".")
+        (if cannotbedeleted then "N" else ".")
+        (if isroot then "R" else ".")
+        (if ismountpoint then "M" else ".")
+        (if isvolatile then "V" else ".")
+        (print_time timestamp)
+        unknown1 parent nr_subkeys nr_subkeys_vol
+        (print_offset subkeys) subkeys_vol
+        nr_values (print_offset vallist)
+        (print_offset sk) (print_offset classname)
+        unknown2 unknown3 unknown4 unknown5 unknown6
+        name_len classname_len name
+
+type data_t = Inline of bitstring | Offset of int
+let bitmatch vk_fields =
+  { "vk" : 2*8 : string;
+    name_len : 2*8 : littleendian;
+    (* No one documents the important fact that data_len can have the
+     * top bit set (randomly or is it meaningful?).  The length can
+     * also be 0 (or 0x80000000) if the data type is NONE.
+     *)
+    data_len : 4*8
+      : littleendian, bind (
+        let data_len = Int32.logand data_len 0x7fff_ffff_l in
+        Int32.to_int data_len
+      );
+    (* Inline data if len <= 4, offset otherwise. *)
+    data : 4*8
+      : bitstring, bind (
+        if data_len <= 4 then
+          Inline (takebits (data_len*8) data)
+        else (
+          let offset =
+            bitmatch data with { offset : 4*8 : littleendian } -> offset in
+          let offset = get_offset offset in
+          Offset offset
+        )
+      );
+    t : 4*8 : littleendian, bind (Int32.to_int t);
+    (* Flags, stored as a little-endian word: *)
+    unknown1 : 7; nameisascii : 1; unknown2 : 8;
+    unknown3 : 2*8 : littleendian;
+    name : name_len * 8 : string }
+
+let fprintf_vk chan vk =
+  let (_, _, bits) = lookup "fprintf_vk" vk in
+  bitmatch bits with
+  | { :vk_fields } ->
+      let data =
+        match data with
+        | Inline data -> data
+        | Offset offset ->
+            let (_, _, bits) = lookup "fprintf_vk (data)" offset in
+            bits in
+      fprintf chan "VK %s %s %d %s %s u%08x %s u%08x u%08x\n"
+        (print_offset vk)
+        name data_len (print_bitstring data) (print_vk_type t)
+        unknown1 (if nameisascii then "A" else "L")
+        unknown2 unknown3
+
+let bitmatch sk_fields =
+  { "sk" : 2*8 : string;
+    unknown1 : 2*8 : littleendian;
+    sk_prev : 4*8 : littleendian, bind (get_offset sk_prev);
+    sk_next : 4*8 : littleendian, bind (get_offset sk_next);
+    refcount : 4*8 : littleendian, bind (Int32.to_int refcount);
+    sec_len : 4*8 : littleendian, bind (Int32.to_int sec_len);
+    sec_desc : sec_len * 8 : bitstring }
+
+let fprintf_sk chan sk =
+  let (_, _, bits) = lookup "fprintf_sk" sk in
+  bitmatch bits with
+  | { :sk_fields } ->
+      ignore (sec_desc);
+      fprintf chan "SK %s u%04x %s %s %d %d\n"
+        (print_offset sk) unknown1
+        (print_offset sk_prev) (print_offset sk_next)
+        refcount sec_len
+        (* print_bitstring sec_desc -- suppress this *)
+
+(* Store lists of records we encounter (lists of offsets). *)
+let nk_records = ref []
+and vk_records = ref []
+and sk_records = ref []
+
+(* Functions to visit each block, starting at the root.  Each block
+ * that we visit is printed.
+ *)
+let rec visit_nk ?(nk_is_root = false) nk =
+  let (_, _, bits) = lookup "visit_nk" nk in
+  mark_visited nk;
+  (bitmatch bits with
+   | { :nk_fields } ->
+       ignore (parent, timestamp);
+
+       nk_records := nk :: !nk_records;
+
+       (* Check the isroot flag is only set on the root node. *)
+       assert (isroot = nk_is_root);
+
+       (* Visit the values first at this node. *)
+       if vallist <> -1 then
+         visit_vallist nr_values vallist;
+
+       (* Visit the subkeys of this node. *)
+       if subkeys <> -1 then (
+         let counted = visit_subkeys subkeys in
+         if counted <> nr_subkeys then
+           failwithf "%s: incorrect count of subkeys (%d, counted %d) in subkey list at %s\n"
+             filename nr_subkeys counted (print_offset subkeys)
+       );
+
+       (* Visit the sk-record and classname. *)
+       if sk <> -1 then
+         visit_sk sk;
+       if classname <> -1 then
+         visit_classname classname classname_len;
+
+   | {_} ->
+       failwithf "%s: invalid nk block at offset %s\n"
+         filename (print_offset nk)
+  )
+
+and visit_vallist nr_values vallist =
+  let (_, _, bits) = lookup "visit_vallist" vallist in
+  mark_visited vallist;
+  visit_values_in_vallist nr_values vallist bits
+
+and visit_values_in_vallist nr_values vallist bits =
+  if nr_values > 0 then (
+    bitmatch bits with
+    | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
+        assert (nr_values = 0)
+
+    | { value : 4*8 : littleendian, bind (get_offset value);
+        rest : -1 : bitstring } ->
+        visit_vk value;
+        visit_values_in_vallist (nr_values-1) vallist rest
+
+    | {_} ->
+        failwithf "%s: invalid offset in value list at %s\n"
+          filename (print_offset vallist)
+  )
+
+and visit_vk vk =
+  let (_, _, bits) = lookup "visit_vk" vk in
+  mark_visited vk;
+
+  (bitmatch bits with
+   | { :vk_fields } ->
+       ignore (t);
+
+       vk_records := vk :: !vk_records;
+       (match data with
+        | Inline data -> ()
+        | Offset offset ->
+            let _ = lookup "visit_vk (data)" offset in
+            mark_visited offset
+       );
+
+   | {_} ->
+       failwithf "%s: invalid vk block at offset %s\n"
+         filename (print_offset vk)
+  )
+
+(* Visits subkeys, recursing through intermediate lf/lh/ri structures,
+ * and returns the number of subkeys actually seen.
+ *)
+and visit_subkeys subkeys =
+  let (_, _, bits) = lookup "visit_subkeys" subkeys in
+  mark_visited subkeys;
+  (bitmatch bits with
+   | { ("lf"|"lh") : 2*8 : string;
+       len : 2*8 : littleendian; (* number of subkeys of this node *)
+       rest : len*8*8 : bitstring } ->
+       (*printf "LF %s %d\n" (print_offset subkeys) len;*)
+       visit_subkeys_in_lf_list subkeys len rest
+
+   | { "ri" : 2*8 : string;
+       len : 2*8 : littleendian;
+       rest : len*4*8 : bitstring } ->
+       (*printf "RI %s %d\n" (print_offset subkeys) len;*)
+       visit_subkeys_in_ri_list subkeys len rest
+
+   (* In theory you can have an li-record here, but we've never
+    * seen one.
+    *)
+
+   | { "nk" : 2*8 : string } ->
+       visit_nk subkeys; 1
+
+   | {_} ->
+       failwithf "%s: invalid subkey node found at %s\n"
+         filename (print_offset subkeys)
+  )
+
+and visit_subkeys_in_lf_list subkeys_top len bits =
+  if len > 0 then (
+    bitmatch bits with
+    | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
+        assert (len = 0);
+        0
+
+    | { offset : 4*8 : littleendian, bind (get_offset offset);
+        _ (* hash *) : 4*8 : bitstring;
+        rest : -1 : bitstring } ->
+        let c1 = visit_subkeys offset in
+        let c2 = visit_subkeys_in_lf_list subkeys_top (len-1) rest in
+        c1 + c2
+
+    | {_} ->
+        failwithf "%s: invalid subkey in lf/lh list at %s\n"
+          filename (print_offset subkeys_top)
+  ) else 0
+
+and visit_subkeys_in_ri_list subkeys_top len bits =
+  if len > 0 then (
+    bitmatch bits with
+    | { rest : -1 : bitstring } when bitstring_length rest = 0 ->
+        assert (len = 0);
+        0
+
+    | { offset : 4*8 : littleendian, bind (get_offset offset);
+        rest : -1 : bitstring } ->
+        let c1 = visit_subkeys offset in
+        let c2 = visit_subkeys_in_ri_list subkeys_top (len-1) rest in
+        c1 + c2
+
+    | {_} ->
+        failwithf "%s: invalid subkey in ri list at %s\n"
+          filename (print_offset subkeys_top)
+  ) else 0
+
+and visit_sk sk =
+  let (_, _, bits) = lookup "visit_sk" sk in
+  if is_not_visited sk then (
+    mark_visited sk;
+    (bitmatch bits with
+     | { :sk_fields } ->
+         ignore (sk_prev, sk_next, refcount, sec_desc);
+         sk_records := sk :: !sk_records
+
+     | {_} ->
+         failwithf "%s: invalid sk-record at %s\n"
+           filename (print_offset sk)
+    )
+  )
+
+and visit_classname classname classname_len =
+  let (seg_len, _, bits) = lookup "visit_classname" classname in
+  mark_visited classname;
+  assert (seg_len >= classname_len)
+  (*printf "CL %s %s\n" (print_offset classname) (print_bitstring bits)*)
+
+let () =
+  visit_nk ~nk_is_root:true root_key
+
+(* Now after visiting all the blocks, are there any used blocks which
+ * are unvisited?  If there are any then that would indicate either (a)
+ * that the hive contains unreferenced blocks, or (b) that there are
+ * referenced blocks that we did not visit because we don't have a full
+ * understanding of the hive format.
+ *
+ * Windows 7 registries often contain a few of these -- not clear
+ * how serious they are, but don't fail here.
+ *)
+let () =
+  let unvisited = unvisited_blocks () in
+  IntMap.iter (
+    fun offset block ->
+      match block with
+      | (_, false, _) -> () (* ignore unused blocks *)
+      | (_, true, _) ->
+          eprintf "!-- used block %s is not referenced\n"
+            filename (print_offset offset)
+  ) unvisited
diff --git a/hivex/visualizer_NT_time.ml b/hivex/visualizer_NT_time.ml
new file mode 100644
index 0000000..a752112
--- /dev/null
+++ b/hivex/visualizer_NT_time.ml
@@ -0,0 +1,30 @@
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * For existing information on the registry format, please refer
+ * to the following documents.  Note they are both incomplete
+ * and inaccurate in some respects.
+ *)
+
+(* Convert an NT file timestamp to time_t.  See:
+ * http://blogs.msdn.com/oldnewthing/archive/2003/09/05/54806.aspx
+ * http://support.microsoft.com/kb/167296
+ *)
+let nt_to_time_t t =
+  let t = Int64.sub t 116444736000000000L in
+  let t = Int64.div t 10000000L in
+  Int64.to_float t
diff --git a/hivex/visualizer_utils.ml b/hivex/visualizer_utils.ml
new file mode 100644
index 0000000..4abf96e
--- /dev/null
+++ b/hivex/visualizer_utils.ml
@@ -0,0 +1,124 @@
+(* Windows Registry reverse-engineering tool.
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * For existing information on the registry format, please refer
+ * to the following documents.  Note they are both incomplete
+ * and inaccurate in some respects.
+ *)
+
+open Bitstring
+open ExtString
+open Printf
+
+let failwithf fs = ksprintf failwith fs
+
+(* Useful function to convert unknown bitstring fragments into
+ * printable strings.
+ *)
+let rec print_bitstring bits =
+  let str = string_of_bitstring bits in
+  print_binary_string str
+and print_binary_string str =
+  let rec printable = function
+    | '\x00' -> "\\0" | '\x01' -> "\\1" | '\x02' -> "\\2" | '\x03' -> "\\3"
+    | '\x04' -> "\\4" | '\x05' -> "\\5" | '\x06' -> "\\6" | '\x07' -> "\\7"
+    | ('\x08'..'\x31' as c)
+    | ('\x7f'..'\xff' as c) -> sprintf "\\x%02x" (Char.code c)
+    | ('\x32'..'\x7e' as c) -> String.make 1 c
+  and repeat str = function
+    | n when n <= 0 -> ""
+    | n -> str ^ repeat str (n-1)
+  in
+  let chars = String.explode str in
+  let rec loop = function
+    | [] -> []
+    | x :: (y :: _ as ys) when x = y ->
+        let (nr, _), ys =
+          match loop ys with [] -> assert false | a :: b -> a, b in
+        (nr+1, x) :: ys
+    | x :: ys -> (1, x) :: loop ys
+  in
+  let frags = loop chars in
+  let frags =
+    List.map (function
+              | (nr, x) when nr <= 4 -> repeat (printable x) nr
+              | (nr, x) -> sprintf "%s<%d times>" (printable x) nr
+             ) frags in
+  "\"" ^ String.concat "" frags ^ "\""
+
+(* Convert an offset from the file to an offset.  The only special
+ * thing is that 0xffffffff in the file is used as a kind of "NULL
+ * pointer".  We map these null values to -1.
+ *)
+let get_offset = function
+  | 0xffffffff_l -> -1
+  | i -> Int32.to_int i
+
+(* Print an offset. *)
+let print_offset = function
+  | -1 -> "NULL"
+  | i -> sprintf "@%08x" i
+
+(* Print time. *)
+let print_time t =
+  let tm = Unix.gmtime t in
+  sprintf "%04d-%02d-%02d %02d:%02d:%02d"
+    (tm.Unix.tm_year + 1900) (tm.Unix.tm_mon + 1) tm.Unix.tm_mday
+    tm.Unix.tm_hour tm.Unix.tm_min tm.Unix.tm_sec
+
+(* Print UTF16LE. *)
+let print_utf16 str =
+  let n = String.length str in
+  if n land 1 <> 0 then
+    print_binary_string str
+  else (
+    let rec loop i =
+      if i < n-1 then (
+        let c1 = Char.code (str.[i]) in
+        let c2 = Char.code (str.[i+1]) in
+        if c1 <> 0 || c2 <> 0 then (
+          (* Well, this doesn't print non-7bit-ASCII ... *)
+          let c =
+            if c2 = 0 then String.make 1 (Char.chr c1)
+            else sprintf "\\u%04d" (c2 * 256 + c1) in
+          c :: loop (i+2)
+        ) else []
+      ) else []
+    in
+    let frags = loop 0 in
+    "L\"" ^ String.concat "" frags ^ "\""
+  )
+
+(* A map of int -> anything. *)
+module IntMap =
+  Map.Make (struct type t = int let compare = Pervasives.compare end)
+
+(* Print registry vk-record type field. *)
+let print_vk_type = function
+  | 0 -> "NONE"
+  | 1 -> "SZ"
+  | 2 -> "EXPAND_SZ"
+  | 3 -> "BINARY"
+  | 4 -> "DWORD"
+  | 5 -> "DWORD_BIG_ENDIAN"
+  | 6 -> "LINK"
+  | 7 -> "MULTI_SZ"
+  | 8 -> "RESOURCE_LiST"
+  | 9 -> "FULL_RESOURCE_DESCRIPTOR"
+  | 10 -> "RESOURCE_REQUIREMENTS_LIST"
+  | 11 -> "QWORD"
+  | i -> sprintf "UNKNOWN_VK_TYPE_%d" i
diff --git a/m4/.gitignore b/m4/.gitignore
index 01c8e2d..3f1a5a4 100644
--- a/m4/.gitignore
+++ b/m4/.gitignore
@@ -143,3 +143,4 @@ xsize.m4
 /strtoll.m4
 /strtoul.m4
 /strtoull.m4
+/xstrtol.m4
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 85892e8..5a27d5c 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -80,6 +80,8 @@ fish/tilde.c
 fish/time.c
 fuse/dircache.c
 fuse/guestmount.c
+hivex/example1.c
+hivex/example2.c
 hivex/hivex.c
 hivex/hivexget.c
 hivex/hivexml.c
-- 
1.6.5.2



More information about the Libguestfs mailing list