[Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t

Hilko Bengen bengen at hilluzination.de
Fri Feb 9 14:52:20 UTC 2018


It was brought to my attention that dumping a registry hive causes a
lot of time spent in disk I/O activity because iconv_open() and
iconv_close() are called for every key. Every iconv_open() call causes
/usr/lib/.../gconv/$ENCODING.so to be opened and mapped.

The iconv_t handles are now cached in the hive_h struct; they are
opened on-demand and re-used.

On my ~10 year old Lenovo T60, I have seen 57% savings in the overal
runtime of running

    hivexregedit --export windows-8-enterprise-software.hive '\\'
---
 bootstrap            |  1 +
 configure.ac         |  2 ++
 lib/Makefile.am      |  2 ++
 lib/handle.c         | 42 +++++++++++++++++++++++++++++++++++++++++-
 lib/hivex-internal.h | 31 ++++++++++++++++++++++---------
 lib/node.c           |  6 +++---
 lib/utf16.c          | 38 ++++++++++++++++----------------------
 lib/value.c          | 10 +++++-----
 lib/write.c          |  4 ++--
 m4/.gitignore        |  2 ++
 10 files changed, 96 insertions(+), 42 deletions(-)

diff --git a/bootstrap b/bootstrap
index bd82477..373fad8 100755
--- a/bootstrap
+++ b/bootstrap
@@ -75,6 +75,7 @@ vc-list-files
 warnings
 xstrtol
 xstrtoll
+threadlib
 '
 
 $gnulib_tool			\
diff --git a/configure.ac b/configure.ac
index 547fb0d..8405774 100644
--- a/configure.ac
+++ b/configure.ac
@@ -38,7 +38,9 @@ AC_DEFINE([PACKAGE_VERSION_RELEASE],[hivex_release],[Release number])
 AC_DEFINE([PACKAGE_VERSION_EXTRA],["hivex_extra"],[Extra version string])
 
 gl_EARLY
+gl_THREADLIB_EARLY
 gl_INIT
+gl_THREADLIB
 
 AM_PROG_LIBTOOL
 
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 4a7cea1..62cdf35 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -38,6 +38,8 @@ libhivex_la_SOURCES = \
 	visit.c \
 	write.c
 
+libhivex_la_SOURCES += $(top_srcdir)/gnulib/lib/glthread/threadlib.c
+
 libhivex_la_LIBADD =  ../gnulib/lib/libgnu.la $(LTLIBOBJS)
 libhivex_la_LDFLAGS = \
 	-version-info 0:0:0 \
diff --git a/lib/handle.c b/lib/handle.c
index 9dcf81d..01b8d80 100644
--- a/lib/handle.c
+++ b/lib/handle.c
@@ -30,6 +30,8 @@
 #include <sys/stat.h>
 #include <errno.h>
 #include <assert.h>
+#include <iconv.h>
+#include <glthread/lock.h>
 
 #ifdef HAVE_MMAP
 #include <sys/mman.h>
@@ -62,6 +64,32 @@ header_checksum (const hive_h *h)
 
 #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG)
 
+iconv_t *
+_hivex_get_iconv (hive_h *h, recode_type t)
+{
+  glthread_lock_lock (&h->iconv_cache[t].mutex);
+  if (h->iconv_cache[t].handle == NULL) {
+    if (t == utf8_to_latin1)
+      h->iconv_cache[t].handle = iconv_open ("LATIN1", "UTF-8");
+    else if (t == latin1_to_utf8)
+      h->iconv_cache[t].handle = iconv_open ("UTF-8", "LATIN1");
+    else if (t == utf8_to_utf16le)
+      h->iconv_cache[t].handle = iconv_open ("UTF-16LE", "UTF-8");
+    else if (t == utf16le_to_utf8)
+      h->iconv_cache[t].handle = iconv_open ("UTF-8", "UTF-16LE");
+  } else {
+    /* reinitialize iconv context */
+    iconv (h->iconv_cache[t].handle, NULL, 0, NULL, 0);
+  }
+  return h->iconv_cache[t].handle;
+}
+
+void
+_hivex_release_iconv (hive_h *h, recode_type t)
+{
+  glthread_lock_unlock (&h->iconv_cache[t].mutex);
+}
+
 hive_h *
 hivex_open (const char *filename, int flags)
 {
@@ -164,11 +192,17 @@ hivex_open (const char *filename, int flags)
     goto error;
   }
 
+  for (int t=0; t<3; t++) {
+    glthread_lock_init (&h->iconv_cache[t].mutex);
+    h->iconv_cache[t].handle = NULL;
+  }
+
   /* Last modified time. */
   h->last_modified = le64toh ((int64_t) h->hdr->last_modified);
 
   if (h->msglvl >= 2) {
-    char *name = _hivex_windows_utf16_to_utf8 (h->hdr->name, 64);
+    char *name = _hivex_recode (h, utf16le_to_utf8,
+                                h->hdr->name, 64, NULL);
 
     fprintf (stderr,
              "hivex_open: header fields:\n"
@@ -424,6 +458,12 @@ hivex_close (hive_h *h)
   else
     r = 0;
   free (h->filename);
+  for (int t=0; t<3; t++) {
+    if (h->iconv_cache[t].handle != NULL) {
+      iconv_close (h->iconv_cache[t].handle);
+      h->iconv_cache[t].handle = NULL;
+    }
+  }
   free (h);
 
   return r;
diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
index 9a497ed..d04ae3c 100644
--- a/lib/hivex-internal.h
+++ b/lib/hivex-internal.h
@@ -22,6 +22,8 @@
 #include <stdarg.h>
 #include <stddef.h>
 #include <string.h>
+#include <iconv.h>
+#include <glthread/lock.h>
 
 #include "byte_conversions.h"
 
@@ -35,6 +37,13 @@
 #define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0)
 #define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0)
 
+typedef enum {
+  utf8_to_latin1 = 0,
+  latin1_to_utf8,
+  utf8_to_utf16le,
+  utf16le_to_utf8,
+} recode_type;
+
 struct hive_h {
   char *filename;
   int fd;
@@ -79,6 +88,11 @@ struct hive_h {
   /* Internal data for mmap replacement */
   void *p_winmap;
 #endif
+
+  struct {
+    gl_lock_t mutex;
+    iconv_t *handle;
+  } iconv_cache[4];
 };
 
 /* Format of registry blocks. NB. All fields are little endian. */
@@ -282,17 +296,16 @@ extern void _hivex_free_offset_list (offset_list *list);
 extern size_t * _hivex_return_offset_list (offset_list *list);
 extern void _hivex_print_offset_list (offset_list *list, FILE *fp);
 
+/* handle.c */
+extern iconv_t * _hivex_get_iconv (hive_h *h, recode_type r);
+extern void  _hivex_release_iconv (hive_h *h, recode_type r);
+
 /* utf16.c */
-extern char * _hivex_recode (const char *input_encoding,
-                             const char *input, size_t input_len,
-                             const char *output_encoding, size_t *output_len);
-#define _hivex_windows_utf16_to_utf8(_input, _len) \
-  _hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL)
-#define _hivex_windows_latin1_to_utf8(_input, _len) \
-  _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL)
-extern char* _hivex_encode_string(const char *str, size_t *size, int *utf16);
+extern char * _hivex_recode (hive_h *h, recode_type r,
+                             const char *input, size_t input_len, size_t *output_len);
+extern char* _hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16);
 extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len);
-extern size_t _hivex_utf8_strlen (const char* str, size_t len, int utf16);
+extern size_t _hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16);
 
 /* util.c */
 extern void _hivex_free_strings (char **argv);
diff --git a/lib/node.c b/lib/node.c
index 36e61c4..21cd127 100644
--- a/lib/node.c
+++ b/lib/node.c
@@ -90,9 +90,9 @@ hivex_node_name (hive_h *h, hive_node_h node)
   }
   size_t flags = le16toh (nk->flags);
   if (flags & 0x20) {
-    return _hivex_windows_latin1_to_utf8 (nk->name, len);
+    return _hivex_recode (h, latin1_to_utf8, nk->name, len, NULL);
   } else {
-    return _hivex_windows_utf16_to_utf8 (nk->name, len);
+    return _hivex_recode (h, utf16le_to_utf8, nk->name, len, NULL);
   }
 }
 
@@ -116,7 +116,7 @@ hivex_node_name_len (hive_h *h, hive_node_h node)
     return 0;
   }
 
-  return _hivex_utf8_strlen (nk->name, len, ! (le16toh (nk->flags) & 0x20));
+  return _hivex_utf8_strlen (h, nk->name, len, ! (le16toh (nk->flags) & 0x20));
 }
 
 
diff --git a/lib/utf16.c b/lib/utf16.c
index 238f40a..c0f0b05 100644
--- a/lib/utf16.c
+++ b/lib/utf16.c
@@ -30,24 +30,21 @@
 #include "hivex-internal.h"
 
 char *
-_hivex_recode (const char *input_encoding, const char *input, size_t input_len,
-               const char *output_encoding, size_t *output_len)
+_hivex_recode (hive_h *h, recode_type t,
+               const char *input, size_t input_len, size_t *output_len)
 {
-  iconv_t ic = iconv_open (output_encoding, input_encoding);
-  if (ic == (iconv_t) -1)
-    return NULL;
-
   /* iconv(3) has an insane interface ... */
 
   size_t outalloc = input_len;
 
+  iconv_t *ic = _hivex_get_iconv (h, t);
  again:;
   size_t inlen = input_len;
   size_t outlen = outalloc;
   char *out = malloc (outlen + 1);
   if (out == NULL) {
     int err = errno;
-    iconv_close (ic);
+    _hivex_release_iconv (h, t);
     errno = err;
     return NULL;
   }
@@ -56,18 +53,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len,
 
   size_t r = iconv (ic, (ICONV_CONST char **) &inp, &inlen, &outp, &outlen);
   if (r == (size_t) -1) {
+    int err = errno;
     if (errno == E2BIG) {
-      int err = errno;
       /* Reset errno here because we don't want to accidentally
        * return E2BIG to a library caller.
        */
-      errno = 0;
       size_t prev = outalloc;
       /* Try again with a larger output buffer. */
       free (out);
       outalloc *= 2;
       if (outalloc < prev) {
-        iconv_close (ic);
+        _hivex_release_iconv (h, t);
         errno = err;
         return NULL;
       }
@@ -75,19 +71,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len,
     }
     else {
       /* Else some conversion failure, eg. EILSEQ, EINVAL. */
-      int err = errno;
-      iconv_close (ic);
+      _hivex_release_iconv (h, t);
       free (out);
       errno = err;
       return NULL;
     }
   }
 
+  _hivex_release_iconv (h, t);
   *outp = '\0';
-  iconv_close (ic);
   if (output_len != NULL)
     *output_len = outp - out;
-
   return out;
 }
 
@@ -95,17 +89,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len,
  * storing in the hive file, as needed.
  */
 char*
-_hivex_encode_string(const char *str, size_t *size, int *utf16)
+_hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16)
 {
   char* outstr;
   *utf16 = 0;
-  outstr = _hivex_recode ("UTF-8", str, strlen(str),
-                          "LATIN1", size);
+  outstr = _hivex_recode (h, utf8_to_latin1,
+                          str, strlen(str), size);
   if (outstr != NULL)
     return outstr;
   *utf16 = 1;
-  outstr = _hivex_recode ("UTF-8", str, strlen(str),
-                          "UTF-16LE", size);
+  outstr = _hivex_recode (h, utf8_to_utf16le,
+                          str, strlen(str), size);
   return outstr;
 }
 
@@ -128,11 +122,11 @@ _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len)
 }
 
 size_t
-_hivex_utf8_strlen (const char* str, size_t len, int utf16)
+_hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16)
 {
-  const char *encoding = utf16 ? "UTF-16LE" : "LATIN1";
+  recode_type t = utf16 ? utf16le_to_utf8 : latin1_to_utf8;
   size_t ret = 0;
-  char *buf = _hivex_recode(encoding, str, len, "UTF-8", &ret);
+  char *buf = _hivex_recode (h, t, str, len, &ret);
   free(buf);
   return ret;
 }
diff --git a/lib/value.c b/lib/value.c
index 2dfe006..3257b53 100644
--- a/lib/value.c
+++ b/lib/value.c
@@ -209,7 +209,7 @@ hivex_value_key_len (hive_h *h, hive_value_h value)
     SET_ERRNO (EFAULT, "key length is too long (%zu, %zu)", len, seg_len);
     return 0;
   }
-  return _hivex_utf8_strlen (vk->name, len, ! (le16toh (vk->flags) & 0x01));
+  return _hivex_utf8_strlen (h, vk->name, len, ! (le16toh (vk->flags) & 0x01));
 }
 
 char *
@@ -232,9 +232,9 @@ hivex_value_key (hive_h *h, hive_value_h value)
     return NULL;
   }
   if (flags & 0x01) {
-    return _hivex_windows_latin1_to_utf8 (vk->name, len);
+    return _hivex_recode (h, latin1_to_utf8, vk->name, len, NULL);
   } else {
-    return _hivex_windows_utf16_to_utf8 (vk->name, len);
+    return _hivex_recode (h, utf16le_to_utf8, vk->name, len, NULL);
   }
 }
 
@@ -471,7 +471,7 @@ hivex_value_string (hive_h *h, hive_value_h value)
   if (slen < len)
     len = slen;
 
-  char *ret = _hivex_windows_utf16_to_utf8 (data, len);
+  char *ret = _hivex_recode (h, utf16le_to_utf8, data, len, NULL);
   free (data);
   if (ret == NULL)
     return NULL;
@@ -538,7 +538,7 @@ hivex_value_multiple_strings (hive_h *h, hive_value_h value)
     }
     ret = ret2;
 
-    ret[nr_strings-1] = _hivex_windows_utf16_to_utf8 (p, plen);
+    ret[nr_strings-1] = _hivex_recode (h, utf16le_to_utf8, p, plen, NULL);
     ret[nr_strings] = NULL;
     if (ret[nr_strings-1] == NULL) {
       _hivex_free_strings (ret);
diff --git a/lib/write.c b/lib/write.c
index 33b64e4..70105c9 100644
--- a/lib/write.c
+++ b/lib/write.c
@@ -610,7 +610,7 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name)
   size_t recoded_name_len;
   int use_utf16 = 0;
   char *recoded_name =
-    _hivex_encode_string (name, &recoded_name_len, &use_utf16);
+    _hivex_encode_string (h, name, &recoded_name_len, &use_utf16);
   if (recoded_name == NULL) {
     SET_ERRNO (EINVAL, "malformed name");
     return 0;
@@ -959,7 +959,7 @@ hivex_node_set_values (hive_h *h, hive_node_h node,
     static const char vk_id[2] = { 'v', 'k' };
     size_t recoded_name_len;
     int use_utf16;
-    char* recoded_name = _hivex_encode_string (values[i].key, &recoded_name_len,
+    char* recoded_name = _hivex_encode_string (h, values[i].key, &recoded_name_len,
                                                &use_utf16);
     seg_len = sizeof (struct ntreg_vk_record) + recoded_name_len;
     size_t vk_offs = allocate_block (h, seg_len, vk_id);
diff --git a/m4/.gitignore b/m4/.gitignore
index 05ca27c..a19035c 100644
--- a/m4/.gitignore
+++ b/m4/.gitignore
@@ -138,3 +138,5 @@
 /xalloc.m4
 /xsize.m4
 /xstrtol.m4
+/thread.m4
+/yield.m4
-- 
2.11.0




More information about the Libguestfs mailing list