[Libguestfs] [PATCH] Add a cache for iconv_t handles to hive_t

Richard W.M. Jones rjones at redhat.com
Fri Feb 9 13:30:03 UTC 2018


On Fri, Feb 09, 2018 at 01:52:52AM +0100, Hilko Bengen wrote:
> It was brought to my attention that dumping a registry hive causes a
> lot of time spent in disk I/O activity because iconv_open() and
> iconv_close() are called for every key. Every iconv_open() call causes
> /usr/lib/.../gconv/$ENCODING.so to be opened and mapped.
> 
> The iconv_t handles are now cached in the hive_h struct; they are
> opened on-demand and re-used.
> 
> On my ~10 year old Lenovo T60, I have seen 57% savings in the overal
> runtime of running
>
>     hivexregedit --export windows-8-enterprise-software.hive '\\'
> ---
>  lib/handle.c         | 43 ++++++++++++++++++++++++++++++++++++++++++-
>  lib/hivex-internal.h | 31 ++++++++++++++++++++++---------
>  lib/node.c           |  6 +++---
>  lib/utf16.c          | 38 ++++++++++++++++----------------------
>  lib/value.c          | 10 +++++-----
>  lib/write.c          |  4 ++--
>  6 files changed, 90 insertions(+), 42 deletions(-)
> 
> diff --git a/lib/handle.c b/lib/handle.c
> index 9dcf81d..7d715df 100644
> --- a/lib/handle.c
> +++ b/lib/handle.c
> @@ -31,6 +31,9 @@
>  #include <errno.h>
>  #include <assert.h>
>  
> +#include <iconv.h>
> +#include <pthread.h>

This makes hivex depend unconditionally on pthread.  It's possible to
make this optional so that hivex would use locking when the main
program is already linked to pthread but would skip locking otherwise,
and it's a fairly simply change:

(1) Edit ‘bootstrap’ and add ‘threadlib’ to the list of modules near
the end.  You'll probably need to rerun ./bootstrap after this.

(2) Read ‘.gnulib/modules/threadlib’ and follow the instructions for
modifying configure.ac and Makefile.am.

(3) Replace #include <pthread.h> -> #include "glthread/lock.h".

(4) Replace any calls to pthread_mutex_* with gl_lock_*.  It's
probably not necessary to check return codes, unless they need to be
recursive in which case use gl_recursive_lock_* instead.

>  #ifdef HAVE_MMAP
>  #include <sys/mman.h>
>  #else
> @@ -62,6 +65,32 @@ header_checksum (const hive_h *h)
>  
>  #define HIVEX_OPEN_MSGLVL_MASK (HIVEX_OPEN_VERBOSE|HIVEX_OPEN_DEBUG)
>  
> +iconv_t *
> +_hivex_get_iconv (hive_h *h, recode_type t)
> +{
> +  pthread_mutex_lock (&h->iconv_cache[t].mutex);
> +  if (h->iconv_cache[t].handle == NULL) {
> +    if (t == utf8_to_latin1)
> +      h->iconv_cache[t].handle = iconv_open ("LATIN1", "UTF-8");
> +    else if (t == latin1_to_utf8)
> +      h->iconv_cache[t].handle = iconv_open ("UTF-8", "LATIN1");
> +    else if (t == utf8_to_utf16le)
> +      h->iconv_cache[t].handle = iconv_open ("UTF-16LE", "UTF-8");
> +    else if (t == utf16le_to_utf8)
> +      h->iconv_cache[t].handle = iconv_open ("UTF-8", "UTF-16LE");
> +  } else {
> +    /* reinitialize iconv context */
> +    iconv (h->iconv_cache[t].handle, NULL, 0, NULL, 0);
> +  }
> +  return h->iconv_cache[t].handle;
> +}
> +
> +void
> +_hivex_release_iconv (hive_h *h, recode_type t)
> +{
> +  pthread_mutex_unlock (&h->iconv_cache[t].mutex);
> +}
> +
>  hive_h *
>  hivex_open (const char *filename, int flags)
>  {
> @@ -164,11 +193,17 @@ hivex_open (const char *filename, int flags)
>      goto error;
>    }
>  
> +  for (int t=0; t<3; t++) {
> +    pthread_mutex_init (&h->iconv_cache[t].mutex, NULL);
> +    h->iconv_cache[t].handle = NULL;
> +  }
> +
>    /* Last modified time. */
>    h->last_modified = le64toh ((int64_t) h->hdr->last_modified);
>  
>    if (h->msglvl >= 2) {
> -    char *name = _hivex_windows_utf16_to_utf8 (h->hdr->name, 64);
> +    char *name = _hivex_recode (h, utf16le_to_utf8,
> +                                h->hdr->name, 64, NULL);
>  
>      fprintf (stderr,
>               "hivex_open: header fields:\n"
> @@ -424,6 +459,12 @@ hivex_close (hive_h *h)
>    else
>      r = 0;
>    free (h->filename);
> +  for (int t=0; t<3; t++) {
> +    if (h->iconv_cache[t].handle != NULL) {
> +      iconv_close (h->iconv_cache[t].handle);
> +      h->iconv_cache[t].handle = NULL;
> +    }
> +  }
>    free (h);
>  
>    return r;
> diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
> index 9a497ed..fefdc81 100644
> --- a/lib/hivex-internal.h
> +++ b/lib/hivex-internal.h
> @@ -23,6 +23,8 @@
>  #include <stddef.h>
>  #include <string.h>
>  
> +#include <iconv.h>
> +
>  #include "byte_conversions.h"
>  
>  #define STREQ(a,b) (strcmp((a),(b)) == 0)
> @@ -35,6 +37,13 @@
>  #define STRCASENEQLEN(a,b,n) (strncasecmp((a),(b),(n)) != 0)
>  #define STRPREFIX(a,b) (strncmp((a),(b),strlen((b))) == 0)
>  
> +typedef enum {
> +  utf8_to_latin1 = 0,
> +  latin1_to_utf8,
> +  utf8_to_utf16le,
> +  utf16le_to_utf8,
> +} recode_type;
> +
>  struct hive_h {
>    char *filename;
>    int fd;
> @@ -79,6 +88,11 @@ struct hive_h {
>    /* Internal data for mmap replacement */
>    void *p_winmap;
>  #endif
> +
> +  struct {
> +    pthread_mutex_t mutex;
> +    iconv_t *handle;
> +  } iconv_cache[4];
>  };
>  
>  /* Format of registry blocks. NB. All fields are little endian. */
> @@ -282,17 +296,16 @@ extern void _hivex_free_offset_list (offset_list *list);
>  extern size_t * _hivex_return_offset_list (offset_list *list);
>  extern void _hivex_print_offset_list (offset_list *list, FILE *fp);
>  
> +/* handle.c */
> +extern iconv_t * _hivex_get_iconv (hive_h *h, recode_type r);
> +extern void  _hivex_release_iconv (hive_h *h, recode_type r);
> +
>  /* utf16.c */
> -extern char * _hivex_recode (const char *input_encoding,
> -                             const char *input, size_t input_len,
> -                             const char *output_encoding, size_t *output_len);
> -#define _hivex_windows_utf16_to_utf8(_input, _len) \
> -  _hivex_recode ("UTF-16LE", _input, _len, "UTF-8", NULL)
> -#define _hivex_windows_latin1_to_utf8(_input, _len) \
> -  _hivex_recode ("LATIN1", _input, _len, "UTF-8", NULL)
> -extern char* _hivex_encode_string(const char *str, size_t *size, int *utf16);
> +extern char * _hivex_recode (hive_h *h, recode_type r,
> +                             const char *input, size_t input_len, size_t *output_len);
> +extern char* _hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16);
>  extern size_t _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len);
> -extern size_t _hivex_utf8_strlen (const char* str, size_t len, int utf16);
> +extern size_t _hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16);
>  
>  /* util.c */
>  extern void _hivex_free_strings (char **argv);
> diff --git a/lib/node.c b/lib/node.c
> index 36e61c4..21cd127 100644
> --- a/lib/node.c
> +++ b/lib/node.c
> @@ -90,9 +90,9 @@ hivex_node_name (hive_h *h, hive_node_h node)
>    }
>    size_t flags = le16toh (nk->flags);
>    if (flags & 0x20) {
> -    return _hivex_windows_latin1_to_utf8 (nk->name, len);
> +    return _hivex_recode (h, latin1_to_utf8, nk->name, len, NULL);
>    } else {
> -    return _hivex_windows_utf16_to_utf8 (nk->name, len);
> +    return _hivex_recode (h, utf16le_to_utf8, nk->name, len, NULL);
>    }
>  }
>  
> @@ -116,7 +116,7 @@ hivex_node_name_len (hive_h *h, hive_node_h node)
>      return 0;
>    }
>  
> -  return _hivex_utf8_strlen (nk->name, len, ! (le16toh (nk->flags) & 0x20));
> +  return _hivex_utf8_strlen (h, nk->name, len, ! (le16toh (nk->flags) & 0x20));
>  }
>  
>  
> diff --git a/lib/utf16.c b/lib/utf16.c
> index 238f40a..c0f0b05 100644
> --- a/lib/utf16.c
> +++ b/lib/utf16.c
> @@ -30,24 +30,21 @@
>  #include "hivex-internal.h"
>  
>  char *
> -_hivex_recode (const char *input_encoding, const char *input, size_t input_len,
> -               const char *output_encoding, size_t *output_len)
> +_hivex_recode (hive_h *h, recode_type t,
> +               const char *input, size_t input_len, size_t *output_len)
>  {
> -  iconv_t ic = iconv_open (output_encoding, input_encoding);
> -  if (ic == (iconv_t) -1)
> -    return NULL;
> -
>    /* iconv(3) has an insane interface ... */
>  
>    size_t outalloc = input_len;
>  
> +  iconv_t *ic = _hivex_get_iconv (h, t);
>   again:;
>    size_t inlen = input_len;
>    size_t outlen = outalloc;
>    char *out = malloc (outlen + 1);
>    if (out == NULL) {
>      int err = errno;
> -    iconv_close (ic);
> +    _hivex_release_iconv (h, t);
>      errno = err;
>      return NULL;
>    }
> @@ -56,18 +53,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len,
>  
>    size_t r = iconv (ic, (ICONV_CONST char **) &inp, &inlen, &outp, &outlen);
>    if (r == (size_t) -1) {
> +    int err = errno;
>      if (errno == E2BIG) {
> -      int err = errno;
>        /* Reset errno here because we don't want to accidentally
>         * return E2BIG to a library caller.
>         */
> -      errno = 0;
>        size_t prev = outalloc;
>        /* Try again with a larger output buffer. */
>        free (out);
>        outalloc *= 2;
>        if (outalloc < prev) {
> -        iconv_close (ic);
> +        _hivex_release_iconv (h, t);
>          errno = err;
>          return NULL;
>        }
> @@ -75,19 +71,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len,
>      }
>      else {
>        /* Else some conversion failure, eg. EILSEQ, EINVAL. */
> -      int err = errno;
> -      iconv_close (ic);
> +      _hivex_release_iconv (h, t);
>        free (out);
>        errno = err;
>        return NULL;
>      }
>    }
>  
> +  _hivex_release_iconv (h, t);
>    *outp = '\0';
> -  iconv_close (ic);
>    if (output_len != NULL)
>      *output_len = outp - out;
> -
>    return out;
>  }
>  
> @@ -95,17 +89,17 @@ _hivex_recode (const char *input_encoding, const char *input, size_t input_len,
>   * storing in the hive file, as needed.
>   */
>  char*
> -_hivex_encode_string(const char *str, size_t *size, int *utf16)
> +_hivex_encode_string (hive_h *h, const char *str, size_t *size, int *utf16)
>  {
>    char* outstr;
>    *utf16 = 0;
> -  outstr = _hivex_recode ("UTF-8", str, strlen(str),
> -                          "LATIN1", size);
> +  outstr = _hivex_recode (h, utf8_to_latin1,
> +                          str, strlen(str), size);
>    if (outstr != NULL)
>      return outstr;
>    *utf16 = 1;
> -  outstr = _hivex_recode ("UTF-8", str, strlen(str),
> -                          "UTF-16LE", size);
> +  outstr = _hivex_recode (h, utf8_to_utf16le,
> +                          str, strlen(str), size);
>    return outstr;
>  }
>  
> @@ -128,11 +122,11 @@ _hivex_utf16_string_len_in_bytes_max (const char *str, size_t len)
>  }
>  
>  size_t
> -_hivex_utf8_strlen (const char* str, size_t len, int utf16)
> +_hivex_utf8_strlen (hive_h *h, const char* str, size_t len, int utf16)
>  {
> -  const char *encoding = utf16 ? "UTF-16LE" : "LATIN1";
> +  recode_type t = utf16 ? utf16le_to_utf8 : latin1_to_utf8;
>    size_t ret = 0;
> -  char *buf = _hivex_recode(encoding, str, len, "UTF-8", &ret);
> +  char *buf = _hivex_recode (h, t, str, len, &ret);
>    free(buf);
>    return ret;
>  }
> diff --git a/lib/value.c b/lib/value.c
> index 2dfe006..3257b53 100644
> --- a/lib/value.c
> +++ b/lib/value.c
> @@ -209,7 +209,7 @@ hivex_value_key_len (hive_h *h, hive_value_h value)
>      SET_ERRNO (EFAULT, "key length is too long (%zu, %zu)", len, seg_len);
>      return 0;
>    }
> -  return _hivex_utf8_strlen (vk->name, len, ! (le16toh (vk->flags) & 0x01));
> +  return _hivex_utf8_strlen (h, vk->name, len, ! (le16toh (vk->flags) & 0x01));
>  }
>  
>  char *
> @@ -232,9 +232,9 @@ hivex_value_key (hive_h *h, hive_value_h value)
>      return NULL;
>    }
>    if (flags & 0x01) {
> -    return _hivex_windows_latin1_to_utf8 (vk->name, len);
> +    return _hivex_recode (h, latin1_to_utf8, vk->name, len, NULL);
>    } else {
> -    return _hivex_windows_utf16_to_utf8 (vk->name, len);
> +    return _hivex_recode (h, utf16le_to_utf8, vk->name, len, NULL);
>    }
>  }
>  
> @@ -471,7 +471,7 @@ hivex_value_string (hive_h *h, hive_value_h value)
>    if (slen < len)
>      len = slen;
>  
> -  char *ret = _hivex_windows_utf16_to_utf8 (data, len);
> +  char *ret = _hivex_recode (h, utf16le_to_utf8, data, len, NULL);
>    free (data);
>    if (ret == NULL)
>      return NULL;
> @@ -538,7 +538,7 @@ hivex_value_multiple_strings (hive_h *h, hive_value_h value)
>      }
>      ret = ret2;
>  
> -    ret[nr_strings-1] = _hivex_windows_utf16_to_utf8 (p, plen);
> +    ret[nr_strings-1] = _hivex_recode (h, utf16le_to_utf8, p, plen, NULL);
>      ret[nr_strings] = NULL;
>      if (ret[nr_strings-1] == NULL) {
>        _hivex_free_strings (ret);
> diff --git a/lib/write.c b/lib/write.c
> index 33b64e4..70105c9 100644
> --- a/lib/write.c
> +++ b/lib/write.c
> @@ -610,7 +610,7 @@ hivex_node_add_child (hive_h *h, hive_node_h parent, const char *name)
>    size_t recoded_name_len;
>    int use_utf16 = 0;
>    char *recoded_name =
> -    _hivex_encode_string (name, &recoded_name_len, &use_utf16);
> +    _hivex_encode_string (h, name, &recoded_name_len, &use_utf16);
>    if (recoded_name == NULL) {
>      SET_ERRNO (EINVAL, "malformed name");
>      return 0;
> @@ -959,7 +959,7 @@ hivex_node_set_values (hive_h *h, hive_node_h node,
>      static const char vk_id[2] = { 'v', 'k' };
>      size_t recoded_name_len;
>      int use_utf16;
> -    char* recoded_name = _hivex_encode_string (values[i].key, &recoded_name_len,
> +    char* recoded_name = _hivex_encode_string (h, values[i].key, &recoded_name_len,
>                                                 &use_utf16);
>      seg_len = sizeof (struct ntreg_vk_record) + recoded_name_len;
>      size_t vk_offs = allocate_block (h, seg_len, vk_id);

The rest looks OK.

Rich.

-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
virt-p2v converts physical machines to virtual machines.  Boot with a
live CD or over the network (PXE) and turn machines into KVM guests.
http://libguestfs.org/virt-v2v




More information about the Libguestfs mailing list