[Libguestfs] [PATCH] hivexml
Alex Nelson
ajnelson at cs.ucsc.edu
Wed Feb 1 21:50:40 UTC 2012
I will try to test this by tomorrow afternoon and let you know how well it works compared to what I tried previously.
--Alex
On Feb 1, 2012, at 13:08 , Todd Mummert wrote:
> I changed the name of the function to is_valid_xml_string() to avoid
> camel-case. I also briefly commented the function itself, since it
> does expect its input to be valid UTF-8. Also, the patch was missing
> an end-attribute call, which apparently didn't matter in this case
> since the element was being ended subsequently anyway -- but it
> missing was an oversight, not intentional.
>
> The updated patch to handle illegal XML characters that appear in
> registry values is below:
>
> [todd at tm-nc hivex]# cat hivexml.patch
> diff -urNp hivex-1.3.3-orig/xml/hivexml.c hivex-1.3.3-new/xml/hivexml.c
> --- hivex-1.3.3-orig/xml/hivexml.c 2011-09-22 09:17:09.000000000 -0400
> +++ hivex-1.3.3-new/xml/hivexml.c 2012-02-01 15:51:06.481728986 -0500
> @@ -33,6 +33,7 @@
> #endif
>
> #include <libxml/xmlwriter.h>
> +#include <libxml/chvalid.h>
>
> #include "hivex.h"
>
> @@ -208,6 +209,26 @@ filetime_to_8601 (int64_t windows_ticks)
> return ret;
> }
>
> +/*
> + * Check that a UTF-8 string contains only valid XML characters.
> + * There is an assumption that the input string is valid UTF-8.
> + */
> +static int
> +is_valid_xml_string(const char *string)
> +{
> + int c;
> + int pos = 0;
> + int len = strlen(string);
> + int charlen = len;
> + while ((c = xmlGetUTF8Char(string+pos, &charlen)) >= 0) {
> + if (xmlIsCharQ(c) == 0)
> + return 0;
> + pos += charlen;
> + charlen = len - pos;
> + }
> + return 1;
> +}
> +
> static int
> node_start (hive_h *h, void *writer_v, hive_node_h node, const char *name)
> {
> @@ -265,6 +286,20 @@ end_value (xmlTextWriterPtr writer)
> XML_CHECK (xmlTextWriterEndElement, (writer));
> }
>
> +static void
> +start_string(xmlTextWriterPtr writer, const char *encoding)
> +{
> + XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "string"));
> + if (encoding)
> + XML_CHECK (xmlTextWriterWriteAttribute, (writer, BAD_CAST
> "encoding", BAD_CAST encoding));
> +}
> +
> +static void
> +end_string(xmlTextWriterPtr writer)
> +{
> + XML_CHECK (xmlTextWriterEndElement, (writer));
> +}
> +
> static int
> value_string (hive_h *h, void *writer_v, hive_node_h node, hive_value_h value,
> hive_type t, size_t len, const char *key, const char *str)
> @@ -292,9 +327,14 @@ value_string (hive_h *h, void *writer_v,
> type = "unknown";
> }
>
> - start_value (writer, key, type, NULL);
> + int validXML = is_valid_xml_string(str);
> + start_value (writer, key, type, validXML ? NULL : "base64");
> XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
> - XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
> + if (validXML)
> + XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST str));
> + else
> + XML_CHECK (xmlTextWriterWriteBase64, (writer, str, 0, strlen(str)));
> +
> XML_CHECK (xmlTextWriterEndAttribute, (writer));
> end_value (writer);
> return 0;
> @@ -310,9 +350,15 @@ value_multiple_strings (hive_h *h, void
>
> size_t i;
> for (i = 0; argv[i] != NULL; ++i) {
> - XML_CHECK (xmlTextWriterStartElement, (writer, BAD_CAST "string"));
> - XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST argv[i]));
> - XML_CHECK (xmlTextWriterEndElement, (writer));
> + int validXML = is_valid_xml_string(argv[i]);
> + start_string(writer, validXML ? NULL : "base64");
> + XML_CHECK (xmlTextWriterStartAttribute, (writer, BAD_CAST "value"));
> + if (validXML)
> + XML_CHECK (xmlTextWriterWriteString, (writer, BAD_CAST argv[i]));
> + else
> + XML_CHECK (xmlTextWriterWriteBase64, (writer, argv[i], 0,
> strlen(argv[i])));
> + XML_CHECK (xmlTextWriterEndAttribute, (writer));
> + end_string(writer);
> }
>
> end_value (writer);
> [todd at tm-nc hivex]#
>
> _______________________________________________
> Libguestfs mailing list
> Libguestfs at redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs
More information about the Libguestfs
mailing list