[PATCH 1/2] util: xml: Introduce infrastructure to support custom XML validators

Mon Sep 26 11:38:10 UTC 2022

On 9/23/22 17:42, Peter Krempa wrote:
> The XML from libxml2 we use in libvirt has in many cases very bad error
> messages, which make it very difficult to point to the specific problem
> with the validated XML.
> 
> This patch adds infrastructure which will allow users to specify a
> custom XML validator program which will be used to validate the XML
> before we invoke libxml's validator.
> 
> The configuration is done via a global variable as normally the daemon
> config is not accessible from inside the runtime.
> 
> Signed-off-by: Peter Krempa <pkrempa at redhat.com>
> ---
>  src/libvirt_private.syms |  1 +
>  src/util/virxml.c        | 93 +++++++++++++++++++++++++++++++++++++++-
>  src/util/virxml.h        |  3 ++
>  3 files changed, 95 insertions(+), 2 deletions(-)
> 
> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index 25794bc2f4..3e90cd4599 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -3678,6 +3678,7 @@ virXMLNodeNameEqual;
>  virXMLNodeSanitizeNamespaces;
>  virXMLNodeToString;
>  virXMLParseHelper;
> +virXMLParseSetCustomValidator;
>  virXMLPickShellSafeComment;
>  virXMLPropEnum;
>  virXMLPropEnumDefault;
> diff --git a/src/util/virxml.c b/src/util/virxml.c
> index d6e2e5dd91..aea9b3aac3 100644
> --- a/src/util/virxml.c
> +++ b/src/util/virxml.c
> @@ -33,6 +33,7 @@
>  #include "virfile.h"
>  #include "virstring.h"
>  #include "virutil.h"
> +#include "vircommand.h"
>  #include "configmake.h"
> 
>  #define VIR_FROM_THIS VIR_FROM_XML
> @@ -41,6 +42,8 @@
>          virReportErrorHelper(from, code, __FILE__, \
>                               __FUNCTION__, __LINE__, __VA_ARGS__)
> 
> +static char *virXMLCustomValidatorPath;
> +
>  /* Internal data to be passed to SAX parser and used by error handler. */
>  struct virParserData {
>      int domcode;
> @@ -1029,6 +1032,68 @@ catchXMLError(void *ctx, const char *msg G_GNUC_UNUSED, ...)
>      }
>  }
> 
> +
> +/**
> + * virXMLParseHelperValidateCustom:
> + *
> + * Invokes an external validator program configured in the
> + * 'virXMLCustomValidatorPath' global variable and captures the validation
> + * output. The program is invoked as:
> + *
> + *  /path/to/program /path/to/schema
> + *
> + * The XML to validate is fed on stdin of the program. The program is expected
> + * to return 0 on successful validation and non-zero on failure to validate,
> + * in which case STDOUT of the program is used in the error message reported
> + * by libvirt.
> + */
> +static int
> +virXMLParseHelperValidateCustom(const char *schema,
> +                                const char *filename,
> +                                const char *xmlStr)
> +{
> +    g_autoptr(virCommand) cmd = virCommandNewArgList(virXMLCustomValidatorPath,
> +                                                     schema, NULL);
> +    g_autofree char *filebuf = NULL;
> +    g_autofree char *outbuf = NULL;
> +    int exitstatus = 0;
> +
> +    if (filename) {
> +        /* virsh uses 10 MiB as max XML size */
> +        if (virFileReadAll(filename, 10 * 1024 * 1024, &filebuf) < 0)
> +            return -1;
> +
> +        xmlStr = filebuf;
> +    }
> +
> +    if (!virFileIsExecutable(virXMLCustomValidatorPath)) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR,
> +                       _("custom XML validator program '%s' is not executable"),
> +                       virXMLCustomValidatorPath);
> +        return -1;
> +    }
> +
> +    virCommandSetInputBuffer(cmd, xmlStr);
> +    virCommandSetOutputBuffer(cmd, &outbuf);
> +    virCommandDoAsyncIO(cmd);
> +
> +    if (virCommandRunAsync(cmd, NULL) < 0)
> +        return -1;
> +
> +    if (virCommandWait(cmd, &exitstatus) < 0)
> +        return -1;
> +

You can use plain:

  virCommandSetInputBuffer();
  virCommandSetOutputBuffer();

  if (virCommandRunAsync(cmd, &exitstatus) < 0) ...

No need for Async versions. We don't really need to do anything
meanwhile. Async is mostly for when we pass FDs instead of buffers and
need to read from those FDs to avoid blocking the command.

Also, in the next patch you claim stderr is also captured. In order for
that to be true you'll need to add this onto correct place:

  virCommandSetErrorBuffer(cmd, &errbuf);

> +    if (exitstatus != 0) {
> +        virReportError(VIR_ERR_XML_INVALID_SCHEMA,
> +                       _("Unable to validate doc against %s\n%s"),
> +                       schema, outbuf);
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +

Michal