[libvirt] [PATCHv6 1/5] virstring.h/c: Util method for finding regexp patterns in some strings

Daniel P. Berrange berrange at redhat.com
Wed Feb 19 16:46:22 UTC 2014


On Thu, Jan 23, 2014 at 10:28:29AM +0100, Manuel VIVES wrote:
> ---
>  po/POTFILES.in           |    1 +
>  src/libvirt_private.syms |    1 +
>  src/util/virstring.c     |   97 ++++++++++++++++++++++++++++++++++++++++++++++
>  src/util/virstring.h     |    3 ++
>  4 files changed, 102 insertions(+)

Can you add something to tests/virstringtest.c to validate your
new method with various interesting input strings.

> diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
> index d1a58f9..68ca39d 100644
> --- a/src/libvirt_private.syms
> +++ b/src/libvirt_private.syms
> @@ -1763,6 +1763,7 @@ virStorageFileResize;
>  # util/virstring.h
>  virArgvToString;
>  virAsprintfInternal;
> +virSearchRegex;

Lets call this new method 'virStringSearchRegex' so we have a normal
name prefix

> diff --git a/src/util/virstring.c b/src/util/virstring.c
> index 8d0ca70..3c93450 100644
> --- a/src/util/virstring.c
> +++ b/src/util/virstring.c
> @@ -23,6 +23,7 @@
>  
>  #include <stdlib.h>
>  #include <stdio.h>
> +#include <regex.h>
>  
>  #include "c-ctype.h"
>  #include "virstring.h"
> @@ -645,3 +646,99 @@ int virStringSortRevCompare(const void *a, const void *b)
>  
>      return strcmp(*sb, *sa);
>  }
> +
> +/**
> + * virSearchRegex:
> + * Allows you to get the nth occurrence of a substring in sourceString which matches
> + * a POSIX Extended regular expression pattern.
> + * If there is no substring, result is not modified.
> + * return -1 on error, 0 if not found and 1 if found.
> + *
> + * @sourceString: String to parse
> + * @occurrence: return occurrence 'n' (starting from 0) of a sub-string that
> + *              matches the pattern.
> + * @regexp: POSIX Extended regular expression pattern used for matching
> + * @result: nth occurrence substring matching the @regexp pattern
> + * @code
> +    char *source = "6853a496-1c10-472e-867a-8244937bd6f0
> +                    773ab075-4cd7-4fc2-8b6e-21c84e9cb391
> +                    bbb3c75c-d60f-43b0-b802-fd56b84a4222
> +                    60c04aa1-0375-4654-8d9f-e149d9885273
> +                    4548d465-9891-4c34-a184-3b1c34a26aa8";
> +    char *ret1=NULL;
> +    char *ret2=NULL;
> +    char *ret3=NULL;
> +    virSearchRegex(source,
> +                   4,
> +                   "([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})",
> +                   &ret1);
> +                            //ret1 = "4548d465-9891-4c34-a184-3b1c34a26aa8"
> +    virSearchRegex(source,
> +                   0,
> +                   "([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})",
> +                   &ret2);
> +                            //ret2 = "6853a496-1c10-472e-867a-8244937bd6f0"
> +    virSearchRegex(source,
> +                   1,
> +                   "([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})",
> +                   &ret3);
> +                            //ret3 = "773ab075-4cd7-4fc2-8b6e-21c84e9cb391"
> + * @endcode

> +int
> +virSearchRegex(const char *sourceString,
> +               unsigned int occurrence,
> +               const char *regexp,
> +               char **result)

This kind of usage leads to pretty inefficient code. Why not just
change it to return 'char ***matches' and thus give the caller all
possible matches in one call. And 'unsigned int occurrence' could
be  'size_t maxMatches' to limit it.




> +{
> +    regex_t pregUuidBracket;

This variable name is a little odd - lets just call it 're'.

> +    size_t i = 0;
> +    size_t nmatch = 0;
> +    regmatch_t *pmatch = NULL;
> +    int ret = -1;
> +    int regError = -1;
> +
> +    regError = regcomp(&pregUuidBracket, regexp, REG_EXTENDED);
> +    if (regError != 0) {
> +        virReportError(VIR_ERR_INTERNAL_ERROR,
> +                       _("Error while compiling regular expression: %d"),
> +                       regError);
> +        goto cleanup;
> +    }
> +    nmatch = pregUuidBracket.re_nsub;
> +    if (VIR_ALLOC_N(pmatch, nmatch) < 0)
> +        goto cleanup;
> +
> +    while (i < (occurrence+1)) {
> +        if (regexec(&pregUuidBracket, sourceString, nmatch, pmatch, 0) == 0) {
> +            regoff_t start = pmatch[0].rm_so;
> +            regoff_t end = pmatch[0].rm_eo;
> +            if (i == occurrence ||
> +                (occurrence > i && regexec(&pregUuidBracket, &sourceString[end],
> +                                         nmatch, pmatch, 0) != 0)) {
> +                /* We copy only if i == position (so that it is the uuid we're looking for),
> +                 * or position > i AND there is no matches left in the rest of the string
> +                 * (this is the case where we give a biggest @occurence than the
> +                 * number of matches and we want to return the last one)
> +                 */
> +                if (VIR_STRNDUP(*result, sourceString + start, end - start) < 0)
> +                    goto cleanup;
> +
> +                ret = 1;
> +                goto cleanup;
> +            }
> +            sourceString = &sourceString[end];
> +        } else {
> +            break;
> +            ret = 0;
> +            goto cleanup;
> +        }
> +        ++i;
> +    }
> +
> +cleanup:
> +    regfree(&pregUuidBracket);
> +    VIR_FREE(pmatch);
> +    return ret;
> +}

Daniel
-- 
|: http://berrange.com      -o-    http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org              -o-             http://virt-manager.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org       -o-       http://live.gnome.org/gtk-vnc :|




More information about the libvir-list mailing list