[augeas-devel] augeas: master - Support for case-insensitive regexps

David Lutterkort lutter at fedoraproject.org
Thu Jan 14 18:19:52 UTC 2010


Gitweb:        http://git.fedorahosted.org/git/augeas.git?p=augeas.git;a=commitdiff;h=3856b3f49d05cea313811e01ce3e11e545006f09
Commit:        3856b3f49d05cea313811e01ce3e11e545006f09
Parent:        a929fdfb8a07683983ae601bbff4c1ed7edaeab0
Author:        David Lutterkort <lutter at redhat.com>
AuthorDate:    Tue Jan 12 15:23:48 2010 -0800
Committer:     David Lutterkort <lutter at redhat.com>
CommitterDate: Wed Jan 13 10:27:34 2010 -0800

Support for case-insensitive regexps

Add nocase argument to make_regexp, and adjust all uses of make_regexp to
pass that in. For concatenation and union of regexps we have to expand
case-insensitive regexps when they are mixed with case-sensitive regexps.

Adjust all uses of make_regexp.
---
 src/lens.c   |   31 ++++++++++++---
 src/parser.y |    2 +-
 src/pathx.c  |    2 +-
 src/regexp.c |  119 +++++++++++++++++++++++++++++++++++++++++++++-------------
 src/regexp.h |    7 +++-
 5 files changed, 126 insertions(+), 35 deletions(-)

diff --git a/src/lens.c b/src/lens.c
index 89eec6c..b093a22 100644
--- a/src/lens.c
+++ b/src/lens.c
@@ -235,12 +235,30 @@ static struct regexp *subtree_atype(struct info *info,
     const char *vpat = (vtype == NULL) ? ENC_NULL : vtype->pattern->str;
     char *pat;
     struct regexp *result = NULL;
+    char *ks = NULL, *vs = NULL;
+    int nocase;
+
+    if (ktype != NULL && vtype != NULL && ktype->nocase != vtype->nocase) {
+        ks = regexp_expand_nocase(ktype);
+        vs = regexp_expand_nocase(vtype);
+        ERR_NOMEM(ks == NULL || vs == NULL, info);
+        if (asprintf(&pat, "(%s)%s(%s)%s", ks, ENC_EQ, vs, ENC_SLASH) < 0)
+            ERR_NOMEM(true, info);
+        nocase = 0;
+    } else {
+        if (asprintf(&pat, "(%s)%s(%s)%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
+            ERR_NOMEM(pat == NULL, info);
 
-    if (asprintf(&pat, "(%s)%s(%s)%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
-        ERR_NOMEM(pat == NULL, info);
-
-    result = make_regexp(info, pat);
+        nocase = 0;
+        if (ktype != NULL)
+            nocase = ktype->nocase;
+        else if (vtype != NULL)
+            nocase = vtype->nocase;
+    }
+    result = make_regexp(info, pat, nocase);
  error:
+    free(ks);
+    free(vs);
     return result;
 }
 
@@ -327,6 +345,7 @@ static struct regexp *make_regexp_from_string(struct info *info,
     if (r != NULL) {
         r->info = ref(info);
         r->pattern = ref(string);
+        r->nocase = 0;
     }
     return r;
 }
@@ -350,7 +369,7 @@ static struct regexp *restrict_regexp(struct regexp *r) {
         return NULL;
     }
 
-    r = make_regexp(r->info, nre);
+    r = make_regexp(r->info, nre, r->nocase);
     if (regexp_compile(r) != 0)
         abort();
     return r;
diff --git a/src/parser.y b/src/parser.y
index b0a52b5..d6c00e1 100644
--- a/src/parser.y
+++ b/src/parser.y
@@ -452,7 +452,7 @@ static struct term *make_value_term(enum value_tag tag, char *value,
     term->type = make_base_type(T_STRING);
   } else {
     term->type = make_base_type(T_REGEXP);
-    term->value->regexp = make_regexp(term->info, value);
+    term->value->regexp = make_regexp(term->info, value, 0);
   }
   return term;
 }
diff --git a/src/pathx.c b/src/pathx.c
index e5eb8fe..27b4e40 100644
--- a/src/pathx.c
+++ b/src/pathx.c
@@ -641,7 +641,7 @@ static void func_regexp(struct state *state) {
         return;
     }
 
-    struct regexp *rx = make_regexp(NULL, pat);
+    struct regexp *rx = make_regexp(NULL, pat, 0);
     if (rx == NULL) {
         FREE(pat);
         STATE_ENOMEM;
diff --git a/src/regexp.c b/src/regexp.c
index 55075df..d4d5f89 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -109,9 +109,11 @@ void print_regexp(FILE *out, struct regexp *r) {
         FREE(rx);
     }
     fputc('/', out);
+    if (r->nocase)
+        fputc('i', out);
 }
 
-struct regexp *make_regexp(struct info *info, char *pat) {
+struct regexp *make_regexp(struct info *info, char *pat, int nocase) {
     struct regexp *regexp;
 
     make_ref(regexp);
@@ -119,6 +121,7 @@ struct regexp *make_regexp(struct info *info, char *pat) {
 
     make_ref(regexp->pattern);
     regexp->pattern->str = pat;
+    regexp->nocase = nocase;
     return regexp;
 }
 
@@ -161,28 +164,50 @@ struct regexp *make_regexp_literal(struct info *info, const char *text) {
             *p++ = *t;
         }
     }
-    return make_regexp(info, pattern);
+    return make_regexp(info, pattern, 0);
 }
 
 struct regexp *
 regexp_union(struct info *info, struct regexp *r1, struct regexp *r2) {
-    const char *p1 = r1->pattern->str;
-    const char *p2 = r2->pattern->str;
-    char *s;
+    struct regexp *r[2];
 
-    if (asprintf(&s, "(%s)|(%s)", p1, p2) == -1)
-        return NULL;
-    return make_regexp(info, s);
+    r[0] = r1;
+    r[1] = r2;
+    return regexp_union_n(info, 2, r);
+}
+
+char *regexp_expand_nocase(struct regexp *r) {
+    const char *p = r->pattern->str;
+    char *s = NULL;
+    size_t len;
+    int ret;
+
+    if (! r->nocase)
+        return strdup(p);
+
+    ret = fa_expand_nocase(p, strlen(p), &s, &len);
+    ERR_NOMEM(ret == REG_ESPACE, r->info);
+    BUG_ON(ret != REG_NOERROR, r->info, NULL);
+ error:
+    return s;
 }
 
 struct regexp *
 regexp_union_n(struct info *info, int n, struct regexp **r) {
     size_t len = 0;
-    char *pat, *p;
+    char *pat = NULL, *p, *expanded = NULL;
+    int nnocase = 0, npresent = 0;
+    int ret;
 
     for (int i=0; i < n; i++)
-        if (r[i] != NULL)
+        if (r[i] != NULL) {
             len += strlen(r[i]->pattern->str) + strlen("()|");
+            npresent += 1;
+            if (r[i]->nocase)
+                nnocase += 1;
+        }
+
+    bool mixedcase = nnocase > 0 && nnocase < npresent;
 
     if (len == 0)
         return NULL;
@@ -198,37 +223,60 @@ regexp_union_n(struct info *info, int n, struct regexp **r) {
         if (added > 0)
             *p++ = '|';
         *p++ = '(';
-        p = stpcpy(p, r[i]->pattern->str);
+        if (mixedcase && r[i]->nocase) {
+            expanded = regexp_expand_nocase(r[i]);
+            ERR_BAIL(r[i]->info);
+            len += strlen(expanded) - strlen(r[i]->pattern->str);
+            ret = REALLOC_N(pat, len);
+            ERR_NOMEM(ret < 0, info);
+            p = pat + strlen(pat);
+            p = stpcpy(p, expanded);
+            FREE(expanded);
+        } else {
+            p = stpcpy(p, r[i]->pattern->str);
+        }
         *p++ = ')';
         added += 1;
     }
-    return make_regexp(info, pat);
+    *p = '\0';
+    return make_regexp(info, pat, nnocase == npresent);
+ error:
+    FREE(expanded);
+    FREE(pat);
+    return NULL;
 }
 
 struct regexp *
 regexp_concat(struct info *info, struct regexp *r1, struct regexp *r2) {
-    const char *p1 = r1->pattern->str;
-    const char *p2 = r2->pattern->str;
-    char *s;
+    struct regexp *r[2];
 
-    if (asprintf(&s, "(%s)(%s)", p1, p2) == -1)
-        return NULL;
-    return make_regexp(info, s);
+    r[0] = r1;
+    r[1] = r2;
+    return regexp_concat_n(info, 2, r);
 }
 
 struct regexp *
 regexp_concat_n(struct info *info, int n, struct regexp **r) {
     size_t len = 0;
-    char *pat, *p;
+    char *pat = NULL, *p, *expanded = NULL;
+    int nnocase = 0, npresent = 0;
+    int ret;
 
     for (int i=0; i < n; i++)
-        if (r[i] != NULL)
+        if (r[i] != NULL) {
             len += strlen(r[i]->pattern->str) + strlen("()");
+            npresent += 1;
+            if (r[i]->nocase)
+                nnocase += 1;
+        }
+
+    bool mixedcase = nnocase > 0 && nnocase < npresent;
 
     if (len == 0)
         return NULL;
 
-    if (ALLOC_N(pat, len+1) < 0)
+    len += 1;
+    if (ALLOC_N(pat, len) < 0)
         return NULL;
 
     p = pat;
@@ -236,10 +284,26 @@ regexp_concat_n(struct info *info, int n, struct regexp **r) {
         if (r[i] == NULL)
             continue;
         *p++ = '(';
-        p = stpcpy(p, r[i]->pattern->str);
+        if (mixedcase && r[i]->nocase) {
+            expanded = regexp_expand_nocase(r[i]);
+            ERR_BAIL(r[i]->info);
+            len += strlen(expanded) - strlen(r[i]->pattern->str);
+            ret = REALLOC_N(pat, len);
+            ERR_NOMEM(ret < 0, info);
+            p = pat + strlen(pat);
+            p = stpcpy(p, expanded);
+            FREE(expanded);
+        } else {
+            p = stpcpy(p, r[i]->pattern->str);
+        }
         *p++ = ')';
     }
-    return make_regexp(info, pat);
+    *p = '\0';
+    return make_regexp(info, pat, nnocase == npresent);
+ error:
+    FREE(expanded);
+    FREE(pat);
+    return NULL;
 }
 
 struct regexp *
@@ -276,7 +340,7 @@ regexp_minus(struct info *info, struct regexp *r1, struct regexp *r2) {
     if (regexp_c_locale(&s, NULL) < 0)
         goto error;
 
-    result = make_regexp(info, s);
+    result = make_regexp(info, s, fa_is_nocase(fa));
     s = NULL;
 
  done:
@@ -309,7 +373,7 @@ regexp_iter(struct info *info, struct regexp *r, int min, int max) {
     } else {
         ret = asprintf(&s, "(%s){%d,%d}", p, min, max);
     }
-    return (ret == -1) ? NULL : make_regexp(info, s);
+    return (ret == -1) ? NULL : make_regexp(info, s, r->nocase);
 }
 
 struct regexp *
@@ -322,7 +386,7 @@ regexp_maybe(struct info *info, struct regexp *r) {
         return NULL;
     p = r->pattern->str;
     ret = asprintf(&s, "(%s)?", p);
-    return (ret == -1) ? NULL : make_regexp(info, s);
+    return (ret == -1) ? NULL : make_regexp(info, s, r->nocase);
 }
 
 struct regexp *regexp_make_empty(struct info *info) {
@@ -334,6 +398,7 @@ struct regexp *regexp_make_empty(struct info *info) {
         /* Casting away the CONST for EMPTY_PATTERN is ok since it
            is protected against changes because REF == REF_MAX */
         regexp->pattern = (struct string *) empty_pattern;
+        regexp->nocase = 0;
     }
     return regexp;
 }
@@ -357,6 +422,8 @@ static int regexp_compile_internal(struct regexp *r, const char **c) {
         CALLOC(r->re, 1);
 
     re_syntax_options = syntax;
+    if (r->nocase)
+        re_syntax_options |= RE_ICASE;
     *c = re_compile_pattern(r->pattern->str, strlen(r->pattern->str), r->re);
     re_syntax_options = old_syntax;
 
diff --git a/src/regexp.h b/src/regexp.h
index 783b123..3c4e5c0 100644
--- a/src/regexp.h
+++ b/src/regexp.h
@@ -31,6 +31,7 @@ struct regexp {
     struct info              *info;
     struct string            *pattern;
     struct re_pattern_buffer *re;
+    unsigned int              nocase : 1;
 };
 
 void print_regexp(FILE *out, struct regexp *regexp);
@@ -38,7 +39,7 @@ void print_regexp(FILE *out, struct regexp *regexp);
 /* Make a regexp with pattern PAT, which is not copied. Ownership
  * of INFO is taken.
  */
-struct regexp *make_regexp(struct info *info, char *pat);
+struct regexp *make_regexp(struct info *info, char *pat, int nocase);
 
 /* Return 1 if R is an empty pattern, i.e. one consisting of nothing but
    '(' and ')' characters, 0 otherwise */
@@ -109,6 +110,10 @@ void regexp_release(struct regexp *regexp);
 
 /* Produce a printable representation of R */
 char *regexp_escape(const struct regexp *r);
+
+/* If R is case-insensitive, expand its pattern so that it matches the same
+ * string even when used in a case-sensitive match. */
+char *regexp_expand_nocase(struct regexp *r);
 #endif
 
 




More information about the augeas-devel mailing list