[augeas-devel] augeas: master - Support for case-insensitive regexps
David Lutterkort
lutter at fedoraproject.org
Thu Jan 14 18:19:52 UTC 2010
Gitweb: http://git.fedorahosted.org/git/augeas.git?p=augeas.git;a=commitdiff;h=3856b3f49d05cea313811e01ce3e11e545006f09
Commit: 3856b3f49d05cea313811e01ce3e11e545006f09
Parent: a929fdfb8a07683983ae601bbff4c1ed7edaeab0
Author: David Lutterkort <lutter at redhat.com>
AuthorDate: Tue Jan 12 15:23:48 2010 -0800
Committer: David Lutterkort <lutter at redhat.com>
CommitterDate: Wed Jan 13 10:27:34 2010 -0800
Support for case-insensitive regexps
Add nocase argument to make_regexp, and adjust all uses of make_regexp to
pass that in. For concatenation and union of regexps we have to expand
case-insensitive regexps when they are mixed with case-sensitive regexps.
Adjust all uses of make_regexp.
---
src/lens.c | 31 ++++++++++++---
src/parser.y | 2 +-
src/pathx.c | 2 +-
src/regexp.c | 119 +++++++++++++++++++++++++++++++++++++++++++++-------------
src/regexp.h | 7 +++-
5 files changed, 126 insertions(+), 35 deletions(-)
diff --git a/src/lens.c b/src/lens.c
index 89eec6c..b093a22 100644
--- a/src/lens.c
+++ b/src/lens.c
@@ -235,12 +235,30 @@ static struct regexp *subtree_atype(struct info *info,
const char *vpat = (vtype == NULL) ? ENC_NULL : vtype->pattern->str;
char *pat;
struct regexp *result = NULL;
+ char *ks = NULL, *vs = NULL;
+ int nocase;
+
+ if (ktype != NULL && vtype != NULL && ktype->nocase != vtype->nocase) {
+ ks = regexp_expand_nocase(ktype);
+ vs = regexp_expand_nocase(vtype);
+ ERR_NOMEM(ks == NULL || vs == NULL, info);
+ if (asprintf(&pat, "(%s)%s(%s)%s", ks, ENC_EQ, vs, ENC_SLASH) < 0)
+ ERR_NOMEM(true, info);
+ nocase = 0;
+ } else {
+ if (asprintf(&pat, "(%s)%s(%s)%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
+ ERR_NOMEM(pat == NULL, info);
- if (asprintf(&pat, "(%s)%s(%s)%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
- ERR_NOMEM(pat == NULL, info);
-
- result = make_regexp(info, pat);
+ nocase = 0;
+ if (ktype != NULL)
+ nocase = ktype->nocase;
+ else if (vtype != NULL)
+ nocase = vtype->nocase;
+ }
+ result = make_regexp(info, pat, nocase);
error:
+ free(ks);
+ free(vs);
return result;
}
@@ -327,6 +345,7 @@ static struct regexp *make_regexp_from_string(struct info *info,
if (r != NULL) {
r->info = ref(info);
r->pattern = ref(string);
+ r->nocase = 0;
}
return r;
}
@@ -350,7 +369,7 @@ static struct regexp *restrict_regexp(struct regexp *r) {
return NULL;
}
- r = make_regexp(r->info, nre);
+ r = make_regexp(r->info, nre, r->nocase);
if (regexp_compile(r) != 0)
abort();
return r;
diff --git a/src/parser.y b/src/parser.y
index b0a52b5..d6c00e1 100644
--- a/src/parser.y
+++ b/src/parser.y
@@ -452,7 +452,7 @@ static struct term *make_value_term(enum value_tag tag, char *value,
term->type = make_base_type(T_STRING);
} else {
term->type = make_base_type(T_REGEXP);
- term->value->regexp = make_regexp(term->info, value);
+ term->value->regexp = make_regexp(term->info, value, 0);
}
return term;
}
diff --git a/src/pathx.c b/src/pathx.c
index e5eb8fe..27b4e40 100644
--- a/src/pathx.c
+++ b/src/pathx.c
@@ -641,7 +641,7 @@ static void func_regexp(struct state *state) {
return;
}
- struct regexp *rx = make_regexp(NULL, pat);
+ struct regexp *rx = make_regexp(NULL, pat, 0);
if (rx == NULL) {
FREE(pat);
STATE_ENOMEM;
diff --git a/src/regexp.c b/src/regexp.c
index 55075df..d4d5f89 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -109,9 +109,11 @@ void print_regexp(FILE *out, struct regexp *r) {
FREE(rx);
}
fputc('/', out);
+ if (r->nocase)
+ fputc('i', out);
}
-struct regexp *make_regexp(struct info *info, char *pat) {
+struct regexp *make_regexp(struct info *info, char *pat, int nocase) {
struct regexp *regexp;
make_ref(regexp);
@@ -119,6 +121,7 @@ struct regexp *make_regexp(struct info *info, char *pat) {
make_ref(regexp->pattern);
regexp->pattern->str = pat;
+ regexp->nocase = nocase;
return regexp;
}
@@ -161,28 +164,50 @@ struct regexp *make_regexp_literal(struct info *info, const char *text) {
*p++ = *t;
}
}
- return make_regexp(info, pattern);
+ return make_regexp(info, pattern, 0);
}
struct regexp *
regexp_union(struct info *info, struct regexp *r1, struct regexp *r2) {
- const char *p1 = r1->pattern->str;
- const char *p2 = r2->pattern->str;
- char *s;
+ struct regexp *r[2];
- if (asprintf(&s, "(%s)|(%s)", p1, p2) == -1)
- return NULL;
- return make_regexp(info, s);
+ r[0] = r1;
+ r[1] = r2;
+ return regexp_union_n(info, 2, r);
+}
+
+char *regexp_expand_nocase(struct regexp *r) {
+ const char *p = r->pattern->str;
+ char *s = NULL;
+ size_t len;
+ int ret;
+
+ if (! r->nocase)
+ return strdup(p);
+
+ ret = fa_expand_nocase(p, strlen(p), &s, &len);
+ ERR_NOMEM(ret == REG_ESPACE, r->info);
+ BUG_ON(ret != REG_NOERROR, r->info, NULL);
+ error:
+ return s;
}
struct regexp *
regexp_union_n(struct info *info, int n, struct regexp **r) {
size_t len = 0;
- char *pat, *p;
+ char *pat = NULL, *p, *expanded = NULL;
+ int nnocase = 0, npresent = 0;
+ int ret;
for (int i=0; i < n; i++)
- if (r[i] != NULL)
+ if (r[i] != NULL) {
len += strlen(r[i]->pattern->str) + strlen("()|");
+ npresent += 1;
+ if (r[i]->nocase)
+ nnocase += 1;
+ }
+
+ bool mixedcase = nnocase > 0 && nnocase < npresent;
if (len == 0)
return NULL;
@@ -198,37 +223,60 @@ regexp_union_n(struct info *info, int n, struct regexp **r) {
if (added > 0)
*p++ = '|';
*p++ = '(';
- p = stpcpy(p, r[i]->pattern->str);
+ if (mixedcase && r[i]->nocase) {
+ expanded = regexp_expand_nocase(r[i]);
+ ERR_BAIL(r[i]->info);
+ len += strlen(expanded) - strlen(r[i]->pattern->str);
+ ret = REALLOC_N(pat, len);
+ ERR_NOMEM(ret < 0, info);
+ p = pat + strlen(pat);
+ p = stpcpy(p, expanded);
+ FREE(expanded);
+ } else {
+ p = stpcpy(p, r[i]->pattern->str);
+ }
*p++ = ')';
added += 1;
}
- return make_regexp(info, pat);
+ *p = '\0';
+ return make_regexp(info, pat, nnocase == npresent);
+ error:
+ FREE(expanded);
+ FREE(pat);
+ return NULL;
}
struct regexp *
regexp_concat(struct info *info, struct regexp *r1, struct regexp *r2) {
- const char *p1 = r1->pattern->str;
- const char *p2 = r2->pattern->str;
- char *s;
+ struct regexp *r[2];
- if (asprintf(&s, "(%s)(%s)", p1, p2) == -1)
- return NULL;
- return make_regexp(info, s);
+ r[0] = r1;
+ r[1] = r2;
+ return regexp_concat_n(info, 2, r);
}
struct regexp *
regexp_concat_n(struct info *info, int n, struct regexp **r) {
size_t len = 0;
- char *pat, *p;
+ char *pat = NULL, *p, *expanded = NULL;
+ int nnocase = 0, npresent = 0;
+ int ret;
for (int i=0; i < n; i++)
- if (r[i] != NULL)
+ if (r[i] != NULL) {
len += strlen(r[i]->pattern->str) + strlen("()");
+ npresent += 1;
+ if (r[i]->nocase)
+ nnocase += 1;
+ }
+
+ bool mixedcase = nnocase > 0 && nnocase < npresent;
if (len == 0)
return NULL;
- if (ALLOC_N(pat, len+1) < 0)
+ len += 1;
+ if (ALLOC_N(pat, len) < 0)
return NULL;
p = pat;
@@ -236,10 +284,26 @@ regexp_concat_n(struct info *info, int n, struct regexp **r) {
if (r[i] == NULL)
continue;
*p++ = '(';
- p = stpcpy(p, r[i]->pattern->str);
+ if (mixedcase && r[i]->nocase) {
+ expanded = regexp_expand_nocase(r[i]);
+ ERR_BAIL(r[i]->info);
+ len += strlen(expanded) - strlen(r[i]->pattern->str);
+ ret = REALLOC_N(pat, len);
+ ERR_NOMEM(ret < 0, info);
+ p = pat + strlen(pat);
+ p = stpcpy(p, expanded);
+ FREE(expanded);
+ } else {
+ p = stpcpy(p, r[i]->pattern->str);
+ }
*p++ = ')';
}
- return make_regexp(info, pat);
+ *p = '\0';
+ return make_regexp(info, pat, nnocase == npresent);
+ error:
+ FREE(expanded);
+ FREE(pat);
+ return NULL;
}
struct regexp *
@@ -276,7 +340,7 @@ regexp_minus(struct info *info, struct regexp *r1, struct regexp *r2) {
if (regexp_c_locale(&s, NULL) < 0)
goto error;
- result = make_regexp(info, s);
+ result = make_regexp(info, s, fa_is_nocase(fa));
s = NULL;
done:
@@ -309,7 +373,7 @@ regexp_iter(struct info *info, struct regexp *r, int min, int max) {
} else {
ret = asprintf(&s, "(%s){%d,%d}", p, min, max);
}
- return (ret == -1) ? NULL : make_regexp(info, s);
+ return (ret == -1) ? NULL : make_regexp(info, s, r->nocase);
}
struct regexp *
@@ -322,7 +386,7 @@ regexp_maybe(struct info *info, struct regexp *r) {
return NULL;
p = r->pattern->str;
ret = asprintf(&s, "(%s)?", p);
- return (ret == -1) ? NULL : make_regexp(info, s);
+ return (ret == -1) ? NULL : make_regexp(info, s, r->nocase);
}
struct regexp *regexp_make_empty(struct info *info) {
@@ -334,6 +398,7 @@ struct regexp *regexp_make_empty(struct info *info) {
/* Casting away the CONST for EMPTY_PATTERN is ok since it
is protected against changes because REF == REF_MAX */
regexp->pattern = (struct string *) empty_pattern;
+ regexp->nocase = 0;
}
return regexp;
}
@@ -357,6 +422,8 @@ static int regexp_compile_internal(struct regexp *r, const char **c) {
CALLOC(r->re, 1);
re_syntax_options = syntax;
+ if (r->nocase)
+ re_syntax_options |= RE_ICASE;
*c = re_compile_pattern(r->pattern->str, strlen(r->pattern->str), r->re);
re_syntax_options = old_syntax;
diff --git a/src/regexp.h b/src/regexp.h
index 783b123..3c4e5c0 100644
--- a/src/regexp.h
+++ b/src/regexp.h
@@ -31,6 +31,7 @@ struct regexp {
struct info *info;
struct string *pattern;
struct re_pattern_buffer *re;
+ unsigned int nocase : 1;
};
void print_regexp(FILE *out, struct regexp *regexp);
@@ -38,7 +39,7 @@ void print_regexp(FILE *out, struct regexp *regexp);
/* Make a regexp with pattern PAT, which is not copied. Ownership
* of INFO is taken.
*/
-struct regexp *make_regexp(struct info *info, char *pat);
+struct regexp *make_regexp(struct info *info, char *pat, int nocase);
/* Return 1 if R is an empty pattern, i.e. one consisting of nothing but
'(' and ')' characters, 0 otherwise */
@@ -109,6 +110,10 @@ void regexp_release(struct regexp *regexp);
/* Produce a printable representation of R */
char *regexp_escape(const struct regexp *r);
+
+/* If R is case-insensitive, expand its pattern so that it matches the same
+ * string even when used in a case-sensitive match. */
+char *regexp_expand_nocase(struct regexp *r);
#endif
More information about the augeas-devel
mailing list