[augeas-devel] [PATCH] Keep re_cset_as_string from including an explicit '\0'
David Lutterkort
dlutter at redhat.com
Sat May 10 00:39:52 UTC 2008
2 files changed, 49 insertions(+), 53 deletions(-)
src/fa.c | 92 ++++++++++++++++++++++++++------------------------------
tests/fatest.c | 10 +++---
# HG changeset patch
# User David Lutterkort <dlutter at redhat.com>
# Date 1210379967 25200
# Node ID 2049984893424b72354a4fe11c345bd90682b70e
# Parent 2d5bb9bd00887894ccc52dadf248a67a03995a4d
Keep re_cset_as_string from including an explicit '\0'
We use NUL delimited strings; that means we can not use a CSET
representation that mentions '\0' explicitly.
diff -r 2d5bb9bd0088 -r 204998489342 src/fa.c
--- a/src/fa.c Fri May 09 15:48:38 2008 -0700
+++ b/src/fa.c Fri May 09 17:39:27 2008 -0700
@@ -2839,33 +2839,48 @@ static char *re_cset_as_string(const str
static char *re_cset_as_string(const struct re *re) {
const uchar rbrack = ']';
const uchar dash = '-';
+ const uchar nul = '\0';
+
static const char *const empty_set = "[]";
- static const char *const empty_nset = "[^]";
static const char *const total_set = "(.|\n)";
char *result = NULL, *s;
int from, to, negate;
- size_t set_len, nset_len, len;
- int set_rbrack, set_dash, nset_rbrack, nset_dash;
+ size_t set_len, len;
+ int incl_rbrack, incl_dash;
int r;
set_len = strlen(empty_set);
- nset_len = strlen(empty_nset);
+
+ /* We can not include NUL explicitly in a CSET since we use ordinary
+ NUL delimited strings to represent them. That means that we need to
+ use negated representation if NUL is to be included (and vice versa)
+ */
+ negate = bitset_get(re->cset, nul);
+ if (negate) {
+ for (from = UCHAR_MIN;
+ from <= UCHAR_MAX && bitset_get(re->cset, from);
+ from += 1);
+ if (from > UCHAR_MAX) {
+ /* Special case: the set matches every character */
+ return strdup(total_set);
+ }
+ }
/* See if ']' and '-' will be explicitly included in the character set
- (SET_RBRACK, SET_DASH) or in the negated character set (NSET_RBRACK,
- NSET_DASH) As we loop over the character set, we reset these flags
- if they are in the set/negated set, but not mentioned explicitly
+ (INCL_RBRACK, INCL_DASH) As we loop over the character set, we reset
+ these flags if they are in the set, but not mentioned explicitly
*/
- set_rbrack = bitset_get(re->cset, rbrack);
- set_dash = bitset_get(re->cset, dash);
- nset_rbrack = !set_rbrack;
- nset_dash = !set_dash;
+ incl_rbrack = bitset_get(re->cset, rbrack) != negate;
+ incl_dash = bitset_get(re->cset, dash) != negate;
for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
- int include = bitset_get(re->cset, from);
+ while (from <= UCHAR_MAX && bitset_get(re->cset, from) == negate)
+ from += 1;
+ if (from > UCHAR_MAX)
+ break;
for (to = from;
- to < UCHAR_MAX && (bitset_get(re->cset, to+1) == include);
+ to < UCHAR_MAX && (bitset_get(re->cset, to+1) != negate);
to++);
if (to == from && (from == rbrack || from == dash))
@@ -2876,35 +2891,18 @@ static char *re_cset_as_string(const str
to -= 1;
len = (to == from) ? 1 : ((to == from + 1) ? 2 : 3);
- if (include) {
- if (from < rbrack && rbrack < to)
- set_rbrack = 0;
- if (from < dash && dash < to)
- set_dash = 0;
- set_len += len;
- } else {
- if (from < rbrack && rbrack < to)
- nset_rbrack = 0;
- if (from < dash && dash < to)
- nset_dash = 0;
- nset_len += len;
- }
+
+ if (from < rbrack && rbrack < to)
+ incl_rbrack = 0;
+ if (from < dash && dash < to)
+ incl_dash = 0;
+ set_len += len;
}
- set_len += set_rbrack + set_dash;
- nset_len += nset_rbrack + nset_dash;
+ set_len += incl_rbrack + incl_dash;
+ if (negate)
+ set_len += 1; /* For the ^ */
- if (nset_len == strlen(empty_nset)) {
- /* Special case: the set matches every character */
- return strdup(total_set);
- }
-
- if (set_len < nset_len) {
- negate = 0;
- r = ALLOC_N(result, set_len + 1);
- } else {
- negate = 1;
- r = ALLOC_N(result, nset_len + 1);
- }
+ r = ALLOC_N(result, set_len + 1);
if (r < 0)
return NULL;
@@ -2912,7 +2910,7 @@ static char *re_cset_as_string(const str
*s++ = '[';
if (negate)
*s++ = '^';
- if ((negate && nset_rbrack) || (!negate && set_rbrack))
+ if (incl_rbrack)
*s++ = rbrack;
for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
@@ -2921,7 +2919,7 @@ static char *re_cset_as_string(const str
if (from > UCHAR_MAX)
break;
for (to = from;
- to < UCHAR_MAX && (bitset_get(re->cset, to+1) == ! negate);
+ to < UCHAR_MAX && (bitset_get(re->cset, to+1) != negate);
to++);
if (to == from && (from == rbrack || from == dash))
@@ -2942,13 +2940,9 @@ static char *re_cset_as_string(const str
*s++ = to;
}
}
- if (negate) {
- if (nset_dash)
- *s++ = dash;
- } else {
- if (set_dash)
- *s++ = dash;
- }
+ if (incl_dash)
+ *s++ = dash;
+
*s = ']';
return result;
diff -r 2d5bb9bd0088 -r 204998489342 tests/fatest.c
--- a/tests/fatest.c Fri May 09 15:48:38 2008 -0700
+++ b/tests/fatest.c Fri May 09 17:39:27 2008 -0700
@@ -96,14 +96,15 @@ static fa_t make_fa(CuTest *tc, const ch
int r;
r = fa_compile(regexp, &fa);
- CuAssertIntEquals(tc, exp_err, r);
if (exp_err == REG_NOERROR) {
if (r != REG_NOERROR)
print_regerror(r, regexp);
+ CuAssertIntEquals(tc, REG_NOERROR, r);
CuAssertPtrNotNull(tc, fa);
mark(fa);
assertAsRegexp(tc, fa);
} else {
+ CuAssertIntEquals(tc, exp_err, r);
CuAssertPtrEquals(tc, NULL, fa);
}
return fa;
@@ -393,12 +394,13 @@ static void testAsRegexp(CuTest *tc) {
assertFaAsRegexp(tc, "abcd");
assertFaAsRegexp(tc, "ab|cd");
assertFaAsRegexp(tc, "[a-z]+");
- assertFaAsRegexp(tc, "[]a]+");
+ assertFaAsRegexp(tc, "[]a-]+");
+ assertFaAsRegexp(tc, "[A-CE-GI-LN-QS-Z]");
}
static void testAsRegexpMinus(CuTest *tc) {
- fa_t fa1 = make_good_fa(tc, "[a-z]+");
- fa_t fa2 = make_good_fa(tc, "baseurl");
+ fa_t fa1 = make_good_fa(tc, "[A-Za-z]+");
+ fa_t fa2 = make_good_fa(tc, "Deny(Users|Groups|Other)");
fa_t fa = mark(fa_minus(fa1, fa2));
char *re;
int r;
More information about the augeas-devel
mailing list