[augeas-devel] [PATCH] Keep re_cset_as_string from including an explicit '\0'

David Lutterkort dlutter at redhat.com
Sat May 10 00:39:52 UTC 2008


2 files changed, 49 insertions(+), 53 deletions(-)
src/fa.c       |   92 ++++++++++++++++++++++++++------------------------------
tests/fatest.c |   10 +++---


# HG changeset patch
# User David Lutterkort <dlutter at redhat.com>
# Date 1210379967 25200
# Node ID 2049984893424b72354a4fe11c345bd90682b70e
# Parent  2d5bb9bd00887894ccc52dadf248a67a03995a4d
Keep re_cset_as_string from including an explicit '\0'

We use NUL delimited strings; that means we can not use a CSET
representation that mentions '\0' explicitly.

diff -r 2d5bb9bd0088 -r 204998489342 src/fa.c
--- a/src/fa.c	Fri May 09 15:48:38 2008 -0700
+++ b/src/fa.c	Fri May 09 17:39:27 2008 -0700
@@ -2839,33 +2839,48 @@ static char *re_cset_as_string(const str
 static char *re_cset_as_string(const struct re *re) {
     const uchar rbrack = ']';
     const uchar dash = '-';
+    const uchar nul = '\0';
+
     static const char *const empty_set = "[]";
-    static const char *const empty_nset = "[^]";
     static const char *const total_set = "(.|\n)";
 
     char *result = NULL, *s;
     int from, to, negate;
-    size_t set_len, nset_len, len;
-    int set_rbrack, set_dash, nset_rbrack, nset_dash;
+    size_t set_len, len;
+    int incl_rbrack, incl_dash;
     int r;
 
     set_len = strlen(empty_set);
-    nset_len = strlen(empty_nset);
+
+    /* We can not include NUL explicitly in a CSET since we use ordinary
+       NUL delimited strings to represent them. That means that we need to
+       use negated representation if NUL is to be included (and vice versa)
+    */
+    negate = bitset_get(re->cset, nul);
+    if (negate) {
+        for (from = UCHAR_MIN;
+             from <= UCHAR_MAX && bitset_get(re->cset, from);
+             from += 1);
+        if (from > UCHAR_MAX) {
+            /* Special case: the set matches every character */
+            return strdup(total_set);
+        }
+    }
 
     /* See if ']' and '-' will be explicitly included in the character set
-       (SET_RBRACK, SET_DASH) or in the negated character set (NSET_RBRACK,
-       NSET_DASH) As we loop over the character set, we reset these flags
-       if they are in the set/negated set, but not mentioned explicitly
+       (INCL_RBRACK, INCL_DASH) As we loop over the character set, we reset
+       these flags if they are in the set, but not mentioned explicitly
     */
-    set_rbrack = bitset_get(re->cset, rbrack);
-    set_dash = bitset_get(re->cset, dash);
-    nset_rbrack = !set_rbrack;
-    nset_dash = !set_dash;
+    incl_rbrack = bitset_get(re->cset, rbrack) != negate;
+    incl_dash = bitset_get(re->cset, dash) != negate;
 
     for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
-        int include = bitset_get(re->cset, from);
+        while (from <= UCHAR_MAX && bitset_get(re->cset, from) == negate)
+            from += 1;
+        if (from > UCHAR_MAX)
+            break;
         for (to = from;
-             to < UCHAR_MAX && (bitset_get(re->cset, to+1) == include);
+             to < UCHAR_MAX && (bitset_get(re->cset, to+1) != negate);
              to++);
 
         if (to == from && (from == rbrack || from == dash))
@@ -2876,35 +2891,18 @@ static char *re_cset_as_string(const str
             to -= 1;
 
         len = (to == from) ? 1 : ((to == from + 1) ? 2 : 3);
-        if (include) {
-            if (from < rbrack && rbrack < to)
-                set_rbrack = 0;
-            if (from < dash && dash < to)
-                set_dash = 0;
-            set_len += len;
-        } else {
-            if (from < rbrack && rbrack < to)
-                nset_rbrack = 0;
-            if (from < dash && dash < to)
-                nset_dash = 0;
-            nset_len += len;
-        }
+
+        if (from < rbrack && rbrack < to)
+            incl_rbrack = 0;
+        if (from < dash && dash < to)
+            incl_dash = 0;
+        set_len += len;
     }
-    set_len += set_rbrack + set_dash;
-    nset_len += nset_rbrack + nset_dash;
+    set_len += incl_rbrack + incl_dash;
+    if (negate)
+        set_len += 1;        /* For the ^ */
 
-    if (nset_len == strlen(empty_nset)) {
-        /* Special case: the set matches every character */
-        return strdup(total_set);
-    }
-
-    if (set_len < nset_len) {
-        negate = 0;
-        r = ALLOC_N(result, set_len + 1);
-    } else {
-        negate = 1;
-        r = ALLOC_N(result, nset_len + 1);
-    }
+    r = ALLOC_N(result, set_len + 1);
     if (r < 0)
         return NULL;
 
@@ -2912,7 +2910,7 @@ static char *re_cset_as_string(const str
     *s++ = '[';
     if (negate)
         *s++ = '^';
-    if ((negate && nset_rbrack) || (!negate && set_rbrack))
+    if (incl_rbrack)
         *s++ = rbrack;
 
     for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
@@ -2921,7 +2919,7 @@ static char *re_cset_as_string(const str
         if (from > UCHAR_MAX)
             break;
         for (to = from;
-             to < UCHAR_MAX && (bitset_get(re->cset, to+1) == ! negate);
+             to < UCHAR_MAX && (bitset_get(re->cset, to+1) != negate);
              to++);
 
         if (to == from && (from == rbrack || from == dash))
@@ -2942,13 +2940,9 @@ static char *re_cset_as_string(const str
             *s++ = to;
         }
     }
-    if (negate) {
-        if (nset_dash)
-            *s++ = dash;
-    } else {
-        if (set_dash)
-            *s++ = dash;
-    }
+    if (incl_dash)
+        *s++ = dash;
+
     *s = ']';
 
     return result;
diff -r 2d5bb9bd0088 -r 204998489342 tests/fatest.c
--- a/tests/fatest.c	Fri May 09 15:48:38 2008 -0700
+++ b/tests/fatest.c	Fri May 09 17:39:27 2008 -0700
@@ -96,14 +96,15 @@ static fa_t make_fa(CuTest *tc, const ch
     int r;
 
     r = fa_compile(regexp, &fa);
-    CuAssertIntEquals(tc, exp_err, r);
     if (exp_err == REG_NOERROR) {
         if (r != REG_NOERROR)
             print_regerror(r, regexp);
+        CuAssertIntEquals(tc, REG_NOERROR, r);
         CuAssertPtrNotNull(tc, fa);
         mark(fa);
         assertAsRegexp(tc, fa);
     } else {
+        CuAssertIntEquals(tc, exp_err, r);
         CuAssertPtrEquals(tc, NULL, fa);
     }
     return fa;
@@ -393,12 +394,13 @@ static void testAsRegexp(CuTest *tc) {
     assertFaAsRegexp(tc, "abcd");
     assertFaAsRegexp(tc, "ab|cd");
     assertFaAsRegexp(tc, "[a-z]+");
-    assertFaAsRegexp(tc, "[]a]+");
+    assertFaAsRegexp(tc, "[]a-]+");
+    assertFaAsRegexp(tc, "[A-CE-GI-LN-QS-Z]");
 }
 
 static void testAsRegexpMinus(CuTest *tc) {
-    fa_t fa1 = make_good_fa(tc, "[a-z]+");
-    fa_t fa2 = make_good_fa(tc, "baseurl");
+    fa_t fa1 = make_good_fa(tc, "[A-Za-z]+");
+    fa_t fa2 = make_good_fa(tc, "Deny(Users|Groups|Other)");
     fa_t fa = mark(fa_minus(fa1, fa2));
     char *re;
     int r;




More information about the augeas-devel mailing list