[augeas-devel] [PATCH 3/6] libfa: new function fa_expand_char_ranges
David Lutterkort
lutter at redhat.com
Fri Oct 23 17:30:10 UTC 2009
---
src/fa.c | 148 ++++++++++++++++++++++++++++++++++------------------
src/fa.h | 13 ++++-
src/fa_sym.version | 1 +
tests/fatest.c | 13 +++++
4 files changed, 123 insertions(+), 52 deletions(-)
diff --git a/src/fa.c b/src/fa.c
index 6fcb09f..c8d87fb 100644
--- a/src/fa.c
+++ b/src/fa.c
@@ -182,6 +182,9 @@ struct re {
struct { /* CSET */
bool negate;
bitset *cset;
+ /* Whether we can use character ranges when converting back
+ * to a string */
+ unsigned int no_ranges:1;
};
struct { /* CHAR */
uchar c;
@@ -199,6 +202,8 @@ struct re_parse {
const char *rx; /* Current position in regex */
const char *rend; /* Last char of rx+ 1 */
int error; /* error code */
+ /* Whether new CSET's should have the no_ranges flag set */
+ unsigned int no_ranges:1;
};
/* String with explicit length, used when converting re to string */
@@ -2960,10 +2965,11 @@ static struct re *make_re_char(uchar c) {
return re;
}
-static struct re *make_re_char_set(bool negate) {
+static struct re *make_re_char_set(bool negate, bool no_ranges) {
struct re *re = make_re(CSET);
if (re) {
re->negate = negate;
+ re->no_ranges = no_ranges;
re->cset = bitset_init(UCHAR_NUM);
if (re->cset == NULL)
re_unref(re);
@@ -3054,7 +3060,7 @@ static struct re *parse_simple_exp(struct re_parse *parse) {
if (match(parse, '[')) {
bool negate = match(parse, '^');
- re = make_re_char_set(negate);
+ re = make_re_char_set(negate, parse->no_ranges);
if (re == NULL) {
parse->error = REG_ESPACE;
goto error;
@@ -3088,7 +3094,7 @@ static struct re *parse_simple_exp(struct re_parse *parse) {
}
}
} else if (match(parse, '.')) {
- re = make_re_char_set(1);
+ re = make_re_char_set(1, parse->no_ranges);
if (re == NULL) {
parse->error = REG_ESPACE;
goto error;
@@ -3440,31 +3446,37 @@ static int re_cset_as_string(const struct re *re, struct re_str *str) {
incl_rbrack = cset_contains(re, rbrack) != negate;
incl_dash = cset_contains(re, dash) != negate;
- for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
- while (from <= UCHAR_MAX && cset_contains(re, from) == negate)
- from += 1;
- if (from > UCHAR_MAX)
- break;
- for (to = from;
- to < UCHAR_MAX && (cset_contains(re, to+1) != negate);
- to++);
-
- if (to == from && (from == rbrack || from == dash))
- continue;
- if (from == rbrack || from == dash)
- from += 1;
- if (to == rbrack || to == dash)
- to -= 1;
+ if (re->no_ranges) {
+ for (from = UCHAR_MIN; from <= UCHAR_MAX; from++)
+ if (cset_contains(re, from) != negate)
+ str->len += 1;
+ } else {
+ for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
+ while (from <= UCHAR_MAX && cset_contains(re, from) == negate)
+ from += 1;
+ if (from > UCHAR_MAX)
+ break;
+ for (to = from;
+ to < UCHAR_MAX && (cset_contains(re, to+1) != negate);
+ to++);
+
+ if (to == from && (from == rbrack || from == dash))
+ continue;
+ if (from == rbrack || from == dash)
+ from += 1;
+ if (to == rbrack || to == dash)
+ to -= 1;
- len = (to == from) ? 1 : ((to == from + 1) ? 2 : 3);
+ len = (to == from) ? 1 : ((to == from + 1) ? 2 : 3);
- if (from < rbrack && rbrack < to)
- incl_rbrack = 0;
- if (from < dash && dash < to)
- incl_dash = 0;
- str->len += len;
+ if (from < rbrack && rbrack < to)
+ incl_rbrack = 0;
+ if (from < dash && dash < to)
+ incl_dash = 0;
+ str->len += len;
+ }
+ str->len += incl_rbrack + incl_dash;
}
- str->len += incl_rbrack + incl_dash;
if (negate)
str->len += 1; /* For the ^ */
@@ -3479,31 +3491,40 @@ static int re_cset_as_string(const struct re *re, struct re_str *str) {
if (incl_rbrack)
*s++ = rbrack;
- for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
- while (from <= UCHAR_MAX && cset_contains(re, from) == negate)
- from += 1;
- if (from > UCHAR_MAX)
- break;
- for (to = from;
- to < UCHAR_MAX && (cset_contains(re, to+1) != negate);
- to++);
-
- if (to == from && (from == rbrack || from == dash))
- continue;
- if (from == rbrack || from == dash)
- from += 1;
- if (to == rbrack || to == dash)
- to -= 1;
-
- if (to == from) {
- *s++ = from;
- } else if (to == from + 1) {
- *s++ = from;
- *s++ = to;
- } else {
- *s++ = from;
- *s++ = '-';
- *s++ = to;
+ if (re->no_ranges) {
+ for (from = UCHAR_MIN; from <= UCHAR_MAX; from++) {
+ if (from == rbrack || from == dash)
+ continue;
+ if (cset_contains(re, from) != negate)
+ *s++ = from;
+ }
+ } else {
+ for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
+ while (from <= UCHAR_MAX && cset_contains(re, from) == negate)
+ from += 1;
+ if (from > UCHAR_MAX)
+ break;
+ for (to = from;
+ to < UCHAR_MAX && (cset_contains(re, to+1) != negate);
+ to++);
+
+ if (to == from && (from == rbrack || from == dash))
+ continue;
+ if (from == rbrack || from == dash)
+ from += 1;
+ if (to == rbrack || to == dash)
+ to -= 1;
+
+ if (to == from) {
+ *s++ = from;
+ } else if (to == from + 1) {
+ *s++ = from;
+ *s++ = to;
+ } else {
+ *s++ = from;
+ *s++ = '-';
+ *s++ = to;
+ }
}
}
if (incl_dash)
@@ -3645,7 +3666,7 @@ static int convert_trans_to_re(struct state *s) {
to = t->to;
}
if (re == NULL) {
- re = make_re_char_set(0);
+ re = make_re_char_set(0, 0);
if (re == NULL)
goto error;
}
@@ -3935,6 +3956,31 @@ int fa_restrict_alphabet(const char *regexp, size_t regexp_len,
return result;
}
+int fa_expand_char_ranges(const char *regexp, size_t regexp_len,
+ char **newregexp, size_t *newregexp_len) {
+ int result;
+ struct re *re = NULL;
+ struct re_parse parse;
+ struct re_str str;
+
+ *newregexp = NULL;
+ MEMZERO(&parse, 1);
+ parse.rx = regexp;
+ parse.rend = regexp + regexp_len;
+ parse.error = REG_NOERROR;
+ parse.no_ranges = 1;
+ re = parse_regexp(&parse);
+ if (parse.error != REG_NOERROR)
+ return parse.error;
+
+ MEMZERO(&str, 1);
+ result = re_as_string(re, &str);
+ *newregexp = str.rx;
+ *newregexp_len = str.len;
+ re_unref(re);
+ return result;
+}
+
static void print_char(FILE *out, uchar c) {
/* We escape '/' as '\\/' since dot chokes on bare slashes in labels;
Also, a space ' ' is shown as '\s' */
diff --git a/src/fa.h b/src/fa.h
index 4e7bc52..bf2b341 100644
--- a/src/fa.h
+++ b/src/fa.h
@@ -212,11 +212,22 @@ int fa_as_regexp(struct fa *fa, char **regexp, size_t *regexp_len);
* outside of a character set.
*
* Return a positive value if REGEXP is not syntactically valid; the value
- * returned is one of the REG_ERRCODE_T POSIX error codes.
+ * returned is one of the REG_ERRCODE_T POSIX error codes. Return 0 on
+ * success and -1 if an allocation fails.
*/
int fa_restrict_alphabet(const char *regexp, size_t regexp_len,
char **newregexp, size_t *newregexp_len,
char from, char to);
+
+/* Convert REGEXP into one that does not use ranges inside character
+ * classes.
+ *
+ * Return a positive value if REGEXP is not syntactically valid; the value
+ * returned is one of the REG_ERRCODE_T POSIX error codes. Return 0 on
+ * success and -1 if an allocation fails.
+ */
+int fa_expand_char_ranges(const char *regexp, size_t regexp_len,
+ char **newregexp, size_t *newregexp_len);
#endif
diff --git a/src/fa_sym.version b/src/fa_sym.version
index a489d86..bc8554a 100644
--- a/src/fa_sym.version
+++ b/src/fa_sym.version
@@ -20,5 +20,6 @@ FA_1.0.0 {
fa_ambig_example;
fa_as_regexp;
fa_restrict_alphabet;
+ fa_expand_char_ranges;
local: *;
};
diff --git a/tests/fatest.c b/tests/fatest.c
index 2a7e7c6..ee2cbc3 100644
--- a/tests/fatest.c
+++ b/tests/fatest.c
@@ -499,6 +499,18 @@ static void testRestrictAlphabet(CuTest *tc) {
CuAssertIntEquals(tc, REG_EBRACE, r);
}
+static void testExpandCharRanges(CuTest *tc) {
+ const char *re = "[1-3]*|[a-b]([^\nU-X][^\n])*";
+ char *nre;
+ size_t nre_len;
+ int r;
+
+ r = fa_expand_char_ranges(re, strlen(re), &nre, &nre_len);
+ CuAssertIntEquals(tc, 0, r);
+ CuAssertStrEquals(tc, "[123]*|[ab]([^\nUVWX].)*", nre);
+ CuAssertIntEquals(tc, strlen(nre), nre_len);
+}
+
int main(int argc, char **argv) {
if (argc == 1) {
char *output = NULL;
@@ -520,6 +532,7 @@ int main(int argc, char **argv) {
SUITE_ADD_TEST(suite, testRangeEnd);
SUITE_ADD_TEST(suite, testNul);
SUITE_ADD_TEST(suite, testRestrictAlphabet);
+ SUITE_ADD_TEST(suite, testExpandCharRanges);
CuSuiteRun(suite);
CuSuiteSummary(suite, &output);
--
1.6.2.5
More information about the augeas-devel
mailing list