[augeas-devel] [PATCH] Store character sets as bitsets
David Lutterkort
dlutter at redhat.com
Fri May 9 04:33:28 UTC 2008
1 file changed, 53 insertions(+), 48 deletions(-)
src/fa.c | 101 ++++++++++++++++++++++++++++++++------------------------------
# HG changeset patch
# User David Lutterkort <dlutter at redhat.com>
# Date 1210307539 25200
# Node ID cdce7caf8dcebd02e36faf0f21636cc085aa031d
# Parent e86bdb57c7f6108beca25214a9eb20a8c2e8dbf9
Store character sets as bitsets
Also clean up the definition of the bitset type and make the bitset_*
functions const correct
diff -r e86bdb57c7f6 -r cdce7caf8dce src/fa.c
--- a/src/fa.c Thu May 08 21:25:25 2008 -0700
+++ b/src/fa.c Thu May 08 21:32:19 2008 -0700
@@ -85,6 +85,37 @@ struct trans {
};
};
+/*
+ * Bitsets
+ */
+#define UINT_BIT (sizeof(unsigned int) * CHAR_BIT)
+
+typedef unsigned int bitset;
+
+static bitset *bitset_init(size_t nbits) {
+ bitset *bs;
+ if (ALLOC_N(bs, (nbits + UINT_BIT) / UINT_BIT) == -1)
+ return NULL;
+ return bs;
+}
+
+static inline void bitset_clr(bitset *bs, unsigned int bit) {
+ bs[bit/UINT_BIT] &= ~(1 << (bit % UINT_BIT));
+}
+
+static inline void bitset_set(bitset *bs, unsigned int bit) {
+ bs[bit/UINT_BIT] |= 1 << (bit % UINT_BIT);
+}
+
+ATTRIBUTE_PURE
+static inline int bitset_get(const bitset * const bs, unsigned int bit) {
+ return (bs[bit/UINT_BIT] >> bit % UINT_BIT) & 1;
+}
+
+static void bitset_free(bitset *bs) {
+ free(bs);
+}
+
/*
* Representation of a parsed regular expression. The regular expression is
@@ -129,8 +160,8 @@ struct re {
struct re *exp2;
};
struct { /* CSET */
- int negate;
- uchar *cset;
+ int negate;
+ bitset *cset;
};
struct { /* CHAR */
uchar c;
@@ -215,12 +246,12 @@ static void print_char_set(struct re *se
else
printf("[");
for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
- while (set->cset[from] == set->negate)
+ while (bitset_get(set->cset, from) == set->negate)
from += 1;
if (from > UCHAR_MAX)
break;
for (to = from;
- to < UCHAR_MAX && (set->cset[to+1] == ! set->negate);
+ to < UCHAR_MAX && (bitset_get(set->cset, to+1) == !set->negate);
to++);
if (to == from) {
printf("%c", from);
@@ -1169,32 +1200,6 @@ static struct state *step(struct state *
return NULL;
}
-#define UINT_BIT (sizeof(unsigned int) * CHAR_BIT)
-
-typedef unsigned int *bitset_t;
-
-static bitset_t bitset_init(size_t nbits) {
- bitset_t bs;
- CALLOC(bs, (nbits + UINT_BIT) / UINT_BIT);
- return bs;
-}
-
-static void bitset_clr(bitset_t bs, unsigned int bit) {
- bs[bit/UINT_BIT] &= ~(1 << (bit % UINT_BIT));
-}
-
-static void bitset_set(bitset_t bs, unsigned int bit) {
- bs[bit/UINT_BIT] |= 1 << (bit % UINT_BIT);
-}
-
-static int bitset_get(bitset_t bs, unsigned int bit) {
- return bs[bit/UINT_BIT] & (1 << (bit % UINT_BIT));
-}
-
-static void bitset_free(bitset_t bs) {
- free(bs);
-}
-
struct state_list {
struct state_list_node *first;
struct state_list_node *last;
@@ -1292,7 +1297,7 @@ static void minimize_hopcroft(struct fa
/* An ss->used x nsigma matrix of lists of states */
struct state_set **reverse;
CALLOC(reverse, nstates * nsigma);
- bitset_t reverse_nonempty = bitset_init(nstates * nsigma);
+ bitset *reverse_nonempty = bitset_init(nstates * nsigma);
struct state_set **partition;
CALLOC(partition, nstates);
@@ -1312,14 +1317,14 @@ static void minimize_hopcroft(struct fa
*/
int *pending = NULL;
size_t npending = 0, spending = 0;
- bitset_t pending2 = bitset_init(nstates * nsigma);
+ bitset *pending2 = bitset_init(nstates * nsigma);
struct state_set *split = state_set_init(-1, S_NONE);
- bitset_t split2 = bitset_init(nstates);
+ bitset *split2 = bitset_init(nstates);
int *refine;
CALLOC(refine, nstates);
- bitset_t refine2 = bitset_init(nstates);
+ bitset *refine2 = bitset_init(nstates);
struct state_set **splitblock;
CALLOC(splitblock, nstates);
@@ -1736,7 +1741,7 @@ struct fa *fa_concat(struct fa *fa1, str
return fa1;
}
-static struct fa *fa_make_char_set(uchar *cset, int negate) {
+static struct fa *fa_make_char_set(bitset *cset, int negate) {
struct fa *fa = fa_make_empty();
struct state *s = fa->initial;
struct state *t = add_state(fa, 1);
@@ -1744,12 +1749,12 @@ static struct fa *fa_make_char_set(uchar
int r;
while (from <= UCHAR_MAX) {
- while (from <= UCHAR_MAX && cset[from] == negate)
+ while (from <= UCHAR_MAX && bitset_get(cset, from) == negate)
from += 1;
if (from > UCHAR_MAX)
break;
int to = from;
- while (to < UCHAR_MAX && (cset[to + 1] == !negate))
+ while (to < UCHAR_MAX && (bitset_get(cset, to + 1) == !negate))
to += 1;
r = add_new_trans(s, t, from, to);
if (r < 0)
@@ -2400,7 +2405,7 @@ static void free_re(struct re *re) {
} else if (re->type == ITER) {
re_unref(re->exp);
} else if (re->type == CSET) {
- free(re->cset);
+ bitset_free(re->cset);
}
free(re);
}
@@ -2465,7 +2470,7 @@ static struct re *make_re_char_set(int n
struct re *re = make_re(CSET);
if (re) {
re->negate = negate;
- CALLOC(re->cset, UCHAR_NUM);
+ re->cset = bitset_init(UCHAR_NUM);
if (re->cset == NULL)
re_unref(re);
}
@@ -2520,8 +2525,8 @@ static char parse_char(const char **rege
static void add_re_char(struct re *re, uchar from, uchar to) {
assert(re->type == CSET);
- for (int c = from; c <= to; c++)
- re->cset[c] = 1;
+ for (unsigned int c = from; c <= to; c++)
+ bitset_set(re->cset, c);
}
static void parse_char_class(const char **regexp, struct re *re,
@@ -2847,15 +2852,15 @@ static char *re_cset_as_string(const str
NSET_DASH) As we loop over the character set, we reset these flags
if they are in the set/negated set, but not mentioned explicitly
*/
- set_rbrack = re->cset[rbrack];
- set_dash = re->cset[dash];
+ set_rbrack = bitset_get(re->cset, rbrack);
+ set_dash = bitset_get(re->cset, dash);
nset_rbrack = !set_rbrack;
nset_dash = !set_dash;
for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
- int include = re->cset[from];
+ int include = bitset_get(re->cset, from);
for (to = from;
- to < UCHAR_MAX && (re->cset[to+1] == include);
+ to < UCHAR_MAX && (bitset_get(re->cset, to+1) == include);
to++);
if (to == from && (from == rbrack || from == dash))
@@ -2906,12 +2911,12 @@ static char *re_cset_as_string(const str
*s++ = rbrack;
for (from = UCHAR_MIN; from <= UCHAR_MAX; from = to+1) {
- while (from <= UCHAR_MAX && re->cset[from] == negate)
+ while (from <= UCHAR_MAX && bitset_get(re->cset, from) == negate)
from += 1;
if (from > UCHAR_MAX)
break;
for (to = from;
- to < UCHAR_MAX && (re->cset[to+1] == ! negate);
+ to < UCHAR_MAX && (bitset_get(re->cset, to+1) == ! negate);
to++);
if (to == from && (from == rbrack || from == dash))
@@ -3063,7 +3068,7 @@ static int convert_trans_to_re(struct st
int cnt = 0;
uchar chr = UCHAR_MIN;
for (int c = 0; c < UCHAR_NUM; c++) {
- if (trans[t].re->cset[c]) {
+ if (bitset_get(trans[t].re->cset, c)) {
cnt += 1;
chr = c;
}
More information about the augeas-devel
mailing list