[augeas-devel] augeas: master - * src/get.c: reduce the number of calls to regexp_match
David Lutterkort
lutter at fedoraproject.org
Wed Feb 18 22:03:25 UTC 2009
Gitweb: http://git.fedorahosted.org/git/augeas.git?p=augeas.git;a=commitdiff;h=b2bd8000adf84db7572c77a02f24eb2a275f51f7
Commit: b2bd8000adf84db7572c77a02f24eb2a275f51f7
Parent: 8050fb09c9e7a59193aed911b9a138b123e7a2f9
Author: David Lutterkort <lutter at redhat.com>
AuthorDate: Sat Feb 14 21:44:15 2009 -0800
Committer: David Lutterkort <lutter at redhat.com>
CommitterDate: Tue Feb 17 10:09:35 2009 -0800
* src/get.c: reduce the number of calls to regexp_match
Only call the regexp matcher at the very beginning of get/parse, and once
for every match inside an iteration. Use re_registers to keep track of the
substrings being worked on in favor of 'struct split'
---
src/get.c | 314 ++++++++++++++++++++++++++----------------------------------
1 files changed, 136 insertions(+), 178 deletions(-)
diff --git a/src/get.c b/src/get.c
index 90d1da8..e66dd83 100644
--- a/src/get.c
+++ b/src/get.c
@@ -33,15 +33,6 @@
#define assert_error(state, format, args ...) \
assert_error_at(__FILE__, __LINE__, &(state->info), format, ## args)
-/* A substring in the overall string we are matching. A lens is supposed
- to process the whole string from start[0] ... start[size]
-*/
-struct split {
- struct split *next;
- const char *start; /* The start of the text to process */
- int size;
-};
-
struct seq {
struct seq *next;
const char *name;
@@ -50,15 +41,36 @@ struct seq {
struct state {
struct info info;
- struct split *split;
const char *text;
- const char *pos;
struct seq *seqs;
char *key;
char *value; /* GET_STORE leaves a value here */
struct lns_error *error;
+ /* We use the registers from a regular expression match to keep track
+ * of the substring we are currently looking at. REGS are the registers
+ * from the last regexp match; NREG is the number of the register
+ * in REGS that describes the substring we are currently looking at.
+ *
+ * We adjust NREG as we traverse lenses either because we know the
+ * grouping structure of lenses (for L_STAR, the child lens is always
+ * in NREG + 1) or by looking at the number of groups in a sublens (as
+ * we move from one child of L_CONCAT to the next, we need to add 1 +
+ * number of groups of that child to NREG) How NREG is adjusted is
+ * closely related to how the REGEXP_* routines build up bigger regexps
+ * from smaller ones.
+ */
+ struct re_registers *regs;
+ uint nreg;
};
+#define REG_START(state) ((state)->regs->start[(state)->nreg])
+#define REG_END(state) ((state)->regs->end[(state)->nreg])
+#define REG_SIZE(state) (REG_END(state) - REG_START(state))
+#define REG_POS(state) ((state)->text + REG_START(state))
+#define REG_VALID(state) ((state)->nreg < (state)->regs->num_regs)
+#define REG_MATCHED(state) (REG_VALID(state) \
+ && (state)->regs->start[(state)->nreg] >= 0)
+
static void get_error(struct state *state, struct lens *lens,
const char *format, ...)
ATTRIBUTE_FORMAT(printf, 3, 4);
@@ -82,7 +94,10 @@ static void get_error(struct state *state, struct lens *lens,
return;
CALLOC(state->error, 1);
state->error->lens = ref(lens);
- state->error->pos = state->pos - state->text;
+ if (REG_MATCHED(state))
+ state->error->pos = REG_START(state);
+ else
+ state->error->pos = 0;
va_start(ap, format);
r = vasprintf(&state->error->message, format, ap);
va_end(ap);
@@ -175,7 +190,10 @@ static void get_expected_error(struct state *state, struct lens *l) {
char word[wordlen+1];
char *p, *pat;
- strncpy(word, state->split->start, wordlen);
+ if (REG_MATCHED(state))
+ strncpy(word, REG_POS(state), wordlen);
+ else
+ strncpy(word, state->text, wordlen);
word[wordlen] = '\0';
for (p = word; *p != '\0' && *p != '\n'; p++);
*p = '\0';
@@ -189,43 +207,13 @@ static void get_expected_error(struct state *state, struct lens *l) {
* Splitting of the input text
*/
-static char *token_from_split(struct split *split) {
- return strndup(split->start, split->size);
-}
-
-static struct split *split_append(struct split **split, struct split *tail,
- const char *text, int start, int end) {
- struct split *sp;
-
- if (ALLOC(sp) < 0)
- return NULL;
-
- sp->start = text + start;
- sp->size = end - start;
- list_tail_cons(*split, tail, sp);
- return tail;
-}
-
-static struct split *next_split(struct state *state) {
- if (state->split != NULL) {
- state->split = state->split->next;
- if (state->split != NULL)
- state->pos = state->split->start + state->split->size;
- }
- return state->split;
-}
-
-static struct split *set_split(struct state *state, struct split *split) {
- state->split = split;
- if (split != NULL)
- state->pos = split->start + split->size;
- return split;
+static char *token(struct state *state) {
+ return strndup(REG_POS(state), REG_SIZE(state));
}
static void regexp_match_error(struct state *state, struct lens *lens,
- int count, struct split *split,
- struct regexp *r) {
- char *text = strndup(split->start, split->size);
+ int count, struct regexp *r) {
+ char *text = strndup(REG_POS(state), REG_SIZE(state));
char *pat = regexp_escape(r);
if (count == -1) {
@@ -241,84 +229,35 @@ static void regexp_match_error(struct state *state, struct lens *lens,
free(text);
}
-/* Refine a tree split OUTER according to the L_CONCAT lens LENS */
-static struct split *split_concat(struct state *state, struct lens *lens) {
- assert(lens->tag == L_CONCAT);
-
- int count = 0;
- struct split *outer = state->split;
- struct re_registers regs;
- struct split *split = NULL;
- struct regexp *ctype = lens->ctype;
-
- MEMZERO(®s, 1);
- count = regexp_match(ctype, outer->start, outer->size, 0, ®s);
- if (count < 0) {
- regexp_match_error(state, lens, count, outer, ctype);
- return NULL;
- }
-
- int reg = 1;
- struct split *tail = NULL;
- for (int i=0; i < lens->nchildren; i++) {
- assert(reg < regs.num_regs);
- assert(regs.start[reg] != -1);
- tail = split_append(&split, tail,
- outer->start, regs.start[reg], regs.end[reg]);
- if (tail == NULL)
- goto error;
- reg += 1 + regexp_nsub(lens->children[i]->ctype);
- }
- assert(reg < regs.num_regs);
- done:
- free(regs.start);
- free(regs.end);
- return split;
- error:
- list_free(split);
- goto done;
-}
-
-static struct split *split_iter(struct state *state, struct lens *lens) {
- assert(lens->tag == L_STAR);
+/* Modifies STATE->REGS and STATE->NREG. The caller must save these
+ * if they are still needed
+ *
+ * Return the number of characters matched
+ */
+static int match(struct state *state, struct lens *lens,
+ struct regexp *re, uint size, uint start) {
+ struct re_registers *regs;
+ int count;
- int count = 0;
- struct split *outer = state->split;
- struct split *split = NULL;
- struct regexp *ctype = lens->child->ctype;
+ if (ALLOC(regs) < 0)
+ return -1;
- int pos = 0;
- struct split *tail = NULL;
- while (pos < outer->size) {
- count = regexp_match(ctype, outer->start, outer->size, pos, NULL);
- if (count == -1) {
- break;
- } else if (count < -1) {
- regexp_match_error(state, lens, count, outer, ctype);
- goto error;
- }
- tail = split_append(&split, tail, outer->start, pos, pos + count);
- if (tail == NULL)
- goto error;
- pos += count;
+ count = regexp_match(re, state->text, size, start, regs);
+ if (count < -1) {
+ regexp_match_error(state, lens, count, re);
+ return -1;
}
- return split;
- error:
- list_free(split);
- return NULL;
+ state->regs = regs;
+ state->nreg = 0;
+ return count;
}
-static int applies(struct state *state, struct lens *lens) {
- struct split *split = state->split;
- int count;
- count = regexp_match(lens->ctype, split->start, split->size, 0, NULL);
- if (count == -1)
- return 0;
- else if (count < -1) {
- regexp_match_error(state, lens, count, split, lens->ctype);
- return 0;
+static void free_regs(struct state *state) {
+ if (state->regs != NULL) {
+ free(state->regs->start);
+ free(state->regs->end);
+ FREE(state->regs);
}
- return (count == split->size);
}
static struct tree *get_lens(struct lens *lens, struct state *state);
@@ -389,7 +328,7 @@ static struct skel *parse_del(struct lens *lens, struct state *state) {
struct skel *skel = NULL;
skel = make_skel(lens);
- skel->text = token_from_split(state->split);
+ skel->text = token(state);
return skel;
}
@@ -401,7 +340,7 @@ static struct tree *get_store(struct lens *lens, struct state *state) {
if (state->value != NULL) {
get_error(state, lens, "More than one store in a subtree");
} else {
- state->value = token_from_split(state->split);
+ state->value = token(state);
}
return tree;
}
@@ -414,7 +353,7 @@ static struct skel *parse_store(struct lens *lens,
static struct tree *get_key(struct lens *lens, struct state *state) {
assert(lens->tag == L_KEY);
- state->key = token_from_split(state->split);
+ state->key = token(state);
return NULL;
}
@@ -438,15 +377,18 @@ static struct tree *get_union(struct lens *lens, struct state *state) {
assert(lens->tag == L_UNION);
struct tree *tree = NULL;
int applied = 0;
+ uint old_nreg = state->nreg;
+ state->nreg += 1;
for (int i=0; i < lens->nchildren; i++) {
- struct lens *l = lens->children[i];
- if (applies(state, l)) {
- tree = get_lens(l, state);
+ if (REG_MATCHED(state)) {
+ tree = get_lens(lens->children[i], state);
applied = 1;
break;
}
+ state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
}
+ state->nreg = old_nreg;
if (!applied)
get_expected_error(state, lens);
return tree;
@@ -457,15 +399,19 @@ static struct skel *parse_union(struct lens *lens, struct state *state,
assert(lens->tag == L_UNION);
struct skel *skel = NULL;
int applied = 0;
+ uint old_nreg = state->nreg;
+ state->nreg += 1;
for (int i=0; i < lens->nchildren; i++) {
struct lens *l = lens->children[i];
- if (applies(state, l)) {
+ if (REG_MATCHED(state)) {
skel = parse_lens(l, state, dict);
applied = 1;
break;
}
+ state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
}
+ state->nreg = old_nreg;
if (! applied)
get_expected_error(state, lens);
@@ -475,27 +421,25 @@ static struct skel *parse_union(struct lens *lens, struct state *state,
static struct tree *get_concat(struct lens *lens, struct state *state) {
assert(lens->tag == L_CONCAT);
struct tree *tree = NULL;
- struct split *oldsplit = state->split;
- struct split *split = split_concat(state, lens);
+ uint old_nreg = state->nreg;
- set_split(state, split);
+ state->nreg += 1;
for (int i=0; i < lens->nchildren; i++) {
struct tree *t = NULL;
- if (state->split == NULL) {
+ if (! REG_VALID(state)) {
get_error(state, lens->children[i],
"Not enough components in concat");
- list_free(split);
free_tree(tree);
+ state->nreg = old_nreg;
return NULL;
}
t = get_lens(lens->children[i], state);
list_append(tree, t);
- next_split(state);
+ state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
}
+ state->nreg = old_nreg;
- set_split(state, oldsplit);
- list_free(split);
return tree;
}
@@ -503,17 +447,15 @@ static struct skel *parse_concat(struct lens *lens, struct state *state,
struct dict **dict) {
assert(lens->tag == L_CONCAT);
struct skel *skel = make_skel(lens);
- struct split *oldsplit = state->split;
- struct split *split = split_concat(state, lens);
+ uint old_nreg = state->nreg;
- set_split(state, split);
+ state->nreg += 1;
for (int i=0; i < lens->nchildren; i++) {
struct skel *sk = NULL;
struct dict *di = NULL;
- if (state->split == NULL) {
+ if (! REG_VALID(state)) {
get_error(state, lens->children[i],
"Not enough components in concat");
- list_free(split);
free_skel(skel);
return NULL;
}
@@ -521,57 +463,71 @@ static struct skel *parse_concat(struct lens *lens, struct state *state,
sk = parse_lens(lens->children[i], state, &di);
list_append(skel->skels, sk);
dict_append(dict, di);
- next_split(state);
+ state->nreg += 1 + regexp_nsub(lens->children[i]->ctype);
}
+ state->nreg = old_nreg;
- set_split(state, oldsplit);
- list_free(split);
return skel;
}
static struct tree *get_quant_star(struct lens *lens, struct state *state) {
assert(lens->tag == L_STAR);
+ struct lens *child = lens->child;
struct tree *tree = NULL, *tail = NULL;
- struct split *oldsplit = state->split;
- struct split *split = split_iter(state, lens);
+ struct re_registers *old_regs = state->regs;
+ uint old_nreg = state->nreg;
+ uint end = REG_END(state);
+ uint start = REG_START(state);
+ uint size = end - start;
- set_split(state, split);
- while (state->split != NULL) {
+ while (size > 0 && match(state, child, child->ctype, end, start) > 0) {
struct tree *t = NULL;
+
t = get_lens(lens->child, state);
list_tail_cons(tree, tail, t);
- next_split(state);
+
+ start += REG_SIZE(state);
+ size -= REG_SIZE(state);
+ free_regs(state);
}
- if (state->pos != oldsplit->start + oldsplit->size) {
+ state->regs = old_regs;
+ state->nreg = old_nreg;
+ if (size != 0) {
get_error(state, lens, "Short iteration");
+ state->error->pos = start;
}
- set_split(state, oldsplit);
- list_free(split);
return tree;
}
static struct skel *parse_quant_star(struct lens *lens, struct state *state,
struct dict **dict) {
assert(lens->tag == L_STAR);
- struct split *oldsplit = state->split;
- struct split *split = split_iter(state, lens);
+ struct lens *child = lens->child;
struct skel *skel = make_skel(lens), *tail = NULL;
+ struct re_registers *old_regs = state->regs;
+ uint old_nreg = state->nreg;
+ uint end = REG_END(state);
+ uint start = REG_START(state);
+ uint size = end - start;
*dict = NULL;
- set_split(state, split);
- while (state->split != NULL) {
+ while (size > 0 && match(state, child, child->ctype, end, start) > 0) {
struct skel *sk;
struct dict *di = NULL;
+
sk = parse_lens(lens->child, state, &di);
list_tail_cons(skel->skels, tail, sk);
dict_append(dict, di);
- next_split(state);
+
+ start += REG_SIZE(state);
+ size -= REG_SIZE(state);
+ free_regs(state);
}
- if (state->pos != oldsplit->start + oldsplit->size) {
+ state->regs = old_regs;
+ state->nreg = old_nreg;
+ if (size != 0) {
get_error(state, lens, "Short iteration");
}
- set_split(state, oldsplit);
- list_free(split);
return skel;
}
@@ -579,9 +535,15 @@ static struct tree *get_quant_maybe(struct lens *lens, struct state *state) {
assert(lens->tag == L_MAYBE);
struct tree *tree = NULL;
- if (applies(state, lens->child)) {
+ /* Check that our child matched. For a construct like (r)?, the
+ * matcher will report a zero length match for group 0, even if r
+ * does not match at all
+ */
+ state->nreg += 1;
+ if (REG_MATCHED(state)) {
tree = get_lens(lens->child, state);
}
+ state->nreg -= 1;
return tree;
}
@@ -590,9 +552,12 @@ static struct skel *parse_quant_maybe(struct lens *lens, struct state *state,
assert(lens->tag == L_MAYBE);
struct skel *skel = NULL;
- if (applies(state, lens->child)) {
+
+ state->nreg += 1;
+ if (REG_MATCHED(state)) {
skel = parse_lens(lens->child, state, dict);
}
+ state->nreg -= 1;
if (skel == NULL)
skel = make_skel(lens);
return skel;
@@ -674,22 +639,15 @@ static struct tree *get_lens(struct lens *lens, struct state *state) {
struct tree *lns_get(struct info *info, struct lens *lens, const char *text,
struct lns_error **err) {
struct state state;
- struct split split;
struct tree *tree = NULL;
- int partial = 1;
+ uint size = strlen(text);
+ int partial;
MEMZERO(&state, 1);
- MEMZERO(&split, 1);
state.info = *info;
state.info.ref = UINT_MAX;
- state.pos = text;
-
- split.start = text;
- split.size = strlen(text);
- state.split = &split;
state.text = text;
- state.pos = text;
/* We are probably being overly cautious here: if the lens can't process
* all of TEXT, we should really fail somewhere in one of the sublenses.
@@ -697,7 +655,7 @@ struct tree *lns_get(struct info *info, struct lens *lens, const char *text,
* try to process, hoping we'll get a more specific error, and if that
* fails, we throw our arms in the air and say 'something went wrong'
*/
- partial = !applies(&state, lens);
+ partial = match(&state, lens, lens->ctype, size, 0) != size;
tree = get_lens(lens, &state);
@@ -714,6 +672,8 @@ struct tree *lns_get(struct info *info, struct lens *lens, const char *text,
get_error(&state, lens, "Get did not match entire input");
}
+ free_regs(&state);
+
if (err != NULL) {
*err = state.error;
} else {
@@ -774,22 +734,18 @@ static struct skel *parse_lens(struct lens *lens, struct state *state,
struct skel *lns_parse(struct lens *lens, const char *text, struct dict **dict,
struct lns_error **err) {
struct state state;
- struct split split;
struct skel *skel = NULL;
+ uint size = strlen(text);
+ int partial;
MEMZERO(&state, 1);
- MEMZERO(&split, 1);
state.info.ref = UINT_MAX;
state.text = text;
- split.start = text;
- split.size = strlen(text);
-
- state.split = &split;
state.text = text;
- state.pos = text;
- if (applies(&state, lens)) {
+ partial = match(&state, lens, lens->ctype, size, 0) != size;
+ if (! partial) {
*dict = NULL;
skel = parse_lens(lens, &state, dict);
@@ -813,6 +769,8 @@ struct skel *lns_parse(struct lens *lens, const char *text, struct dict **dict,
get_error(&state, lens, "parse can not process entire input");
}
+ free_regs(&state);
+
if (err != NULL) {
*err = state.error;
} else {
More information about the augeas-devel
mailing list