[augeas-devel] [PATCH 4/6] Match trees on label and value, not just label
David Lutterkort
lutter at redhat.com
Mon Aug 31 22:09:42 UTC 2009
We still only use one level in the tree for matching in the put direction,
but take the values of nodes into account.
We had fail_ tests in place to document the deficiency of the tree matcher,
but with that deficiency gone, these now become passing tests.
---
src/lens.c | 128 ++++++++++++++++++++++++++---------
src/lens.h | 52 ++++++++++++++
src/put.c | 102 +++++++++++++++++-----------
tests/modules/fail_concat_atype.aug | 18 -----
tests/modules/fail_iter_atype.aug | 12 ---
tests/modules/fail_union_atype.aug | 15 ----
tests/modules/pass_concat_atype.aug | 18 +++++
tests/modules/pass_iter_atype.aug | 14 ++++
tests/modules/pass_union_atype.aug | 13 ++++
9 files changed, 257 insertions(+), 115 deletions(-)
delete mode 100644 tests/modules/fail_concat_atype.aug
delete mode 100644 tests/modules/fail_iter_atype.aug
delete mode 100644 tests/modules/fail_union_atype.aug
create mode 100644 tests/modules/pass_concat_atype.aug
create mode 100644 tests/modules/pass_iter_atype.aug
create mode 100644 tests/modules/pass_union_atype.aug
diff --git a/src/lens.c b/src/lens.c
index 32ed3f9..7342a48 100644
--- a/src/lens.c
+++ b/src/lens.c
@@ -29,7 +29,8 @@
static const int const type_offs[] = {
offsetof(struct lens, ctype),
offsetof(struct lens, atype),
- offsetof(struct lens, ktype)
+ offsetof(struct lens, ktype),
+ offsetof(struct lens, vtype)
};
static const int ntypes = sizeof(type_offs)/sizeof(type_offs[0]);
@@ -42,8 +43,6 @@ static struct value *typecheck_concat(struct info *,
static struct value *typecheck_iter(struct info *info, struct lens *l);
static struct value *typecheck_maybe(struct info *info, struct lens *l);
-static struct regexp *make_key_regexp(struct info *info, const char *pat);
-
/* Lens names for pretty printing */
static const char *const tags[] = {
"del", "store", "key", "label", "seq", "counter", "concat", "union",
@@ -51,7 +50,7 @@ static const char *const tags[] = {
};
static const struct string digits_string = {
- .ref = REF_MAX, .str = (char *) "[0-9]+/"
+ .ref = REF_MAX, .str = (char *) "[0-9]+"
};
static const struct string *const digits_pat = &digits_string;
@@ -219,13 +218,17 @@ struct value *lns_make_concat(struct info *info,
struct value *lns_make_subtree(struct info *info, struct lens *l) {
struct lens *lens;
+ const char *kpat = (l->ktype == NULL) ? ENC_NULL : l->ktype->pattern->str;
+ const char *vpat = (l->vtype == NULL) ? ENC_NULL : l->vtype->pattern->str;
+ char *pat;
+
+ if (asprintf(&pat, "%s%s%s%s", kpat, ENC_EQ, vpat, ENC_SLASH) < 0)
+ return NULL;
lens = make_lens_unop(L_SUBTREE, info, l);
lens->ctype = ref(l->ctype);
- lens->atype = ref(l->ktype);
+ lens->atype = make_regexp(info, pat);
lens->value = lens->key = 0;
- if (lens->atype == NULL)
- lens->atype = make_key_regexp(info, "");
return make_lens_value(lens);
}
@@ -296,6 +299,25 @@ static struct regexp *make_regexp_from_string(struct info *info,
return r;
}
+static struct regexp *restrict_regexp(struct regexp *r) {
+ char *nre = NULL;
+ size_t nre_len;
+ int ret;
+
+ ret = fa_restrict_alphabet(r->pattern->str, strlen(r->pattern->str),
+ &nre, &nre_len,
+ RESERVED_FROM, RESERVED_TO);
+ assert(nre_len == strlen(nre));
+ // FIXME: Tell the user what's wrong
+ if (ret != 0)
+ return NULL;
+
+ r = make_regexp(r->info, nre);
+ if (regexp_compile(r) != 0)
+ abort();
+ return r;
+}
+
struct value *lns_make_prim(enum lens_tag tag, struct info *info,
struct regexp *regexp, struct string *string) {
struct lens *lens = NULL;
@@ -369,18 +391,19 @@ struct value *lns_make_prim(enum lens_tag tag, struct info *info,
if (tag == L_SEQ) {
lens->ktype =
make_regexp_from_string(info, (struct string *) digits_pat);
+ if (lens->ktype == NULL)
+ goto error;
} else if (tag == L_KEY) {
- lens->ktype = make_key_regexp(info, lens->regexp->pattern->str);
+ lens->ktype = restrict_regexp(lens->regexp);
} else if (tag == L_LABEL) {
- struct regexp *r = make_regexp_literal(info, lens->string->str);
- if (r == NULL)
- goto error;
- if (REALLOC_N(r->pattern->str, strlen(r->pattern->str) + 2) == -1) {
- unref(r, regexp);
+ lens->ktype = make_regexp_literal(info, lens->string->str);
+ if (lens->ktype == NULL)
goto error;
- }
- strcat(r->pattern->str, "/");
- lens->ktype = r;
+ }
+
+ /* Set the vtype */
+ if (tag == L_STORE) {
+ lens->vtype = restrict_regexp(lens->regexp);
}
return make_lens_value(lens);
@@ -449,7 +472,9 @@ static struct value *typecheck_union(struct info *info,
return exn;
}
-static struct value *ambig_check(struct info *info, struct fa *fa1, struct fa *fa2,
+static struct value *ambig_check(struct info *info,
+ struct fa *fa1, struct fa *fa2,
+ struct regexp *r1, struct regexp *r2,
const char *msg) {
char *upv, *pv, *v;
size_t upv_len;
@@ -463,6 +488,8 @@ static struct value *ambig_check(struct info *info, struct fa *fa1, struct fa *f
char *e_pv = escape(pv, -1);
char *e_v = escape(v, -1);
exn = make_exn_value(ref(info), "%s", msg);
+ exn_printf_line(exn, " First regexp: /%s/", escape(r1->pattern->str, -1));
+ exn_printf_line(exn, " Second regexp: /%s/", escape(r2->pattern->str, -1));
exn_printf_line(exn, " '%s' can be split into", e_upv);
exn_printf_line(exn, " '%s|=|%s'\n", e_u, e_pv);
exn_printf_line(exn, " and");
@@ -491,7 +518,7 @@ static struct value *ambig_concat_check(struct info *info, const char *msg,
if (result != NULL)
goto done;
- result = ambig_check(info, fa1, fa2, msg);
+ result = ambig_check(info, fa1, fa2, r1, r2, msg);
done:
fa_free(fa1);
fa_free(fa2);
@@ -530,7 +557,7 @@ static struct value *ambig_iter_check(struct info *info, const char *msg,
fas = fa_iter(fa, 0, -1);
- result = ambig_check(info, fa, fas, msg);
+ result = ambig_check(info, fa, fas, r, r, msg);
done:
fa_free(fa);
@@ -578,18 +605,6 @@ static struct value *typecheck_maybe(struct info *info, struct lens *l) {
return exn;
}
-static struct regexp *make_key_regexp(struct info *info, const char *pat) {
- struct regexp *regexp;
- size_t len = strlen(pat) + 4;
-
- make_ref(regexp);
- make_ref(regexp->pattern);
- regexp->info = ref(info);
- CALLOC(regexp->pattern->str, len);
- snprintf(regexp->pattern->str, len, "(%s)/", pat);
- return regexp;
-}
-
void free_lens(struct lens *lens) {
if (lens == NULL)
return;
@@ -651,6 +666,57 @@ void lens_release(struct lens *lens) {
}
/*
+ * Encoding of tree levels
+ */
+char *enc_format(const char *e, size_t len) {
+ size_t size = 0;
+ char *result = NULL, *r;
+ const char *k = e;
+
+ while (*k && k - e < len) {
+ char *eq, *slash, *v;
+ eq = strchr(k, ENC_EQ_CH);
+ slash = strchr(eq, ENC_SLASH_CH);
+ assert(eq != NULL && slash != NULL);
+ v = eq + 1;
+
+ size += 6; /* Surrounding braces */
+ if (k != eq)
+ size += 1 + (eq - k) + 1;
+ if (v != slash)
+ size += 4 + (slash - v) + 1;
+ k = slash + 1;
+ }
+ if (ALLOC_N(result, size + 1) < 0)
+ return NULL;
+
+ k = e;
+ r = result;
+ while (*k && k - e < len) {
+ char *eq, *slash, *v;
+ eq = strchr(k, ENC_EQ_CH);
+ slash = strchr(eq, ENC_SLASH_CH);
+ assert(eq != NULL && slash != NULL);
+ v = eq + 1;
+
+ r = stpcpy(r, " { \"");
+ if (k != eq) {
+ r = stpcpy(r, "\"");
+ r = stpncpy(r, k, eq - k);
+ r = stpcpy(r, "\"");
+ }
+ if (v != slash) {
+ r = stpcpy (r, " = \"");
+ r = stpncpy(r, v, slash - v);
+ r = stpcpy(r, "\"");
+ }
+ r = stpcpy(r, " }");
+ k = slash + 1;
+ }
+ return result;
+}
+
+/*
* Local variables:
* indent-tabs-mode: nil
* c-indent-level: 4
diff --git a/src/lens.h b/src/lens.h
index 37fc26c..32af92d 100644
--- a/src/lens.h
+++ b/src/lens.h
@@ -40,6 +40,16 @@ enum lens_tag {
L_MAYBE
};
+/* A lens. The way the type information is computed is a little
+ * delicate. There are various regexps involved to form the final type:
+ *
+ * CTYPE - the concrete type, used to parse file -> tree
+ * ATYPE - the abstract type, used to parse tree -> file
+ * KTYPE - the 'key' type, matching the label that this lens
+ * can produce, or NULL if no label is produced
+ * VTYPE - the 'value' type, matching the value that this lens
+ * can produce, or NULL if no value is produce
+ */
struct lens {
unsigned int ref;
enum lens_tag tag;
@@ -47,6 +57,7 @@ struct lens {
struct regexp *ctype;
struct regexp *atype;
struct regexp *ktype;
+ struct regexp *vtype;
unsigned int value : 1;
unsigned int key : 1;
unsigned int consumes_value : 1;
@@ -131,6 +142,47 @@ void lns_put(FILE *out, struct lens *lens, struct tree *tree,
regular expressions */
void lens_release(struct lens *lens);
void free_lens(struct lens *lens);
+
+/*
+ * Encoding of tree levels into strings
+ */
+
+/* Special characters used when encoding one level of the tree as a string.
+ * We encode one tree node as KEY . ENC_EQ . VALUE . ENC_SLASH; if KEY or
+ * VALUE are NULL, we use ENC_NULL, which is the empty string. This has the
+ * effect that NULL strings are treated the same as empty strings.
+ *
+ * This encoding is used both for actual trees in the put direction, and to
+ * produce regular expressions describing one level in the tree (we
+ * disregard subtrees)
+ *
+ * For this to work, neither ENC_EQ nor ENC_SLASH can be allowed in a
+ * VALUE; we do this behind the scenes by rewriting regular expressions for
+ * values.
+ */
+#define ENC_EQ "\003"
+#define ENC_SLASH "\004"
+#define ENC_NULL ""
+#define ENC_EQ_CH (ENC_EQ[0])
+#define ENC_SLASH_CH (ENC_SLASH[0])
+
+/* The reserved range of characters that we do not allow in user-supplied
+ regular expressions, since we need them for internal bookkeeping.
+
+ This range must include the ENC_* characters
+*/
+#define RESERVED_FROM '\001'
+#define RESERVED_TO ENC_SLASH_CH
+
+/* The length of the string S encoded */
+#define ENCLEN(s) ((s) == NULL ? strlen(ENC_NULL) : strlen(s))
+#define ENCSTR(s) ((s) == NULL ? ENC_NULL : s)
+
+/* Format an encoded level as
+ * { key1 = value1 } { key2 = value2 } .. { keyN = valueN }
+ */
+char *enc_format(const char *e, size_t len);
+
#endif
diff --git a/src/put.c b/src/put.c
index 26483df..7d725e5 100644
--- a/src/put.c
+++ b/src/put.c
@@ -24,6 +24,7 @@
#include <stdarg.h>
#include "syntax.h"
+#include "memory.h"
/* Data structure to keep track of where we are in the tree. The split
* describes a sublist of the list of siblings in the current tree. The
@@ -35,15 +36,18 @@
* part of the split anymore (NULL if we are talking about all the siblings
* of TREE)
*
- * LABELS is a string containing all the labels of the siblings joined with
- * '/' as a separator. We are currently looking at a part of that string,
- * namely the END - START characters starting at LABELS + START.
+ * ENC is a string containing the encoding of the current position in the
+ * tree. The encoding is
+ * <label>=<value>/<label>=<value>/.../<label>=<value>/
+ * where the label/value pairs come from TREE and its
+ * siblings. The encoding uses ENC_EQ instead of the '=' above to avoid
+ * clashes with legitimate values, and encodes NULL values as ENC_NULL.
*/
struct split {
struct split *next;
struct tree *tree;
struct tree *follow;
- char *labels;
+ char *enc;
size_t start;
size_t end;
};
@@ -88,15 +92,31 @@ static void put_error(struct state *state, struct lens *lens,
state->error->message = NULL;
}
+ATTRIBUTE_PURE
+static int enclen(const char *key, const char *value) {
+ return ENCLEN(key) + strlen(ENC_EQ) + ENCLEN(value)
+ + strlen(ENC_SLASH);
+}
+
+static char *encpcpy(char *e, const char *key, const char *value) {
+ e = stpcpy(e, ENCSTR(key));
+ e = stpcpy(e, ENC_EQ);
+ e = stpcpy(e, ENCSTR(value));
+ e = stpcpy(e, ENC_SLASH);
+ return e;
+}
+
static void regexp_match_error(struct state *state, struct lens *lens,
int count, struct split *split,
struct regexp *r) {
- char *text = strndup(split->labels + split->start,
+ // FIXME: Split the regexp and encoding back
+ // into something resembling a tree level
+ char *text = strndup(split->enc + split->start,
split->end - split->start);
char *pat = regexp_escape(r);
if (count == -1) {
- put_error(state, lens, "Failed to match /%s/ with %s", pat, text);
+ put_error(state, lens, "Failed to match /%s/ with %s", pat, enc_format(text, strlen(text)));
} else if (count == -2) {
put_error(state, lens, "Internal error matching /%s/ with %s",
pat, text);
@@ -108,45 +128,48 @@ static void regexp_match_error(struct state *state, struct lens *lens,
free(text);
}
+static void free_split(struct split *split) {
+ if (split == NULL)
+ return;
+
+ free(split->enc);
+ free(split);
+}
+
+/* Encode the list of TREE's children as a string.
+ */
static struct split *make_split(struct tree *tree) {
struct split *split;
- CALLOC(split, 1);
+
+ if (ALLOC(split) < 0)
+ return NULL;
split->tree = tree;
- split->start = 0;
- for (struct tree *t = tree; t != NULL; t = t->next) {
- if (t->label != NULL)
- split->end += strlen(t->label);
- split->end += 1;
- }
- char *labels;
- CALLOC(labels, split->end + 1);
- char *l = labels;
- for (struct tree *t = tree; t != NULL; t = t->next) {
- if (t->label != NULL)
- l = stpcpy(l, t->label);
- l = stpcpy(l, "/");
+ list_for_each(t, tree) {
+ split->end += enclen(t->label, t->value);
}
- split->labels = labels;
- return split;
-}
-static void free_split(struct split *split) {
- if (split == NULL)
- return;
+ if (ALLOC_N(split->enc, split->end + 1) < 0)
+ goto error;
- free(split->labels);
- free(split);
+ char *enc = split->enc;
+ list_for_each(t, tree) {
+ enc = encpcpy(enc, t->label, t->value);
+ }
+ return split;
+ error:
+ free_split(split);
+ return NULL;
}
static struct split *split_append(struct split **split, struct split *tail,
struct tree *tree, struct tree *follow,
- char *labels, size_t start, size_t end) {
+ char *enc, size_t start, size_t end) {
struct split *sp;
CALLOC(sp, 1);
sp->tree = tree;
sp->follow = follow;
- sp->labels = labels;
+ sp->enc = enc;
sp->start = start;
sp->end = end;
list_tail_cons(*split, tail, sp);
@@ -180,17 +203,18 @@ static struct split *split_concat(struct state *state, struct lens *lens) {
struct regexp *atype = lens->atype;
/* Fast path for leaf nodes, which will always lead to an empty split */
- if (outer->tree == NULL && strlen(outer->labels) == 0
+ // FIXME: This doesn't match the empty encoding
+ if (outer->tree == NULL && strlen(outer->enc) == 0
&& regexp_is_empty_pattern(atype)) {
for (int i=0; i < lens->nchildren; i++) {
tail = split_append(&split, tail, NULL, NULL,
- outer->labels, 0, 0);
+ outer->enc, 0, 0);
}
return split;
}
MEMZERO(®s, 1);
- count = regexp_match(atype, outer->labels, outer->end,
+ count = regexp_match(atype, outer->enc, outer->end,
outer->start, ®s);
if (count >= 0 && count != outer->end - outer->start)
count = -1;
@@ -206,11 +230,11 @@ static struct split *split_concat(struct state *state, struct lens *lens) {
assert(regs.start[reg] != -1);
struct tree *follow = cur;
for (int j = regs.start[reg]; j < regs.end[reg]; j++) {
- if (outer->labels[j] == '/')
+ if (outer->enc[j] == ENC_SLASH_CH)
follow = follow->next;
}
tail = split_append(&split, tail, cur, follow,
- outer->labels, regs.start[reg], regs.end[reg]);
+ outer->enc, regs.start[reg], regs.end[reg]);
cur = follow;
reg += 1 + regexp_nsub(lens->children[i]->atype);
}
@@ -232,7 +256,7 @@ static struct split *split_iter(struct state *state, struct lens *lens) {
int pos = outer->start;
struct split *tail = NULL;
while (pos < outer->end) {
- count = regexp_match(atype, outer->labels, outer->end, pos, NULL);
+ count = regexp_match(atype, outer->enc, outer->end, pos, NULL);
if (count == -1) {
break;
} else if (count < -1) {
@@ -242,11 +266,11 @@ static struct split *split_iter(struct state *state, struct lens *lens) {
struct tree *follow = cur;
for (int j = pos; j < pos + count; j++) {
- if (outer->labels[j] == '/')
+ if (outer->enc[j] == ENC_SLASH_CH)
follow = follow->next;
}
tail = split_append(&split, tail, cur, follow,
- outer->labels, pos, pos + count);
+ outer->enc, pos, pos + count);
cur = follow;
pos += count;
}
@@ -261,7 +285,7 @@ static int applies(struct lens *lens, struct state *state) {
int count;
struct split *split = state->split;
- count = regexp_match(lens->atype, split->labels, split->end,
+ count = regexp_match(lens->atype, split->enc, split->end,
split->start, NULL);
if (count < -1) {
regexp_match_error(state, lens, count, split, lens->atype);
diff --git a/tests/modules/fail_concat_atype.aug b/tests/modules/fail_concat_atype.aug
deleted file mode 100644
index 3cf582d..0000000
--- a/tests/modules/fail_concat_atype.aug
+++ /dev/null
@@ -1,18 +0,0 @@
-module Fail_concat_atype =
-
- (* This passes the ctype check for unambiguous concatenation *)
- (* because the STORE's keep everything copacetic, but in the *)
- (* PUT direction, we can't tell how to split a tree *)
- (* { "a" = .. } { "b" = .. } { "a" = .. } *)
- (* solely by looking at tree labels. *)
- (* Ultimately, Augeas should check ful tree schemas as the *)
- (* atype, but for now we stick to just tree labels *)
-
- let ab = [ key /a/ . store /1/ ] . ([ key /b/ . store /2/ ]?)
- let ba = ([ key /b/ . store /3/ ])? . [ key /a/ . store /4/ ]
- let lns = ab . ba
-
-
-(* Local Variables: *)
-(* mode: caml *)
-(* End: *)
diff --git a/tests/modules/fail_iter_atype.aug b/tests/modules/fail_iter_atype.aug
deleted file mode 100644
index 480927b..0000000
--- a/tests/modules/fail_iter_atype.aug
+++ /dev/null
@@ -1,12 +0,0 @@
-module Fail_iter_atype =
-
- (* Similar to the Fail_concat_atype check *)
-
- let a (r:regexp) = [ key /a/ . store r ]
- let aa = (a /1/) . (a /2/)
- let lns = ((a /3/) | aa)*
-
-
-(* Local Variables: *)
-(* mode: caml *)
-(* End: *)
diff --git a/tests/modules/fail_union_atype.aug b/tests/modules/fail_union_atype.aug
deleted file mode 100644
index fcbdac4..0000000
--- a/tests/modules/fail_union_atype.aug
+++ /dev/null
@@ -1,15 +0,0 @@
-module Fail_union_atype =
- (* This is illegal, otherwise we don't know which alternative *)
- (* to take for a tree { "a" = "?" } *)
-
- let del_str (s:string) = del s s
-
- let lns = [ key /a/ . store /b/ . del_str " (l)" ]
- | [ key /a/ . store /c/ . del_str " (r)" ]
-
- (* To make this a passing test, make sure that this also works: *)
- (* test lns put "ac (r)" after set "a" "b" = "ab (l)" *)
-
-(* Local Variables: *)
-(* mode: caml *)
-(* End: *)
diff --git a/tests/modules/pass_concat_atype.aug b/tests/modules/pass_concat_atype.aug
new file mode 100644
index 0000000..04301ae
--- /dev/null
+++ b/tests/modules/pass_concat_atype.aug
@@ -0,0 +1,18 @@
+module Pass_concat_atype =
+
+ (* This passes both the ctype and atype check for unambiguous *)
+ (* concatenation because the STORE's keep everything copacetic.*)
+ (* If we would only look at tree labels, we'd get a type error *)
+ (* in the PUT direction, because we couldn't tell how to split *)
+ (* the tree *)
+ (* { "a" = .. } { "b" = .. } { "a" = .. } *)
+ (* solely by looking at tree labels. *)
+
+ let ab = [ key /a/ . store /1/ ] . ([ key /b/ . store /2/ ]?)
+ let ba = ([ key /b/ . store /3/ ])? . [ key /a/ . store /4/ ]
+ let lns = ab . ba
+
+
+(* Local Variables: *)
+(* mode: caml *)
+(* End: *)
diff --git a/tests/modules/pass_iter_atype.aug b/tests/modules/pass_iter_atype.aug
new file mode 100644
index 0000000..afaeaed
--- /dev/null
+++ b/tests/modules/pass_iter_atype.aug
@@ -0,0 +1,14 @@
+module Pass_iter_atype =
+
+ (* Similar to the Pass_concat_atype check; verify that the *)
+ (* typechecker takes tree values into account in the PUT direction, *)
+ (* and not just tree labels. *)
+
+ let a (r:regexp) = [ key /a/ . store r ]
+ let aa = (a /1/) . (a /2/)
+ let lns = ((a /3/) | aa)*
+
+
+(* Local Variables: *)
+(* mode: caml *)
+(* End: *)
diff --git a/tests/modules/pass_union_atype.aug b/tests/modules/pass_union_atype.aug
new file mode 100644
index 0000000..ea87709
--- /dev/null
+++ b/tests/modules/pass_union_atype.aug
@@ -0,0 +1,13 @@
+(* Test that we take the right branch in a union based solely on *)
+(* differing values associated with a tree node *)
+module Pass_union_atype =
+ let del_str (s:string) = del s s
+
+ let lns = [ key /a/ . store /b/ . del_str " (l)" ]
+ | [ key /a/ . store /c/ . del_str " (r)" ]
+
+ test lns put "ac (r)" after set "a" "b" = "ab (l)"
+
+(* Local Variables: *)
+(* mode: caml *)
+(* End: *)
--
1.6.2.5
More information about the augeas-devel
mailing list