[augeas-devel] augeas: master - * src/jmt.c: add caller filtering

David Lutterkort lutter at fedoraproject.org
Mon Feb 8 20:09:08 UTC 2010


Gitweb:        http://git.fedorahosted.org/git/augeas.git?p=augeas.git;a=commitdiff;h=00459b6074d8296deea7328651b029548165c7ea
Commit:        00459b6074d8296deea7328651b029548165c7ea
Parent:        8164848e52c6ff855aac25c62e1f83b750267724
Author:        David Lutterkort <lutter at redhat.com>
AuthorDate:    Tue Jan 12 13:25:04 2010 -0800
Committer:     David Lutterkort <lutter at redhat.com>
CommitterDate: Mon Feb 8 11:55:55 2010 -0800

* src/jmt.c: add caller filtering

Caller filtering is needed to weed out seemingly ambiguous parse trees (see
Section 5.3 of Jim/Mandelbaum's paper)

Also adds two pathological lenses and tests to
pass_simple_recursion.aug. Those tests fail without caller filtering.
---
 src/jmt.c                               |  263 ++++++++++++++++++++----------
 tests/modules/pass_simple_recursion.aug |   11 ++
 2 files changed, 186 insertions(+), 88 deletions(-)

diff --git a/src/jmt.c b/src/jmt.c
index a089748..9f0072b 100644
--- a/src/jmt.c
+++ b/src/jmt.c
@@ -196,10 +196,10 @@ struct jmt {
 };
 
 enum item_reason {
-    R_ROOT = 0,
-    R_COMPLETE = 1 << 1,
-    R_PREDICT = 1 << 2,
-    R_SCAN = 1 << 3
+    R_ROOT = 1,
+    R_COMPLETE = R_ROOT << 1,
+    R_PREDICT = R_COMPLETE << 1,
+    R_SCAN = R_PREDICT << 1
 };
 
 /* The reason an item was added for parse reconstruction; can be R_SCAN,
@@ -210,6 +210,7 @@ struct link {
     ind_t            from_set;   /* R_COMPLETE, R_PREDICT, R_SCAN */
     ind_t            from_item;  /* R_COMPLETE, R_PREDICT, R_SCAN */
     ind_t            to_item;    /* R_COMPLETE */
+    ind_t            caller;     /* number of a state */
 };
 
 struct item {
@@ -293,7 +294,8 @@ static ind_t parse_add_item(struct jmt_parse *parse, ind_t j,
                             struct state *s, ind_t k,
                             enum item_reason reason, ind_t lens,
                             ind_t from_set,
-                            ind_t from_item, ind_t to_item) {
+                            ind_t from_item, ind_t to_item,
+                            ind_t caller) {
 
     ensure(from_item == EPS || from_item < parse->sets[from_set]->items.used,
            parse);
@@ -334,7 +336,7 @@ static ind_t parse_add_item(struct jmt_parse *parse, ind_t j,
         struct link *lnk = item->links + i;
         if (lnk->reason == reason && lnk->lens == lens
             && lnk->from_set == from_set && lnk->from_item == from_item
-            && lnk->to_item == to_item)
+            && lnk->to_item == to_item && lnk->caller == caller)
             return result;
     }
 
@@ -349,6 +351,7 @@ static ind_t parse_add_item(struct jmt_parse *parse, ind_t j,
     lnk->from_set = from_set;
     lnk->from_item = from_item;
     lnk->to_item = to_item;
+    lnk->caller = caller;
  error:
     return result;
 }
@@ -364,7 +367,7 @@ static void parse_add_complete(struct jmt_parse *parse, ind_t j,
                                ind_t k, ind_t itemk,
                                ind_t lens,
                                ind_t item) {
-    parse_add_item(parse, j, s, i, R_COMPLETE, lens, k, itemk, item);
+    parse_add_item(parse, j, s, i, R_COMPLETE, lens, k, itemk, item, IND_MAX);
 }
 
 /* Same as parse_add_complete, but mark the item also as a predict
@@ -375,9 +378,9 @@ static void parse_add_predict_complete(struct jmt_parse *parse, ind_t j,
                                        struct state *s, ind_t i,
                                        ind_t k, ind_t itemk,
                                        ind_t lens,
-                                       ind_t item) {
+                                       ind_t item, ind_t caller) {
     parse_add_item(parse, j, s, i, R_COMPLETE|R_PREDICT,
-                   lens, k, itemk, item);
+                   lens, k, itemk, item, caller);
 }
 
 /* Add item (s, j) to E_j and record that it was added because of a
@@ -387,8 +390,10 @@ static ind_t parse_add_predict(struct jmt_parse *parse, ind_t j,
                                struct state *s, ind_t from) {
 
     ensure(from < parse->sets[j]->items.used, parse);
+    struct state *t = item_state(parse, j, from);
 
-    return parse_add_item(parse, j, s, j, R_PREDICT, EPS, j, from, EPS);
+    return parse_add_item(parse, j, s, j, R_PREDICT, EPS, j, from, EPS,
+                          t->num);
  error:
     return IND_MAX;
 }
@@ -403,7 +408,7 @@ static void parse_add_scan(struct jmt_parse *parse, ind_t j,
                            ind_t lens, ind_t k, ind_t item) {
     ensure(item < parse->sets[k]->items.used, parse);
 
-    parse_add_item(parse, j, s, i, R_SCAN, lens, k, item, EPS);
+    parse_add_item(parse, j, s, i, R_SCAN, lens, k, item, EPS, IND_MAX);
  error:
     return;
 }
@@ -424,6 +429,13 @@ static bool is_scan(const struct link *lnk) {
 }
 
 ATTRIBUTE_PURE
+static bool is_last_sibling(const struct link *lnk) {
+    if (is_complete(lnk))
+        return false;
+    return lnk->reason & (R_PREDICT|R_ROOT);
+}
+
+ATTRIBUTE_PURE
 static bool returns(const struct state *s, ind_t l) {
     for (ind_t i = 0; i < s->nret; i++)
         if (s->ret[i] == l)
@@ -479,7 +491,7 @@ static void ncaller(struct jmt_parse *parse, ind_t j, ind_t item,
             parse_add_predict_complete(parse, j,
                                        u->to, i,
                                        j, item, u->lens,
-                                       pred);
+                                       pred, t->num);
         }
     }
 }
@@ -496,7 +508,7 @@ static void ncallee(struct jmt_parse *parse, ind_t j, ATTRIBUTE_UNUSED ind_t ite
             parse_add_predict_complete(parse, j,
                                        u->to, j,
                                        j, pred, u->lens,
-                                       pred);
+                                       pred, t->num);
         }
     }
 }
@@ -646,13 +658,14 @@ jmt_parse(struct jmt *jmt, const char *text, size_t text_len)
     ERR_BAIL(jmt);
 
     /* INIT */
-    parse_add_item(parse, 0, jmt->start, 0, R_ROOT, EPS, EPS, EPS, EPS);
+    parse_add_item(parse, 0, jmt->start, 0, R_ROOT, EPS, EPS, EPS, EPS,
+                   jmt->lens);
     /* NINIT */
     if (is_return(jmt->start)) {
         for_each_trans(x, jmt->start) {
             if (returns(jmt->start, x->lens))
                 parse_add_predict_complete(parse, 0, x->to, 0,
-                                           0, 0, x->lens, 0);
+                                           0, 0, x->lens, 0, 0);
         }
     }
 
@@ -774,83 +787,113 @@ static void build_trace(const char *msg, ind_t start, ind_t end,
     }
 }
 
-static int ambig_check(struct jmt_visitor *visitor, struct lens *lens,
-                        ind_t k, struct item *x) {
-    ind_t count = 0;
-    ind_t nonpred = 0;
-
-    for (ind_t i=0; i < x->nlinks; i++) {
-        if (!is_predict(x->links + i) || is_complete(x->links + i)) {
-            nonpred = i;
-            count += 1;
-        }
-    }
+static int add_sibling(struct array *siblings, ind_t lnk) {
+    int r;
+    ind_t ind;
 
-    if (count > 1) {
-        (*visitor->error)(lens, visitor->data, k,
-                 "Ambiguous parse: %d links in state (%d, %d) in E_%d",
-                 x->nlinks, x->state->num, x->parent, k);
+    r = array_add(siblings, &ind);
+    if (r < 0)
         return -1;
-    }
-    if (count == 1 && x->links[0].reason == R_PREDICT) {
-        struct link tmp = x->links[0];
-        x->links[0] = x->links[nonpred];
-        x->links[nonpred] = tmp;
-    }
+    *array_elem(*siblings, ind, ind_t) = lnk;
     return 0;
 }
 
-static int
-build_tree(struct jmt_parse *parse, ind_t k, ind_t item, struct lens *lens,
-           struct jmt_visitor *visitor, int lvl);
+/* Return true if CALLER is a possible caller for the link LNK which starts
+ * at item X.
+ *
+ * FIXME: We can get rid of the caller field on a link if we distinguish
+ * between NCALLER and NCALLEE in the Earley graph, rather than collapse
+ * them both into links with reason PREDICT|COMPLETE
+ */
+static bool is_caller(struct item *x, struct link *lnk, ind_t caller) {
+    if (lnk->reason & R_ROOT)
+        return caller == lnk->caller;
 
-static int
-build_children(struct jmt_parse *parse, ind_t k, ind_t item, ind_t start,
-               struct jmt_visitor *visitor, int lvl) {
-    struct item *x = set_item(parse, k, item);
-    struct lens *lens = lens_of_parse(parse, x->links->lens);
-    ind_t end = k;
+    if (! is_predict(lnk))
+        return false;
 
-    if (start == end) {
-        /* This completion corresponds to a nullable nonterminal
-         * that match epsilon. Reconstruct the full parse tree
-         * for matching epsilon */
-        build_nullable(parse, start, visitor, lens, lvl);
-        return end;
+    if (is_complete(lnk)) {
+        /* NCALLER: caller == t
+         * NCALLEE: caller == s */
+        return caller == lnk->caller;
     }
+    /* PREDICT: caller == t || caller == s */
+    return caller == lnk->caller || caller == x->state->num;
+}
 
-    /* x the first item in a list of siblings; visit items (x->from_set,
-     * x->from_item) in order, which will visit x and its siblings in the
-     * parse tree from right to left */
-    while (is_complete(x->links) || is_scan(x->links)) {
-        struct lens *sub = lens_of_parse(parse, x->links->lens);
-        if (sub->recursive) {
-            build_tree(parse, k, item, sub, visitor, lvl+1);
-            ERR_BAIL(parse);
+/* Traverse the siblings of x and check that the callee's set of callers
+ * contains CALLER. When a path ending with a call from CALLER exists,
+ * record the number of the corresponding link for each item in
+ * SIBLINGS. The links are recorded in left-to-right order, i.e. the number
+ * of the first link to follow is in the last entry in SIBLINGS.
+ *
+ * Returns 0 if there is a path ending in a callee of CALLER. Return -1 if
+ * there is none. Return -2 if there are multiple such paths. Return -3 for
+ * any other error.
+ */
+static int filter_siblings(struct jmt_visitor *visitor, struct lens *lens,
+                          ind_t k, ind_t item, ind_t caller,
+                          struct array *siblings) {
+    struct jmt_parse *parse = visitor->parse;
+    struct item *x = set_item(parse, k, item);
+    ind_t nlast = 0;
+    int r;
+
+    for (ind_t lnk = 0; lnk < x->nlinks; lnk++)
+        if (is_last_sibling(x->links + lnk))
+            nlast += 1;
+
+    if (nlast > 0 && nlast < x->nlinks)
+        goto ambig;
+
+    if (nlast == x->nlinks) {
+        for (ind_t lnk = 0; lnk < x->nlinks; lnk++) {
+            if (is_caller(x, x->links + lnk, caller)) {
+                siblings->used = 0;
+                r = add_sibling(siblings, lnk);
+                if (r < 0) {
+                    ERR_REPORT(parse, AUG_ENOMEM, NULL);
+                    return -3;
+                }
+                return 0;
+            }
+        }
+        return -1;
+    } else {
+        /* nlast == 0 */
+        ind_t found = IND_MAX;
+        for (ind_t lnk = 0; lnk < x->nlinks; lnk++) {
+            struct link *l = x->links + lnk;
+            r = filter_siblings(visitor, lens,
+                                l->from_set, l->from_item, caller,
+                                siblings);
+            if (r == -1)
+                continue;
+            if (r == 0) {
+                if (found != IND_MAX)
+                    goto ambig;
+                else
+                    found = lnk;
+            } else {
+                return r;
+            }
+        }
+        if (found == IND_MAX) {
+            return -1;
         } else {
-            if (debugging("cf.jmt.visit"))
-                build_trace("T", x->links->from_set, k, x, lvl+1);
-            if (visitor->terminal != NULL) {
-                (*visitor->terminal)(sub,
-                                     x->links->from_set, k, visitor->data);
-                ERR_BAIL(parse);
+            r = add_sibling(siblings, found);
+            if (r < 0) {
+                ERR_REPORT(parse, AUG_ENOMEM, NULL);
+                return -3;
             }
+            return 0;
         }
-        k = x->links->from_set;
-        item = x->links->from_item;
-        x = set_item(parse, k, item);
-        // FIXME: This is not clean - we are checking if
-        // we would leave because we'd leave the span [start .. end]
-        // That kinda works, but it's a kludge around the trouble we
-        // have with COMPLETE + PREDICT nodes
-        if (x->links->from_set < start)
-            break;
-        if (ambig_check(visitor, lens, k, x) < 0)
-            return end;
-        ERR_BAIL(parse);
     }
- error:
-    return end;
+ ambig:
+    (*visitor->error)(lens, visitor->data, k,
+                      "Ambiguous parse: %d links in state (%d, %d) in E_%d",
+                      x->nlinks, x->state->num, x->parent, k);
+    return -2;
 }
 
 static void visit_enter(struct jmt_visitor *visitor, struct lens *lens,
@@ -872,6 +915,10 @@ static void visit_exit(struct jmt_visitor *visitor, struct lens *lens,
 }
 
 static int
+build_children(struct jmt_parse *parse, ind_t k, ind_t item,
+               struct jmt_visitor *visitor, int lvl, ind_t caller);
+
+static int
 build_tree(struct jmt_parse *parse, ind_t k, ind_t item, struct lens *lens,
            struct jmt_visitor *visitor, int lvl) {
     struct item *x = set_item(parse, k, item);
@@ -887,9 +934,6 @@ build_tree(struct jmt_parse *parse, ind_t k, ind_t item, struct lens *lens,
         return end;
     }
 
-    if (ambig_check(visitor, lens, k, x) < 0)
-        return end;
-
     ensure(is_complete(x->links), parse);
 
     visit_enter(visitor, lens, start, end, x, lvl);
@@ -898,12 +942,13 @@ build_tree(struct jmt_parse *parse, ind_t k, ind_t item, struct lens *lens,
     /* x is a completion item. (k, x->to_item) is its first child in the
      * parse tree. */
     if (! is_predict(x->links)) {
-        item = x->links->to_item;
+        struct link *lnk = x->links;
+        struct item *sib = set_item(parse, lnk->from_set, lnk->from_item);
+        ind_t caller = sib->state->num;
+
+        item = lnk->to_item;
         x = set_item(parse, k, item);
-        if (ambig_check(visitor, lens, k, x) < 0)
-            return end;
-        ERR_BAIL(parse);
-        build_children(parse, k, item, start, visitor, lvl);
+        build_children(parse, k, item, visitor, lvl, caller);
         ERR_BAIL(parse);
     }
 
@@ -913,6 +958,47 @@ build_tree(struct jmt_parse *parse, ind_t k, ind_t item, struct lens *lens,
     return end;
 }
 
+static int
+build_children(struct jmt_parse *parse, ind_t k, ind_t item,
+               struct jmt_visitor *visitor, int lvl, ind_t caller) {
+    struct item *x = set_item(parse, k, item);
+    struct lens *lens = lens_of_parse(parse, x->links->lens);
+    struct array siblings;
+    ind_t end = k;
+    int r;
+
+    array_init(&siblings, sizeof(ind_t));
+    r = filter_siblings(visitor, lens, k, item, caller, &siblings);
+    if (r < 0)
+        goto error;
+
+    /* x the first item in a list of siblings; visit items (x->from_set,
+     * x->from_item) in order, which will visit x and its siblings in the
+     * parse tree from right to left */
+    for (ind_t i = siblings.used - 1; i > 0; i--) {
+        ind_t lnk = *array_elem(siblings, i, ind_t);
+        struct lens *sub = lens_of_parse(parse, x->links[lnk].lens);
+        if (sub->recursive) {
+            build_tree(parse, k, item, sub, visitor, lvl+1);
+            ERR_BAIL(parse);
+        } else {
+            if (debugging("cf.jmt.visit"))
+                build_trace("T", x->links->from_set, k, x, lvl+1);
+            if (visitor->terminal != NULL) {
+                (*visitor->terminal)(sub,
+                                     x->links->from_set, k, visitor->data);
+                ERR_BAIL(parse);
+            }
+        }
+        k = x->links[lnk].from_set;
+        item = x->links[lnk].from_item;
+        x = set_item(parse, k, item);
+    }
+ error:
+    array_release(&siblings);
+    return end;
+}
+
 void jmt_visit(struct jmt_visitor *visitor, size_t *len) {
     struct jmt_parse *parse = visitor->parse;
     ind_t k = parse->nsets - 1;     /* Current Earley set */
@@ -943,7 +1029,8 @@ void jmt_visit(struct jmt_visitor *visitor, size_t *len) {
     visit_enter(visitor, lens, 0, k, NULL, 0);
     ERR_BAIL(parse);
 
-    *len = build_children(parse, k, item, 0, visitor, 0);
+    *len = build_children(parse, k, item, visitor, 0,
+                          parse->jmt->start->num);
     ERR_BAIL(parse);
 
     visit_exit(visitor, lens, 0, k, NULL, 0);
diff --git a/tests/modules/pass_simple_recursion.aug b/tests/modules/pass_simple_recursion.aug
index 849c861..0a8f48f 100644
--- a/tests/modules/pass_simple_recursion.aug
+++ b/tests/modules/pass_simple_recursion.aug
@@ -51,3 +51,14 @@ test prim_nullable get "x" = { "x" }
 let rec ambig = [ ambig . label "x" ] | del /s+/ "s"
 test ambig get "" = *
 test ambig get "s" = *
+
+(* Test link filtering. These tests cause seemingly ambiguous parses, which
+ * need to be disambiguated by filtering links in the Earley graph. See
+ * section 5.3 in the paper *)
+let rec unamb1 = [ label "x" . unamb1 . store /y/ ] | [ key "z" ]
+test unamb1 get "zyy" = { "x" = "y" { "x" = "y" { "z" } } }
+
+
+let rec unamb2 = del /u*/ "" . [ unamb2 . key /x/ ] | del /s*/ ""
+test unamb2 get "sx" = { "x" }
+test unamb2 get "x" = { "x" }




More information about the augeas-devel mailing list