[Libguestfs] [PATCH 3/6] inspection: Add rules compiler to the generator.

Richard W.M. Jones rjones at redhat.com
Sat Dec 5 12:48:31 UTC 2015


This adds a "prolog-inspired" compiler to the generator, which we can
use to generate guestfs inspection.  The compiler generates C code
which should be reasonably efficient.

Note this change requires ocamllex and ocamlyacc, but these are
normally part of any OCaml distribution, since they are part of the
OCaml compiler.  We also import some GPL-licensed gnulib code for
handling sets, which is OK because we're going to use these in a
standalone program (in following commits).
---
 .gitignore                   |   4 +
 README                       |   4 +
 bootstrap                    |   2 +
 generator/Makefile.am        |  39 ++-
 generator/rules_compiler.ml  | 801 +++++++++++++++++++++++++++++++++++++++++++
 generator/rules_compiler.mli |  21 ++
 generator/rules_parser.mly   | 129 +++++++
 generator/rules_scanner.mll  | 113 ++++++
 generator/types.ml           |  65 ++++
 generator/utils.ml           |  16 +
 generator/utils.mli          |   6 +
 m4/guestfs_ocaml.m4          |   2 +
 12 files changed, 1200 insertions(+), 2 deletions(-)
 create mode 100644 generator/rules_compiler.ml
 create mode 100644 generator/rules_compiler.mli
 create mode 100644 generator/rules_parser.mly
 create mode 100644 generator/rules_scanner.mll

diff --git a/.gitignore b/.gitignore
index 11557b6..288a853 100644
--- a/.gitignore
+++ b/.gitignore
@@ -221,7 +221,11 @@ Makefile.in
 /generator/files-generated.txt
 /generator/generator
 /generator/.pod2text.data*
+/generator/rules_parser.ml
+/generator/rules_parser.mli
+/generator/rules_scanner.ml
 /generator/stamp-generator
+/generator/stamp-rules-parser
 /get-kernel/.depend
 /get-kernel/stamp-virt-get-kernel.pod
 /get-kernel/virt-get-kernel
diff --git a/README b/README
index 2c79c0d..26198fc 100644
--- a/README
+++ b/README
@@ -88,6 +88,10 @@ The full requirements are described below.
 |              |             |   | Optional if compiling from tarball.     |
 |              |             |   | To build generated files and OCaml bindings.
 +--------------+-------------+---+-----------------------------------------+
+| ocamllex     | 3.11        |R/O| Required if compiling from git.         |
+| ocamlyacc    |             |   | Optional if compiling from tarball.     |
+|              |             |   | To build generated files and OCaml bindings.
++--------------+-------------+---+-----------------------------------------+
 | findlib      |             |R/O| Required if compiling from git.         |
 |              |             |   | Optional if compiling from tarball.     |
 |              |             |   | To build generated files and OCaml bindings.
diff --git a/bootstrap b/bootstrap
index 5df6f0f..f932c0c 100755
--- a/bootstrap
+++ b/bootstrap
@@ -36,6 +36,7 @@ gnulib_tool=$GNULIB_SRCDIR/gnulib-tool
 
 modules='
 accept4
+array-oset
 areadlink
 areadlinkat
 arpa_inet
@@ -97,6 +98,7 @@ warnings
 xalloc
 xalloc-die
 xgetcwd
+xoset
 xstrtol
 xstrtoll
 xvasprintf
diff --git a/generator/Makefile.am b/generator/Makefile.am
index 9177e6f..a0fb7bd 100644
--- a/generator/Makefile.am
+++ b/generator/Makefile.am
@@ -18,6 +18,9 @@
 include $(top_srcdir)/subdir-rules.mk
 
 # In alphabetical order.
+#
+# Note we include ocamllex/ocamlyacc-generated files here, since
+# we want to distribute these in the tarball for convenience.
 sources = \
 	actions.ml \
 	actions.mli \
@@ -48,6 +51,13 @@ sources = \
 	prepopts.mli \
 	python.ml \
 	ruby.ml \
+	rules_compiler.ml \
+	rules_compiler.mli \
+	rules_parser.ml \
+	rules_parser.mli \
+	rules_parser.mly \
+	rules_scanner.ml \
+	rules_scanner.mll \
 	structs.ml \
 	structs.mli \
 	tests_c_api.ml \
@@ -88,6 +98,9 @@ objects = \
 	bindtests.cmo \
 	errnostring.cmo \
 	customize.cmo \
+	rules_scanner.cmo \
+	rules_parser.cmo \
+	rules_compiler.cmo \
 	main.cmo
 
 EXTRA_DIST = $(sources) files-generated.txt
@@ -101,6 +114,22 @@ if HAVE_OCAML
 generator: $(objects)
 	$(OCAMLFIND) ocamlc $(OCAMLCFLAGS) -linkpkg $^ -o $@
 
+rules_parser.ml rules_parser.mli: stamp-rules-parser
+
+stamp-rules-parser: rules_parser.mly
+	rm -f $@
+	$(OCAMLYACC) $<
+	touch $@
+
+rules_scanner.ml: rules_scanner.mll
+	$(OCAMLLEX) $<
+
+# Apparently because rules_parser.mli and rules_scanner.ml may not
+# exist before the Makefile is run, the pattern dependencies below
+# don't add these rules automatically, so we have to be explicit.
+rules_parser.cmi: rules_parser.mli
+rules_scanner.cmi: rules_scanner.ml
+
 # Dependencies.
 %.cmi: %.mli
 	$(OCAMLFIND) ocamlc $(OCAMLCFLAGS) -c $< -o $@
@@ -111,7 +140,7 @@ generator: $(objects)
 
 depend: .depend
 
-.depend: $(wildcard $(abs_srcdir)/*.mli) $(wildcard $(abs_srcdir)/*.ml)
+.depend: $(wildcard $(abs_srcdir)/*.mli) $(wildcard $(abs_srcdir)/*.ml) rules_parser.ml rules_parser.mli rules_scanner.ml
 	rm -f $@ $@-t
 	$(OCAMLFIND) ocamldep -I ../ocaml -I $(abs_srcdir) $^ | \
 	  $(SED) 's/ *$$//' | \
@@ -154,6 +183,12 @@ stamp-generator: generator
 
 CLEANFILES = $(noinst_DATA) $(noinst_PROGRAM) *.cmi *.cmo *~
 
-DISTCLEANFILES = .depend .pod2text.data.version.2
+DISTCLEANFILES = \
+	.depend \
+	.pod2text.data.version.2 \
+	rules_parser.ml \
+	rules_parser.mli \
+	rules_scanner.ml \
+	stamp-rules-parser
 
 SUFFIXES = .cmo .cmi .cmx .ml .mli .mll .mly
diff --git a/generator/rules_compiler.ml b/generator/rules_compiler.ml
new file mode 100644
index 0000000..5530408
--- /dev/null
+++ b/generator/rules_compiler.ml
@@ -0,0 +1,801 @@
+(* libguestfs
+ * Copyright (C) 2009-2015 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *)
+
+(* This is the compiler that turns inspection rules into C code. *)
+
+open Printf
+
+open Utils
+open Types
+open Pr
+open Docstrings
+
+module StringSet = Set.Make (String)
+
+let (//) = Filename.concat
+
+type env = {
+  free_vars : string list;
+  assign_vars : string list;
+
+  (* Name of the C environment struct. *)
+  env_struct : string;
+}
+
+let rec compile filename () =
+  let rules = parse filename in
+  type_checking filename rules;
+
+  generate_header ~extra_inputs:[filename] CStyle GPLv2plus;
+
+  pr "\
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <error.h>
+
+/* XXX At the moment we have to hard-code any headers needed by
+ * C code snippets from the input here.  We could fix this by
+ * allowing the source to define a C prologue.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+
+#include \"gl_oset.h\"
+#include \"gl_xoset.h\"
+#include \"gl_array_oset.h\"
+
+#include \"inspection.h\"
+
+#include \"guestfs-internal-all.h\"
+
+/* Disable a few warnings, so we can take a few short-cuts with the
+ * generated code.
+ */
+#pragma GCC diagnostic ignored \"-Wunused-variable\"
+#pragma GCC diagnostic ignored \"-Wunused-macros\"
+#pragma GCC diagnostic ignored \"-Wunused-function\"
+
+static gl_oset_t
+new_string_set (void)
+{
+  /* Note that we don't need a dispose function because all the
+   * strings added to the set will be \"owned\" by other code, either
+   * static (all_strings) or owned by facts.
+   */
+  return gl_oset_create_empty (GL_ARRAY_OSET,
+                               (gl_setelement_compar_fn) strcmp, NULL);
+}
+
+static void
+add_all_strings (gl_oset_t set)
+{
+  size_t i;
+
+  for (i = 0; all_strings[i] != NULL; ++i)
+    gl_oset_add (set, all_strings[i]);
+}
+
+";
+
+  get_all_strings filename rules;
+
+  (* Give each rule a unique number.  The number is used for rule
+   * function names, environments, code functions and so on.
+   * eg: 'rule_0 ()', 'struct rule_0_env', 'rule_0_code_0 ()'.
+   *)
+  iteri (
+    fun i rule ->
+      rule.rule_fn <- sprintf "rule_%d" i
+  ) rules;
+
+  (* Create the environment struct for each rule.  This contains all
+   * the variables either consumed or set by the function.
+   *)
+  let rules =
+    List.map (
+      fun rule ->
+        let env = compile_rule_environment filename rule in
+        (rule, env)
+    ) rules in
+
+  (* Write all C code snippets to functions. *)
+  List.iter (
+    fun (rule, env) ->
+      let j = ref 0 in
+      let rec loop = function
+        | And (e1, e2) | Or (e1, e2) -> loop e1; loop e2
+        | BoolCode code ->
+           code.code_fn <- sprintf "%s_code_%d" rule.rule_fn !j;
+           incr j;
+           compile_bool_code filename rule env code
+        | AssignCode (vs, row_count, code) ->
+           code.code_fn <- sprintf "%s_code_%d" rule.rule_fn !j;
+           incr j;
+           compile_assign_code filename rule env vs row_count code
+        | Term _ | Not _ | True | False -> ()
+      in
+      loop rule.body
+  ) rules;
+
+  (* Compile all rules into functions. *)
+  List.iter (fun (rule, env) -> compile_rule filename rule env) rules;
+
+  pr "\
+void
+rules (void)
+{
+  clear_true_facts ();
+  clear_false_facts ();
+
+  /* Loop over all the rules until no more true facts can be added. */
+  for (;;) {
+    size_t nr_true_facts = count_true_facts ();
+
+";
+
+  List.iter (
+    fun (rule, env) ->
+      pr "    if (verbose)\n";
+      pr "      printf (\"trying rule %%s\\n\", %S);\n"
+         (string_of_term rule.head);
+      pr "    %s ();\n" rule.rule_fn;
+      pr "\n";
+  ) rules;
+
+  pr "    /* Added a true fact during this iteration? */
+    if (nr_true_facts == count_true_facts ())
+      break;
+  } /* for (;;) */
+}
+
+/* EOF */\n"
+
+and get_all_strings filename rules =
+  let rec loop = function
+    | True | False | BoolCode _ | AssignCode _ -> []
+    | And (e1, e2) | Or (e1, e2) -> loop e1 @ loop e2
+    | Term term | Not term -> get_term_strings term
+  and get_term_strings { term_args = args } =
+    filter_map (function Variable _ -> None | Constant s -> Some s) args
+  in
+  let all_strings =
+    List.map (fun rule -> get_term_strings rule.head @ loop rule.body) rules in
+  let all_strings = List.concat all_strings in
+  let all_strings = sort_uniq all_strings in
+  pr "const char *all_strings[] = {\n";
+  pr "    ";
+  let col = ref 0 in
+  List.iter (
+    fun s ->
+      let len = String.length s in
+      if !col + len + 4 >= 72 then (
+        col := 0;
+        pr "\n    "
+      );
+      pr "%S, " s;
+      col := !col + len + 4;
+  ) all_strings;
+  if !col > 0 then pr "\n";
+  pr "    NULL\n";
+  pr "};\n";
+  pr "\n"
+
+(* Work the environment of a single rule.  Also write out the
+ * corresponding struct to the C file.
+ *)
+and compile_rule_environment filename rule =
+  (* The name of the C struct. *)
+  let env_struct = sprintf "%s_env" rule.rule_fn in
+
+  (* Divide all the variables which appear in the rule into:
+   *  - ones which we have to search for [free_vars],
+   *  - ones which are going to be returned by a C expression within
+   *    the body [assign_vars].
+   * We can do this statically.
+   * These sets are non-overlapping, so we just need to check which
+   * variables are returned by C expressions, and do an additional
+   * check that no C expressions are returning the same variable.
+   *)
+  (* Get the complete list of vars ... *)
+  let free_vars = Hashtbl.create 13 in
+  (* ... from the head *)
+  List.iter (
+    function
+    | Variable v ->
+       if Hashtbl.mem free_vars v then (
+         eprintf "%s: variable '%s' appears two or more times in a rule\n"
+                 filename v;
+         exit 1
+       );
+       Hashtbl.add free_vars v 1
+    | Constant _ -> ()
+  ) rule.head.term_args;
+  (* ... from the body *)
+  let rec loop = function
+    | And (e1, e2) | Or (e1, e2) -> loop e1; loop e2
+    | Term { term_args = args } | Not { term_args = args } ->
+       List.iter (
+         function
+         | Variable v -> Hashtbl.replace free_vars v 1
+         | Constant _ -> ()
+       ) args
+    | True | False
+    | BoolCode _ | AssignCode _ -> ()
+  in
+  loop rule.body;
+
+  let assign_vars = Hashtbl.create 13 in
+  let rec loop = function
+    | True | False | Term _ | Not _ | BoolCode _ -> ()
+    | And (e1, e2) | Or (e1, e2) -> loop e1; loop e2
+    | AssignCode (vs, _, _) ->
+       List.iter (
+         fun v ->
+           Hashtbl.remove free_vars v;
+           if Hashtbl.mem assign_vars v then (
+             eprintf "%s: variable '%s' appears two or more times in a C assignment expression in a rule\n"
+                     filename v;
+             exit 1
+           );
+           Hashtbl.add assign_vars v 1
+       ) vs
+  in
+  loop rule.body;
+  let free_vars = Hashtbl.fold (fun k _ ks -> k :: ks) free_vars [] in
+  let assign_vars = Hashtbl.fold (fun k _ ks -> k :: ks) assign_vars [] in
+
+  (* Write out the C struct. *)
+  pr "/* Environment struct for rule %s */\n" (string_of_term rule.head);
+  pr "struct %s {\n" env_struct;
+  if free_vars <> [] then (
+    pr "  /* free variables */\n";
+    List.iter (pr "  char *%s;\n") free_vars
+  );
+  if assign_vars <> [] then (
+    pr "  /* assigned vars */\n";
+    pr "  size_t nr_assign_vars;\n";
+    List.iter (pr "  char **%s;\n") assign_vars
+  );
+  pr "};\n";
+  pr "\n";
+
+  (* Return the OCaml env. *)
+  { free_vars = free_vars;
+    assign_vars = assign_vars;
+    env_struct = env_struct; }
+
+(* Compile a single rule to C code. *)
+and compile_rule filename rule env =
+  (* For each free variable we need to find the possible values for that
+   * variable.  If they appear within the body in a term like
+   * 'Foo(var)' then we can just look for matching facts and add
+   * them (at runtime).  If they don't, then we start with the list
+   * of all strings culled from the source + all strings from all facts.
+   *)
+  let free_vars = List.map (
+    fun v ->
+      let fact_lookups = ref [] in
+      let rec loop = function
+        | True | False | BoolCode _ | AssignCode _ -> ()
+        | And (e1, e2) | Or (e1, e2) -> loop e1; loop e2
+        | Term { term_name = term_name; term_args = args }
+        | Not { term_name = term_name; term_args = args } ->
+           (* If this term contains this variable at some position,
+            * then save that in the list of 'facts'.
+            *)
+           iteri (
+             fun arg_i ->
+               function
+               | Variable v' when v = v' ->
+                  fact_lookups := (term_name, arg_i) :: !fact_lookups
+               | Variable _ | Constant _ -> ()
+           ) args
+      in
+      loop rule.body;
+      let fact_lookups = sort_uniq !fact_lookups in
+
+      v, fact_lookups
+  ) env.free_vars in
+
+  pr "/* %s */\n" (string_of_term rule.head);
+  pr "static void\n";
+  pr "%s (void)\n" rule.rule_fn;
+  pr "{\n";
+  pr "  struct %s env;\n" env.env_struct;
+  pr "  bool added;\n";
+  pr "  size_t i;\n";
+  List.iter (
+    fun (v, _) ->
+      pr "  gl_oset_t search_%s;\n" v;
+      pr "  gl_oset_iterator_t iter_%s;\n" v;
+  ) free_vars;
+  pr "\n";
+
+  (* This is an optimization: If the rule contains no free variables,
+   * we only need to run it once.  This even applies if there are
+   * assigned variables, because the C code is supposed to be pure,
+   * ie. produce the same result every time it is called.
+   *)
+  if free_vars = [] then (
+    pr "  /* Because this rule contains no free variables, we only need\n";
+    pr "   * to evaluate it once.  This applies even if the rule runs\n";
+    pr "   * C code (see 'C code memoization' in guestfs-inspection(8)\n";
+    pr "   * for an explanation of why this is so).\n";
+    pr "   */\n";
+    pr "  static bool called = false;\n";
+    pr "  if (called)\n";
+    pr "    return;\n";
+    pr "  called = true;\n";
+    pr "\n";
+  );
+
+  if free_vars <> [] then
+    pr "  /* Build the sets we will use for searching each free variable. */\n";
+  List.iter (
+    function
+    | v, [] ->
+      (* The variable doesn't appear in any expressions, so
+       * add a note to the source.  Maybe emit a compiler warning? XXX
+       *)
+       pr "  search_%s = new_string_set ();\n" v;
+       pr "  /* Warning: variable '%s' is underspecified, so we will\n" v;
+       pr "   * search over all strings from the source and all facts.\n";
+       pr "   */\n";
+       pr "  add_all_strings (search_%s);\n" v;
+       pr "  add_all_fact_strings (search_%s);\n" v;
+       pr "\n"
+    | v, fact_lookups ->
+       pr "  search_%s = new_string_set ();\n" v;
+       List.iter (
+         fun (term_name, arg_i) ->
+           pr "  add_strings_from_facts (search_%s, %S, %d);\n"
+              v term_name arg_i
+       ) fact_lookups;
+       pr "\n"
+  ) free_vars;
+
+  (* Do a cartesian search over all [free_vars], substituting each set of
+   * variables, and evaluating the body.  If it evaluates to true,
+   * then we will add a new true fact!  (Or maybe several if we are
+   * dealing with a list assignment []={{...}}).  If it evaluates
+   * to false, we add a false fact.  It's also possible that we
+   * cannot evaluate the rule at all, because it contains unknown
+   * facts, in which case we end up adding NO new facts.
+   *)
+  if free_vars <> [] then (
+    pr "  /* Perform cartesian search over free variables. */\n";
+
+    List.iter (
+      fun (v, _) ->
+        pr "  iter_%s = gl_oset_iterator (search_%s);\n" v v;
+        pr "  while (gl_oset_iterator_next (&iter_%s,\n" v;
+        pr "                                (const void **)&env.%s)) {\n" v;
+    ) free_vars;
+
+  ) else (
+    (* If there are no free_vars, then we have to add a dummy loop
+     * around the next code so that the 'continue' statement can be used.
+     *)
+    pr "  do {\n";
+  );
+
+  (* Initialize any assign_vars in the env struct.  Note that the
+   * free_vars are initialized by the iterator loops above.
+   *)
+  List.iter (pr "  env.%s = NULL;\n") env.assign_vars;
+  if env.assign_vars <> [] then pr "  env.nr_assign_vars = 0;\n";
+
+  (* We can only do this optimization if assign_vars = [],
+   * because we don't know what the C code (returning those vars)
+   * may give us yet.  XXX Actually we could be looser with this:
+   * we only need to check that the head term contains no assigned
+   * variables.
+   *)
+  if env.assign_vars = [] then (
+    pr "  {\n";
+    pr "    /* If the fact already exists, don't bother doing any work. */\n";
+    pr "    CLEANUP_FREE fact *fact = create_fact (%S, %d"
+       rule.head.term_name (List.length rule.head.term_args);
+    List.iter (function
+                | Variable v -> pr ", env.%s" v
+                | Constant s -> pr ", %S" s)
+              rule.head.term_args;
+    pr ");\n";
+    pr "\n";
+    pr "    if (is_fact (true, fact) || is_fact (false, fact))\n";
+    pr "      continue;\n";
+    pr "  }\n";
+    pr "\n";
+  );
+
+  (* Evaluate the expression on the right hand side. *)
+  let rec eval result = function
+    | True ->
+       pr "  %s = 1;\n" result
+    | False ->
+       pr "  %s = 0;\n" result
+    | BoolCode code ->
+       pr "  %s = %s (&env);\n" result code.code_fn
+    | AssignCode (_, _, code) ->
+       pr "  %s (&env);\n" code.code_fn;
+       (* The result of AssignCode is always true (else it would
+        * have exited in the call above).  Hence:
+        *)
+       pr "  %s = 1;\n" result
+    | And (e1, e2) ->
+       let re1 = sprintf "r_%d" (unique ()) in
+       pr "  int %s;\n" re1;
+       eval re1 e1;
+       pr "  if (%s != 1)\n" re1;
+       pr "    %s = %s;\n" result re1;
+       pr "  else {\n";
+       let re2 = sprintf "r_%d" (unique ()) in
+       pr "    int %s;\n" re2;
+       eval re2 e2;
+       pr "    %s = %s;\n" result re2;
+       pr "  }\n";
+    | Or (e1, e2) ->
+       let re1 = sprintf "r_%d" (unique ()) in
+       pr "  int %s;\n" re1;
+       eval re1 e1;
+       pr "  if (%s == 1)\n" re1;
+       pr "    %s = %s;\n" result re1;
+       pr "  else {\n";
+       let re2 = sprintf "r_%d" (unique ()) in
+       pr "    int %s;\n" re2;
+       eval re2 e2;
+       pr "    %s = %s;\n" result re2;
+       pr "  }\n";
+    | Term term ->
+       pr "  {\n";
+       pr "    CLEANUP_FREE fact *fact = create_fact (%S, %d"
+          term.term_name (List.length term.term_args);
+       List.iter (
+         function
+         | Variable v -> pr ", env.%s" v
+         | Constant s -> pr ", %S" s
+       ) term.term_args;
+       pr ");\n";
+       pr "    %s = is_fact (true, fact);\n" result;
+       pr "  }\n";
+    | Not term ->
+       pr "  {\n";
+       pr "    CLEANUP_FREE fact *fact = create_fact (%S, %d"
+          term.term_name (List.length term.term_args);
+       List.iter (
+         function
+         | Variable v -> pr ", env.%s" v
+         | Constant s -> pr ", %S" s
+       ) term.term_args;
+       pr ");\n";
+       pr "    %s = is_fact (false, fact);\n" result;
+       pr "  }\n";
+  in
+  pr "  /* Evaluate the RHS of the rule with this assignment of variables. */\n";
+  pr "  int result;\n";
+  eval "result" rule.body;
+  pr "  if (result == -1) /* not determined */ continue;\n";
+  let make_fact ?i ?(indent = 2) () =
+    let indent = spaces indent in
+    pr "%sCLEANUP_FREE fact *fact =\n" indent;
+    pr "%s  create_fact (%S, %d"
+       indent rule.head.term_name (List.length rule.head.term_args);
+    List.iter (
+      function
+      | Variable v ->
+         if not (List.mem v env.assign_vars) then
+           pr ", env.%s" v
+         else (
+           let i = match i with Some i -> i | None -> assert false in
+           pr ", env.%s[%s]" v i
+         )
+      | Constant s -> pr ", %S" s
+    ) rule.head.term_args;
+    pr ");\n";
+  in
+  pr "  if (result > 0) /* true */ {\n";
+  if env.assign_vars = [] then (
+    make_fact ~indent:4 ();
+    pr "    added = add_fact (true, fact);\n";
+    pr "    if (added && verbose) {\n";
+    pr "      printf (\"added new fact \");\n";
+    pr "      print_fact (true, fact, stdout);\n";
+    pr "      printf (\"\\n\");\n";
+    pr "    }\n";
+  ) else (
+    pr "    for (i = 0; i < env.nr_assign_vars; ++i) {\n";
+    make_fact ~i:"i" ~indent:6 ();
+    pr "      added = add_fact (true, fact);\n";
+    pr "      if (added && verbose) {\n";
+    pr "        printf (\"added new fact \");\n";
+    pr "        print_fact (true, fact, stdout);\n";
+    pr "        printf (\"\\n\");\n";
+    pr "      }\n";
+    pr "    }\n";
+  );
+  pr "  }\n";
+  pr "  if (result == 0) /* false */ {\n";
+  if env.assign_vars = [] then (
+    make_fact ~indent:4 ();
+    pr "\n";
+    pr "    added = add_fact (false, fact);\n";
+    pr "    if (added && verbose) {\n";
+    pr "      printf (\"added new fact \");\n";
+    pr "      print_fact (false, fact, stdout);\n";
+    pr "      printf (\"\\n\");\n";
+    pr "    }\n";
+  ) else (
+    pr "    for (i = 0; i < env.nr_assign_vars; ++i) {\n";
+    make_fact ~i:"i" ~indent:6 ();
+    pr "      added = add_fact (false, fact);\n";
+    pr "      if (added && verbose) {\n";
+    pr "        printf (\"added new fact \");\n";
+    pr "        print_fact (false, fact, stdout);\n";
+    pr "        printf (\"\\n\");\n";
+    pr "      }\n";
+    pr "    }\n";
+  );
+  pr "  }\n";
+
+  (* Free any assign_vars.  The free_vars don't have to be freed
+   * because the iterator loop handles them.
+   *)
+  List.iter (
+    fun v ->
+      pr "  for (i = 0; i < env.nr_assign_vars; ++i)\n";
+      pr "    free (env.%s[i]);\n" v;
+      pr "  free (env.%s);\n" v
+  ) env.assign_vars;
+
+  if free_vars <> [] then (
+    List.iter (
+      fun (v, _) ->
+        pr "  }\n";
+        pr "  gl_oset_iterator_free (&iter_%s);\n" v
+    ) (List.rev free_vars);
+  ) else (
+    pr "  } while (0);\n";
+  );
+  pr "\n";
+
+  List.iter (
+    function
+    | v, _ ->
+       pr "  gl_oset_free (search_%s);\n" v
+  ) free_vars;
+
+  pr "}\n";
+  pr "\n"
+
+(* Compile a BoolCode snippet from a rule into a function. *)
+and compile_bool_code filename rule env code =
+  (* Create a function which wraps the C code. *)
+  let lineno = code.code_loc.Lexing.pos_lnum in
+  let code_wrapper_fn = sprintf "%s_wrapper" code.code_fn in
+  List.iter (fun v -> pr "#define %s (_env->%s)\n" v v) env.free_vars;
+  pr "static int\n";
+  pr "%s (struct %s *_env)\n" code_wrapper_fn env.env_struct;
+  pr "{\n";
+  pr "#line %d \"%s\"\n" lineno filename;
+  pr "%s\n" code.code;
+  pr "}\n";
+  List.iter (pr "#undef %s\n") env.free_vars;
+  pr "\n";
+
+  (* Create the function itself. *)
+  pr "static int\n";
+  pr "%s (struct %s *env)\n" code.code_fn env.env_struct;
+  pr "{\n";
+  pr "  int r;\n";
+  pr "\n";
+  pr "  if (verbose)\n";
+  pr "    printf (\"running C function %%s:%%d\\n\",\n";
+  pr "            \"%s\", %d);\n" filename lineno;
+  pr "\n";
+  pr "  r = %s (env);\n" code_wrapper_fn;
+  pr "\n";
+  pr "  /* If the C function returns -1, it causes us to exit at once. */\n";
+  pr "  if (r == -1)\n";
+  pr "    error (EXIT_FAILURE, 0,\n";
+  pr "           \"%%s:%%d: C function failed - see earlier errors\",\n";
+  pr "           \"%s\", %d);\n"
+     filename code.code_loc.Lexing.pos_lnum;
+  pr "\n";
+  pr "  return r;\n";
+  pr "}\n";
+  pr "\n";
+
+(* Compile assignment code (AssignCode) snippet into a function. *)
+and compile_assign_code filename rule env vs row_count code =
+  (* Create a function for setting a row in the result. *)
+  let set_vars_fn = sprintf "%s_set_row" code.code_fn in
+  let set_vars_alias = sprintf "set_%s" (String.concat "_" vs) in
+  pr "static void\n";
+  pr "%s (struct %s *_env, %s)\n"
+     set_vars_fn env.env_struct
+     (String.concat ", "
+                    (List.map (sprintf "const char *%s") vs));
+  pr "{\n";
+  pr "  size_t _i = _env->nr_assign_vars;\n";
+  pr "\n";
+  List.iter (
+    fun v ->
+      pr "  _env->%s = realloc (_env->%s, (_i+1) * sizeof (char *));\n" v v;
+      pr "  if (_env->%s == NULL)\n" v;
+      pr "    error (EXIT_FAILURE, errno, \"realloc\");\n";
+      pr "  _env->%s[_i] = strdup (%s);\n" v v;
+      pr "  if (_env->%s[_i] == NULL)\n" v;
+      pr "    error (EXIT_FAILURE, errno, \"strdup\");\n"
+  ) vs;
+  pr "  _env->nr_assign_vars++;\n";
+  pr "}\n";
+  pr "\n";
+
+  (* Create a function which wraps the C code. *)
+  let lineno = code.code_loc.Lexing.pos_lnum in
+  let code_wrapper_fn = sprintf "%s_wrapper" code.code_fn in
+  List.iter (fun v -> pr "#define %s (_env->%s)\n" v v) env.free_vars;
+  pr "#define %s(%s) %s (_env, %s)\n"
+     set_vars_alias (String.concat ", " vs)
+     set_vars_fn (String.concat ", " (List.map (fun v -> "("^v^")") vs));
+  pr "static int\n";
+  pr "%s (struct %s *_env)\n" code_wrapper_fn env.env_struct;
+  pr "{\n";
+  pr "#line %d \"%s\"\n" lineno filename;
+  pr "%s\n" code.code;
+  pr "}\n";
+  List.iter (pr "#undef %s\n") env.free_vars;
+  pr "#undef %s\n" set_vars_alias;
+  pr "\n";
+
+  (* Create the function itself. *)
+  pr "static void\n";
+  pr "%s (struct %s *env)\n" code.code_fn env.env_struct;
+  pr "{\n";
+  pr "  int r;\n";
+  pr "\n";
+  pr "  if (verbose)\n";
+  pr "    printf (\"running C function %%s:%%d\\n\",\n";
+  pr "            \"%s\", %d);\n" filename lineno;
+  pr "\n";
+  pr "  r = %s (env);\n" code_wrapper_fn;
+  pr "\n";
+  pr "  /* If the C function returns -1, it causes us to exit at once. */\n";
+  pr "  if (r == -1)\n";
+  pr "    error (EXIT_FAILURE, 0,\n";
+  pr "           \"%%s:%%d: C function failed - see earlier errors\",\n";
+  pr "           \"%s\", %d);\n"
+     filename code.code_loc.Lexing.pos_lnum;
+  pr "\n";
+  pr "  /* Check the set_* function was called the expected number\n";
+  pr "   * of times.\n";
+  pr "   */\n";
+  (match row_count with
+   | RowsOne ->
+      pr "  if (env->nr_assign_vars != 1)\n"
+   | RowsZeroOrMore ->
+      pr "  if (0) /* no check necessary for (var)* assignment */\n"
+   | RowsZeroOrOne ->
+      pr "  if (env->nr_assign_vars > 1)\n"
+   | RowsOneOrMore ->
+      pr "  if (env->nr_assign_vars < 1)\n"
+  );
+  pr "    error (EXIT_FAILURE, 0,\n";
+  pr "           \"%%s:%%d: C function called %%s incorrect number of times (%%zu)\",\n";
+  pr "           \"%s\", %d, \"%s\", env->nr_assign_vars);\n"
+     filename code.code_loc.Lexing.pos_lnum set_vars_alias;
+  pr "}\n";
+  pr "\n";
+
+(* Parse the input. *)
+and parse filename =
+  let lexbuf = Lexing.from_channel (open_in filename) in
+  let rules = ref [] in
+  (try
+      while true do
+        let rule = Rules_parser.rule Rules_scanner.token lexbuf in
+        (*printf "%s\n" (string_of_rule rule);*)
+        rules := rule :: !rules
+      done
+   with
+   | End_of_file -> ()
+   | Rules_scanner.Error (msg, _, lineno, charno) ->
+      eprintf "%s: %d: %d: %s\n" filename lineno charno msg;
+      exit 1
+   | Parsing.Parse_error ->
+      let p = Lexing.lexeme_start_p lexbuf in
+      eprintf "%s: %d: %d: syntax error\n"
+              filename
+              p.Lexing.pos_lnum
+              (p.Lexing.pos_cnum - p.Lexing.pos_bol);
+      exit 1
+  );
+  let rules = List.rev !rules in
+  rules
+
+(* Minimal type checking. *)
+and type_checking filename rules =
+  check_term_rhs filename rules;
+  check_term_arity filename rules
+
+(* If a term appears on the right hand side in any expression, then
+ * the term must also appear on the left hand side of a rule.
+ *)
+and check_term_rhs filename rules =
+  let names = List.map (fun { head = { term_name = name } } -> name) rules in
+  let names =
+    List.fold_left (fun set x -> StringSet.add x set) StringSet.empty names in
+
+  let errors = ref 0 in
+  List.iter (
+    fun { body = body } ->
+      visit_terms (
+        fun { term_name = name } ->
+          if not (StringSet.mem name names) then (
+            eprintf "%s: '%s' appears in a rule expression, but does not appear on the left hand side of any rule.  Maybe there is a typo?\n"
+                    filename name;
+            incr errors
+          )
+      ) body
+  ) rules;
+  if !errors > 0 then exit 1
+
+(* Check the arity of terms is the same wherever they appear. *)
+and check_term_arity filename rules =
+  let hash = Hashtbl.create (List.length rules) in (* name -> arity *)
+
+  let errors = ref 0 in
+
+  let check_arity { term_name = name; term_args = args } =
+    let arity = List.length args in
+    try
+      let expected_arity = Hashtbl.find hash name in
+      if arity <> expected_arity then (
+        eprintf "%s: '%s' has different number of parameters (has %d, expected %d).  It must have the same number of parameters throughout the program.\n"
+                filename name arity expected_arity;
+        incr errors
+      )
+    with
+      (* The first time we've seen this term. *)
+      Not_found -> Hashtbl.add hash name arity
+  in
+
+  List.iter (
+    fun { head = head; body = body } ->
+      check_arity head;
+      visit_terms check_arity body
+  ) rules;
+
+  if !errors > 0 then exit 1
+
+and visit_terms f = function
+  | And (e1, e2)
+  | Or (e1, e2) -> visit_terms f e1; visit_terms f e2
+  | Term t
+  | Not t -> f t
+  | True | False | BoolCode _ | AssignCode _ -> ()
+
+and unique =
+  let i = ref 0 in
+  fun () -> incr i; !i
diff --git a/generator/rules_compiler.mli b/generator/rules_compiler.mli
new file mode 100644
index 0000000..2bc5274
--- /dev/null
+++ b/generator/rules_compiler.mli
@@ -0,0 +1,21 @@
+(* libguestfs
+ * Copyright (C) 2009-2015 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *)
+
+(* This is the compiler that turns inspection rules into C code. *)
+
+val compile : string -> unit -> unit
diff --git a/generator/rules_parser.mly b/generator/rules_parser.mly
new file mode 100644
index 0000000..871ee02
--- /dev/null
+++ b/generator/rules_parser.mly
@@ -0,0 +1,129 @@
+/* libguestfs -*- text -*-
+ * Copyright (C) 2009-2015 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+%{
+open Types
+%}
+
+%token <string> STRING /* string literal */
+%token <string> UID    /* uppercase identifier */
+%token <string> LID    /* lowercase identifier */
+
+%token TRUE            /* true (keyword) */
+%token FALSE           /* false (keyword) */
+
+%token LPAREN RPAREN   /* ( ... ) */
+%token <string> CODE   /* {{ .. }} containing C code */
+%token STAR            /* * (zero or more rows) */
+%token QUESTION        /* ? (zero or one row) */
+%token PLUS            /* + (one or more rows) */
+%token DOT             /* . */
+%token IMPLIC          /* :- (implication) */
+%token COMMA           /* , (AND operator) */
+%token SEMI            /* , (OR operator) */
+%token NOT             /* ! */
+%token EQUALS          /* = */
+
+/* These operators are arranged from lowest to highest precedence. */
+%left IMPLIC
+%left SEMI
+%left COMMA
+%nonassoc NOT
+
+%start rule
+%type <Types.rule> rule
+
+%%
+
+rule:     head DOT
+            { { head = $1; body = True;
+                rule_loc = symbol_start_pos (); rule_fn = "" } }
+        | head IMPLIC body DOT
+            { { head = $1; body = $3;
+                rule_loc = symbol_start_pos (); rule_fn = "" } }
+        ;
+
+head:   term
+            { $1 }
+        ;
+
+term:     UID
+            { { term_name = $1; term_args = [] } }
+        | UID LPAREN term_args RPAREN
+            { { term_name = $1; term_args = $3 } }
+        ;
+
+term_args:
+          term_arg
+            { [ $1 ] }
+        | term_arg COMMA term_args
+            { $1 :: $3 }
+        ;
+
+term_arg:
+          LID
+            { Variable $1 }
+        | STRING
+            { Constant $1 }
+        ;
+
+body:   expr
+            { $1 }
+        ;
+
+expr:     TRUE
+            { True }
+        | FALSE
+            { False }
+        | term
+            { Term $1 }
+        | CODE
+            { BoolCode { code = $1;
+                         code_loc = symbol_start_pos ();
+                         code_fn = "" } }
+        | LPAREN result_bindings RPAREN EQUALS CODE
+            { AssignCode ($2, RowsOne,
+                         { code = $5;
+                           code_loc = symbol_start_pos (); code_fn = "" }) }
+        | LPAREN result_bindings RPAREN STAR EQUALS CODE
+            { AssignCode ($2, RowsZeroOrMore,
+                         { code = $6;
+                           code_loc = symbol_start_pos (); code_fn = "" }) }
+        | LPAREN result_bindings RPAREN QUESTION EQUALS CODE
+            { AssignCode ($2, RowsZeroOrOne,
+                         { code = $6;
+                           code_loc = symbol_start_pos (); code_fn = "" }) }
+        | LPAREN result_bindings RPAREN PLUS EQUALS CODE
+            { AssignCode ($2, RowsOneOrMore,
+                         { code = $6;
+                           code_loc = symbol_start_pos (); code_fn = "" }) }
+        | NOT term
+            { Not $2 }
+        | expr COMMA expr
+            { And ($1, $3) }
+        | expr SEMI expr
+            { Or ($1, $3) }
+        | LPAREN expr RPAREN
+            { $2 }
+        ;
+
+result_bindings:
+          LID
+            { [ $1 ] }
+        | LID COMMA result_bindings
+            { $1 :: $3 }
diff --git a/generator/rules_scanner.mll b/generator/rules_scanner.mll
new file mode 100644
index 0000000..f486ba4
--- /dev/null
+++ b/generator/rules_scanner.mll
@@ -0,0 +1,113 @@
+(* libguestfs -*- text -*-
+ * Copyright (C) 2009-2015 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *)
+
+{
+open Rules_parser
+
+let string_of_lexbuf = Lexing.lexeme
+
+(* Errors raised by the lexer. *)
+exception Error of string * string * int * int
+
+let raise_error lexbuf msg =
+  let p = Lexing.lexeme_start_p lexbuf in
+  raise (Error (msg, p.Lexing.pos_fname,
+                     p.Lexing.pos_lnum,
+                     p.Lexing.pos_cnum - p.Lexing.pos_bol))
+
+(* Store "..." strings. *)
+let string_buf = Buffer.create 256
+let reset_string_buffer () = Buffer.clear string_buf
+let store_string_char c = Buffer.add_char string_buf c
+let get_string_buffer () = Buffer.contents string_buf
+
+let char_for_backslash = function
+  | 'n' -> '\010'
+  | 'r' -> '\013'
+  | 'b' -> '\008'
+  | 't' -> '\009'
+  | c   -> c
+
+(* Store {{ CODE }} sections. *)
+let code_buf = Buffer.create 256
+let reset_code_buffer () = Buffer.clear code_buf
+let store_code_char c = Buffer.add_char code_buf c
+let get_code_buffer () = Buffer.contents code_buf
+}
+
+(* Characters that can appear within an identifier (after the first
+ * character which is treated specially below).
+ *)
+let id_char = ['a'-'z' 'A'-'Z' '0'-'9' '_']
+
+(* Whitespace. *)
+let ws = [' ' '\t']
+
+(* Backslash escapes within strings. *)
+let backslash_escapes = ['\\' '\'' '"' 'n' 't' 'b' 'r']
+
+rule token = parse
+        | "/*"       { comment lexbuf; token lexbuf }
+        | '('        { LPAREN }
+        | ')'        { RPAREN }
+        | '*'        { STAR }
+        | '?'        { QUESTION }
+        | '+'        { PLUS }
+        | '.'        { DOT }
+        | ":-"       { IMPLIC }
+        | ','        { COMMA }
+        | ';'        { SEMI }
+        | '!'        { NOT }
+        | '='        { EQUALS }
+        | '"'        { reset_string_buffer ();
+                       string lexbuf;
+                       STRING (get_string_buffer ()) }
+        | "{{"       { reset_code_buffer ();
+                       code lexbuf;
+                       CODE (get_code_buffer ()) }
+        | "true"     { TRUE }
+        | "false"    { FALSE }
+        | ['A'-'Z'] id_char* { UID (string_of_lexbuf lexbuf) }
+        | ['a'-'z' '_'] id_char* { LID (string_of_lexbuf lexbuf) }
+        | '\n'       { Lexing.new_line lexbuf; token lexbuf }
+        | ws         { token lexbuf }
+        | eof        { raise End_of_file }
+        | _          { raise_error lexbuf "unexpected character in input" }
+
+(* Discard C-style comments. *)
+and comment = parse
+        | "*/"       { () }
+        | eof        { raise_error lexbuf "unterminated comment" }
+        | '\n'       { Lexing.new_line lexbuf; comment lexbuf }
+        | _          { comment lexbuf }
+
+(* Store "..." strings. *)
+and string = parse
+        | '"'        { () }
+        | eof        { raise_error lexbuf "unterminated string" }
+        | '\n'       { raise_error lexbuf "strings cannot contain newline characters" }
+        | '\\' (backslash_escapes as c)
+                     { store_string_char (char_for_backslash c); string lexbuf }
+        | _ as c     { store_string_char c; string lexbuf }
+
+(* Store {{ ... }} (CODE) sections containing C code. *)
+and code = parse
+        | "}}"       { () }
+        | eof        { raise_error lexbuf "unterminated code section" }
+        | '\n' as c  { Lexing.new_line lexbuf; store_code_char c; code lexbuf }
+        | _ as c     { store_code_char c; code lexbuf }
diff --git a/generator/types.ml b/generator/types.ml
index f2d9750..cceb0f6 100644
--- a/generator/types.ml
+++ b/generator/types.ml
@@ -18,6 +18,8 @@
 
 (* Please read generator/README first. *)
 
+open Printf
+
 (* Types used to describe the API. *)
 
 type style = ret * args * optargs
@@ -421,3 +423,66 @@ type call_optargt =
   | CallOInt64 of string * int64
   | CallOString of string * string
   | CallOStringList of string * string list
+
+(* Used by the rules compiler. *)
+
+type rule = {
+  head : term;
+  body : expr;
+  rule_loc : Lexing.position;
+  mutable rule_fn : string;
+}
+(* The type of a parsed rule from the source. *)
+
+and term = { term_name : string; term_args : term_arg list }
+
+and term_arg = Variable of string | Constant of string
+
+and expr =
+  | True                        (* used for facts *)
+  | False                       (* false (keyword) *)
+  | Term of term
+  | Not of term                 (* ! term *)
+  | And of expr * expr          (* expr, expr *)
+  | Or of expr * expr           (* expr; expr *)
+  | BoolCode of code            (* {{ ... }} *)
+  | AssignCode of string list * row_count * code (* (a,b)={{ ... }} *)
+
+and code = {
+  code : string;
+  code_loc : Lexing.position;
+  mutable code_fn : string;
+}
+
+and row_count = RowsOne | RowsZeroOrMore | RowsOneOrMore | RowsZeroOrOne
+
+let rec string_of_rule { head = head; body = body } =
+  sprintf "%s :-\n\t%s." (string_of_term head) (string_of_expr body)
+
+and string_of_term = function
+  | { term_name = term_name; term_args = [] } ->
+      sprintf "%s" term_name
+  | { term_name = term_name; term_args = args } ->
+      sprintf "%s(%s)" term_name
+              (String.concat ", " (List.map string_of_term_arg args))
+
+and string_of_term_arg = function
+  | Variable s -> s
+  | Constant s -> sprintf "%S" s
+
+and string_of_expr = function
+  | True -> "true"
+  | False -> "true"
+  | Term term -> string_of_term term
+  | Not term -> sprintf "!%s" (string_of_term term)
+  | And (e1, e2) -> sprintf "(%s,%s)" (string_of_expr e1) (string_of_expr e2)
+  | Or (e1, e2) -> sprintf "(%s;%s)" (string_of_expr e1) (string_of_expr e2)
+  | BoolCode _ -> "{{ // code }}"
+  | AssignCode (bindings, RowsOne, _) ->
+     sprintf "(%s)={{ // code }}" (String.concat ", " bindings)
+  | AssignCode (bindings, RowsZeroOrMore, _) ->
+     sprintf "(%s)*={{ // code }}" (String.concat ", " bindings)
+  | AssignCode (bindings, RowsOneOrMore, _) ->
+     sprintf "(%s)+={{ // code }}" (String.concat ", " bindings)
+  | AssignCode (bindings, RowsZeroOrOne, _) ->
+     sprintf "(%s)?={{ // code }}" (String.concat ", " bindings)
diff --git a/generator/utils.ml b/generator/utils.ml
index 7d47430..6b4497d 100644
--- a/generator/utils.ml
+++ b/generator/utils.ml
@@ -231,6 +231,22 @@ let mapi f xs =
   in
   loop 0 xs
 
+let uniq ?(cmp = Pervasives.compare) xs =
+  let rec loop acc = function
+    | [] -> acc
+    | [x] -> x :: acc
+    | x :: (y :: _ as xs) when cmp x y = 0 ->
+       loop acc xs
+    | x :: (y :: _ as xs) ->
+       loop (x :: acc) xs
+  in
+  List.rev (loop [] xs)
+
+let sort_uniq ?(cmp = Pervasives.compare) xs =
+  let xs = List.sort cmp xs in
+  let xs = uniq ~cmp xs in
+  xs
+
 let count_chars c str =
   let count = ref 0 in
   for i = 0 to String.length str - 1 do
diff --git a/generator/utils.mli b/generator/utils.mli
index e0f30c3..392e9d6 100644
--- a/generator/utils.mli
+++ b/generator/utils.mli
@@ -84,6 +84,12 @@ val iteri : (int -> 'a -> unit) -> 'a list -> unit
 
 val mapi : (int -> 'a -> 'b) -> 'a list -> 'b list
 
+val uniq : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list
+  (** Uniquify a list (the list must be sorted first). *)
+
+val sort_uniq : ?cmp:('a -> 'a -> int) -> 'a list -> 'a list
+  (** Sort and uniquify a list. *)
+
 val count_chars : char -> string -> int
 (** Count number of times the character occurs in string. *)
 
diff --git a/m4/guestfs_ocaml.m4 b/m4/guestfs_ocaml.m4
index b3e9387..e213f80 100644
--- a/m4/guestfs_ocaml.m4
+++ b/m4/guestfs_ocaml.m4
@@ -29,6 +29,8 @@ AS_IF([test "x$enable_ocaml" != "xno"],[
     OCAMLFIND=
     AC_PROG_OCAML
     AC_PROG_FINDLIB
+    AC_PROG_OCAMLLEX
+    AC_PROG_OCAMLYACC
 
     dnl OCaml >= 3.11 is required.
     AC_MSG_CHECKING([if OCaml version >= 3.11])
-- 
2.5.0




More information about the Libguestfs mailing list