[Libguestfs] [PATCH v2 04/18] common/mlpcre: Allow some PCRE_* flags to be passed to pcre_compile.

Richard W.M. Jones rjones at redhat.com
Thu Sep 21 14:53:59 UTC 2017


Only five simple flags are allowed so far, and not all of them are
actually used in any code.  They are:

  ~anchored / PCRE_ANCHORED   - implicit ^...$ around regexp
  ~caseless / PCRE_CASELESS   - fold upper and lower case
  ~dotall / PCRE_DOTALL       - ‘.’ matches anything
  ~extended / PCRE_EXTENDED   - extended regular expressions
  ~multiline / PCRE_MULTILINE - ^ and $ match lines within the subject string
---
 common/mlpcre/PCRE.ml       |  2 +-
 common/mlpcre/PCRE.mli      | 10 ++++++++--
 common/mlpcre/pcre-c.c      | 41 ++++++++++++++++++++++++++++++++++++++---
 common/mlpcre/pcre_tests.ml | 22 +++++++++++++++-------
 4 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 0eb7eb2ec..753e247e4 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -22,7 +22,7 @@ exception Error of string * int
 
 type regexp
 
-external compile : string -> regexp = "guestfs_int_pcre_compile"
+external compile : ?anchored:bool -> ?caseless:bool -> ?dotall:bool -> ?extended:bool -> ?multiline:bool -> string -> regexp = "guestfs_int_pcre_compile_byte" "guestfs_int_pcre_compile"
 external matches : regexp -> string -> bool = "guestfs_int_pcre_matches"
 external sub : int -> string = "guestfs_int_pcre_sub"
 external subi : int -> int * int = "guestfs_int_pcre_subi"
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index 634cc600c..fcf6fd25e 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -53,8 +53,14 @@ exception Error of string * int
 type regexp
 (** The type of a compiled regular expression. *)
 
-val compile : string -> regexp
-(** Compile a regular expression.  This can raise {!Error}. *)
+val compile : ?anchored:bool -> ?caseless:bool -> ?dotall:bool -> ?extended:bool -> ?multiline:bool -> string -> regexp
+(** Compile a regular expression.  This can raise {!Error}.
+
+    The flags [?anchored], [?caseless], [?dotall], [?extended],
+    [?multiline]
+    correspond to the [pcre_compile] flags [PCRE_ANCHORED] etc.
+    See pcreapi(3) for details of what they do.
+    All flags default to false. *)
 
 val matches : regexp -> string -> bool
 (** Test whether the regular expression matches the string.  This
diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c
index 15775dad0..6dc30087d 100644
--- a/common/mlpcre/pcre-c.c
+++ b/common/mlpcre/pcre-c.c
@@ -112,22 +112,57 @@ Val_regexp (pcre *re)
   CAMLreturn (rv);
 }
 
+static int
+is_Some_true (value v)
+{
+  return
+    v != Val_int (0) /* !None */ &&
+    Bool_val (Field (v, 0)) /* Some true */;
+}
+
 value
-guestfs_int_pcre_compile (value pattv)
+guestfs_int_pcre_compile (value anchoredv, value caselessv, value dotallv,
+                          value extendedv, value multilinev,
+                          value pattv)
 {
-  CAMLparam1 (pattv);
+  CAMLparam5 (anchoredv, caselessv, dotallv, extendedv, multilinev);
+  CAMLxparam1 (pattv);
+  int options = 0;
   pcre *re;
   int errcode = 0;
   const char *err;
   int offset;
 
-  re = pcre_compile2 (String_val (pattv), 0, &errcode, &err, &offset, NULL);
+  /* Flag parameters are all ‘bool option’, defaulting to false. */
+  if (is_Some_true (anchoredv))
+    options |= PCRE_ANCHORED;
+  if (is_Some_true (caselessv))
+    options |= PCRE_CASELESS;
+  if (is_Some_true (dotallv))
+    options |= PCRE_DOTALL;
+  if (is_Some_true (extendedv))
+    options |= PCRE_EXTENDED;
+  if (is_Some_true (multilinev))
+    options |= PCRE_MULTILINE;
+
+  re = pcre_compile2 (String_val (pattv), options,
+                      &errcode, &err, &offset, NULL);
   if (re == NULL)
     raise_pcre_error (err, errcode);
 
   CAMLreturn (Val_regexp (re));
 }
 
+/* OCaml calls C functions from bytecode a bit differently when they
+ * have more than 5 parameters.
+ */
+value
+guestfs_int_pcre_compile_byte (value *argv, int argn)
+{
+  return guestfs_int_pcre_compile (argv[0], argv[1], argv[2], argv[3], argv[4],
+                                   argv[5]);
+}
+
 value
 guestfs_int_pcre_matches (value rev, value strv)
 {
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index b5f712d20..9d42914b9 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -18,9 +18,17 @@
 
 open Printf
 
-let compile patt =
-  eprintf "PCRE.compile %s\n%!" patt;
-  PCRE.compile patt
+let compile ?(anchored = false) ?(caseless = false)
+            ?(dotall = false) ?(extended = false) ?(multiline = false)
+            patt =
+  eprintf "PCRE.compile%s%s%s%s%s %s\n%!"
+          (if anchored then " ~anchored:true" else "")
+          (if caseless then " ~caseless:true" else "")
+          (if dotall then " ~dotall:true" else "")
+          (if extended then " ~extended:true" else "")
+          (if multiline then " ~multiline:true" else "")
+          patt;
+  PCRE.compile ~anchored ~caseless ~dotall ~extended ~multiline patt
 
 let matches re str =
   eprintf "PCRE.matches %s ->%!" str;
@@ -51,7 +59,7 @@ let () =
     let re0 = compile "a+b" in
     let re1 = compile "(a+)b" in
     let re2 = compile "(a+)(b*)" in
-    let re3 = compile "[^A-Za-z0-9_]" in
+    let re3 = compile ~caseless:true "[^a-z0-9_]" in
 
     assert (matches re0 "ccaaabbbb" = true);
     assert (sub 0 = "aaab");
@@ -90,9 +98,9 @@ let () =
      * patterns, and that could be problematic if PCRE was built
      * without Unicode support (XXX).
      *)
-    assert (replace ~global:true re3 "-" "this is a\xc2\xa3funny.name?"
-            (* = "this-is-a-funny-name-" if UTF-8 worked *)
-            = "this-is-a--funny-name-");
+    assert (replace ~global:true re3 "-" "this is a\xc2\xa3FUNNY.name?"
+            (* = "this-is-a-FUNNY-name-" if UTF-8 worked *)
+            = "this-is-a--FUNNY-name-");
   with
   | Not_found ->
      failwith "one of the PCRE.sub functions unexpectedly raised Not_found"
-- 
2.13.2




More information about the Libguestfs mailing list