[Libguestfs] [PATCH v2 04/18] common/mlpcre: Allow some PCRE_* flags to be passed to pcre_compile.
Richard W.M. Jones
rjones at redhat.com
Thu Sep 21 14:53:59 UTC 2017
Only five simple flags are allowed so far, and not all of them are
actually used in any code. They are:
~anchored / PCRE_ANCHORED - implicit ^...$ around regexp
~caseless / PCRE_CASELESS - fold upper and lower case
~dotall / PCRE_DOTALL - ‘.’ matches anything
~extended / PCRE_EXTENDED - extended regular expressions
~multiline / PCRE_MULTILINE - ^ and $ match lines within the subject string
---
common/mlpcre/PCRE.ml | 2 +-
common/mlpcre/PCRE.mli | 10 ++++++++--
common/mlpcre/pcre-c.c | 41 ++++++++++++++++++++++++++++++++++++++---
common/mlpcre/pcre_tests.ml | 22 +++++++++++++++-------
4 files changed, 62 insertions(+), 13 deletions(-)
diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 0eb7eb2ec..753e247e4 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -22,7 +22,7 @@ exception Error of string * int
type regexp
-external compile : string -> regexp = "guestfs_int_pcre_compile"
+external compile : ?anchored:bool -> ?caseless:bool -> ?dotall:bool -> ?extended:bool -> ?multiline:bool -> string -> regexp = "guestfs_int_pcre_compile_byte" "guestfs_int_pcre_compile"
external matches : regexp -> string -> bool = "guestfs_int_pcre_matches"
external sub : int -> string = "guestfs_int_pcre_sub"
external subi : int -> int * int = "guestfs_int_pcre_subi"
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index 634cc600c..fcf6fd25e 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -53,8 +53,14 @@ exception Error of string * int
type regexp
(** The type of a compiled regular expression. *)
-val compile : string -> regexp
-(** Compile a regular expression. This can raise {!Error}. *)
+val compile : ?anchored:bool -> ?caseless:bool -> ?dotall:bool -> ?extended:bool -> ?multiline:bool -> string -> regexp
+(** Compile a regular expression. This can raise {!Error}.
+
+ The flags [?anchored], [?caseless], [?dotall], [?extended],
+ [?multiline]
+ correspond to the [pcre_compile] flags [PCRE_ANCHORED] etc.
+ See pcreapi(3) for details of what they do.
+ All flags default to false. *)
val matches : regexp -> string -> bool
(** Test whether the regular expression matches the string. This
diff --git a/common/mlpcre/pcre-c.c b/common/mlpcre/pcre-c.c
index 15775dad0..6dc30087d 100644
--- a/common/mlpcre/pcre-c.c
+++ b/common/mlpcre/pcre-c.c
@@ -112,22 +112,57 @@ Val_regexp (pcre *re)
CAMLreturn (rv);
}
+static int
+is_Some_true (value v)
+{
+ return
+ v != Val_int (0) /* !None */ &&
+ Bool_val (Field (v, 0)) /* Some true */;
+}
+
value
-guestfs_int_pcre_compile (value pattv)
+guestfs_int_pcre_compile (value anchoredv, value caselessv, value dotallv,
+ value extendedv, value multilinev,
+ value pattv)
{
- CAMLparam1 (pattv);
+ CAMLparam5 (anchoredv, caselessv, dotallv, extendedv, multilinev);
+ CAMLxparam1 (pattv);
+ int options = 0;
pcre *re;
int errcode = 0;
const char *err;
int offset;
- re = pcre_compile2 (String_val (pattv), 0, &errcode, &err, &offset, NULL);
+ /* Flag parameters are all ‘bool option’, defaulting to false. */
+ if (is_Some_true (anchoredv))
+ options |= PCRE_ANCHORED;
+ if (is_Some_true (caselessv))
+ options |= PCRE_CASELESS;
+ if (is_Some_true (dotallv))
+ options |= PCRE_DOTALL;
+ if (is_Some_true (extendedv))
+ options |= PCRE_EXTENDED;
+ if (is_Some_true (multilinev))
+ options |= PCRE_MULTILINE;
+
+ re = pcre_compile2 (String_val (pattv), options,
+ &errcode, &err, &offset, NULL);
if (re == NULL)
raise_pcre_error (err, errcode);
CAMLreturn (Val_regexp (re));
}
+/* OCaml calls C functions from bytecode a bit differently when they
+ * have more than 5 parameters.
+ */
+value
+guestfs_int_pcre_compile_byte (value *argv, int argn)
+{
+ return guestfs_int_pcre_compile (argv[0], argv[1], argv[2], argv[3], argv[4],
+ argv[5]);
+}
+
value
guestfs_int_pcre_matches (value rev, value strv)
{
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index b5f712d20..9d42914b9 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -18,9 +18,17 @@
open Printf
-let compile patt =
- eprintf "PCRE.compile %s\n%!" patt;
- PCRE.compile patt
+let compile ?(anchored = false) ?(caseless = false)
+ ?(dotall = false) ?(extended = false) ?(multiline = false)
+ patt =
+ eprintf "PCRE.compile%s%s%s%s%s %s\n%!"
+ (if anchored then " ~anchored:true" else "")
+ (if caseless then " ~caseless:true" else "")
+ (if dotall then " ~dotall:true" else "")
+ (if extended then " ~extended:true" else "")
+ (if multiline then " ~multiline:true" else "")
+ patt;
+ PCRE.compile ~anchored ~caseless ~dotall ~extended ~multiline patt
let matches re str =
eprintf "PCRE.matches %s ->%!" str;
@@ -51,7 +59,7 @@ let () =
let re0 = compile "a+b" in
let re1 = compile "(a+)b" in
let re2 = compile "(a+)(b*)" in
- let re3 = compile "[^A-Za-z0-9_]" in
+ let re3 = compile ~caseless:true "[^a-z0-9_]" in
assert (matches re0 "ccaaabbbb" = true);
assert (sub 0 = "aaab");
@@ -90,9 +98,9 @@ let () =
* patterns, and that could be problematic if PCRE was built
* without Unicode support (XXX).
*)
- assert (replace ~global:true re3 "-" "this is a\xc2\xa3funny.name?"
- (* = "this-is-a-funny-name-" if UTF-8 worked *)
- = "this-is-a--funny-name-");
+ assert (replace ~global:true re3 "-" "this is a\xc2\xa3FUNNY.name?"
+ (* = "this-is-a-FUNNY-name-" if UTF-8 worked *)
+ = "this-is-a--FUNNY-name-");
with
| Not_found ->
failwith "one of the PCRE.sub functions unexpectedly raised Not_found"
--
2.13.2
More information about the Libguestfs
mailing list