[Libguestfs] [PATCH 3/4] common/mlpcre: Add PCRE.replace function.

Richard W.M. Jones rjones at redhat.com
Wed Sep 20 14:50:24 UTC 2017


Similar to Perl s/// but lacks backreferences.
---
 common/mlpcre/PCRE.ml       | 25 +++++++++++++++++++++++++
 common/mlpcre/PCRE.mli      | 13 +++++++++++++
 common/mlpcre/pcre_tests.ml | 23 ++++++++++++++++++++++-
 3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/common/mlpcre/PCRE.ml b/common/mlpcre/PCRE.ml
index 5269d41f8..d80021f8c 100644
--- a/common/mlpcre/PCRE.ml
+++ b/common/mlpcre/PCRE.ml
@@ -27,5 +27,30 @@ external matches : regexp -> string -> bool = "guestfs_int_pcre_matches"
 external sub : int -> string = "guestfs_int_pcre_sub"
 external subi : int -> int * int = "guestfs_int_pcre_subi"
 
+let rec replace ?(global = false) patt subst subj =
+  if not (matches patt subj) then
+    (* return original string unchanged if patt doesn't match *)
+    subj
+  else (
+    (* If patt matches "yyyy" in the original string then we have
+     * the following situation, where "xxxx" is the part of the
+     * original string before the match, and "zzzz..." is the
+     * part after the match:
+     * "xxxxyyyyzzzzzzzzzzzzz"
+     *      ^   ^
+     *      i1  i2
+     *)
+    let i1, i2 = subi 0 in
+    let xs = String.sub subj 0 i1 (* "xxxx", part before the match *) in
+    let zs = String.sub subj i2 (String.length subj - i2) (* rest *) in
+
+    (* If the global flag was set, we want to continue substitutions
+     * in the rest of the string.
+     *)
+    let zs = if global then replace ~global patt subst zs else zs in
+
+    xs ^ subst ^ zs
+  )
+
 let () =
   Callback.register_exception "PCRE.Error" (Error ("", 0))
diff --git a/common/mlpcre/PCRE.mli b/common/mlpcre/PCRE.mli
index 02f16d19d..634cc600c 100644
--- a/common/mlpcre/PCRE.mli
+++ b/common/mlpcre/PCRE.mli
@@ -91,3 +91,16 @@ val subi : int -> int * int
     for exact details).
 
     If there was no nth substring then this raises [Not_found]. *)
+
+val replace : ?global:bool -> regexp -> string -> string -> string
+(** [replace ?global patt subst subj] performs a search and replace
+    on the subject string ([subj]).  Where [patt] matches the
+    string, [subst] is substituted.  This works similarly to the
+    Perl function [s///].
+
+    The [?global] flag defaults to false, so only the first
+    instance of [patt] in the string is replaced.  If set to true
+    then every instance of [patt] in the string is replaced.
+
+    Note that this function does not allow backreferences.
+    Any captures in [patt] are ignored. *)
diff --git a/common/mlpcre/pcre_tests.ml b/common/mlpcre/pcre_tests.ml
index 316a4348e..b5f712d20 100644
--- a/common/mlpcre/pcre_tests.ml
+++ b/common/mlpcre/pcre_tests.ml
@@ -28,6 +28,12 @@ let matches re str =
   eprintf " %b\n%!" r;
   r
 
+let replace ?(global = false) patt subst subj =
+  eprintf "PCRE.replace global:%b <patt> %s %s ->%!" global subst subj;
+  let r = PCRE.replace ~global patt subst subj in
+  eprintf " %s\n%!" r;
+  r
+
 let sub i =
   eprintf "PCRE.sub %d ->%!" i;
   let r = PCRE.sub i in
@@ -45,6 +51,7 @@ let () =
     let re0 = compile "a+b" in
     let re1 = compile "(a+)b" in
     let re2 = compile "(a+)(b*)" in
+    let re3 = compile "[^A-Za-z0-9_]" in
 
     assert (matches re0 "ccaaabbbb" = true);
     assert (sub 0 = "aaab");
@@ -71,7 +78,21 @@ let () =
     assert (sub 0 = "a");
     assert (subi 0 = (2, 3));
     assert (subi 1 = (2, 3));
-    assert (subi 2 = (3, 3))
+    assert (subi 2 = (3, 3));
+
+    assert (replace re0 "dd" "abcabcaabccca" = "ddcabcaabccca");
+    assert (replace ~global:true re0 "dd" "abcabcaabccca" = "ddcddcddccca");
+
+    (* This example copies a usage from customize/firstboot.ml
+     * "\xc2\xa3" is utf-8 for the GBP sign.  Ideally PCRE would
+     * recognize that this is a single character, however doing that
+     * would involve passing the PCRE_UTF8 flag when compiling
+     * patterns, and that could be problematic if PCRE was built
+     * without Unicode support (XXX).
+     *)
+    assert (replace ~global:true re3 "-" "this is a\xc2\xa3funny.name?"
+            (* = "this-is-a-funny-name-" if UTF-8 worked *)
+            = "this-is-a--funny-name-");
   with
   | Not_found ->
      failwith "one of the PCRE.sub functions unexpectedly raised Not_found"
-- 
2.13.2




More information about the Libguestfs mailing list