[Libguestfs] [PATCH v3 05/22] common/mlstdutils: Add String.nsplit ?max parameter, and tests.

Richard W.M. Jones rjones at redhat.com
Fri Sep 22 07:36:06 UTC 2017


This idea was previously proposed by Tomáš Golembiovský in
https://www.redhat.com/archives/libguestfs/2017-January/msg00138.html
---
 common/mlstdutils/std_utils.ml       | 28 ++++++++++++++++------------
 common/mlstdutils/std_utils.mli      | 11 ++++++++---
 common/mlstdutils/std_utils_tests.ml | 29 +++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/common/mlstdutils/std_utils.ml b/common/mlstdutils/std_utils.ml
index b731b8fd5..37eef0348 100644
--- a/common/mlstdutils/std_utils.ml
+++ b/common/mlstdutils/std_utils.ml
@@ -147,18 +147,7 @@ module String = struct
       done;
       if not !r then s else Bytes.to_string b2
 
-    let rec nsplit sep str =
-      let len = length str in
-      let seplen = length sep in
-      let i = find str sep in
-      if i = -1 then [str]
-      else (
-        let s' = sub str 0 i in
-        let s'' = sub str (i+seplen) (len-i-seplen) in
-        s' :: nsplit sep s''
-      )
-
-    let split sep str =
+    let rec split sep str =
       let len = length sep in
       let seplen = length str in
       let i = find str sep in
@@ -167,6 +156,21 @@ module String = struct
         sub str 0 i, sub str (i + len) (seplen - i - len)
       )
 
+    and nsplit ?(max = 0) sep str =
+      if max < 0 then
+        invalid_arg "String.nsplit: max parameter should not be negative";
+
+      (* If we reached the limit, OR if the pattern does not match the string
+       * at all, return the rest of the string as a single element list.
+       *)
+      if max = 1 || find str sep = -1 then
+        [str]
+      else (
+        let s1, s2 = split sep str in
+        let max = if max = 0 then 0 else max - 1 in
+        s1 :: nsplit ~max sep s2
+      )
+
     let rec lines_split str =
       let buf = Buffer.create 16 in
       let len = length str in
diff --git a/common/mlstdutils/std_utils.mli b/common/mlstdutils/std_utils.mli
index d217e48d4..c08e51360 100644
--- a/common/mlstdutils/std_utils.mli
+++ b/common/mlstdutils/std_utils.mli
@@ -88,14 +88,19 @@ module String : sig
         [str] with [s2]. *)
     val replace_char : string -> char -> char -> string
     (** Replace character in string. *)
-    val nsplit : string -> string -> string list
-    (** [nsplit sep str] splits [str] into multiple strings at each
-        separator [sep]. *)
     val split : string -> string -> string * string
     (** [split sep str] splits [str] at the first occurrence of the
         separator [sep], returning the part before and the part after.
         If separator is not found, return the whole string and an
         empty string. *)
+    val nsplit : ?max:int -> string -> string -> string list
+    (** [nsplit ?max sep str] splits [str] into multiple strings at each
+        separator [sep].
+
+        As with the Perl split function, you can give an optional
+        [?max] parameter to limit the number of strings returned.  The
+        final element of the list will contain the remainder of the
+        input string. *)
     val lines_split : string -> string list
     (** [lines_split str] splits [str] into lines, keeping continuation
         characters (i.e. [\] at the end of lines) into account. *)
diff --git a/common/mlstdutils/std_utils_tests.ml b/common/mlstdutils/std_utils_tests.ml
index ce49c7606..dcd237dab 100644
--- a/common/mlstdutils/std_utils_tests.ml
+++ b/common/mlstdutils/std_utils_tests.ml
@@ -18,6 +18,8 @@
 
 (* This file tests the Std_utils module. *)
 
+open Printf
+
 open OUnit2
 open Std_utils
 
@@ -26,6 +28,7 @@ let assert_equal_string = assert_equal ~printer:(fun x -> x)
 let assert_equal_int = assert_equal ~printer:(fun x -> string_of_int x)
 let assert_equal_int64 = assert_equal ~printer:(fun x -> Int64.to_string x)
 let assert_equal_stringlist = assert_equal ~printer:(fun x -> "(" ^ (String.escaped (String.concat "," x)) ^ ")")
+let assert_equal_stringpair = assert_equal ~printer:(fun (x, y) -> sprintf "%S, %S" x y)
 
 let test_subdirectory ctx =
   assert_equal_string "" (subdirectory "/foo" "/foo");
@@ -83,6 +86,30 @@ let test_string_find ctx =
   assert_equal_int (-1) (String.find "" "baz");
   assert_equal_int (-1) (String.find "foobar" "baz")
 
+(* Test Std_utils.String.split. *)
+let test_string_split ctx =
+  assert_equal_stringpair ("a", "b") (String.split " " "a b");
+  assert_equal_stringpair ("", "ab") (String.split " " " ab");
+  assert_equal_stringpair ("", "abc") (String.split "" "abc");
+  assert_equal_stringpair ("abc", "") (String.split " " "abc");
+  assert_equal_stringpair ("", "") (String.split " " "")
+
+(* Test Std_utils.String.nsplit. *)
+let test_string_nsplit ctx =
+  (* XXX Not clear if the next test case indicates an error in
+   * String.nsplit.  However this is how it has historically worked.
+   *)
+  assert_equal_stringlist [""] (String.nsplit " " "");
+  assert_equal_stringlist ["abc"] (String.nsplit " " "abc");
+  assert_equal_stringlist ["a"; "b"; "c"] (String.nsplit " " "a b c");
+  assert_equal_stringlist ["a"; "b"; "c"; ""] (String.nsplit " " "a b c ");
+  assert_equal_stringlist [""; "a"; "b"; "c"] (String.nsplit " " " a b c");
+  assert_equal_stringlist [""; "a"; "b"; "c"; ""] (String.nsplit " " " a b c ");
+  assert_equal_stringlist ["a b c d"] (String.nsplit ~max:1 " " "a b c d");
+  assert_equal_stringlist ["a"; "b c d"] (String.nsplit ~max:2 " " "a b c d");
+  assert_equal_stringlist ["a"; "b"; "c d"] (String.nsplit ~max:3 " " "a b c d");
+  assert_equal_stringlist ["a"; "b"; "c";  "d"] (String.nsplit ~max:10 " " "a b c d")
+
 (* Test Std_utils.String.lines_split. *)
 let test_string_lines_split ctx =
   assert_equal_stringlist [""] (String.lines_split "");
@@ -129,6 +156,8 @@ let suite =
       "strings.is_prefix" >:: test_string_is_prefix;
       "strings.is_suffix" >:: test_string_is_suffix;
       "strings.find" >:: test_string_find;
+      "strings.split" >:: test_string_split;
+      "strings.nsplit" >:: test_string_nsplit;
       "strings.lines_split" >:: test_string_lines_split;
       "strings.span" >:: test_string_span;
       "strings.chomp" >:: test_string_chomp;
-- 
2.13.2




More information about the Libguestfs mailing list