[Libguestfs] [PATCH v3 2/2] builder: Choose better weights in the planner.

Richard W.M. Jones rjones at redhat.com
Wed Oct 4 13:31:11 UTC 2017


---
 builder/builder.ml            | 82 +++++++++++++++++++++++++++++++++----------
 common/mlutils/unix_utils-c.c | 27 ++++++++++++++
 common/mlutils/unix_utils.ml  |  3 ++
 common/mlutils/unix_utils.mli |  4 +++
 m4/guestfs_libraries.m4       |  1 +
 5 files changed, 98 insertions(+), 19 deletions(-)

diff --git a/builder/builder.ml b/builder/builder.ml
index d8e625f68..90ff4645a 100644
--- a/builder/builder.ml
+++ b/builder/builder.ml
@@ -414,12 +414,57 @@ let main () =
     let is_not t = not (is t) in
     let remove = List.remove_assoc in
     let ret = ref [] in
-    let tr task weight otags = push_front (task, weight, otags) ret in
 
-    (* XXX Weights are not very smartly chosen.  At the moment I'm
-     * using a range [0..100] where 0 = free and 100 = expensive.  We
-     * could estimate weights better by looking at file sizes.
+    (* The scheme for weights ranges from 0 = free to 100 = most expensive:
+     *
+     *    0 = free operations like renaming a file in the same directory
+     *   10 = in-place conversions (like [qemu-img resize])
+     *   20 = copy or move a file between two local filesystems
+     *   30 = copy and convert a file between two local filesystems
+     *   40 = copy a file within the same local filesystem
+     *   50 = copy and convert a file within the same local filesystem
+     *   80 = copy, move, convert if source or target is on remote filesystem
+     *  100 = complex operations like virt-resize
+     *
+     * Copies and moves across different local filesystems are
+     * cheaper than copies within the same filesystem.  The
+     * theory because less bandwith is available if both source
+     * and destination hit the same device (except in the special
+     * case of moving within a filesystem which is free).
+     *
+     * We could estimate weights better by looking at file sizes.
      *)
+    let weight task otags =
+      let infile = List.assoc `Filename itags
+      and outfile = List.assoc `Filename otags in
+
+      (* If infile/outfile don't exist, get the containing directory. *)
+      let infile =
+        if Sys.file_exists infile then infile else Filename.dirname infile in
+      let outfile =
+        if Sys.file_exists outfile then outfile else Filename.dirname outfile in
+
+      match task with
+      | `Virt_resize -> 100     (* virt-resize is a special case*)
+      | (`Copy|`Move|`Pxzcat|`Disk_resize|`Convert) as task ->
+         if StatVFS.is_network_filesystem infile ||
+            StatVFS.is_network_filesystem outfile
+         then 80 (* NFS etc. *)
+         else (
+           let across = (lstat infile).st_dev <> (lstat outfile).st_dev in
+           match task, across with
+           | `Move, false -> 0     (* rename in same filesystem *)
+           | `Disk_resize, _ -> 10 (* in-place conversion *)
+           | `Move, true           (* move or copy between two filesystems *)
+           | `Copy, true -> 20
+           | (`Pxzcat|`Convert), true -> 30 (* convert between two local fses*)
+           | `Copy, false -> 40    (* copy within same filesystem *)
+           | (`Pxzcat|`Convert), false -> 50 (* convert with same local fs*)
+         )
+    in
+
+    (* Add a transition to the returned list. *)
+    let tr task otags = push_front (task, weight task otags, otags) ret in
 
     (* Since the final plan won't run in parallel, we don't only need
      * to choose unique tempfiles per transition, so this is OK:
@@ -431,17 +476,16 @@ let main () =
      * thing a copy does is to remove the template tag (since it's always
      * copied out of the cache directory).
      *)
-    tr `Copy 50 ((`Filename, output_filename) :: remove `Template itags);
-    tr `Copy 50 ((`Filename, tempfile) :: remove `Template itags);
+    tr `Copy ((`Filename, output_filename) :: remove `Template itags);
+    tr `Copy ((`Filename, tempfile) :: remove `Template itags);
 
     (* We can rename a file instead of copying, but don't rename the
-     * cache copy!  (XXX Also this is not free if copying across
-     * filesystems)
+     * cache copy!
      *)
     if is_not `Template then (
       if not output_is_block_dev then
-        tr `Rename 0 ((`Filename, output_filename) :: itags);
-      tr `Rename 0 ((`Filename, tempfile) :: itags);
+        tr `Move ((`Filename, output_filename) :: itags);
+      tr `Move ((`Filename, tempfile) :: itags)
     );
 
     if is `XZ then (
@@ -449,9 +493,9 @@ let main () =
        * to the output file or to a temp file.
        *)
       if not output_is_block_dev then
-        tr `Pxzcat 80
+        tr `Pxzcat
           ((`Filename, output_filename) :: remove `XZ (remove `Template itags));
-      tr `Pxzcat 80
+      tr `Pxzcat
         ((`Filename, tempfile) :: remove `XZ (remove `Template itags));
     )
     else (
@@ -462,11 +506,11 @@ let main () =
       let old_size = Int64.of_string (List.assoc `Size itags) in
       let headroom = 256L *^ 1024L *^ 1024L in
       if output_size >= old_size +^ headroom then (
-        tr `Virt_resize 100
+        tr `Virt_resize
           ((`Size, Int64.to_string output_size) ::
               (`Filename, output_filename) ::
               (`Format, output_format) :: (remove `Template itags));
-        tr `Virt_resize 100
+        tr `Virt_resize
           ((`Size, Int64.to_string output_size) ::
               (`Filename, tempfile) ::
               (`Format, output_format) :: (remove `Template itags))
@@ -485,15 +529,15 @@ let main () =
        *)
       else if output_size > old_size && is_not `Template
               && List.mem_assoc `Format itags then
-        tr `Disk_resize 60 ((`Size, Int64.to_string output_size) :: itags);
+        tr `Disk_resize ((`Size, Int64.to_string output_size) :: itags);
 
       (* qemu-img convert is always possible, and quicker.  It doesn't
        * resize, but it does change the format.
        *)
-      tr `Convert 60
+      tr `Convert
         ((`Filename, output_filename) :: (`Format, output_format) ::
             (remove `Template itags));
-      tr `Convert 60
+      tr `Convert
         ((`Filename, tempfile) :: (`Format, output_format) ::
             (remove `Template itags));
     );
@@ -528,7 +572,7 @@ let main () =
     in
     let print_task = function
       | `Copy -> printf "cp"
-      | `Rename -> printf "mv"
+      | `Move -> printf "mv"
       | `Pxzcat -> printf "pxzcat"
       | `Virt_resize -> printf "virt-resize"
       | `Disk_resize -> printf "qemu-img resize"
@@ -570,7 +614,7 @@ let main () =
       let cmd = [ "cp"; ifile; ofile ] in
       if run_command cmd <> 0 then exit 1
 
-    | itags, `Rename, otags ->
+    | itags, `Move, otags ->
       let ifile = List.assoc `Filename itags in
       let ofile = List.assoc `Filename otags in
       let cmd = [ "mv"; ifile; ofile ] in
diff --git a/common/mlutils/unix_utils-c.c b/common/mlutils/unix_utils-c.c
index 94097e95f..a43921741 100644
--- a/common/mlutils/unix_utils-c.c
+++ b/common/mlutils/unix_utils-c.c
@@ -28,10 +28,18 @@
 #include <errno.h>
 #include <sys/types.h>
 
+#ifdef HAVE_SYS_STATFS_H
+#include <sys/statfs.h>
+#endif
+
 #ifdef HAVE_SYS_STATVFS_H
 #include <sys/statvfs.h>
 #endif
 
+#ifdef HAVE_SYS_VFS_H
+#include <sys/vfs.h>
+#endif
+
 #if MAJOR_IN_MKDEV
 #include <sys/mkdev.h>
 #elif MAJOR_IN_SYSMACROS
@@ -60,6 +68,7 @@ extern value guestfs_int_mllib_fsync_file (value filenamev);
 extern value guestfs_int_mllib_mkdtemp (value val_pattern);
 extern value guestfs_int_mllib_realpath (value pathv);
 extern value guestfs_int_mllib_statvfs_statvfs (value pathv);
+extern value guestfs_int_mllib_statvfs_is_network_filesystem (value pathv);
 
 /* NB: This is a "noalloc" call. */
 value
@@ -322,3 +331,21 @@ guestfs_int_mllib_statvfs_statvfs (value pathv)
 
   CAMLreturn (rv);
 }
+
+/* NB: This is a "noalloc" call. */
+value
+guestfs_int_mllib_statvfs_is_network_filesystem (value pathv)
+{
+#ifdef HAVE_STATFS
+  struct statfs buf;
+
+  if (statfs (String_val (pathv), &buf) == -1)
+    unix_error (errno, (char *) "statvfs", pathv);
+
+  return Val_bool (buf.f_type == 0xff534d42 /* CIFS_MAGIC_NUMBER */ ||
+                   buf.f_type == 0x6969 /* NFS_SUPER_MAGIC */ ||
+                   buf.f_type == 0x517b /* SMB_SUPER_MAGIC */);
+#else
+  return Val_bool (0);
+#endif
+}
diff --git a/common/mlutils/unix_utils.ml b/common/mlutils/unix_utils.ml
index 085eff65b..6912282c8 100644
--- a/common/mlutils/unix_utils.ml
+++ b/common/mlutils/unix_utils.ml
@@ -80,4 +80,7 @@ module StatVFS = struct
     "guestfs_int_mllib_statvfs_statvfs"
 
   let free_space { f_bsize = bsize; f_bavail = bavail } = bsize *^ bavail
+
+  external is_network_filesystem : string -> bool =
+    "guestfs_int_mllib_statvfs_is_network_filesystem" "noalloc"
 end
diff --git a/common/mlutils/unix_utils.mli b/common/mlutils/unix_utils.mli
index bd182129c..4df72aa55 100644
--- a/common/mlutils/unix_utils.mli
+++ b/common/mlutils/unix_utils.mli
@@ -116,4 +116,8 @@ module StatVFS : sig
   val free_space : statvfs -> int64
   (** [free_space (statvfs path)] returns the free space available on the
       filesystem that contains [path], in bytes. *)
+
+  val is_network_filesystem : string -> bool
+  (** [is_network_filesystem path] returns true if [path] is located on
+      a network filesystem such as NFS or CIFS. *)
 end
diff --git a/m4/guestfs_libraries.m4 b/m4/guestfs_libraries.m4
index ddd9e2b7f..5b3983fae 100644
--- a/m4/guestfs_libraries.m4
+++ b/m4/guestfs_libraries.m4
@@ -82,6 +82,7 @@ AC_CHECK_FUNCS([\
     setrlimit \
     setxattr \
     sigaction \
+    statfs \
     statvfs \
     sync])
 
-- 
2.13.2




More information about the Libguestfs mailing list