[Libguestfs] [PATCH v2 05/17] v2v: factor out size checks

Roman Kagan rkagan at virtuozzo.com
Tue Aug 11 17:00:24 UTC 2015


Factor the size checks out to separate functions.  Besides, move the
definitions of functions called therein before the definition of the new
functions, for better readability.

Signed-off-by: Roman Kagan <rkagan at virtuozzo.com>
---
 v2v/v2v.ml | 378 +++++++++++++++++++++++++++++++------------------------------
 1 file changed, 190 insertions(+), 188 deletions(-)

diff --git a/v2v/v2v.ml b/v2v/v2v.ml
index c20cbf0..53456ea 100644
--- a/v2v/v2v.ml
+++ b/v2v/v2v.ml
@@ -183,6 +183,194 @@ let init_targets overlays source output output_format =
 
   output#prepare_targets source targets
 
+(* Conversion can fail if there is no space on the guest filesystems
+ * (RHBZ#1139543).  To avoid this situation, check there is some
+ * headroom.  Mainly we care about the root filesystem.
+ *)
+let check_free_space mpstats =
+  message (f_"Checking for sufficient free disk space in the guest");
+  List.iter (
+    fun { mp_path = mp;
+          mp_statvfs = { G.bfree = bfree; blocks = blocks; bsize = bsize } } ->
+      (* Ignore small filesystems. *)
+      let total_size = blocks *^ bsize in
+      if total_size > 100_000_000L then (
+        (* bfree = free blocks for root user *)
+        let free_bytes = bfree *^ bsize in
+        let needed_bytes =
+          match mp with
+          | "/" ->
+            (* We may install some packages, and they would usually go
+             * on the root filesystem.
+             *)
+            20_000_000L
+          | "/boot" ->
+            (* We usually regenerate the initramfs, which has a
+             * typical size of 20-30MB.  Hence:
+             *)
+            50_000_000L
+          | _ ->
+            (* For everything else, just make sure there is some free space. *)
+            10_000_000L in
+
+        if free_bytes < needed_bytes then
+          error (f_"not enough free space for conversion on filesystem '%s'.  %Ld bytes free < %Ld bytes needed")
+            mp free_bytes needed_bytes
+      )
+  ) mpstats
+
+(* Estimate the space required on the target for each disk.  It is the
+ * maximum space that might be required, but in reasonable cases much
+ * less space would actually be needed.
+ *
+ * As a starting point we could take ov_virtual_size (plus a tiny
+ * overhead for qcow2 headers etc) as the maximum.  However that's not
+ * very useful.  Other information we have available is:
+ *
+ * - The list of filesystems across the source disk(s).
+ *
+ * - The disk used/free of each of those filesystems, and the
+ * filesystem type.
+ *
+ * Note that we do NOT have the used size of the source disk (because
+ * it may be remote).
+ *
+ * How do you attribute filesystem usage through to backing disks,
+ * since one filesystem might span multiple disks?
+ *
+ * How do you account for non-filesystem usage (eg. swap, partitions
+ * that libguestfs cannot read, the space between LVs/partitions)?
+ *
+ * Another wildcard is that although we try to run {c fstrim} on each
+ * source filesystem, it can fail in some common scenarios.  Also
+ * qemu-img will do zero detection.  Both of these can be big wins when
+ * they work.
+ *
+ * The algorithm used here is this:
+ *
+ * (1) Calculate the total virtual size of all guest filesystems.
+ * eg: [ "/boot" = 500 MB, "/" = 2.5 GB ], total = 3 GB
+ *
+ * (2) Calculate the total virtual size of all source disks.
+ * eg: [ sda = 1 GB, sdb = 3 GB ], total = 4 GB
+ *
+ * (3) The ratio of (1):(2) is the maximum that could be freed up if
+ * all filesystems were effectively zeroed out during the conversion.
+ * eg. ratio = 3/4
+ *
+ * (4) Work out how much filesystem space we are likely to save if
+ * fstrim works, but exclude a few cases where fstrim will probably
+ * fail (eg. filesystems that don't support fstrim).  This is the
+ * conversion saving.
+ * eg. [ "/boot" = 200 MB used, "/" = 1 GB used ], saving = 3 - 1.2 = 1.8
+ *
+ * (5) Scale the conversion saving (4) by the ratio (3), and allocate
+ * that saving across all source disks in proportion to their
+ * virtual size.
+ * eg. scaled saving is 1.8 * 3/4 = 1.35 GB
+ *     sda has 1/4 of total virtual size, so it gets a saving of 1.35/4
+ *     sda final estimated size = 1 - (1.35/4) = 0.6625 GB
+ *     sdb has 3/4 of total virtual size, so it gets a saving of 3 * 1.35 / 4
+ *     sdb final estimate size = 3 - (3*1.35/4) = 1.9875 GB
+ *)
+let estimate_target_size mpstats targets =
+  let sum = List.fold_left (+^) 0L in
+
+  (* (1) *)
+  let fs_total_size =
+    sum (
+      List.map (fun { mp_statvfs = s } -> s.G.blocks *^ s.G.bsize) mpstats
+    ) in
+  if verbose () then
+    printf "estimate_target_size: fs_total_size = %Ld [%s]\n%!"
+      fs_total_size (human_size fs_total_size);
+
+  (* (2) *)
+  let source_total_size =
+    sum (List.map (fun t -> t.target_overlay.ov_virtual_size) targets) in
+  if verbose () then
+    printf "estimate_target_size: source_total_size = %Ld [%s]\n%!"
+      source_total_size (human_size source_total_size);
+
+  if source_total_size = 0L then     (* Avoid divide by zero error. *)
+    targets
+  else (
+    (* (3) Store the ratio as a float to avoid overflows later. *)
+    let ratio =
+      Int64.to_float fs_total_size /. Int64.to_float source_total_size in
+    if verbose () then
+      printf "estimate_target_size: ratio = %.3f\n%!" ratio;
+
+    (* (4) *)
+    let fs_free =
+      sum (
+        List.map (
+          function
+          (* On filesystems supported by fstrim, assume we can save all
+           * the free space.
+           *)
+          | { mp_vfs = "ext2"|"ext3"|"ext4"|"xfs"; mp_statvfs = s } ->
+            s.G.bfree *^ s.G.bsize
+
+          (* fstrim is only supported on NTFS very recently, and has a
+           * lot of limitations.  So make the safe assumption for now
+           * that it's not going to work.
+           *)
+          | { mp_vfs = "ntfs" } -> 0L
+
+          (* For other filesystems, sorry we can't free anything :-/ *)
+          | _ -> 0L
+        ) mpstats
+      ) in
+    if verbose () then
+      printf "estimate_target_size: fs_free = %Ld [%s]\n%!"
+        fs_free (human_size fs_free);
+    let scaled_saving = Int64.of_float (Int64.to_float fs_free *. ratio) in
+    if verbose () then
+      printf "estimate_target_size: scaled_saving = %Ld [%s]\n%!"
+        scaled_saving (human_size scaled_saving);
+
+    (* (5) *)
+    let targets = List.map (
+      fun ({ target_overlay = ov } as t) ->
+        let size = ov.ov_virtual_size in
+        let proportion =
+          Int64.to_float size /. Int64.to_float source_total_size in
+        let estimated_size =
+          size -^ Int64.of_float (proportion *. Int64.to_float scaled_saving) in
+        if verbose () then
+          printf "estimate_target_size: %s: %Ld [%s]\n%!"
+            ov.ov_sd estimated_size (human_size estimated_size);
+        { t with target_estimated_size = Some estimated_size }
+    ) targets in
+
+    targets
+  )
+
+let get_mpstats g =
+  (* Collect statvfs information from the guest mountpoints. *)
+  let mpstats = List.map (
+    fun (dev, path) ->
+      let statvfs = g#statvfs path in
+      let vfs = g#vfs_type dev in
+      { mp_dev = dev; mp_path = path; mp_statvfs = statvfs; mp_vfs = vfs }
+  ) (g#mountpoints ()) in
+
+  if verbose () then (
+    (* This is useful for debugging speed / fstrim issues. *)
+    printf "mpstats:\n";
+    List.iter (print_mpstat Pervasives.stdout) mpstats
+  );
+
+  mpstats
+
+let check_target_free_space mpstats source targets output =
+  (* Estimate space required on target for each disk.  Note this is a max. *)
+  message (f_"Estimating space required on target for each disk");
+  let targets = estimate_target_size mpstats targets in
+
+  output#check_target_free_space source targets
+
 let rec main () =
   (* Handle the command line. *)
   let input, output,
@@ -222,32 +410,9 @@ let rec main () =
   message (f_"Inspecting the overlay");
   let inspect = inspect_source g root_choice in
 
-  (* The guest free disk space check and the target free space
-   * estimation both require statvfs information from mountpoints, so
-   * get that information first.
-   *)
-  let mpstats = List.map (
-    fun (dev, path) ->
-      let statvfs = g#statvfs path in
-      let vfs = g#vfs_type dev in
-      { mp_dev = dev; mp_path = path; mp_statvfs = statvfs; mp_vfs = vfs }
-  ) (g#mountpoints ()) in
-
-  if verbose () then (
-    (* This is useful for debugging speed / fstrim issues. *)
-    printf "mpstats:\n";
-    List.iter (print_mpstat Pervasives.stdout) mpstats
-  );
-
-  (* Check there is enough free space to perform conversion. *)
-  message (f_"Checking for sufficient free disk space in the guest");
+  let mpstats = get_mpstats g in
   check_free_space mpstats;
-
-  (* Estimate space required on target for each disk.  Note this is a max. *)
-  message (f_"Estimating space required on target for each disk");
-  let targets = estimate_target_size mpstats targets in
-
-  output#check_target_free_space source targets;
+  check_target_free_space mpstats source targets output;
 
   (* Conversion. *)
   let guestcaps =
@@ -597,41 +762,6 @@ and inspect_source g root_choice =
   if verbose () then printf "%s%!" (string_of_inspect inspect);
   inspect
 
-(* Conversion can fail if there is no space on the guest filesystems
- * (RHBZ#1139543).  To avoid this situation, check there is some
- * headroom.  Mainly we care about the root filesystem.
- *)
-and check_free_space mpstats =
-  List.iter (
-    fun { mp_path = mp;
-          mp_statvfs = { G.bfree = bfree; blocks = blocks; bsize = bsize } } ->
-      (* Ignore small filesystems. *)
-      let total_size = blocks *^ bsize in
-      if total_size > 100_000_000L then (
-        (* bfree = free blocks for root user *)
-        let free_bytes = bfree *^ bsize in
-        let needed_bytes =
-          match mp with
-          | "/" ->
-            (* We may install some packages, and they would usually go
-             * on the root filesystem.
-             *)
-            20_000_000L
-          | "/boot" ->
-            (* We usually regenerate the initramfs, which has a
-             * typical size of 20-30MB.  Hence:
-             *)
-            50_000_000L
-          | _ ->
-            (* For everything else, just make sure there is some free space. *)
-            10_000_000L in
-
-        if free_bytes < needed_bytes then
-          error (f_"not enough free space for conversion on filesystem '%s'.  %Ld bytes free < %Ld bytes needed")
-            mp free_bytes needed_bytes
-      )
-  ) mpstats
-
 (* Perform the fstrim.  The trimming bit is easy.  Dealing with the
  * [--no-trim] parameter .. not so much.
  *)
@@ -691,134 +821,6 @@ and do_fstrim g no_trim inspect =
       )
   ) fses
 
-(* Estimate the space required on the target for each disk.  It is the
- * maximum space that might be required, but in reasonable cases much
- * less space would actually be needed.
- *
- * As a starting point we could take ov_virtual_size (plus a tiny
- * overhead for qcow2 headers etc) as the maximum.  However that's not
- * very useful.  Other information we have available is:
- *
- * - The list of filesystems across the source disk(s).
- *
- * - The disk used/free of each of those filesystems, and the
- * filesystem type.
- *
- * Note that we do NOT have the used size of the source disk (because
- * it may be remote).
- *
- * How do you attribute filesystem usage through to backing disks,
- * since one filesystem might span multiple disks?
- *
- * How do you account for non-filesystem usage (eg. swap, partitions
- * that libguestfs cannot read, the space between LVs/partitions)?
- *
- * Another wildcard is that although we try to run {c fstrim} on each
- * source filesystem, it can fail in some common scenarios.  Also
- * qemu-img will do zero detection.  Both of these can be big wins when
- * they work.
- *
- * The algorithm used here is this:
- *
- * (1) Calculate the total virtual size of all guest filesystems.
- * eg: [ "/boot" = 500 MB, "/" = 2.5 GB ], total = 3 GB
- *
- * (2) Calculate the total virtual size of all source disks.
- * eg: [ sda = 1 GB, sdb = 3 GB ], total = 4 GB
- *
- * (3) The ratio of (1):(2) is the maximum that could be freed up if
- * all filesystems were effectively zeroed out during the conversion.
- * eg. ratio = 3/4
- *
- * (4) Work out how much filesystem space we are likely to save if
- * fstrim works, but exclude a few cases where fstrim will probably
- * fail (eg. filesystems that don't support fstrim).  This is the
- * conversion saving.
- * eg. [ "/boot" = 200 MB used, "/" = 1 GB used ], saving = 3 - 1.2 = 1.8
- *
- * (5) Scale the conversion saving (4) by the ratio (3), and allocate
- * that saving across all source disks in proportion to their
- * virtual size.
- * eg. scaled saving is 1.8 * 3/4 = 1.35 GB
- *     sda has 1/4 of total virtual size, so it gets a saving of 1.35/4
- *     sda final estimated size = 1 - (1.35/4) = 0.6625 GB
- *     sdb has 3/4 of total virtual size, so it gets a saving of 3 * 1.35 / 4
- *     sdb final estimate size = 3 - (3*1.35/4) = 1.9875 GB
- *)
-and estimate_target_size mpstats targets =
-  let sum = List.fold_left (+^) 0L in
-
-  (* (1) *)
-  let fs_total_size =
-    sum (
-      List.map (fun { mp_statvfs = s } -> s.G.blocks *^ s.G.bsize) mpstats
-    ) in
-  if verbose () then
-    printf "estimate_target_size: fs_total_size = %Ld [%s]\n%!"
-      fs_total_size (human_size fs_total_size);
-
-  (* (2) *)
-  let source_total_size =
-    sum (List.map (fun t -> t.target_overlay.ov_virtual_size) targets) in
-  if verbose () then
-    printf "estimate_target_size: source_total_size = %Ld [%s]\n%!"
-      source_total_size (human_size source_total_size);
-
-  if source_total_size = 0L then     (* Avoid divide by zero error. *)
-    targets
-  else (
-    (* (3) Store the ratio as a float to avoid overflows later. *)
-    let ratio =
-      Int64.to_float fs_total_size /. Int64.to_float source_total_size in
-    if verbose () then
-      printf "estimate_target_size: ratio = %.3f\n%!" ratio;
-
-    (* (4) *)
-    let fs_free =
-      sum (
-        List.map (
-          function
-          (* On filesystems supported by fstrim, assume we can save all
-           * the free space.
-           *)
-          | { mp_vfs = "ext2"|"ext3"|"ext4"|"xfs"; mp_statvfs = s } ->
-            s.G.bfree *^ s.G.bsize
-
-          (* fstrim is only supported on NTFS very recently, and has a
-           * lot of limitations.  So make the safe assumption for now
-           * that it's not going to work.
-           *)
-          | { mp_vfs = "ntfs" } -> 0L
-
-          (* For other filesystems, sorry we can't free anything :-/ *)
-          | _ -> 0L
-        ) mpstats
-      ) in
-    if verbose () then
-      printf "estimate_target_size: fs_free = %Ld [%s]\n%!"
-        fs_free (human_size fs_free);
-    let scaled_saving = Int64.of_float (Int64.to_float fs_free *. ratio) in
-    if verbose () then
-      printf "estimate_target_size: scaled_saving = %Ld [%s]\n%!"
-        scaled_saving (human_size scaled_saving);
-
-    (* (5) *)
-    let targets = List.map (
-      fun ({ target_overlay = ov } as t) ->
-        let size = ov.ov_virtual_size in
-        let proportion =
-          Int64.to_float size /. Int64.to_float source_total_size in
-        let estimated_size =
-          size -^ Int64.of_float (proportion *. Int64.to_float scaled_saving) in
-        if verbose () then
-          printf "estimate_target_size: %s: %Ld [%s]\n%!"
-            ov.ov_sd estimated_size (human_size estimated_size);
-        { t with target_estimated_size = Some estimated_size }
-    ) targets in
-
-    targets
-  )
-
 (* Update the target_actual_size field in the target structure. *)
 and actual_target_size target =
   { target with target_actual_size = du target.target_file }
-- 
2.4.3




More information about the Libguestfs mailing list