[Libguestfs] [PATCH 5/5] Add a virt-builder-repository tool

Pino Toscano ptoscano at redhat.com
Wed Jan 4 16:53:39 UTC 2017


On Tuesday, 3 January 2017 11:18:56 CET Cédric Bosdonnat wrote:
> virt-builder-repository allows users to easily create or update
> a virt-builder source repository out of disk images. The tool can
> be run in either interactive or automated mode.
> ---

Lots of code to review, so this is not an exhaustive review, neither
in the style nor in what is actually done; for some coding style issues,
I'm usually noting only one occurrence, so please check them in all the
sources.

>  builder/builder_repository.ml       | 493 ++++++++++++++++++++++++++++++++++++

I'd call this source as repository_main.ml, similar to what was done
for virt-customize (although the reason was different), so there's a
pattern when the main source for an OCaml tool is not called "main.ml"
or "$tool.ml".

> diff --git a/builder/Makefile.am b/builder/Makefile.am
> index b1ccd49f3..d9f203381 100644
> --- a/builder/Makefile.am
> +++ b/builder/Makefile.am
>  
> -bin_PROGRAMS += virt-builder
> +bin_PROGRAMS += virt-builder virt-builder-repository

You should add virt-builder-repository once, either here or below.

> +# virt-builder repository creation tool
> +
> +bin_PROGRAMS += virt-builder-repository
> +
> +virt_builder_repository_SOURCES = $(REPOSITORY_SOURCES_C)
> +virt_builder_repository_CPPFLAGS = \
> +	-I. \
> +	-I$(top_builddir) \
> +	-I$(top_srcdir)/gnulib/lib -I$(top_builddir)/gnulib/lib \
> +	-I$(shell $(OCAMLC) -where) \
> +	-I$(top_srcdir)/gnulib/lib \
> +	-I$(top_srcdir)/src \
> +	-I$(top_srcdir)/fish
> +virt_builder_repository_CFLAGS = \
> +	-pthread \
> +	$(WARN_CFLAGS) $(WERROR_CFLAGS) \
> +	-Wno-unused-macros \
> +	$(LIBLZMA_CFLAGS) \
> +	$(LIBTINFO_CFLAGS) \
> +	$(LIBXML2_CFLAGS) \
> +	$(YAJL_CFLAGS)
> +REPOSITORY_BOBJECTS = $(REPOSITORY_SOURCES_ML:.ml=.cmo)
> +REPOSITORY_XOBJECTS = $(REPOSITORY_BOBJECTS:.cmo=.cmx)
> +
> +
> +if !HAVE_OCAMLOPT
> +REPOSITORY_OBJECTS = $(REPOSITORY_BOBJECTS)
> +else
> +REPOSITORY_OBJECTS = $(REPOSITORY_XOBJECTS)
> +endif
> +
> +virt_builder_repository_DEPENDENCIES = \
> +	$(REPOSITORY_OBJECTS) \
> +	../mllib/mllib.$(MLARCHIVE) \
> +	../customize/customize.$(MLARCHIVE) \
> +	$(top_srcdir)/ocaml-link.sh
> +virt_builder_repository_LINK = \
> +	$(top_srcdir)/ocaml-link.sh -cclib '$(OCAMLCLIBS)' -- \
> +	  $(OCAMLFIND) $(BEST) $(OCAMLFLAGS) $(OCAMLPACKAGES) $(OCAMLLINKFLAGS) \
> +	  $(REPOSITORY_OBJECTS) -o $@

I'd move the whole block quoted above after the same of
virt-builder, ...

> +man_MANS += virt-builder-repository.1
> +noinst_DATA += $(top_builddir)/website/virt-builder-repository.1.html
> +
> +virt-builder-repository.1 $(top_builddir)/website/virt-builder-repository.1.html: stamp-virt-builder-repository.pod
> +
> +stamp-virt-builder-repository.pod: virt-builder-repository.pod
> +	$(PODWRAPPER) \
> +	  --man virt-builder-repository.1 \
> +	  --html $(top_builddir)/website/virt-builder-repository.1.html \
> +	  --license GPLv2+ \
> +	  --warning safe \
> +	  $<
> +	touch $@

... and the same for this one, after the block of the documentation of
virt-builder.

> diff --git a/builder/builder_repository.ml b/builder/builder_repository.ml
> new file mode 100644
> index 000000000..682bc9405
> --- /dev/null
> +++ b/builder/builder_repository.ml
> @@ -0,0 +1,493 @@
> +let () = Random.self_init ()

Most probably this is not needed, since this tool will not need to get
random bytes.

> +type cmdline = {
> +  gpg : string;
> +  gpgkey : string;

Make this a string option ref, so it's clearer to distinguish whether
a key was actually specified.

> +    [ L"gpg-key" ],   Getopt.Set_string ("gpgkey", gpgkey),    s_"ID of the GPG key to sign the repo with";

I'd add a short alias, -K.

> +    | _ ->
> +      error (f_"too many parameters, at most one '/path/to/repo' is allowed")

  "too many parameters, only one path to repository is allowed"

> +let make_relative_path base absolute =
> +  if Filename.is_relative absolute then
> +    absolute
> +  else
> +    let expr = sprintf "^%s/\\(.+\\)$" (Str.quote base) in
> +    if Str.string_match (Str.regexp expr) absolute 0 then
> +      Str.matched_group 1 absolute
> +    else
> +      absolute

Hm the path in the "else" branch seems overkill; also, in one of the
patches by Tomáš [1] there's one small function, subfolder, which seems
to do the same job, so you could polish and add it to Common_utils first
(with unit tests, of course ;) ).

[1] https://www.redhat.com/archives/libguestfs/2016-December/msg00156.html

> +let osinfo_get_short_ids () =
> +  let get_ids xpathctx =
> +    xpath_string_default xpathctx "/libosinfo/os/short-id" "" in
> +
> +  let ids = Osinfo.osinfo_read_db get_ids in
> +  List.filter (fun id -> id <> "") ids

The last line can be simplified:
  List.filter ((<>) "") ids

Also, the function is ok (modulo changes proposed in patch #4), but
it is called every time an osinfo short-id is requested... meaning
re-reading fully the whole osinfo-db. Please add a cache for this,
reading the osinfo-db only once at the first access.

Furthermore, since it's just a collection of strings, I'd use a
StringSet for the cache, so the lookup is fast.

> +  (* Check that the paths are existing *)
> +  if not (Sys.file_exists (cmdline.repo)) then

No need for round brackets around cmdline.repo.

> +  (* Create a temporary folder to work in *)
> +  let tmpdir = Mkdtemp.temp_dir ~base_dir:cmdline.repo "virt-builder-repository." "" in

Too long line, please wrap it.

> +  let sigchecker = Sigchecker.create ~gpg:cmdline.gpg
> +                                     ~check_signature:false
> +                                     ~gpgkey:No_Key
> +                                     ~tmpdir:tmpdir in

When the variable for a labelled argument is named exactly like the
labelled argument, it can be simplified: ~foo:foo -> ~foo.

> +  let index =
> +    try
> +      let index_filename =
> +        List.find (
> +          fun filename -> Sys.file_exists (cmdline.repo // filename)

Since it is done every iteration (and also below),
cmdline.repo // filename could be saved in a variable at the beginning
in this try block.

> +      let downloader = Downloader.create ~curl:"curl" ~cache:None ~tmpdir:tmpdir in

Maybe the path to curl could be made configurable like in virt-builder.

> +      let source = { Sources.name = "input";
> +                     uri = (cmdline.repo // index_filename);

Extra round brackets.

> +  (* Check for index/interactive consistency *)
> +  if not cmdline.interactive && index == [] then
> +    error (f_"The repository needs to contain an index file when running in automated mode");

Messages for error should not start as capitalized sentence.

> +  let is_supported_format file =
> +    String.is_suffix file "qcow2" ||
> +    String.is_suffix file "raw" ||
> +    String.is_suffix file "img" in

This function could be in the same block of 'let images' below, as
it is not useful outside. Also, Filename.check_suffix could fit better
here.

Another note is that I've seen filenames for disk images with no
extension, e.g. "SomeDisk" or "instance-hda". Maybe we could pick
also those files, or try to detect files with unknown extension: the
problem is that there is no simple way IIRC to detect RAW images as
such.

> +  let images =
> +    let files = Array.to_list (Sys.readdir cmdline.repo) in
> +    List.filter (fun f -> is_supported_format f) files in

The last line can be simplified:
  List.filter is_supported files

> +    if r <> 0 then
> +      error (f_"cp command failed copying '%s'") file;

I guess "copying" can be left out from the error message, since
mentioning cp implies that already, sort of :)

> +  let process_image filename repo index interactive = (

No need for round brackets around whole functions, there is the "in"
at the end already.

Also, I'd move this big function as top-level in the file, making the
"main" easier to read.

> +    printf "Preparing %s...\n" filename;
> +
> +    let filepath = repo // filename in
> +    let qemuimg_cmd = "qemu-img info --output json " ^ (quote filepath) in
> +    let lines = external_command qemuimg_cmd in
> +    let line = String.concat "" lines in

I'd concat the lines with \n, because IIRC the error messages of the
yajl library include the line number information.

> +    let infos = yajl_tree_parse line in
> +    let format = object_get_string "format" infos in
> +    let size = object_get_number "virtual-size" infos in

The code for getting size and format of a disk image fits as separate
helper function IMHO.

> +    let xz_path = compress_to filepath cmdline.repo in
> +    let checksum = Checksums.get_checksum (Checksums.SHA512 "") xz_path in
> +    let compressed_size = (Unix.stat xz_path).Unix.st_size in
> +
> +    let ask message = (
> +      printf (f_ message);
> +      let value = read_line () in
> +      match value with
> +      | "" -> None
> +      | s -> Some s
> +    ) in
> +
> +    let rec ask_id () = (
> +      printf (f_"Identifier: ");

Small hint: when ask details about an image, either put the basename of
the file (i.e. "Identifier (foo.qcow2): ") or use the "info" functions
to print the progress (like the other tools do).

> +      if not (Str.string_match (Str.regexp "[a-zA-Z0-9-_.]+") id 0) then (
> +        printf (f_"Allowed characters are letters, digits, - _ and .\n");

Such messages should use 'warning' IMHO.

> +    let ask_arch () = (
> +      printf (f_"Architecture. Choose one from the list below:\n");
> +      let arches = ["x86_64"; "aarch64"; "armv7l"; "i686"; "ppc64"; "ppc64le"; "s390x" ] in
> +      iteri (
> +        fun i arch -> printf " [%d] %s\n" (i + 1) arch
> +      ) arches;
> +
> +      let i = ref 0 in
> +      let n = List.length arches in
> +      while !i < 1 || !i > n do
> +        let input = read_line () in
> +        if input = "exit" || input = "q" || input = "quit" then
> +          exit 0
> +        else
> +          try i := int_of_string input
> +          with Failure _ -> ()
> +      done;
> +      List.nth arches (!i - 1)
> +    ) in

While giving a choice of the common options is a good thing, I'd still
accept arbitrary architectures, as there is no limitation in other
parts of libguestfs. If the input cannot parsed as integer, use the
input string as-in.

> +        let osinfo_exists = List.exists (fun id -> osinfo = id) ids in

  List.exists ((=) osinfo) ids

> +      with
> +      | Failure _ -> ("", {Index.printable_name = None;

Hm weird indentation, I'd indent it like:

   with
   | Failure ->
      let entry = { Index.foo = ...;
                    ...
      } in
      ( "", entry }

> +      let printable_name =
> +        if printable_name = None && interactive then
> +          ask "Display name: "

Missing translation for this string.

> +      let arch =
> +        if arch = "" && interactive then
> +          ask_arch ()
> +        else (
> +          if arch = "" then
> +            error (f_"Missing architecture");
> +          arch
> +        ) in

Hmm convoluted conditions, I'd make it simplier:

  let arch =
    if arch = "" then (
      if interactive then ask_arch ()
      else error (f_"missing architecture for %s") short_id
    ) else arch in

> +      (id, {Index.printable_name = printable_name;

Space between '{' and Index.etc.

> +            osinfo = osinfo;
> +            file_uri = Filename.basename xz_path;
> +            arch = arch;
> +            signature_uri = None;
> +            checksums = Some [(Checksums.SHA512 checksum)];
> +            revision = revision;
> +            format = Some format;
> +            size = size;
> +            compressed_size = Some (Int64.of_int compressed_size);
> +            expand = expand;
> +            lvexpand = lvexpand;
> +            notes = notes;
> +            hidden = hidden;
> +            aliases = aliases;
> +            sigchecker = sigchecker;
> +            proxy = Curl.UnsetProxy})

Ditto.

> +  (* Generate entries for uncompressed images *)
> +  let written_ids =
> +  List.map (
> +    fun filename ->
> +      let (id, new_entry) = process_image filename cmdline.repo
> +                                          index cmdline.interactive in

No need for round brackets, as the return value is automatically
unwrapped.

> +
> +      Index.print_entry index_channel (id, new_entry);

Note print_entry is designed mostly as debugging helper, not as way to
recreate the native index again -- for example, text lines in notes are
not indented. I'd add a new function in Index_parser to do this
specifically (we can always rename the module to something more
fitting later on).

> +      output_string index_channel "\n";
> +      id
> +  ) images in
> +
> +  (* Write the unchanged entries *)
> +  List.iter (
> +    fun (id, {Index.printable_name = printable_name;
> +              osinfo = osinfo;
> +              file_uri = file_uri;
> +              arch = arch;
> +              signature_uri = signature_uri;
> +              checksums = checksums;
> +              revision = revision;
> +              format = format;
> +              size = size;
> +              compressed_size = compressed_size;
> +              expand = expand;
> +              lvexpand = lvexpand;
> +              notes = notes;
> +              hidden = hidden;
> +              aliases = aliases;
> +              sigchecker = sigchecker;
> +              proxy = proxy}) ->

No need to extract all the struct values to create a new struct just
for changing one or more values -- use the "with" keyword:

  let entry = { ... } in
  let new = { entry with size = 10; ... } in

> +    (* Remove the index file since we have the signed version of it *)
> +    Sys.remove (tmprepo // "index")

Small TODO item: command line option to not remove the unsigned index
when signing -- useful to keep the unsigned index in VCS/etc, while
publishing only the signed one.

> +        let cmd = [ "mv"; filepath;
> +                          (filepath ^ ".bak") ] in
> +        if run_command cmd <> 0 then
> +          error (f_"Failed to create %s backup copy") filename

I'd use a simple helper function, do_mv, for this and the mv below,
similar to do_cp in dib/utils.ml.

> +  List.iter (
> +    fun filename ->
> +      let cmd = [ "mv"; tmprepo // filename;
> +                        cmdline.repo ] in
> +      if run_command cmd <> 0 then
> +        error (f_"Failed to move %s in repository") (tmprepo // filename)
> +  ) (Array.to_list (Sys.readdir tmprepo));

No need to convert the Array returned by Sys.readdir to List to iterate
it, you can iterate just fine on an Array.

> diff --git a/builder/virt-builder-repository.pod b/builder/virt-builder-repository.pod
> new file mode 100644
> index 000000000..29d86b4ac
> --- /dev/null
> +++ b/builder/virt-builder-repository.pod
> + [template_id]
> + name=template display name
> + file=template_filename.qcow.xz
> + arch=x86_64
> + revision=0
> + size=0

Hm, this makes me think Index_parser.get_index could have a "template"
parameter and not abort for fields missing that are considered mandatory
when parsing a real index.

> +The file value needs to match the image name extended with the ".xz"
> +suffix. Other optional data can be prefilled, for more informations,
> +see the I<Creating and signing the index file> section in
> +L<virt-builder(1)> man page.

Use L<virt-builder(1)/Creating and signing the index file>, so it
creates the proper linking in HTML.

Thanks,
-- 
Pino Toscano
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://listman.redhat.com/archives/libguestfs/attachments/20170104/8d2d0f2f/attachment.sig>


More information about the Libguestfs mailing list