[Libguestfs] [PATCH libnbd v2 1/3] copy: Store the preferred block size in the operations struct

Laszlo Ersek lersek at redhat.com
Wed Jun 29 14:56:20 UTC 2022


On 06/29/22 13:03, Richard W.M. Jones wrote:
> This will be used in a subsequent commit.  At the moment the preferred
> block size for all sources / destinations is simply calculated and
> stored.
> ---
>  copy/file-ops.c | 9 ++++++++-
>  copy/main.c     | 9 ++++++---
>  copy/nbd-ops.c  | 9 +++++++++
>  copy/nbdcopy.h  | 4 +++-
>  copy/null-ops.c | 1 +
>  copy/pipe-ops.c | 1 +
>  6 files changed, 28 insertions(+), 5 deletions(-)
> 
> diff --git a/copy/file-ops.c b/copy/file-ops.c
> index ab3787545c..3b901bcdb8 100644
> --- a/copy/file-ops.c
> +++ b/copy/file-ops.c
> @@ -241,7 +241,8 @@ seek_hole_supported (int fd)
>  
>  struct rw *
>  file_create (const char *name, int fd,
> -             off_t st_size, bool is_block, direction d)
> +             off_t st_size, blksize_t st_blksize,
> +             bool is_block, direction d)
>  {
>    struct rw_file *rwf = calloc (1, sizeof *rwf);
>    if (rwf == NULL) { perror ("calloc"); exit (EXIT_FAILURE); }
> @@ -262,6 +263,11 @@ file_create (const char *name, int fd,
>        perror ("lseek");
>        exit (EXIT_FAILURE);
>      }
> +    rwf->rw.preferred = 4096;
> +#ifdef BLKIOOPT
> +    if (ioctl (fd, BLKIOOPT, &rwf->rw.preferred))
> +      fprintf (stderr, "warning: cannot get optimal I/O size: %s: %m", name);
> +#endif

>From my reading of the kernel, this ioctl stores an "unsigned int",
which (in practice) will not set all bytes in the "preferred" uint64_t
field.

Additionally, if the ioctl fails, I'm not sure if we have a guarantee
that the destination will be left unchanged. I suggest:

#ifdef BLKIOOPT
  unsigned int blkioopt;

  if (ioctl (fd, BLKIOOPT, &blkioopt))
    rwf->rw.preferred = blkioopt;
  else {
    fprintf (stderr, "warning: cannot get optimal I/O size: %s: %m",
             name);
    rwf->rw.preferred = 4096;
  }
#else
  rwf->rw.preferred = 4096;
#endif

... however, I also suggest doing this *elsewhere* (not here). I think
this function should take a plain uint64_t "preferred" param, and simply
assign that to "rwf->rw.preferred", independently of "is_block". (There
is one call site only, so moving the logic out there does not lead to
code duplication.) I'll explain the reason below.

>      rwf->seek_hole_supported = seek_hole_supported (fd);
>      rwf->sector_size = 4096;
>  #ifdef BLKSSZGET
> @@ -282,6 +288,7 @@ file_create (const char *name, int fd,
>    else {
>      /* Regular file. */
>      rwf->rw.size = st_size;
> +    rwf->rw.preferred = st_blksize;
>      rwf->seek_hole_supported = seek_hole_supported (fd);
>      /* Possible efficient zero methods for regular file. */
>  #ifdef FALLOC_FL_PUNCH_HOLE
> diff --git a/copy/main.c b/copy/main.c
> index cc379e98bc..9b551de664 100644
> --- a/copy/main.c
> +++ b/copy/main.c
> @@ -513,7 +513,9 @@ open_local (const char *filename, direction d)
>      exit (EXIT_FAILURE);
>    }
>    if (S_ISBLK (stat.st_mode) || S_ISREG (stat.st_mode))
> -    return file_create (filename, fd, stat.st_size, S_ISBLK (stat.st_mode), d);
> +    return file_create (filename, fd,
> +                        stat.st_size, stat.st_blksize, S_ISBLK (stat.st_mode),
> +                        d);
>    else {
>      /* Probably stdin/stdout, a pipe or a socket. */
>      synchronous = true;        /* Force synchronous mode for pipes. */

I couldn't find any info on whether fstat() fills in "stat.st_blksize"
at all for block devices. I understand that with "is_block" set, the
potential garbage will be ignored in file_create() anyway, but passing
garbage *itself* is not great.

So I suggest to split

  S_ISBLK (stat.st_mode) || S_ISREG (stat.st_mode)

to two separate branche. Pass stat.st_blksize (as "uint64_t preferred")
on the S_ISREG branch. Do the following on the S_ISBLK branch:

  unsigned int blkioopt;

#ifdef BLKIOOPT
  if (ioctl (fd, BLKIOOPT, &blkioopt) == -1) {
    fprintf (stderr, "warning: cannot get optimal I/O size: %s: %m",
             filename);
    blkioopt = 4096;
  }
#else
  blkioopt = 4096;
#endif

  return file_create (filename, fd, stat.st_size, blkioopt, false, d);


> @@ -528,8 +530,9 @@ print_rw (struct rw *rw, const char *prefix, FILE *fp)
>    char buf[HUMAN_SIZE_LONGEST];
>  
>    fprintf (fp, "%s: %s \"%s\"\n", prefix, rw->ops->ops_name, rw->name);
> -  fprintf (fp, "%s: size=%" PRIi64 " (%s)\n",
> -           prefix, rw->size, human_size (buf, rw->size, NULL));
> +  fprintf (fp, "%s: size=%" PRIi64 " (%s), preferred block size=%" PRIu64 "\n",
> +           prefix, rw->size, human_size (buf, rw->size, NULL),
> +           rw->preferred);
>  }
>  
>  /* Default implementation of rw->ops->get_extents for backends which
> diff --git a/copy/nbd-ops.c b/copy/nbd-ops.c
> index 3bc26ba613..6c5d59c578 100644
> --- a/copy/nbd-ops.c
> +++ b/copy/nbd-ops.c
> @@ -113,11 +113,20 @@ open_one_nbd_handle (struct rw_nbd *rwn)
>     */
>    if (rwn->handles.len == 0) {
>      rwn->can_zero = nbd_can_zero (nbd) > 0;
> +
>      rwn->rw.size = nbd_get_size (nbd);
>      if (rwn->rw.size == -1) {

This existent code is fine, as nbd_get_size returns int64_t, and
"rwn->rw.size" also has type int64_t...

>        fprintf (stderr, "%s: %s: %s\n", prog, rwn->rw.name, nbd_get_error ());
>        exit (EXIT_FAILURE);
>      }
> +
> +    rwn->rw.preferred = nbd_get_block_size (nbd, LIBNBD_SIZE_PREFERRED);
> +    if (rwn->rw.preferred == -1) {

... but this is hard to read (albeit correct in practice).

In case nbd_get_block_size() fails, it will return (int64_t)-1, and we
end up assigning "rwn->rw.preferred" UINT64_MAX. In the comparison on
the next line, the (int)-1 will in practice be converted to uint64_t, so
we do the comparison against UINT64_MAX. Correct, but hard to read.

I'd use a separate "int64_t block_size" local variable for this:

  block_size = nbd_get_block_size (...);
  if (block_size == -1) {
    fprintf (...);
    exit (...);
  }
  rwn->rw.preferred = block_size == 0 ? 4096 : block_size;

But, replacing -1 with UINT64_MAX, or else (uint64_t)-1, would be
informative too.

> +      fprintf (stderr, "%s: %s: %s\n", prog, rwn->rw.name, nbd_get_error ());
> +      exit (EXIT_FAILURE);
> +    }
> +    if (rwn->rw.preferred == 0)
> +      rwn->rw.preferred = 4096;
>    }
>  
>    if (handles_append (&rwn->handles, nbd) == -1) {
> diff --git a/copy/nbdcopy.h b/copy/nbdcopy.h
> index 19797dfd66..9b02ddf270 100644
> --- a/copy/nbdcopy.h
> +++ b/copy/nbdcopy.h
> @@ -43,6 +43,7 @@ struct rw {
>    struct rw_ops *ops;           /* Operations. */
>    const char *name;             /* Printable name, for error messages etc. */
>    int64_t size;                 /* May be -1 for streams. */
> +  uint64_t preferred;           /* Preferred block size. */
>    /* Followed by private data for the particular subtype. */
>  };
>  
> @@ -53,7 +54,8 @@ typedef enum { READING, WRITING } direction;
>  
>  /* Create subtypes. */
>  extern struct rw *file_create (const char *name, int fd,
> -                               off_t st_size, bool is_block, direction d);
> +                               off_t st_size, blksize_t st_blksize,
> +                               bool is_block, direction d);
>  extern struct rw *nbd_rw_create_uri (const char *name,
>                                       const char *uri, direction d);
>  extern struct rw *nbd_rw_create_subprocess (const char **argv, size_t argc,
> diff --git a/copy/null-ops.c b/copy/null-ops.c
> index 1218a623e2..854e291613 100644
> --- a/copy/null-ops.c
> +++ b/copy/null-ops.c
> @@ -45,6 +45,7 @@ null_create (const char *name)
>    rw->rw.ops = &null_ops;
>    rw->rw.name = name;
>    rw->rw.size = INT64_MAX;
> +  rw->rw.preferred = 4 * 1024 * 1024;
>    return &rw->rw;
>  }
>  
> diff --git a/copy/pipe-ops.c b/copy/pipe-ops.c
> index 3c8b6c2b39..3815f824fc 100644
> --- a/copy/pipe-ops.c
> +++ b/copy/pipe-ops.c
> @@ -43,6 +43,7 @@ pipe_create (const char *name, int fd)
>    rwp->rw.ops = &pipe_ops;
>    rwp->rw.name = name;
>    rwp->rw.size = -1;
> +  rwp->rw.preferred = 4096;
>    rwp->fd = fd;
>    return &rwp->rw;
>  }
> 

Thanks,
Laszlo


More information about the Libguestfs mailing list