<div dir="ltr"><br><div class="gmail_extra"><br><br><div class="gmail_quote">2014/1/15 Michal Privoznik <span dir="ltr"><<a href="mailto:mprivozn@redhat.com" target="_blank">mprivozn@redhat.com</a>></span><br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div class="im">On 24.12.2013 09:56, Chunyan Liu wrote:<br>
> Btrfs has terrible performance when hosting VM images, even more when the guest<br>
> in those VM are also using btrfs as file system. One way to mitigate this bad<br>
> performance is to turn off COW attributes on VM files (since having copy on<br>
> write for this kind of data is not useful).<br>
><br>
> According to 'chattr' manpage, NOCOW could be set to new or empty file only on<br>
> btrfs, so this patch tries to add nocow feature option in volume xml and handle<br>
> it in vol-create, so that users could have a chance to set NOCOW to a new<br>
> volume if that happens on a btrfs like file system.<br>
><br>
> Signed-off-by: Chunyan Liu <<a href="mailto:cyliu@suse.com">cyliu@suse.com</a>><br>
><br>
> ---<br>
> This is a revised version to:<br>
>   <a href="http://www.redhat.com/archives/libvir-list/2013-December/msg00303.html" target="_blank">http://www.redhat.com/archives/libvir-list/2013-December/msg00303.html</a><br>
><br>
> Changes:<br>
>   * fix Daniel's comments<br>
><br>
> ---<br>
>  docs/<a href="http://formatstorage.html.in" target="_blank">formatstorage.html.in</a>           |   12 +++++---<br>
>  docs/schemas/storagefilefeatures.rng |    3 ++<br>
>  src/conf/storage_conf.c              |    9 ++++--<br>
>  src/storage/storage_backend.c        |    4 +-<br>
>  src/storage/storage_backend_fs.c     |   48 ++++++++++++++++++++++++++++++++++<br>
>  src/util/virstoragefile.c            |    1 +<br>
>  src/util/virstoragefile.h            |    1 +<br>
>  7 files changed, 69 insertions(+), 9 deletions(-)<br>
<br>
</div>Can you add a testcase that (at least) checks the RNG scheme? The more<br>
your test tests the better.<br>
<div><div class="h5"><br></div></div></blockquote><div>Michal, thanks for your comment. Sorry for late response. I'm on vacation these days.<br></div><div>Sure, I can add some testcase for that.<br></div><div><br></div>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div><div class="h5">
><br>
> diff --git a/docs/<a href="http://formatstorage.html.in" target="_blank">formatstorage.html.in</a> b/docs/<a href="http://formatstorage.html.in" target="_blank">formatstorage.html.in</a><br>
> index a089a31..3de1a2b 100644<br>
> --- a/docs/<a href="http://formatstorage.html.in" target="_blank">formatstorage.html.in</a><br>
> +++ b/docs/<a href="http://formatstorage.html.in" target="_blank">formatstorage.html.in</a><br>
> @@ -385,6 +385,7 @@<br>
>            &lt;compat&gt;1.1&lt;/compat&gt;<br>
>            &lt;features&gt;<br>
>              &lt;lazy_refcounts/&gt;<br>
> +            &lt;nocow/&gt;<br>
>            &lt;/features&gt;<br>
>          &lt;/target&gt;</pre><br>
><br>
> @@ -423,11 +424,14 @@<br>
>          <span class="since">Since 1.1.0</span><br>
>        </dd><br>
>        <dt><code>features</code></dt><br>
> -      <dd>Format-specific features. Only used for <code>qcow2</code> now.<br>
> -        Valid sub-elements are:<br>
> -        <ul><br>
> +      <dd>Format-specific features. Valid sub-elements are:<br>
> +      <ul><br>
>            <li><code>&lt;lazy_refcounts/&gt;</code> - allow delayed reference<br>
> -          counter updates. <span class="since">Since 1.1.0</span></li><br>
> +          counter updates. Only used for <code>qcow2</code> now.<br>
> +          <span class="since">Since 1.1.0</span></li><br>
> +          <li><code>&lt;nocow/&gt;</code> - turn off copy-on-write. Only valid<br>
> +          to volume on <code>btrfs</code>, can improve performance.<br>
> +          <span class="since">Since 1.2.2</span></li><br>
>          </ul><br>
>        </dd><br>
>      </dl><br>
> diff --git a/docs/schemas/storagefilefeatures.rng b/docs/schemas/storagefilefeatures.rng<br>
> index 424b4e2..0cf3513 100644<br>
> --- a/docs/schemas/storagefilefeatures.rng<br>
> +++ b/docs/schemas/storagefilefeatures.rng<br>
> @@ -17,6 +17,9 @@<br>
>            <element name='lazy_refcounts'><br>
>              <empty/><br>
>            </element><br>
> +          <element name='nocow'><br>
> +            <empty/><br>
> +          </element><br>
>          </optional><br>
>        </interleave><br>
>      </element><br>
> diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c<br>
> index 22e38c1..b6409a6 100644<br>
> --- a/src/conf/storage_conf.c<br>
> +++ b/src/conf/storage_conf.c<br>
> @@ -1398,9 +1398,6 @@ virStorageVolDefParseXML(virStoragePoolDefPtr pool,<br>
>          if ((n = virXPathNodeSet("./target/features/*", ctxt, &nodes)) < 0)<br>
>              goto error;<br>
><br>
> -        if (!ret->target.compat && VIR_STRDUP(ret->target.compat, "1.1") < 0)<br>
> -            goto error;<br>
> -<br>
>          if (!(ret->target.features = virBitmapNew(VIR_STORAGE_FILE_FEATURE_LAST)))<br>
>              goto error;<br>
><br>
> @@ -1412,6 +1409,12 @@ virStorageVolDefParseXML(virStoragePoolDefPtr pool,<br>
>                                 (const char*)nodes[i]->name);<br>
>                  goto error;<br>
>              }<br>
> +<br>
> +            if (f == VIR_STORAGE_FILE_FEATURE_LAZY_REFCOUNTS) {<br>
> +                if (!ret->target.compat && VIR_STRDUP(ret->target.compat, "1.1") < 0)<br>
> +                    goto error;<br>
> +            }<br>
> +<br>
>              ignore_value(virBitmapSetBit(ret->target.features, f));<br>
>          }<br>
>          VIR_FREE(nodes);<br>
> diff --git a/src/storage/storage_backend.c b/src/storage/storage_backend.c<br>
> index b08d646..b4ab866 100644<br>
> --- a/src/storage/storage_backend.c<br>
> +++ b/src/storage/storage_backend.c<br>
> @@ -423,7 +423,7 @@ virStorageBackendCreateRaw(virConnectPtr conn ATTRIBUTE_UNUSED,<br>
>          operation_flags |= VIR_FILE_OPEN_FORK;<br>
><br>
>      if ((fd = virFileOpenAs(vol->target.path,<br>
> -                            O_RDWR | O_CREAT | O_EXCL,<br>
> +                            O_RDWR | O_CREAT,<br>
<br>
</div></div>I don't think this is safe. I see why are you doing this [2], but what<br>
if user hadn't specified  nocow feature? Then we might overwrite an<br>
existing file. And we want to do that.<br>
<div class="im"><br></div></blockquote><div>I'm aware of the overwrite issue. But in fact, if there is an existing volume<br>with the same name, it will be checked out in earlier stage.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div class="im">
>                              vol->target.perms.mode,<br>
>                              vol->target.perms.uid,<br>
>                              vol->target.perms.gid,<br>
> @@ -729,7 +729,7 @@ virStorageBackendCreateQemuImgOpts(char **opts,<br>
>                      break;<br>
><br>
>                  /* coverity[dead_error_begin] */<br>
> -                case VIR_STORAGE_FILE_FEATURE_LAST:<br>
> +                default:<br>
<br>
</div>No, please no. The whole intent of having the enum enumerated explicitly<br>
is: whenever a new item is added to a enum compiler will find all the<br>
places that needs adjustment.<br>
<div class="im"><br>
>                      ;<br>
>                  }<br>
>                  virBufferAsprintf(&buf, "%s,",<br>
> diff --git a/src/storage/storage_backend_fs.c b/src/storage/storage_backend_fs.c<br>
> index 95783be..dc26f6d 100644<br>
> --- a/src/storage/storage_backend_fs.c<br>
> +++ b/src/storage/storage_backend_fs.c<br>
> @@ -51,6 +51,13 @@<br>
>  #include "virfile.h"<br>
>  #include "virlog.h"<br>
>  #include "virstring.h"<br>
> +#ifdef __linux__<br>
> +# include <sys/ioctl.h><br>
> +# include <linux/fs.h><br>
> +#ifndef FS_NOCOW_FL<br>
> +#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */<br>
> +#endif<br>
> +#endif<br>
><br>
>  #define VIR_FROM_THIS VIR_FROM_STORAGE<br>
><br>
> @@ -1061,6 +1068,7 @@ _virStorageBackendFileSystemVolBuild(virConnectPtr conn,<br>
>  {<br>
>      virStorageBackendBuildVolFrom create_func;<br>
>      int tool_type;<br>
> +    bool nocow;<br>
<br>
</div>This should rather be initialized to false ...[1]<br>
<div class="im"><br>
><br>
>      if (inputvol) {<br>
>          if (vol->target.encryption != NULL) {<br>
> @@ -1090,6 +1098,46 @@ _virStorageBackendFileSystemVolBuild(virConnectPtr conn,<br>
>          return -1;<br>
>      }<br>
><br>
> +    if (vol->target.features)<br>
> +        ignore_value(virBitmapGetBit(vol->target.features,<br>
> +                                     VIR_STORAGE_FILE_FEATURE_NOCOW, &nocow));<br>
> +    if (nocow) {<br>
<br>
</div>[1] .. otherwise we might use an uninitialized value here.<br>
<br>
> +#ifdef __linux__<br>
<br>
Okay, btrfs is currently supported only on linux.<br>
<div class="im"><br>
> +        /* create an empty file and set nocow flag.<br>
> +         * This could optimize performance on file system like btrfs.<br>
> +         */<br>
> +        int attr, fd;<br>
> +        int operation_flags = VIR_FILE_OPEN_FORCE_MODE | VIR_FILE_OPEN_FORCE_OWNER;<br>
> +        if (pool->def->type == VIR_STORAGE_POOL_NETFS)<br>
> +            operation_flags |= VIR_FILE_OPEN_FORK;<br>
> +<br>
> +        if ((fd = virFileOpenAs(vol->target.path,<br>
> +                                O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE,<br>
> +                                vol->target.perms.mode,<br>
> +                                vol->target.perms.uid,<br>
> +                                vol->target.perms.gid,<br>
> +                                operation_flags)) < 0) {<br>
> +            virReportSystemError(-fd,<br>
> +                                 _("Failed to create file '%s'"),<br>
> +                                 vol->target.path);<br>
> +            return -1;<br>
> +        }<br>
<br>
</div>2: as you say, btrfs allows setting NOCOW flag only on an empty file<br>
(I'm taking your word for it, haven't checked myself). However, with<br>
this approach we might get into troubles when 'nocow' wasn't specified<br>
in the XML. What would be more appropriate is to pass the NOCOW flag<br>
down the path and do ioctl() just after the virFileOpenAs in<br>
virStorageBackendCreateRaw().<br></blockquote><div><br></div><div>There are three create functions according to the vol format:<br>virStorageBackendCreateRaw, virStorageBackendCreateQemuImg and <br>virStorageBackendCreateQcowCreate. To pass down NOCOW flag, we need to handle it  <br>
in all three places. Last two will call 'qemu-img' and 'qcow-create' commands to create<br>the image, NOCOW option is not supported yet in those commands.<br><br></div><div>BTW, why need qcow-create, but not using 'qemu-img create -f qcow'.<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div class="im"><br>
> +<br>
> +        /* This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will<br>
> +         * be ignored since any failure of this operation should not block the<br>
> +         * left work.<br>
> +         */<br>
> +        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {<br>
> +            attr |= FS_NOCOW_FL;<br>
> +            ioctl(fd, FS_IOC_SETFLAGS, &attr);<br>
> +        }<br>
> +<br>
> +        VIR_FORCE_CLOSE(fd);<br>
> +#endif<br>
<br>
</div>I think this #endif is premature. Should we:<br>
<br>
#else<br>
    virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, _("nocow is not<br>
supported on this platform");<br>
#endif<br>
<br>
instead? BTW:<br>
<div><div class="h5"><br>
> +        if (virBitmapClearBit(vol->target.features, VIR_STORAGE_FILE_FEATURE_NOCOW) < 0)<br>
> +            return -1;<br>
> +    }<br>
> +<br>
>      if (create_func(conn, pool, vol, inputvol, flags) < 0)<br>
>          return -1;<br>
>      return 0;<br>
> diff --git a/src/util/virstoragefile.c b/src/util/virstoragefile.c<br>
> index e45236f..6e34e4c 100644<br>
> --- a/src/util/virstoragefile.c<br>
> +++ b/src/util/virstoragefile.c<br>
> @@ -62,6 +62,7 @@ VIR_ENUM_IMPL(virStorageFileFormat,<br>
>  VIR_ENUM_IMPL(virStorageFileFeature,<br>
>                VIR_STORAGE_FILE_FEATURE_LAST,<br>
>                "lazy_refcounts",<br>
> +              "nocow",<br>
>                )<br>
><br>
>  enum lv_endian {<br>
> diff --git a/src/util/virstoragefile.h b/src/util/virstoragefile.h<br>
> index 7bd2fe0..f45743a 100644<br>
> --- a/src/util/virstoragefile.h<br>
> +++ b/src/util/virstoragefile.h<br>
> @@ -62,6 +62,7 @@ VIR_ENUM_DECL(virStorageFileFormat);<br>
><br>
>  enum virStorageFileFeature {<br>
>      VIR_STORAGE_FILE_FEATURE_LAZY_REFCOUNTS = 0,<br>
> +    VIR_STORAGE_FILE_FEATURE_NOCOW = 1,<br>
<br>
</div></div>This = 1 shouldn't be needed, but doesn't hurt either.<br>
<br>
><br>
>      VIR_STORAGE_FILE_FEATURE_LAST<br>
>  };<br>
><br>
<br>
Looking forward to v3. Which reminds me, can you please note the version<br>
of the patch in the subject line?<br>
<span class=""><font color="#888888"><br>
Michal<br>
<br>
</font></span></blockquote></div><br></div></div>