[Libguestfs] [PATCH nbdkit 1/9] server: Implement extents/can_extents calls for plugins and filters.

Richard W.M. Jones rjones at redhat.com
Tue Mar 19 16:34:58 UTC 2019


This pair of calls allows plugins to describe which extents in the
virtual disk are allocated, holes or zeroes.
---
 docs/nbdkit-filter.pod  | 107 ++++++++
 docs/nbdkit-plugin.pod  | 103 +++++++
 include/nbdkit-common.h |  10 +-
 include/nbdkit-filter.h |  25 +-
 include/nbdkit-plugin.h |   6 +-
 server/internal.h       |   5 +
 server/extents.c        | 574 ++++++++++++++++++++++++++++++++++++++++
 server/filters.c        |  61 +++++
 server/plugins.c        |  56 +++-
 server/test-extents.c   | 151 +++++++++++
 .gitignore              |   1 +
 server/Makefile.am      |  18 +-
 12 files changed, 1111 insertions(+), 6 deletions(-)

diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod
index dc9a262..7b25c8e 100644
--- a/docs/nbdkit-filter.pod
+++ b/docs/nbdkit-filter.pod
@@ -350,6 +350,8 @@ calls.
 
 =head2 C<.can_zero>
 
+=head2 C<.can_extents>
+
 =head2 C<.can_fua>
 
 =head2 C<.can_multi_conn>
@@ -365,6 +367,8 @@ calls.
                   void *handle);
  int (*can_zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
                   void *handle);
+ int (*can_extents) (struct nbdkit_next_ops *next_ops, void *nxdata,
+                     void *handle);
  int (*can_fua) (struct nbdkit_next_ops *next_ops, void *nxdata,
                  void *handle);
  int (*can_multi_conn) (struct nbdkit_next_ops *next_ops, void *nxdata,
@@ -513,6 +517,109 @@ value to return to the client.  The filter should never fail with
 C<EOPNOTSUPP> (while plugins have automatic fallback to C<.pwrite>,
 filters do not).
 
+=head2 C<.extents>
+
+ int (*extents) (struct nbdkit_next_ops *next_ops, void *nxdata,
+                 void *handle, uint32_t count, uint64_t offset, uint32_t flags,
+                 struct nbdkit_extents_map *extents_map,
+                 int *err);
+
+This intercepts the plugin C<.extents> method and can be used to
+modify extent requests.
+
+This function will not be called if C<.can_extents> returned false; in
+turn, the filter should not call C<next_ops-E<gt>extents> if
+C<next_ops-E<gt>can_extents> did not return true.
+
+The C<extents_map> parameter passed to this function is empty
+(representing a fully allocated disk).  If the filter does not need to
+adjust extents from the underlying plugin it can simply pass this
+through to the layer below:
+
+ myfilter_extents (...)
+ {
+   return next_ops->extents (nxdata, count, offset, flags,
+                             extents_map, err);
+ }
+
+It is also possible for filters to transform the extents map received
+back from the layer below.  Usually this must be done by allocating a
+new, empty map which is passed to the layer below.  Without error
+handling it would look like this:
+
+ myfilter_extents (...)
+ {
+   struct nbdkit_extents_map *map2 = nbdkit_extents_new ();
+   next_ops->extents (nxdata, count, offset, flags, map2, err);
+   /* transform map2 and return results in extents_map */
+   nbdkit_extents_foreach (map2, transform_offset, extents_map, /*..*/);
+   nbdkit_extents_free (map2);
+ }
+
+If there is an error, C<.extents> should call C<nbdkit_error> with an
+error message B<and> return -1 with C<err> set to the positive errno
+value to return to the client.
+
+=head3 Allocating and freeing nbdkit_extents_map
+
+Two functions are provided to filters only for allocating and freeing
+the map:
+
+ struct nbdkit_extents_map *nbdkit_extents_new ();
+
+Allocates and returns a new, empty extents map.  The initial state is
+that the whole disk is allocated with data.
+
+On error this function can return C<NULL>.  In this case it calls
+C<nbdkit_error> and/or C<nbdkit_set_error> as required.
+
+ void nbdkit_extents_free (struct nbdkit_extents_map *);
+
+Frees an existing extents map.
+
+=head3 Iterating over nbdkit_extents_map
+
+One function is provided to filters only to iterate over the extents
+map:
+
+ typedef int (*nbdkit_extents_callback) (uint64_t offset, uint64_t length,
+                                         uint32_t type, void *opaque);
+
+ int nbdkit_extents_foreach (
+            const struct nbdkit_extents_map *extents_map,
+            nbdkit_extents_callback fn, void *opaque,
+            uint32_t flags,
+            uint64_t range_offset, uint64_t range_length);
+
+Iterate over the extents in ascending order, calling C<fn> for each.
+C<fn> should return 0 on success or -1 on failure.  If a call to C<fn>
+fails then C<nbdkit_extents_foreach> also fails immediately.
+
+C<nbdkit_extents_foreach> returns C<0> on success or C<-1> on failure.
+On internal failures it calls C<nbdkit_error> or C<nbdkit_set_error>.
+If C<fn> fails it is expected that C<fn> calls those functions as
+necessary.
+
+The C<flags> parameter can contain:
+
+=over 4
+
+=item C<NBDKIT_EXTENTS_FOREACH_FLAG_ONE>
+
+Only call C<fn> for the first extent, and then return.  (This is used
+to implement C<NBD_CMD_FLAG_REQ_ONE> in the NBD protocol.)
+
+=item C<NBDKIT_EXTENTS_FOREACH_FLAG_RANGE>
+
+If this flag is included then the C<range_offset> and C<range_length>
+parameters are used, so only extents overlapping
+C<[range_offset...range_offset+range_length-1]> are returned.
+
+If this flag is not included in C<flags> then the range parameters are
+I<ignored>.
+
+=back
+
 =head1 ERROR HANDLING
 
 If there is an error in the filter itself, the filter should call
diff --git a/docs/nbdkit-plugin.pod b/docs/nbdkit-plugin.pod
index 47545f3..7b7b38b 100644
--- a/docs/nbdkit-plugin.pod
+++ b/docs/nbdkit-plugin.pod
@@ -555,6 +555,20 @@ This callback is not required.  If omitted, then nbdkit always tries
 C<.zero> first if it is present, and gracefully falls back to
 C<.pwrite> if C<.zero> was absent or failed with C<EOPNOTSUPP>.
 
+=head2 C<.can_extents>
+
+ int can_extents (void *handle);
+
+This is called during the option negotiation phase to find out if the
+plugin supports detecting allocated (non-sparse) regions of the disk
+with the C<.extents> callback.
+
+If there is an error, C<.can_extents> should call C<nbdkit_error> with
+an error message and return C<-1>.
+
+This callback is not required.  If omitted, then we return true iff a
+C<.extents> callback has been defined.
+
 =head2 C<.can_fua>
 
  int can_fua (void *handle);
@@ -717,6 +731,95 @@ If there is an error, C<.zero> should call C<nbdkit_error> with an
 error message, and C<nbdkit_set_error> to record an appropriate error
 (unless C<errno> is sufficient), then return C<-1>.
 
+=head2 C<.extents>
+
+ int extents (void *handle, uint32_t count, uint64_t offset,
+              uint32_t flags,
+              struct nbdkit_extents_map *extents_map);
+
+During the data serving phase, this callback is used to detect
+allocated, sparse and zeroed regions of the disk.
+
+This function will not be called if C<.can_extents> returned false.
+nbdkit's default behaviour in this case is to treat the whole virtual
+disk as if it was allocated.
+
+The callback should detect and return the list of extents overlapping
+the range C<[offset...offset+count-1]>.  The C<extents_map> parameter
+points to an opaque object which the callback should fill in by
+calling C<nbdkit_extent_add> etc.  See L</Extents map> below.
+
+The C<flags> parameter may contain the flag C<NBDKIT_FLAG_REQ_ONE>
+which means that the client is only requesting information about the
+extent overlapping C<offset>.  The plugin may ignore this flag, or as
+an optimization it may return just a single extent for C<offset>.
+(Note that implementing this optimization correctly is difficult and
+subtle.  You must ensure that the upper limit of the single extent
+returned is correct.  If unsure then the safest option is to ignore
+this flag.)
+
+If there is an error, C<.extents> should call C<nbdkit_error> with an
+error message, and C<nbdkit_set_error> to record an appropriate error
+(unless C<errno> is sufficient), then return C<-1>.
+
+=head3 Extents map
+
+The plugin C<extents> callback is passed an opaque pointer C<struct
+nbdkit_extents_map *extents_map>.  This structure represents a list of
+L<filesystem extents|https://en.wikipedia.org/wiki/Extent_(file_systems)>
+describing which areas of the disk are allocated, which are sparse
+(“holes”), and, if supported, which are zeroes.  Initially the map
+describes a single allocated data extent covering the entire virtual
+disk.
+
+The C<extents> callback has to fill in any holes or zeroes areas by
+calling C<nbdkit_extent*> functions, to describe all extents
+overlapping the range C<[offset...offset+count-1]>.  (nbdkit will
+ignore extents outside this range so plugins may, if it is easier to
+implement, return all extent information for the whole disk.)
+
+The following functions are available for plugins or filters to call:
+
+ int nbdkit_extent_add (struct nbdkit_extents_map *extents_map,
+                        uint64_t offset, uint64_t length, uint32_t type);
+
+Add an extent covering C<[offset...offset+length-1]> of one of
+the following four types:
+
+=over 4
+
+=item C<type = 0>
+
+A normal, allocated data extent.
+
+=item C<type = NBDKIT_EXTENT_HOLE|NBDKIT_EXTENT_ZERO>
+
+An unallocated extent, a.k.a. a “hole”, which reads back as zeroes.
+This is the normal type of hole applicable to most disks.
+
+=item C<type = NBDKIT_EXTENT_ZERO>
+
+An allocated extent which is known to contain only zeroes.
+
+=item C<type = NBDKIT_EXTENT_HOLE>
+
+An unallocated extent (hole) which does not read back as zeroes.  Note
+this should only be used in specialized circumstances such as when
+writing a plugin for (or to emulate) certain SCSI drives which do not
+guarantee that trimmed blocks read back as zeroes.
+
+=back
+
+If the new extent overlaps some previous extent in the map then the
+overlapping part(s) are overwritten with the new type.  Adjacent
+extents of the same type may also be coalesced.  Thus you shouldn't
+assume that C<nbdkit_extent_add> always adds exactly one more extent
+to the map.
+
+C<nbdkit_extent_add> returns C<0> on success or C<-1> on failure.  On
+failure C<nbdkit_error> and/or C<nbdkit_set_error> has already been
+called.
+
 =head1 THREADS
 
 Each nbdkit plugin must declare its thread safety model by defining
diff --git a/include/nbdkit-common.h b/include/nbdkit-common.h
index cb9954e..8526171 100644
--- a/include/nbdkit-common.h
+++ b/include/nbdkit-common.h
@@ -1,5 +1,5 @@
 /* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -60,11 +60,15 @@ extern "C" {
 
 #define NBDKIT_FLAG_MAY_TRIM (1<<0) /* Maps to !NBD_CMD_FLAG_NO_HOLE */
 #define NBDKIT_FLAG_FUA      (1<<1) /* Maps to NBD_CMD_FLAG_FUA */
+#define NBDKIT_FLAG_REQ_ONE  (1<<2) /* Maps to NBD_CMD_FLAG_REQ_ONE */
 
 #define NBDKIT_FUA_NONE       0
 #define NBDKIT_FUA_EMULATE    1
 #define NBDKIT_FUA_NATIVE     2
 
+#define NBDKIT_EXTENT_HOLE    (1<<0) /* Same as NBD_STATE_HOLE */
+#define NBDKIT_EXTENT_ZERO    (1<<1) /* Same as NBD_STATE_ZERO */
+
 extern void nbdkit_error (const char *msg, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 extern void nbdkit_verror (const char *msg, va_list args);
 extern void nbdkit_debug (const char *msg, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
@@ -76,6 +80,10 @@ extern int nbdkit_parse_bool (const char *str);
 extern int nbdkit_read_password (const char *value, char **password);
 extern char *nbdkit_realpath (const char *path);
 
+struct nbdkit_extents_map;
+extern int nbdkit_extent_add (struct nbdkit_extents_map *,
+                              uint64_t offset, uint64_t length, uint32_t type);
+
 /* A static non-NULL pointer which can be used when you don't need a
  * per-connection handle.
  */
diff --git a/include/nbdkit-filter.h b/include/nbdkit-filter.h
index 71c06c8..4efb98e 100644
--- a/include/nbdkit-filter.h
+++ b/include/nbdkit-filter.h
@@ -1,5 +1,5 @@
 /* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -44,6 +44,19 @@ extern "C" {
 
 #define NBDKIT_FILTER_API_VERSION 2
 
+#define NBDKIT_EXTENTS_FOREACH_FLAG_ONE   1
+#define NBDKIT_EXTENTS_FOREACH_FLAG_RANGE 2
+
+extern struct nbdkit_extents_map *nbdkit_extents_new (void);
+extern void nbdkit_extents_free (struct nbdkit_extents_map *);
+typedef int (*nbdkit_extents_callback) (uint64_t offset, uint64_t length,
+                                        uint32_t type, void *opaque);
+extern int nbdkit_extents_foreach (const struct nbdkit_extents_map *,
+                                   nbdkit_extents_callback fn, void *opaque,
+                                   uint32_t flags,
+                                   uint64_t range_offset,
+                                   uint64_t range_length);
+
 typedef int nbdkit_next_config (void *nxdata,
                                 const char *key, const char *value);
 typedef int nbdkit_next_config_complete (void *nxdata);
@@ -58,6 +71,7 @@ struct nbdkit_next_ops {
   int (*is_rotational) (void *nxdata);
   int (*can_trim) (void *nxdata);
   int (*can_zero) (void *nxdata);
+  int (*can_extents) (void *nxdata);
   int (*can_fua) (void *nxdata);
   int (*can_multi_conn) (void *nxdata);
 
@@ -71,6 +85,9 @@ struct nbdkit_next_ops {
                int *err);
   int (*zero) (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags,
                int *err);
+  int (*extents) (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags,
+                  struct nbdkit_extents_map *extents_map,
+                  int *err);
 };
 
 struct nbdkit_filter {
@@ -120,6 +137,8 @@ struct nbdkit_filter {
                    void *handle);
   int (*can_zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
                    void *handle);
+  int (*can_extents) (struct nbdkit_next_ops *next_ops, void *nxdata,
+                      void *handle);
   int (*can_fua) (struct nbdkit_next_ops *next_ops, void *nxdata,
                   void *handle);
   int (*can_multi_conn) (struct nbdkit_next_ops *next_ops, void *nxdata,
@@ -140,6 +159,10 @@ struct nbdkit_filter {
   int (*zero) (struct nbdkit_next_ops *next_ops, void *nxdata,
                void *handle, uint32_t count, uint64_t offset, uint32_t flags,
                int *err);
+  int (*extents) (struct nbdkit_next_ops *next_ops, void *nxdata,
+                  void *handle, uint32_t count, uint64_t offset, uint32_t flags,
+                  struct nbdkit_extents_map *extents_map,
+                  int *err);
 };
 
 #define NBDKIT_REGISTER_FILTER(filter)                                  \
diff --git a/include/nbdkit-plugin.h b/include/nbdkit-plugin.h
index d43b2f5..76cec54 100644
--- a/include/nbdkit-plugin.h
+++ b/include/nbdkit-plugin.h
@@ -1,5 +1,5 @@
 /* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -125,6 +125,10 @@ struct nbdkit_plugin {
   const char *magic_config_key;
 
   int (*can_multi_conn) (void *handle);
+
+  int (*can_extents) (void *handle);
+  int (*extents) (void *handle, uint32_t count, uint64_t offset, uint32_t flags,
+                  struct nbdkit_extents_map *extents_map);
 };
 
 extern void nbdkit_set_error (int err);
diff --git a/server/internal.h b/server/internal.h
index d40a82d..d0da213 100644
--- a/server/internal.h
+++ b/server/internal.h
@@ -274,6 +274,7 @@ struct backend {
   int (*is_rotational) (struct backend *, struct connection *conn);
   int (*can_trim) (struct backend *, struct connection *conn);
   int (*can_zero) (struct backend *, struct connection *conn);
+  int (*can_extents) (struct backend *, struct connection *conn);
   int (*can_fua) (struct backend *, struct connection *conn);
   int (*can_multi_conn) (struct backend *, struct connection *conn);
 
@@ -287,6 +288,10 @@ struct backend {
                uint64_t offset, uint32_t flags, int *err);
   int (*zero) (struct backend *, struct connection *conn, uint32_t count,
                uint64_t offset, uint32_t flags, int *err);
+  int (*extents) (struct backend *, struct connection *conn, uint32_t count,
+                  uint64_t offset, uint32_t flags,
+                  struct nbdkit_extents_map *extents_map,
+                  int *err);
 };
 
 /* plugins.c */
diff --git a/server/extents.c b/server/extents.c
new file mode 100644
index 0000000..f17ba05
--- /dev/null
+++ b/server/extents.c
@@ -0,0 +1,574 @@
+/* nbdkit
+ * Copyright (C) 2019 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include "minmax.h"
+
+#include "internal.h"
+
+/* Trivial implementation as an ordered list of extents.  We will
+ * probably have to change this to a more efficient data structure if
+ * we have plugins with lots of extents.
+ *
+ * Invariants:
+ * - extents are stored in ascending order
+ * - extents must not overlap
+ * - adjacent extents must not have the same type
+ *
+ * Gaps are treated as type 0 (allocated data).
+ */
+struct nbdkit_extents_map {
+  struct extent *extents;
+  size_t nr_extents, allocated;
+};
+
+struct extent {
+  uint64_t offset;
+  uint64_t length;
+  uint32_t type;
+};
+
+struct nbdkit_extents_map *
+nbdkit_extents_new (void)
+{
+  struct nbdkit_extents_map *r;
+
+  r = malloc (sizeof *r);
+  if (r == NULL) {
+    nbdkit_error ("nbdkit_extents_new: malloc: %m");
+    return NULL;
+  }
+  r->extents = NULL;
+  r->nr_extents = r->allocated = 0;
+  return r;
+}
+
+void
+nbdkit_extents_free (struct nbdkit_extents_map *map)
+{
+  if (map) {
+    free (map->extents);
+    free (map);
+  }
+}
+
+static ssize_t
+find_extent_recursive (const struct nbdkit_extents_map *map,
+                       uint64_t offset, size_t first, size_t last)
+{
+  size_t i, h;
+
+  assert (first <= last);
+  assert (offset >= map->extents[first].offset);
+
+  if (last - first <= 8) {
+    /* Do a linear search for the last bit. */
+    for (i = first+1; i <= last; ++i) {
+      if (offset < map->extents[i].offset)
+        return i-1;
+    }
+    return last;
+  }
+
+  h = first + (last - first) / 2;
+
+  if (offset < map->extents[h].offset)
+    return find_extent_recursive (map, offset, first, h-1);
+  else
+    return find_extent_recursive (map, offset, h, last);
+}
+
+/* Find the extent before or containing offset.  If offset is inside
+ * the extent, the extent index is returned.  If offset is not in any
+ * extent, but there is an extent with a lower offset, then the index
+ * of the nearest lower extent is returned.  If offset is before the
+ * zeroth extent then -1 is returned.
+ */
+static ssize_t
+find_extent (const struct nbdkit_extents_map *map, uint64_t offset)
+{
+  if (map->nr_extents == 0 || offset < map->extents[0].offset)
+    return -1;
+
+  return find_extent_recursive (map, offset, 0, map->nr_extents-1);
+}
+
+/* Erase extents [i...i+count-1] from the array.  Extents after are
+ * renumbered.
+ */
+static void
+erase_extents (struct nbdkit_extents_map *map,
+                size_t i, size_t count)
+{
+  assert (i+count <= map->nr_extents);
+
+  memmove (&map->extents[i],
+           &map->extents[i+count],
+           (map->nr_extents - (i+count)) * sizeof (struct extent));
+  map->nr_extents -= count;
+}
+
+/* Insert new_extent in the map at index i.  Existing extents at index
+ * [i...] are moved up by one.
+ */
+static int
+insert_extent (struct nbdkit_extents_map *map,
+               struct extent new_extent, size_t i)
+{
+  assert (i <= map->nr_extents);
+
+  if (map->nr_extents >= map->allocated) {
+    size_t new_allocated;
+    struct extent *new_extents;
+
+    new_allocated = map->allocated;
+    if (new_allocated == 0)
+      new_allocated = 1;
+    new_allocated *= 2;
+    new_extents =
+      realloc (map->extents, new_allocated * sizeof (struct extent));
+    if (new_extents == NULL) {
+      nbdkit_error ("extent_add: realloc: %m");
+      return -1;
+    }
+    map->allocated = new_allocated;
+    map->extents = new_extents;
+  }
+
+  memmove (&map->extents[i+1], &map->extents[i],
+           (map->nr_extents - i) * sizeof (struct extent));
+  map->nr_extents++;
+  map->extents[i] = new_extent;
+  return 0;
+}
+
+int
+nbdkit_extent_add (struct nbdkit_extents_map *map,
+                   uint64_t offset, uint64_t length, uint32_t type)
+{
+  ssize_t lo, hi;
+  bool coalesce_below, coalesce_above;
+  struct extent new_extent;
+
+  /* This might be considered an error, but be flexible for badly
+   * written plugins.
+   */
+  if (length == 0)
+    return 0;
+
+  /* Don't allow extents to exceeed the _signed_ 64 bit limit that is
+   * used in the rest of nbdkit.
+   */
+  if (offset > INT64_MAX ||
+      offset + length < offset ||
+      offset + length > INT64_MAX) {
+    nbdkit_error ("nbdkit_extent_add: "
+                  "extent cannot exceed signed 64 bit limit: "
+                  "offset=%" PRIu64 " length=%" PRIu64,
+                  offset, length);
+    return -1;
+  }
+
+ again:
+  lo = find_extent (map, offset);
+  hi = find_extent (map, offset + length - 1);
+
+  /* "lo" and "hi" are -1 or they point to the extent
+   * overlapping-or-below the corresponding endpoints of the new
+   * extent.
+   *
+   * The algorithm here has two parts: Firstly we remove any
+   * overlapping extents (this may involve erasing extents completely
+   * or shrinking them).  Secondly we insert the new extent at the
+   * right place in the array, if necessary allowing it to coalesce
+   * with the extent above or below.
+   *
+   * To remove overlapping extents we consider 7 cases:
+   *
+   * A.0   lo == -1 (there is no old extent before the new extent)
+   *     => do nothing
+   *
+   * A.1   ┌─────┐     offset of new extent exactly at
+   *       │ lo  │     beginning of lo and lo smaller/equal
+   *       └─────┘     to new extent
+   *       ↑
+   *       ▐▓▓▓▓▓▓▓▓▓
+   *     => erase lo
+   *     => creates situation A.0 or A.5
+   *
+   * A.2   ┌──────────┐   offset of new extent exactly at
+   *       │ lo       │   beginning of lo and lo bigger than
+   *       └──────────┘   new extent
+   *       ↑
+   *       ▐▓▓▓▓▓▓▓▓▓
+   *     => shrink lo “upwards”
+   *     => creates situation A.0 or A.5
+   *
+   * A.3   ┌──────────────┐  new extent in the middle
+   *       │ lo           │  of lo
+   *       └──────────────┘
+   *         ↑
+   *         ▓▓▓▓▓▓▓▓▓
+   *     => split lo
+   *     => creates situation A.5
+   *
+   * A.4   ┌─────┐     offset of new extent in the middle
+   *       │ lo  │     of lo
+   *       └─────┘
+   *         ↑
+   *         ▓▓▓▓▓▓▓▓▓
+   *     => shrink lo
+   *     => creates situation A.5
+   *
+   * A.5   ┌─────┐     offset of new extent after lo
+   *       │ lo  │
+   *       └─────┘
+   *               ↑
+   *               ▓▓▓▓▓▓▓▓▓
+   *     => do nothing
+   *
+   * B.0   lo == hi or hi == -1 (there is no higher extent)
+   *     => do nothing
+   *
+   * B.1          ┌─────┐     extent hi is fully covered
+   *              │ hi  │     by new extent
+   *              └─────┘
+   *                       ↑
+   *       ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓
+   *     => erase hi
+   *     => creates situation B.0
+   *
+   * B.2          ┌─────┐     extent hi is partly overlapped
+   *              │ hi  │     by new extent
+   *              └─────┘
+   *                 ↑
+   *       ▓▓▓▓▓▓▓▓▓▓▓
+   *     => shrink hi
+   *     => creates situation B.0
+   *
+   * In addition extents [lo+1...hi-1] (if they exist) are erased
+   * since they must be fully covered by the new extent.
+   *
+   * At the end we're in situation A.0/A.5 and B.0, and so no old
+   * extents overlap the new extent.
+   *
+   * The insertion can then be done with optional coalescing (see code
+   * below for details - this is a lot simpler).
+   */
+
+  /* A.0 */
+  if (lo == -1)
+    goto erase_middle_extents;
+  /* A.5 */
+  if (offset >= map->extents[lo].offset + map->extents[lo].length)
+    goto erase_middle_extents;
+
+  /* A.1 or A.2 */
+  if (offset == map->extents[lo].offset) {
+    if (map->extents[lo].offset + map->extents[lo].length <= offset + length) {
+      /* A.1 */
+      erase_extents (map, lo, 1);
+    }
+    else {
+      /* A.2 */
+      uint64_t end = map->extents[lo].offset + map->extents[lo].length;
+      map->extents[lo].offset = offset + length;
+      map->extents[lo].length = end - map->extents[lo].offset;
+    }
+    goto again;
+  }
+
+  /* A.3 */
+  if (map->extents[lo].offset < offset &&
+      map->extents[lo].offset + map->extents[lo].length > offset + length) {
+    new_extent.offset = offset + length;
+    new_extent.length =
+      map->extents[lo].offset + map->extents[lo].length - new_extent.offset;
+    new_extent.type = map->extents[lo].type;
+    map->extents[lo].length = offset - map->extents[lo].offset;
+    if (insert_extent (map, new_extent, lo+1) == -1)
+      return -1;
+    goto again;
+  }
+
+  /* We must be in situation A.4 assuming analysis above is exhaustive. */
+  assert (offset > map->extents[lo].offset);
+  assert (offset < map->extents[lo].offset + map->extents[lo].length);
+  assert (map->extents[lo].offset + map->extents[lo].length <= offset + length);
+
+  map->extents[lo].length = offset - map->extents[lo].offset;
+
+ erase_middle_extents:
+  /* Erase extents [lo+1..hi-1] if they exist as they must be
+   * completely covered by the new extent.
+   */
+  if (hi >= 0 && lo+1 < hi) {
+    erase_extents (map, lo+1, hi-lo-1);
+    goto again;
+  }
+
+  /* B.0 */
+  if (lo == hi || hi == -1)
+    goto insert;
+
+  /* B.1 */
+  if (map->extents[hi].offset + map->extents[hi].length <= offset + length) {
+    erase_extents (map, hi, 1);
+    goto again;
+  }
+
+  /* We must be in situation B.2 if the analysis above is exhaustive. */
+  assert (map->extents[hi].offset < offset + length);
+
+  uint64_t end = map->extents[hi].offset + map->extents[hi].length;
+  map->extents[hi].offset = offset + length;
+  map->extents[hi].length = end - map->extents[hi].offset;
+
+ insert:
+  /* Now we know there is no extent which overlaps with the new
+   * extent, and the new extent must be inserted after lo.
+   */
+
+  /* However we have to deal with possibly coalescing the new extent
+   * with lo and/or lo+1.
+   */
+  coalesce_below =
+    lo >= 0 &&
+    map->extents[lo].type == type &&
+    map->extents[lo].offset + map->extents[lo].length == offset;
+  coalesce_above =
+    lo+1 < map->nr_extents &&
+    map->extents[lo+1].type == type &&
+    map->extents[lo+1].offset == offset + length;
+
+  if (coalesce_below && coalesce_above) {
+    /* Coalesce lo + new extent + lo+1
+     * => Extend lo to cover the whole range and remove lo+1.
+     */
+    map->extents[lo].length =
+      map->extents[lo+1].offset + map->extents[lo+1].length
+      - map->extents[lo].offset;
+    erase_extents (map, lo+1, 1);
+  }
+  else if (coalesce_below) {
+    /* Coalesce lo + new extent.
+     * => Extend lo to cover new extent.
+     */
+    map->extents[lo].length = offset + length - map->extents[lo].offset;
+  }
+  else if (coalesce_above) {
+    /* Coalesce new extent + lo+1
+     * => Extend lo+1 to cover new extent.
+     */
+    map->extents[lo+1].length =
+      map->extents[lo+1].offset + map->extents[lo+1].length - offset;
+    map->extents[lo+1].offset = offset;
+  }
+  else {
+    /* Normal case: Allocate a new extent and insert it after lo. */
+    new_extent.offset = offset;
+    new_extent.length = length;
+    new_extent.type = type;
+    if (insert_extent (map, new_extent, lo+1) == -1)
+      return -1;
+  }
+
+  return 0;
+}
+
+int
+nbdkit_extents_foreach (const struct nbdkit_extents_map *map,
+                        nbdkit_extents_callback fn, void *opaque,
+                        uint32_t flags,
+                        uint64_t range_offset, uint64_t range_length)
+{
+  const bool flag_range = flags & NBDKIT_EXTENTS_FOREACH_FLAG_RANGE;
+  const bool flag_one = flags & NBDKIT_EXTENTS_FOREACH_FLAG_ONE;
+  uint64_t offset, next;
+  size_t i;
+
+  if (flag_range) {
+    if (range_offset + range_length < range_offset) {
+      nbdkit_error ("nbdkit_extents_foreach: invalid range: "
+                    "offset=%" PRIu64 " length=%" PRIu64,
+                    range_offset, range_length);
+      return -1;
+    }
+  }
+  else {
+    range_offset = 0;
+    range_length = INT64_MAX + UINT64_C(1);
+  }
+
+  for (i = 0, offset = range_offset;
+       offset < range_offset + range_length;
+       offset = next) {
+    next = offset;
+
+    if (i >= map->nr_extents) {
+      /* Beyond last extent => Type 0 to end of range. */
+      next = range_offset + range_length;
+      if (fn (offset, next - offset, 0, opaque) == -1)
+        return -1;
+      if (flag_one)
+        return 0;
+      continue;
+    }
+
+    if (offset < map->extents[i].offset) {
+      /* In a gap before the i'th extent => Type 0 to next offset. */
+      next = MIN (map->extents[i].offset, range_offset + range_length);
+      if (fn (offset, next - offset, 0, opaque) == -1)
+        return -1;
+      if (flag_one)
+        return 0;
+      continue;
+    }
+
+    if (offset < map->extents[i].offset + map->extents[i].length) {
+      /* In an extent. */
+      next = MIN (map->extents[i].offset + map->extents[i].length,
+                  range_offset + range_length);
+      if (fn (offset, next - offset, map->extents[i].type, opaque) == -1)
+        return -1;
+      if (flag_one)
+        return 0;
+      continue;
+    }
+
+    /* After an extent, increment i and iterate. */
+    i++;
+  }
+
+  return 0;
+}
+
+#ifdef IN_TEST_EXTENTS
+/* These functions are only compiled for and called from the unit tests. */
+
+void
+dump_extents_map (const struct nbdkit_extents_map *map)
+{
+  size_t i;
+
+  fprintf (stderr, "map %p:\n", map);
+  fprintf (stderr, "\tnr_extents=%zu allocated=%zu\n",
+           map->nr_extents, map->allocated);
+  for (i = 0; i < map->nr_extents; ++i) {
+    fprintf (stderr, "\textent %zu:", i);
+    fprintf (stderr,
+             "\toffset=%" PRIu64 " length=%" PRIu64 " "
+             "last=%" PRIu64 " type=%" PRIu32 "\n",
+             map->extents[i].offset, map->extents[i].length,
+             map->extents[i].offset + map->extents[i].length - 1,
+             map->extents[i].type);
+  }
+}
+
+void
+validate_extents_map (const struct nbdkit_extents_map *map)
+{
+  size_t i;
+  bool fail = false;
+
+  if (map->nr_extents > map->allocated) {
+    fprintf (stderr, "validate_extents_map: nr_extents > allocated\n");
+    fail = true;
+  }
+
+  for (i = 0; i < map->nr_extents; ++i) {
+    /* Extent length must be > 0. */
+    if (map->extents[i].length == 0) {
+      fprintf (stderr, "validate_extents_map: %zu: extent length must be > 0\n",
+               i);
+      fail = true;
+    }
+
+    /* Extent length must not wrap around, indicating that we have
+     * computed a negative length (as unsigned) somewhere.
+     */
+    if (map->extents[i].offset + map->extents[i].length <
+        map->extents[i].offset) {
+      fprintf (stderr, "validate_extents_map: %zu: negative length\n",
+               i);
+      fail = true;
+    }
+
+    if (i > 0) {
+      /* Extents are stored in strictly ascending order. */
+      if (map->extents[i-1].offset >= map->extents[i].offset) {
+        fprintf (stderr,
+                 "validate_extents_map: %zu: "
+                 "not strictly ascending order\n", i);
+        fail = true;
+      }
+
+      /* Adjacent extents must not overlap. */
+      if (map->extents[i-1].offset + map->extents[i-1].length >
+          map->extents[i].offset) {
+        fprintf (stderr,
+                 "validate_extents_map: %zu: "
+                 "overlapping extents\n", i);
+        fail = true;
+      }
+
+      /* Adjacent extents either have a different type or are
+       * separated by a gap.
+       */
+      if (map->extents[i-1].type == map->extents[i].type &&
+          map->extents[i-1].offset + map->extents[i-1].length ==
+          map->extents[i].offset) {
+        fprintf (stderr,
+                 "validate_extents_map: %zu: "
+                 "adjacent extents with same type not coalesced\n", i);
+        fail = true;
+      }
+    }
+  }
+
+  if (fail) {
+    dump_extents_map (map);
+    abort ();
+  }
+}
+
+#endif /* IN_TEST_EXTENTS */
diff --git a/server/filters.c b/server/filters.c
index 5b7abc4..e284080 100644
--- a/server/filters.c
+++ b/server/filters.c
@@ -309,6 +309,13 @@ next_can_zero (void *nxdata)
   return b_conn->b->can_zero (b_conn->b, b_conn->conn);
 }
 
+static int
+next_can_extents (void *nxdata)
+{
+  struct b_conn *b_conn = nxdata;
+  return b_conn->b->can_extents (b_conn->b, b_conn->conn);
+}
+
 static int
 next_can_fua (void *nxdata)
 {
@@ -364,6 +371,16 @@ next_zero (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags,
   return b_conn->b->zero (b_conn->b, b_conn->conn, count, offset, flags, err);
 }
 
+static int
+next_extents (void *nxdata, uint32_t count, uint64_t offset, uint32_t flags,
+              struct nbdkit_extents_map *extents_map,
+              int *err)
+{
+  struct b_conn *b_conn = nxdata;
+  return b_conn->b->extents (b_conn->b, b_conn->conn, count, offset, flags,
+                             extents_map, err);
+}
+
 static struct nbdkit_next_ops next_ops = {
   .get_size = next_get_size,
   .can_write = next_can_write,
@@ -371,6 +388,7 @@ static struct nbdkit_next_ops next_ops = {
   .is_rotational = next_is_rotational,
   .can_trim = next_can_trim,
   .can_zero = next_can_zero,
+  .can_extents = next_can_extents,
   .can_fua = next_can_fua,
   .can_multi_conn = next_can_multi_conn,
   .pread = next_pread,
@@ -378,6 +396,7 @@ static struct nbdkit_next_ops next_ops = {
   .flush = next_flush,
   .trim = next_trim,
   .zero = next_zero,
+  .extents = next_extents,
 };
 
 static int
@@ -511,6 +530,21 @@ filter_can_zero (struct backend *b, struct connection *conn)
     return f->backend.next->can_zero (f->backend.next, conn);
 }
 
+static int
+filter_can_extents (struct backend *b, struct connection *conn)
+{
+  struct backend_filter *f = container_of (b, struct backend_filter, backend);
+  void *handle = connection_get_handle (conn, f->backend.i);
+  struct b_conn nxdata = { .b = f->backend.next, .conn = conn };
+
+  debug ("%s: can_extents", f->name);
+
+  if (f->filter.can_extents)
+    return f->filter.can_extents (&next_ops, &nxdata, handle);
+  else
+    return f->backend.next->can_extents (f->backend.next, conn);
+}
+
 static int
 filter_can_fua (struct backend *b, struct connection *conn)
 {
@@ -646,6 +680,31 @@ filter_zero (struct backend *b, struct connection *conn,
                                   count, offset, flags, err);
 }
 
+static int
+filter_extents (struct backend *b, struct connection *conn,
+                uint32_t count, uint64_t offset, uint32_t flags,
+                struct nbdkit_extents_map *extents_map,
+                int *err)
+{
+  struct backend_filter *f = container_of (b, struct backend_filter, backend);
+  void *handle = connection_get_handle (conn, f->backend.i);
+  struct b_conn nxdata = { .b = f->backend.next, .conn = conn };
+
+  assert (!(flags & ~NBDKIT_FLAG_REQ_ONE));
+
+  debug ("%s: extents count=%" PRIu32 " offset=%" PRIu64 " flags=0x%" PRIx32,
+         f->name, count, offset, flags);
+
+  if (f->filter.extents)
+    return f->filter.extents (&next_ops, &nxdata, handle,
+                              count, offset, flags,
+                              extents_map, err);
+  else
+    return f->backend.next->extents (f->backend.next, conn,
+                                     count, offset, flags,
+                                     extents_map, err);
+}
+
 static struct backend filter_functions = {
   .free = filter_free,
   .thread_model = filter_thread_model,
@@ -667,6 +726,7 @@ static struct backend filter_functions = {
   .is_rotational = filter_is_rotational,
   .can_trim = filter_can_trim,
   .can_zero = filter_can_zero,
+  .can_extents = filter_can_extents,
   .can_fua = filter_can_fua,
   .can_multi_conn = filter_can_multi_conn,
   .pread = filter_pread,
@@ -674,6 +734,7 @@ static struct backend filter_functions = {
   .flush = filter_flush,
   .trim = filter_trim,
   .zero = filter_zero,
+  .extents = filter_extents,
 };
 
 /* Register and load a filter. */
diff --git a/server/plugins.c b/server/plugins.c
index 28b96ad..0bd523b 100644
--- a/server/plugins.c
+++ b/server/plugins.c
@@ -1,5 +1,5 @@
 /* nbdkit
- * Copyright (C) 2013-2018 Red Hat Inc.
+ * Copyright (C) 2013-2019 Red Hat Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -199,6 +199,8 @@ plugin_dump_fields (struct backend *b)
   HAS (trim);
   HAS (zero);
   HAS (can_multi_conn);
+  HAS (can_extents);
+  HAS (extents);
 #undef HAS
 
   /* Custom fields. */
@@ -392,6 +394,21 @@ plugin_can_zero (struct backend *b, struct connection *conn)
   return plugin_can_write (b, conn);
 }
 
+static int
+plugin_can_extents (struct backend *b, struct connection *conn)
+{
+  struct backend_plugin *p = container_of (b, struct backend_plugin, backend);
+
+  assert (connection_get_handle (conn, 0));
+
+  debug ("can_extents");
+
+  if (p->plugin.can_extents)
+    return p->plugin.can_extents (connection_get_handle (conn, 0));
+  else
+    return p->plugin.extents != NULL;
+}
+
 static int
 plugin_can_fua (struct backend *b, struct connection *conn)
 {
@@ -651,6 +668,41 @@ plugin_zero (struct backend *b, struct connection *conn,
   return r;
 }
 
+static int
+plugin_extents (struct backend *b, struct connection *conn,
+                uint32_t count, uint64_t offset, uint32_t flags,
+                struct nbdkit_extents_map *extents_map,
+                int *err)
+{
+  struct backend_plugin *p = container_of (b, struct backend_plugin, backend);
+  bool req_one = flags & NBDKIT_FLAG_REQ_ONE;
+  int r;
+
+  assert (connection_get_handle (conn, 0));
+  assert (!(flags & ~NBDKIT_FLAG_REQ_ONE));
+
+  debug ("extents count=%" PRIu32 " offset=%" PRIu64 " req_one=%d",
+         count, offset, req_one);
+
+  if (!count)
+    return 0;
+
+  if (p->plugin.extents) {
+    r = p->plugin.extents (connection_get_handle (conn, 0), count, offset,
+                           flags, extents_map);
+    if (r == -1)
+      *err = get_error (p);
+    return r;
+  }
+  else {
+    /* By default we assume that everything in the range is allocated.
+     * However as that is the default state for the empty extents_map
+     * we don't need to do anything here.
+     */
+    return 0;
+  }
+}
+
 static struct backend plugin_functions = {
   .free = plugin_free,
   .thread_model = plugin_thread_model,
@@ -672,6 +724,7 @@ static struct backend plugin_functions = {
   .is_rotational = plugin_is_rotational,
   .can_trim = plugin_can_trim,
   .can_zero = plugin_can_zero,
+  .can_extents = plugin_can_extents,
   .can_fua = plugin_can_fua,
   .can_multi_conn = plugin_can_multi_conn,
   .pread = plugin_pread,
@@ -679,6 +732,7 @@ static struct backend plugin_functions = {
   .flush = plugin_flush,
   .trim = plugin_trim,
   .zero = plugin_zero,
+  .extents = plugin_extents,
 };
 
 /* Register and load a plugin. */
diff --git a/server/test-extents.c b/server/test-extents.c
new file mode 100644
index 0000000..e07ecda
--- /dev/null
+++ b/server/test-extents.c
@@ -0,0 +1,151 @@
+/* nbdkit
+ * Copyright (C) 2018-2019 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Test the extents handling code (server/extents.c).  This stochastic
+ * test works by allocating an array representing a disk, allocating
+ * extents randomly while writing the same information into the disk
+ * array, and then reading the extents back and comparing it to what's
+ * in the disk array.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <inttypes.h>
+#include <time.h>
+#include <assert.h>
+
+#include "minmax.h"
+#include "random.h"
+
+#include "internal.h"
+
+extern void dump_extents_map (const struct nbdkit_extents_map *map);
+extern void validate_extents_map (const struct nbdkit_extents_map *map);
+
+#define DISK_SIZE (1<<10)
+static uint8_t disk[DISK_SIZE];
+
+static bool error_flagged = false;
+
+/* Stub for linking against server/extents.c. */
+void
+nbdkit_error (const char *fs, ...)
+{
+  error_flagged = true;
+}
+
+static int
+compare (uint64_t offset, uint64_t length, uint32_t type, void *mapv)
+{
+  const struct nbdkit_extents_map *map = mapv;
+  size_t j;
+
+  /* nbdkit_extents_foreach_range should ensure this is true even if
+   * there are extents which go below or above the range.
+   */
+  assert (offset >= 0);
+  assert (offset + length <= DISK_SIZE);
+
+  fprintf (stderr,
+           "compare: offset=%" PRIu64 " length=%" PRIu64 " "
+           "type=%" PRIu32 "\n",
+           offset, length, type);
+
+  for (j = offset; j < offset+length; ++j) {
+    if (disk[j] != type) {
+      fprintf (stderr, "error: disk[%zu] != expected type %" PRIu32 "\n",
+               j, type);
+      dump_extents_map (map);
+      exit (EXIT_FAILURE);
+    }
+  }
+
+  return 0;
+}
+
+int
+main (int argc, char *argv[])
+{
+  unsigned i;
+  uint64_t offset, length;
+  uint32_t type;
+  size_t j;
+  struct random_state random_state;
+  struct nbdkit_extents_map *map;
+
+  xsrandom (time (NULL), &random_state);
+
+  map = nbdkit_extents_new ();
+  assert (map != NULL);
+
+  for (i = 0; i < 1000; ++i) {
+    type = xrandom (&random_state);
+    type &= 3;
+    offset = xrandom (&random_state);
+    offset &= DISK_SIZE - 1;
+    /* XXX Change the distribution so we mainly choose small lengths
+     * but with a small possibility of choosing large lengths.
+     */
+    length = xrandom (&random_state);
+    length &= DISK_SIZE/8 - 1;
+    length++;
+
+    fprintf (stderr,
+             "insert: extent offset=%" PRIu64 " length=%" PRIu64 " "
+             "type=%" PRIu32 "\n",
+             offset, length, type);
+
+    assert (nbdkit_extent_add (map, offset, length, type) == 0);
+
+    for (j = offset; j < MIN (offset+length, DISK_SIZE); ++j)
+      disk[j] = type;
+
+    /* Ask the extents code to validate that the invariants are still
+     * true.  This calls abort(3) if not.
+     */
+    validate_extents_map (map);
+    dump_extents_map (map);
+  }
+
+  /* Read out the extents and compare them to the disk. */
+  assert (nbdkit_extents_foreach (map, compare, map,
+                                  NBDKIT_EXTENTS_FOREACH_FLAG_RANGE,
+                                  0, DISK_SIZE-1) == 0);
+
+  nbdkit_extents_free (map);
+
+  exit (EXIT_SUCCESS);
+}
diff --git a/.gitignore b/.gitignore
index dc42abd..1f8b2e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -63,6 +63,7 @@ Makefile.in
 /server/nbdkit.pc
 /server/protostrings.c
 /server/synopsis.c
+/server/test-extents
 /server/test-utils
 /stamp-h1
 /tests/disk
diff --git a/server/Makefile.am b/server/Makefile.am
index 5eb575e..5722f7b 100644
--- a/server/Makefile.am
+++ b/server/Makefile.am
@@ -43,6 +43,7 @@ nbdkit_SOURCES = \
 	connections.c \
 	crypto.c \
 	debug.c \
+	extents.c \
 	filters.c \
 	internal.h \
 	locks.c \
@@ -129,9 +130,22 @@ pkgconfig_DATA = nbdkit.pc
 
 # Unit testing
 
-TESTS = test-utils
+TESTS = \
+	test-extents \
+	test-utils
 
-check_PROGRAMS = test-utils
+check_PROGRAMS = \
+	test-extents \
+	test-utils
+
+test_extents_SOURCES = \
+	test-extents.c \
+	extents.c
+test_extents_CPPFLAGS = \
+	-DIN_TEST_EXTENTS=1 \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/common/include
+test_extents_CFLAGS = $(WARNINGS_CFLAGS) $(VALGRIND_CFLAGS)
 
 test_utils_SOURCES = \
 	test-utils.c \
-- 
2.20.1




More information about the Libguestfs mailing list