[Libguestfs] [PATCH nbdkit 3/3] Add partitioning plugin.

Richard W.M. Jones rjones at redhat.com
Mon Sep 17 15:01:25 UTC 2018


Create a partitioned virtual drive from a list of one or more files
each containing single partitions.  The plugin concatenates the files
together and generates a virtual partition table so that NBD clients
see a single partitioned disk.

For example:

  nbdkit partitioning boot.img swap.img root.img

creates a virtual disk with 3 partitions.
---
 configure.ac                                  |   2 +
 filters/partition/nbdkit-partition-filter.pod |   4 +
 plugins/file/nbdkit-file-plugin.pod           |   3 +-
 plugins/partitioning/Makefile.am              |  64 ++
 plugins/partitioning/crc32.c                  | 140 +++
 plugins/partitioning/crc32.h                  |  41 +
 .../nbdkit-partitioning-plugin.pod            | 118 +++
 plugins/partitioning/partitioning.c           | 837 ++++++++++++++++++
 plugins/split/nbdkit-split-plugin.pod         |   6 +-
 tests/Makefile.am                             |  10 +
 tests/test-partitioning1.sh                   |  83 ++
 tests/test-partitioning2.sh                   |  75 ++
 12 files changed, 1381 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 2a4879a..3f99459 100644
--- a/configure.ac
+++ b/configure.ac
@@ -573,6 +573,7 @@ non_lang_plugins="\
         memory \
         nbd \
         null \
+        partitioning \
         pattern \
         random \
         split \
@@ -630,6 +631,7 @@ AC_CONFIG_FILES([Makefile
                  plugins/nbd/Makefile
                  plugins/null/Makefile
                  plugins/ocaml/Makefile
+                 plugins/partitioning/Makefile
                  plugins/pattern/Makefile
                  plugins/perl/Makefile
                  plugins/python/Makefile
diff --git a/filters/partition/nbdkit-partition-filter.pod b/filters/partition/nbdkit-partition-filter.pod
index ae72f3f..4a615b6 100644
--- a/filters/partition/nbdkit-partition-filter.pod
+++ b/filters/partition/nbdkit-partition-filter.pod
@@ -16,6 +16,9 @@ parameter, and count from 1.
 
 This works like the C<qemu-nbd -P> option.
 
+The opposite of this filter is L<nbdkit-partitioning-plugin(1)> which
+adds a virtual partition table to a file or files.
+
 =head1 NOTE
 
 Only MBR primary partitions and GPT partition tables are supported.
@@ -46,6 +49,7 @@ L<nbdkit(1)>,
 L<nbdkit-file-plugin(1)>,
 L<nbdkit-filter(3)>,
 L<nbdkit-offset-filter(1)>,
+L<nbdkit-partitioning-plugin(1)>,
 L<nbdkit-truncate-filter(1)>,
 L<parted(8)>.
 
diff --git a/plugins/file/nbdkit-file-plugin.pod b/plugins/file/nbdkit-file-plugin.pod
index 7d5c71b..cdd9d55 100644
--- a/plugins/file/nbdkit-file-plugin.pod
+++ b/plugins/file/nbdkit-file-plugin.pod
@@ -89,7 +89,8 @@ or block device efficiently or not.
 
 L<nbdkit(1)>,
 L<nbdkit-plugin(3)>,
-L<nbdkit-split-plugin(1)>.
+L<nbdkit-split-plugin(1)>,
+L<nbdkit-partitioning-plugin(1)>.
 
 =head1 AUTHORS
 
diff --git a/plugins/partitioning/Makefile.am b/plugins/partitioning/Makefile.am
new file mode 100644
index 0000000..e517a63
--- /dev/null
+++ b/plugins/partitioning/Makefile.am
@@ -0,0 +1,64 @@
+# nbdkit
+# Copyright (C) 2018 Red Hat Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+include $(top_srcdir)/common-rules.mk
+
+EXTRA_DIST = nbdkit-partitioning-plugin.pod
+
+plugin_LTLIBRARIES = nbdkit-partitioning-plugin.la
+
+nbdkit_partitioning_plugin_la_SOURCES = \
+	crc32.c \
+	crc32.h \
+	partitioning.c \
+	$(top_srcdir)/include/nbdkit-plugin.h
+
+nbdkit_partitioning_plugin_la_CPPFLAGS = \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/common/include \
+	-I.
+nbdkit_partitioning_plugin_la_CFLAGS = \
+	$(WARNINGS_CFLAGS)
+nbdkit_partitioning_plugin_la_LDFLAGS = \
+	-module -avoid-version -shared
+
+if HAVE_POD
+
+man_MANS = nbdkit-partitioning-plugin.1
+CLEANFILES += $(man_MANS)
+
+nbdkit-partitioning-plugin.1: nbdkit-partitioning-plugin.pod
+	$(PODWRAPPER) --section=1 --man $@ \
+	    --html $(top_builddir)/html/$@.html \
+	    $<
+
+endif HAVE_POD
diff --git a/plugins/partitioning/crc32.c b/plugins/partitioning/crc32.c
new file mode 100644
index 0000000..e707a50
--- /dev/null
+++ b/plugins/partitioning/crc32.c
@@ -0,0 +1,140 @@
+/* This code was taken from parted and indirectly from other sources
+ * as you can see from the messages below.  The license is compatible
+ * with the permissive license used in nbdkit.  - RWMJ 2018-09-16
+ */
+
+/*
+ * Dec 5, 2000 Matt Domsch <Matt_Domsch at dell.com>
+ * - Copied crc32.c from the linux/drivers/net/cipe directory.
+ * - Now pass seed as an arg
+ * - changed unsigned long to uint32_t, added #include<stdint.h>
+ * - changed len to be an unsigned long
+ * - changed crc32val to be a register
+ * - License remains unchanged!  It's still GPL-compatable!
+ */
+
+  /* ============================================================= */
+  /*  COPYRIGHT (C) 1986 Gary S. Brown.  You may use this program, or       */
+  /*  code or tables extracted from it, as desired without restriction.     */
+  /*                                                                        */
+  /*  First, the polynomial itself and its table of feedback terms.  The    */
+  /*  polynomial is                                                         */
+  /*  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0   */
+  /*                                                                        */
+  /*  Note that we take it "backwards" and put the highest-order term in    */
+  /*  the lowest-order bit.  The X^32 term is "implied"; the LSB is the     */
+  /*  X^31 term, etc.  The X^0 term (usually shown as "+1") results in      */
+  /*  the MSB being 1.                                                      */
+  /*                                                                        */
+  /*  Note that the usual hardware shift register implementation, which     */
+  /*  is what we're using (we're merely optimizing it by doing eight-bit    */
+  /*  chunks at a time) shifts bits into the lowest-order term.  In our     */
+  /*  implementation, that means shifting towards the right.  Why do we     */
+  /*  do it this way?  Because the calculated CRC must be transmitted in    */
+  /*  order from highest-order term to lowest-order term.  UARTs transmit   */
+  /*  characters in order from LSB to MSB.  By storing the CRC this way,    */
+  /*  we hand it to the UART in the order low-byte to high-byte; the UART   */
+  /*  sends each low-bit to hight-bit; and the result is transmission bit   */
+  /*  by bit from highest- to lowest-order term without requiring any bit   */
+  /*  shuffling on our part.  Reception works similarly.                    */
+  /*                                                                        */
+  /*  The feedback terms table consists of 256, 32-bit entries.  Notes:     */
+  /*                                                                        */
+  /*      The table can be generated at runtime if desired; code to do so   */
+  /*      is shown later.  It might not be obvious, but the feedback        */
+  /*      terms simply represent the results of eight shift/xor opera-      */
+  /*      tions for all combinations of data and CRC register values.       */
+  /*                                                                        */
+  /*      The values must be right-shifted by eight bits by the "updcrc"    */
+  /*      logic; the shift must be unsigned (bring in zeroes).  On some     */
+  /*      hardware you could probably optimize the shift in assembler by    */
+  /*      using byte-swap instructions.                                     */
+  /*      polynomial $edb88320                                              */
+  /*                                                                        */
+  /*  --------------------------------------------------------------------  */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include "crc32.h"
+
+static const uint32_t crc32_tab[] = {
+      0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
+      0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
+      0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
+      0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
+      0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
+      0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
+      0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
+      0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
+      0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
+      0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
+      0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
+      0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
+      0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
+      0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
+      0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
+      0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
+      0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
+      0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
+      0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
+      0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
+      0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
+      0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
+      0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
+      0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
+      0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
+      0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
+      0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
+      0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
+      0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
+      0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
+      0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
+      0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
+      0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
+      0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
+      0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
+      0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
+      0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
+      0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
+      0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
+      0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
+      0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
+      0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
+      0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
+      0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
+      0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
+      0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
+      0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
+      0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
+      0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
+      0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
+      0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
+      0x2d02ef8dL
+   };
+
+/* Return a 32-bit CRC of the contents of the buffer. */
+
+static uint32_t
+efi_crc32 (const void *buf, size_t len, uint32_t seed)
+{
+  size_t i;
+  uint32_t crc32val;
+  const unsigned char *s = buf;
+
+  crc32val = seed;
+  for (i = 0;  i < len;  i++) {
+    crc32val =
+      crc32_tab[(crc32val ^ s[i]) & 0xff] ^
+      (crc32val >> 8);
+  }
+  return crc32val;
+}
+
+uint32_t
+crc32 (const void *buf, size_t len)
+{
+  return efi_crc32 (buf, len, ~0L) ^ ~0L;
+}
diff --git a/plugins/partitioning/crc32.h b/plugins/partitioning/crc32.h
new file mode 100644
index 0000000..6bd5d2d
--- /dev/null
+++ b/plugins/partitioning/crc32.h
@@ -0,0 +1,41 @@
+/* nbdkit
+ * Copyright (C) 2018 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NBDKIT_CRC32_H
+#define NBDKIT_CRC32_H
+
+#include <stdint.h>
+
+extern uint32_t crc32 (const void *buf, size_t len);
+
+#endif /* NBDKIT_CRC32_H */
diff --git a/plugins/partitioning/nbdkit-partitioning-plugin.pod b/plugins/partitioning/nbdkit-partitioning-plugin.pod
new file mode 100644
index 0000000..07780e9
--- /dev/null
+++ b/plugins/partitioning/nbdkit-partitioning-plugin.pod
@@ -0,0 +1,118 @@
+=head1 NAME
+
+nbdkit-partitioning-plugin - create virtual disk from partitions
+
+=head1 SYNOPSIS
+
+ nbdkit partitioning [file=]part1 [[file=]part2 [file=]part3 ...]
+                     [partition-type=mbr|gpt]
+
+=head1 DESCRIPTION
+
+C<nbdkit-partitioning-plugin> is a plugin for L<nbdkit(1)> which
+creates a partitioned virtual drive from a list of one or more files
+each containing single partitions.  The plugin concatenates the files
+together and generates a virtual partition table so that NBD clients
+see a single partitioned disk.
+
+If you just want to concatenate files together (without adding a
+partition table) use L<nbdkit-split-plugin(1)>.  If you want to select
+a single partition from an existing disk, use
+L<nbdkit-partition-filter(1)>.
+
+The plugin supports read/write access.  To limit clients to read-only
+access use the I<-r> flag.
+
+=head2 Partition type
+
+You can choose either MBR (limited to 4 partitions) or GPT (limited to
+128 partitions) partition table type.
+
+If the C<partition-type> parameter is not supplied then the default is
+chosen as follows: If the number of files is E<gt> 4 then GPT is used.
+If the total file size is larger than supported by MBR (approximately
+2 TB), then GPT is used.  Otherwise MBR is used for maximum
+compatibility.
+
+=head2 Padding and alignment
+
+Partition sizes are automatically rounded up to a multiple of the 512
+byte sector size.  Padding may be added between partitions to provide
+the best alignment.  NBD clients may write to partitions, but will get
+an I/O error if they try to change the virtual partition table or any
+padding areas of the disk.
+
+=head1 EXAMPLES
+
+Create a virtual disk containing boot, swap and root partitions (note
+this will not be bootable since the virtual partition table does not
+contain a boot sector or boot loader):
+
+ nbdkit partitioning boot.img swap.img root.img
+
+From a bare ext4 filesystem, create a virtual partitioned disk:
+
+ nbdkit partitioning ext4fs.img
+
+If the underlying file is a multiple of 512 bytes then this is a
+no-op:
+
+ nbdkit --filter=partition partitioning file --partition=1
+
+=head1 PARAMETERS
+
+=over 4
+
+=item B<file=>FILENAME
+
+One or more files containing partition data.
+
+This parameter is required.
+
+C<file=> may be omitted.  To ensure that the filename does not end up
+being parsed accidentally as C<key=value>, prefix relative paths with
+C<./> (absolute paths do not need modification).
+
+=item B<partition-type=mbr>
+
+Add an MBR (DOS-style) partition table.  The MBR format is maximally
+compatible with clients, but only supports up to 4 partitions.
+
+=item B<partition-type=gpt>
+
+Add a GPT partition table.  This plugin supports up to 128 GPT
+partitions.  Note that as well as the virtual primary partition table
+added at the beginning of the disk, a virtual secondary partition
+table is added at the end, as required by GPT.
+
+=back
+
+=head1 LIMITS
+
+This plugin only supports primary MBR partitions, hence the limit of 4
+partitions with MBR.  This might be increased in future if we
+implement support for logical/extended partitions.
+
+This plugin only supports 128 GPT partitions.  The GPT format can
+support more, but this plugin does not implement that.
+
+Zero length partitions are not allowed and will cause nbdkit to exit
+with an error.
+
+The sector size is fixed at 512 bytes.
+
+=head1 SEE ALSO
+
+L<nbdkit(1)>,
+L<nbdkit-file-plugin(1)>,
+L<nbdkit-partition-filter(1)>,
+L<nbdkit-split-plugin(1)>,
+L<nbdkit-plugin(3)>.
+
+=head1 AUTHORS
+
+Richard W.M. Jones
+
+=head1 COPYRIGHT
+
+Copyright (C) 2018 Red Hat Inc.
diff --git a/plugins/partitioning/partitioning.c b/plugins/partitioning/partitioning.c
new file mode 100644
index 0000000..506b0a9
--- /dev/null
+++ b/plugins/partitioning/partitioning.c
@@ -0,0 +1,837 @@
+/* nbdkit
+ * Copyright (C) 2018 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <nbdkit-plugin.h>
+
+#include "byte-swapping.h"
+#include "isaligned.h"
+#include "iszero.h"
+#include "rounding.h"
+
+#include "crc32.h"
+
+/* Debug flag: -D partitioning.regions=1: Print the regions table. */
+int partitioning_debug_regions;
+
+#define SECTOR_SIZE UINT64_C(512)
+
+/* Maximum size of MBR disks.  This is an approximation based on the
+ * known limit (2^32 sectors) and an estimate based on the amount of
+ * padding between partitions.
+ */
+#define MAX_MBR_DISK_SIZE (UINT32_MAX * SECTOR_SIZE - 5 * ALIGNMENT)
+
+#define GPT_PT_ENTRY_SIZE 128
+
+/* XXX Make these configurable in future? */
+#define ALIGNMENT (2048 * SECTOR_SIZE)
+#define PARTITION_ID 0x83
+#define PARTITION_GUID "\xaf\x3d\xc6\x0f\x83\x84\x72\x47\x8e\x79\x3d\x69\xd8\x47\x7d\xe4"
+
+/* Files supplied on the command line. */
+struct file {
+  const char *filename;         /* file= supplied on the command line */
+  int fd;
+  struct stat statbuf;
+  char guid[16];                /* random GUID used for GPT */
+};
+
+static struct file *files = NULL;
+static size_t nr_files = 0;
+
+/* partition-type parameter. */
+#define PARTTYPE_UNSET 0
+#define PARTTYPE_MBR   1
+#define PARTTYPE_GPT   2
+static int parttype = PARTTYPE_UNSET;
+
+/* Virtual disk regions (contiguous). */
+enum region_type {
+  region_file,        /* contents of the i'th file */
+  region_data,        /* pointer to data (used for partition table) */
+  region_zero,        /* padding */
+};
+
+struct region {
+  uint64_t start, len, end;    /* byte offsets; end = start + len - 1 */
+  enum region_type type;
+  union {
+    size_t i;                  /* region_file: i'th file */
+    const unsigned char *data; /* region_data: data (partition table) */
+  } u;
+};
+
+static struct region *regions = NULL;
+static size_t nr_regions = 0;
+
+/* Primary and secondary partition tables (secondary is only used for GPT). */
+static unsigned char *primary = NULL, *secondary = NULL;
+
+static void
+partitioning_load (void)
+{
+  srandom (time (NULL));
+}
+
+static void
+partitioning_unload (void)
+{
+  size_t i;
+
+  for (i = 0; i < nr_files; ++i)
+    close (files[i].fd);
+  free (files);
+
+  /* We don't need to free regions[].u.data because it points to
+   * either primary or secondary which we free here.
+   */
+  free (regions);
+  free (primary);
+  free (secondary);
+}
+
+/* Find the region corresponding to the given offset.  Use region->end
+ * to find the end of the region.
+ */
+static int
+compare_offset (const void *offsetp, const void *regionp)
+{
+  const uint64_t offset = *(uint64_t *)offsetp;
+  const struct region *region = (struct region *)regionp;
+
+  if (offset < region->start) return -1;
+  if (offset > region->end) return 1;
+  return 0;
+}
+
+static struct region *
+get_region (uint64_t offset)
+{
+  return bsearch (&offset, regions, nr_regions, sizeof (struct region),
+                  compare_offset);
+}
+
+/* Helper function to expand an array of objects. */
+static int
+expand (void **objects, size_t size, size_t *nr_objects)
+{
+  void *p;
+
+  p = realloc (*objects, (*nr_objects+1) * size);
+  if (p == NULL) {
+    nbdkit_error ("realloc: %m");
+    return -1;
+  }
+  *objects = p;
+  (*nr_objects)++;
+  return 0;
+}
+
+/* Called once we have the list of filenames and have selected a
+ * partition type.  This creates the virtual disk layout as a list of
+ * regions.
+ */
+static int create_partition_table (void);
+
+static int
+create_virtual_disk_layout (void)
+{
+  struct region region;
+  size_t i;
+
+  assert (nr_regions == 0);
+  assert (nr_files > 0);
+  assert (primary == NULL);
+  assert (secondary == NULL);
+
+  /* Allocate the virtual partition table. */
+  if (parttype == PARTTYPE_MBR) {
+    primary = calloc (1, SECTOR_SIZE);
+    if (primary == NULL) {
+      nbdkit_error ("malloc: %m");
+      return -1;
+    }
+  }
+  else /* PARTTYPE_GPT */ {
+    primary = calloc (34, SECTOR_SIZE);
+    if (primary == NULL) {
+      nbdkit_error ("malloc: %m");
+      return -1;
+    }
+    secondary = calloc (33, SECTOR_SIZE);
+    if (secondary == NULL) {
+      nbdkit_error ("malloc: %m");
+      return -1;
+    }
+  }
+
+  /* Virtual primary partition table region at the start of the disk. */
+  if (parttype == PARTTYPE_MBR) {
+    region.start = 0;
+    region.len = SECTOR_SIZE;
+    region.end = region.start + region.len - 1;
+    region.type = region_data;
+    region.u.data = primary;
+    if (expand ((void *) &regions, sizeof (struct region), &nr_regions) == -1)
+      return -1;
+    regions[nr_regions-1] = region;
+  }
+  else /* PARTTYPE_GPT */ {
+    region.start = 0;
+    region.len = 34 * SECTOR_SIZE;
+    region.end = region.start + region.len - 1;
+    region.type = region_data;
+    region.u.data = primary;
+    if (expand ((void *)&regions, sizeof (struct region), &nr_regions) == -1)
+      return -1;
+    regions[nr_regions-1] = region;
+  }
+
+  /* The partitions. */
+  for (i = 0; i < nr_files; ++i) {
+    uint64_t offset;
+
+    offset = regions[nr_regions-1].end + 1;
+    /* Because we add padding after each partition, this invariant
+     * must always be true.
+     */
+    assert (is_aligned (offset, SECTOR_SIZE));
+
+    /* Make sure each partition is aligned for best performance. */
+    if (!is_aligned (offset, ALIGNMENT)) {
+      region.start = offset;
+      region.end = (offset & ~(ALIGNMENT-1)) + ALIGNMENT - 1;
+      region.len = region.end - region.start + 1;
+      region.type = region_zero;
+      if (expand ((void *)&regions, sizeof (struct region), &nr_regions) == -1)
+        return -1;
+      regions[nr_regions-1] = region;
+    }
+
+    offset = regions[nr_regions-1].end + 1;
+    assert (is_aligned (offset, ALIGNMENT));
+
+    /* Create the partition region for this file. */
+    region.start = offset;
+    region.len = files[i].statbuf.st_size;
+    region.end = region.start + region.len - 1;
+    region.type = region_file;
+    region.u.i = i;
+    if (expand ((void *)&regions, sizeof (struct region), &nr_regions) == -1)
+      return -1;
+    regions[nr_regions-1] = region;
+
+    /* If the file size is not a multiple of SECTOR_SIZE then
+     * add a padding region at the end to round it up.
+     */
+    if (!is_aligned (files[i].statbuf.st_size, SECTOR_SIZE)) {
+      region.start = regions[nr_regions-1].end + 1;
+      region.len = SECTOR_SIZE - (files[i].statbuf.st_size & (SECTOR_SIZE-1));
+      region.end = region.start + region.len - 1;
+      region.type = region_zero;
+      if (expand ((void *)&regions, sizeof (struct region), &nr_regions) == -1)
+        return -1;
+      regions[nr_regions-1] = region;
+    }
+  }
+
+  /* For GPT add the virtual secondary/backup partition table. */
+  if (parttype == PARTTYPE_GPT) {
+    region.start = regions[nr_regions-1].end + 1;
+    region.len = 33 * SECTOR_SIZE;
+    region.end = region.start + region.len - 1;
+    region.type = region_data;
+    region.u.data = secondary;
+    if (expand ((void *)&regions, sizeof (struct region), &nr_regions) == -1)
+      return -1;
+    regions[nr_regions-1] = region;
+  }
+
+  if (partitioning_debug_regions) {
+    for (i = 0; i < nr_regions; ++i) {
+      nbdkit_debug ("region[%zu]: %" PRIx64 "-%" PRIx64 " type=%s",
+                    i, regions[i].start, regions[i].end,
+                    regions[i].type == region_file ?
+                    files[regions[i].u.i].filename :
+                    regions[i].type == region_data ?
+                    "data" : "zero");
+    }
+  }
+
+  /* Assert that the regions table looks sane. */
+  assert (nr_regions > 0);
+  assert (regions[0].start == 0);
+  for (i = 0; i < nr_regions; ++i) {
+    assert (regions[i].len > 0);
+    assert (regions[i].end >= regions[i].start);
+    assert (regions[i].len == regions[i].end - regions[i].start + 1);
+    if (i+1 < nr_regions) {
+      assert (regions[i].end + 1 == regions[i+1].start);
+    }
+  }
+
+  return create_partition_table ();
+}
+
+/* Create the partition table (and for GPT the secondary/backup). */
+static void create_mbr_partition_table (unsigned char *out);
+static void create_mbr_partition_table_entry (const struct region *, int bootable, int partition_id, unsigned char *);
+static void create_gpt_partition_header (const void *pt, int is_primary, unsigned char *out);
+static void create_gpt_partition_table (unsigned char *out);
+static void create_gpt_partition_table_entry (const struct region *region, int bootable, char partition_type_guid[16], unsigned char *out);
+static void create_gpt_protective_mbr (unsigned char *out);
+
+static int
+create_partition_table (void)
+{
+  /* The caller has already create the disk layout and allocated space
+   * in memory for the partition table.
+   */
+  assert (nr_regions > 0);
+  assert (primary != NULL);
+  if (parttype == PARTTYPE_GPT)
+    assert (secondary != NULL);
+
+  if (parttype == PARTTYPE_MBR) {
+    assert (nr_files <= 4);
+    create_mbr_partition_table (primary);
+  }
+  else /* parttype == PARTTYPE_GPT */ {
+    void *pt;
+
+    assert (nr_files <= 128);
+
+    /* Protective MBR.  LBA 0 */
+    create_gpt_protective_mbr (primary);
+
+    /* Primary partition table.  LBA 2-33 */
+    pt = &primary[2*SECTOR_SIZE];
+    create_gpt_partition_table (pt);
+
+    /* Partition table header.  LBA 1 */
+    create_gpt_partition_header (pt, 1, &primary[SECTOR_SIZE]);
+
+    /* Backup partition table.  LBA -33 */
+    pt = secondary;
+    create_gpt_partition_table (pt);
+
+    /* Backup partition table header.  LBA -1 */
+    create_gpt_partition_header (pt, 0, &secondary[32*SECTOR_SIZE]);
+  }
+
+  return 0;
+}
+
+static void
+create_mbr_partition_table (unsigned char *out)
+{
+  size_t i, j;
+
+  for (j = 0; j < nr_regions; ++j) {
+    if (regions[j].type == region_file) {
+      i = regions[j].u.i;
+      assert (i < 4);
+      create_mbr_partition_table_entry (&regions[j], i == 0, PARTITION_ID,
+                                        &out[0x1be + 16*i]);
+    }
+  }
+
+  /* Boot signature. */
+  out[0x1fe] = 0x55;
+  out[0x1ff] = 0xaa;
+}
+
+static void
+chs_too_large (unsigned char *out)
+{
+  const int c = 1023, h = 254, s = 63;
+
+  out[0] = h;
+  out[1] = (c & 0x300) >> 2 | s;
+  out[2] = c & 0xff;
+}
+
+static void
+create_mbr_partition_table_entry (const struct region *region,
+                                  int bootable, int partition_id,
+                                  unsigned char *out)
+{
+  uint64_t start_sector, nr_sectors;
+  uint32_t u32;
+
+  assert (is_aligned (region->start, SECTOR_SIZE));
+
+  start_sector = region->start / SECTOR_SIZE;
+  nr_sectors = DIV_ROUND_UP (region->len, SECTOR_SIZE);
+
+  /* The total_size test in partitioning_config_complete should catch
+   * this earlier.
+   */
+  assert (start_sector <= UINT32_MAX);
+  assert (nr_sectors <= UINT32_MAX);
+
+  out[0] = bootable ? 0x80 : 0;
+  chs_too_large (&out[1]);
+  out[4] = partition_id;
+  chs_too_large (&out[5]);
+  u32 = htole32 (start_sector);
+  memcpy (&out[8], &u32, 4);
+  u32 = htole32 (nr_sectors);
+  memcpy (&out[12], &u32, 4);
+}
+
+static void
+create_gpt_partition_header (const void *pt, int is_primary,
+                             unsigned char *out)
+{
+  uint64_t nr_lbas;
+  struct gpt_header {
+    char signature[8];
+    char revision[4];
+    uint32_t header_size;
+    uint32_t crc;
+    uint32_t reserved;
+    uint64_t current_lba;
+    uint64_t backup_lba;
+    uint64_t first_usable_lba;
+    uint64_t last_usable_lba;
+    char guid[16];
+    uint64_t partition_entries_lba;
+    uint32_t nr_partition_entries;
+    uint32_t size_partition_entry;
+    uint32_t crc_partitions;
+  } *header = (struct gpt_header *) out;
+
+  nr_lbas = (regions[nr_regions-1].end + 1) / SECTOR_SIZE;
+
+  memset (header, 0, sizeof *header);
+  memcpy (header->signature, "EFI PART", 8);
+  memcpy (header->revision, "\0\0\1\0", 4); /* revision 1.0 */
+  header->header_size = htole32 (sizeof *header);
+  if (is_primary) {
+    header->current_lba = htole64 (1);
+    header->backup_lba = htole64 (nr_lbas - 1);
+  }
+  else {
+    header->current_lba = htole64 (nr_lbas - 1);
+    header->backup_lba = htole64 (1);
+  }
+  header->first_usable_lba = htole64 (34);
+  header->last_usable_lba = htole64 (nr_lbas - 34);
+  if (is_primary)
+    header->partition_entries_lba = htole64 (2);
+  else
+    header->partition_entries_lba = htole64 (nr_lbas - 33);
+  header->nr_partition_entries = htole32 (128);
+  header->size_partition_entry = htole32 (GPT_PT_ENTRY_SIZE);
+  header->crc_partitions = htole32 (crc32 (pt, GPT_PT_ENTRY_SIZE * 128));
+
+  /* Must be computed last. */
+  header->crc = htole32 (crc32 (header, sizeof *header));
+}
+
+static void
+create_gpt_partition_table (unsigned char *out)
+{
+  size_t i, j;
+
+  for (j = 0; j < nr_regions; ++j) {
+    if (regions[j].type == region_file) {
+      i = regions[j].u.i;
+      assert (i < 128);
+      create_gpt_partition_table_entry (&regions[j], i == 0, PARTITION_GUID,
+                                        out);
+      out += GPT_PT_ENTRY_SIZE;
+    }
+  }
+}
+
+static void
+create_gpt_partition_table_entry (const struct region *region,
+                                  int bootable, char partition_type_guid[16],
+                                  unsigned char *out)
+{
+  size_t i, len;
+  const char *filename;
+  struct gpt_entry {
+    char partition_type_guid[16];
+    char unique_guid[16];
+    uint64_t first_lba;
+    uint64_t last_lba;
+    uint64_t attributes;
+    char name[72];              /* UTF-16LE */
+  } *entry = (struct gpt_entry *) out;
+
+  assert (sizeof (struct gpt_entry) == GPT_PT_ENTRY_SIZE);
+
+  memcpy (entry->partition_type_guid, partition_type_guid, 16);
+
+  memcpy (entry->unique_guid, files[region->u.i].guid, 16);
+
+  entry->first_lba = htole64 (region->start / SECTOR_SIZE);
+  entry->last_lba = htole64 (region->end / SECTOR_SIZE);
+  entry->attributes = htole64 (bootable ? 4 : 0);
+
+  /* If the filename is 7 bit ASCII then this will reproduce it as a
+   * UTF-16LE string.
+   *
+   * Is this a security risk?  It reveals something about paths on the
+   * server to clients. XXX
+   */
+  filename = files[region->u.i].filename;
+  len = strlen (filename);
+  if (len < 36) {
+    for (i = 0; i < len; ++i)
+      if (filename[i] > 127)
+        goto out;
+
+    for (i = 0; i < len; ++i) {
+      entry->name[2*i] = filename[i];
+      entry->name[2*i+1] = 0;
+    }
+  }
+ out: ;
+}
+
+static void
+create_gpt_protective_mbr (unsigned char *out)
+{
+  struct region region;
+  uint64_t end;
+
+  /* Protective MBR creates a partition with partition ID 0xee which
+   * covers the whole of the disk, or as much of the disk as
+   * expressible with MBR.
+   */
+  region.start = 512;
+  end = regions[nr_regions-1].end;
+  if (end > UINT32_MAX * SECTOR_SIZE)
+    end = UINT32_MAX * SECTOR_SIZE;
+  region.end = end;
+  region.len = region.end - region.start + 1;
+
+  create_mbr_partition_table_entry (&region, 0, 0xee, &out[0x1be]);
+
+  /* Boot signature. */
+  out[0x1fe] = 0x55;
+  out[0x1ff] = 0xaa;
+}
+
+static int
+partitioning_config (const char *key, const char *value)
+{
+  struct file file;
+  size_t i;
+  int err;
+
+  if (strcmp (key, "file") == 0) {
+    file.filename = value;
+    file.fd = open (file.filename, O_RDWR);
+    if (file.fd == -1) {
+      nbdkit_error ("%s: %m", file.filename);
+      return -1;
+    }
+    if (fstat (file.fd, &file.statbuf) == -1) {
+      err = errno;
+      close (file.fd);
+      errno = err;
+      nbdkit_error ("%s: stat: %m", file.filename);
+      return -1;
+    }
+
+    if (file.statbuf.st_size == 0) {
+      nbdkit_error ("%s: zero length partitions are not allowed",
+                    file.filename);
+      return -1;
+    }
+
+    /* Create a random GUID used as "Unique partition GUID".  However
+     * this doesn't follow GUID conventions so in theory could make an
+     * invalid value.  This is only used by GPT, and we store it in
+     * the file structure because it must be the same across primary
+     * and secondary PT entries.
+     */
+    for (i = 0; i < 16; ++i)
+      file.guid[i] = random () & 0xff;
+
+    if (expand ((void *)&files, sizeof (struct file), &nr_files) == -1) {
+      err = errno;
+      close (file.fd);
+      errno = err;
+      return -1;
+    }
+    files[nr_files-1] = file;
+  }
+  else if (strcmp (key, "partition-type") == 0) {
+    if (strcasecmp (value, "mbr") == 0 || strcasecmp (value, "dos") == 0)
+      parttype = PARTTYPE_MBR;
+    else if (strcasecmp (value, "gpt") == 0)
+      parttype = PARTTYPE_GPT;
+    else {
+      nbdkit_error ("unknown partition-type: %s", value);
+      return -1;
+    }
+  }
+  else {
+    nbdkit_error ("unknown parameter '%s'", key);
+    return -1;
+  }
+
+  return 0;
+}
+
+static int
+partitioning_config_complete (void)
+{
+  size_t i;
+  uint64_t total_size;
+  int needs_gpt;
+
+  /* Not enough / too many files? */
+  if (nr_files == 0) {
+    nbdkit_error ("at least one file= parameter must be supplied");
+    return -1;
+  }
+  if (nr_files > 128) {
+    nbdkit_error ("too many files, the plugin supports a maximum of 128 files");
+    return -1;
+  }
+
+  total_size = 0;
+  for (i = 0; i < nr_files; ++i)
+    total_size += files[i].statbuf.st_size;
+
+  if (nr_files > 4)
+    needs_gpt = 1;
+  else if (total_size > MAX_MBR_DISK_SIZE)
+    needs_gpt = 1;
+  else
+    needs_gpt = 0;
+
+  /* Choose default parttype if not set. */
+  if (parttype == PARTTYPE_UNSET) {
+    if (needs_gpt) {
+      parttype = PARTTYPE_GPT;
+      nbdkit_debug ("picking partition type GPT");
+    }
+    else {
+      parttype = PARTTYPE_MBR;
+      nbdkit_debug ("picking partition type MBR");
+    }
+  }
+  else if (parttype == PARTTYPE_MBR && needs_gpt) {
+    nbdkit_error ("MBR partition table type supports a maximum of 4 partitions and a maximum virtual disk size of about 2 TB, but you requested %zu partition(s) and a total size of %" PRIu64 " bytes (> %" PRIu64 ").  Try using: partition-type=gpt",
+                  nr_files, total_size, (uint64_t) MAX_MBR_DISK_SIZE);
+    return -1;
+  }
+
+  return create_virtual_disk_layout ();
+}
+
+#define partitioning_config_help \
+  "file=<FILENAME>  (required) File(s) containing partitions\n" \
+  "partition-type=mbr|gpt      Partition type"
+
+/* Create the per-connection handle. */
+static void *
+partitioning_open (int readonly)
+{
+  /* We don't need a handle.  This is a non-NULL pointer we can return. */
+  static int h;
+
+  return &h;
+}
+
+#define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
+
+/* Get the disk size. */
+static int64_t
+partitioning_get_size (void *handle)
+{
+  assert (nr_regions > 0);
+  return regions[nr_regions-1].end + 1;
+}
+
+/* Read data. */
+static int
+partitioning_pread (void *handle, void *buf, uint32_t count, uint64_t offset)
+{
+  while (count > 0) {
+    const struct region *region = get_region (offset);
+    size_t i, len;
+    ssize_t r;
+
+    /* Length to end of region. */
+    len = region->end - offset + 1;
+    if (len > count)
+      len = count;
+
+    switch (region->type) {
+    case region_file:
+      i = region->u.i;
+      assert (i < nr_files);
+      r = pread (files[i].fd, buf, len, offset - region->start);
+      if (r == -1) {
+        nbdkit_error ("pread: %s: %m", files[i].filename);
+        return -1;
+      }
+      if (r == 0) {
+        nbdkit_error ("pread: %s: unexpected end of file", files[i].filename);
+        return -1;
+      }
+      len = r;
+      break;
+
+    case region_data:
+      memcpy (buf, &region->u.data[offset - region->start], len);
+      break;
+
+    case region_zero:
+      memset (buf, 0, len);
+      break;
+    }
+
+    count -= len;
+    buf += len;
+    offset += len;
+  }
+
+  return 0;
+}
+
+/* Write data. */
+static int
+partitioning_pwrite (void *handle,
+                     const void *buf, uint32_t count, uint64_t offset)
+{
+  while (count > 0) {
+    const struct region *region = get_region (offset);
+    size_t i, len;
+    ssize_t r;
+
+    /* Length to end of region. */
+    len = region->end - offset + 1;
+    if (len > count)
+      len = count;
+
+    switch (region->type) {
+    case region_file:
+      i = region->u.i;
+      assert (i < nr_files);
+      r = pwrite (files[i].fd, buf, len, offset - region->start);
+      if (r == -1) {
+        nbdkit_error ("pwrite: %s: %m", files[i].filename);
+        return -1;
+      }
+      len = r;
+      break;
+
+    case region_data:
+      /* You can only write same data as already present. */
+      if (memcmp (&region->u.data[offset - region->start], buf, len) != 0) {
+        nbdkit_error ("attempt to change partition table of virtual disk");
+        errno = EIO;
+        return -1;
+      }
+      break;
+
+    case region_zero:
+      /* You can only write zeros. */
+      if (!is_zero (buf, len)) {
+        nbdkit_error ("write non-zeros to padding region");
+        errno = EIO;
+        return -1;
+      }
+      break;
+    }
+
+    count -= len;
+    buf += len;
+    offset += len;
+  }
+
+  return 0;
+}
+
+/* Flush. */
+static int
+partitioning_flush (void *handle)
+{
+  size_t i;
+
+  for (i = 0; i < nr_files; ++i) {
+    if (fdatasync (files[i].fd) == -1) {
+      nbdkit_error ("fdatasync: %m");
+      return -1;
+    }
+  }
+
+  return 0;
+}
+
+static struct nbdkit_plugin plugin = {
+  .name              = "partitioning",
+  .version           = PACKAGE_VERSION,
+  .load              = partitioning_load,
+  .unload            = partitioning_unload,
+  .config            = partitioning_config,
+  .config_complete   = partitioning_config_complete,
+  .config_help       = partitioning_config_help,
+  .magic_config_key = "file",
+  .open              = partitioning_open,
+  .get_size          = partitioning_get_size,
+  .pread             = partitioning_pread,
+  .pwrite            = partitioning_pwrite,
+  .flush             = partitioning_flush,
+  /* In this plugin, errno is preserved properly along error return
+   * paths from failed system calls.
+   */
+  .errno_is_preserved = 1,
+};
+
+NBDKIT_REGISTER_PLUGIN(plugin)
diff --git a/plugins/split/nbdkit-split-plugin.pod b/plugins/split/nbdkit-split-plugin.pod
index 6f62eb3..c0b7272 100644
--- a/plugins/split/nbdkit-split-plugin.pod
+++ b/plugins/split/nbdkit-split-plugin.pod
@@ -12,6 +12,9 @@ C<nbdkit-split-plugin> is a file plugin for L<nbdkit(1)>.  One or more
 filenames may be given using the C<FILENAME> parameter.  These
 files are logically concatenated into a single disk image.
 
+If you want to add a virtual partition table, see
+L<nbdkit-partitioning-plugin(1)>.
+
 =head2 Differences from nbdkit-file-plugin
 
 Normally to serve a single file you should use
@@ -73,7 +76,8 @@ modification).
 
 L<nbdkit(1)>,
 L<nbdkit-plugin(3)>,
-L<nbdkit-file-plugin(1)>.
+L<nbdkit-file-plugin(1)>,
+L<nbdkit-partitioning-plugin(1)>.
 
 =head1 AUTHORS
 
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 23316ea..be485b4 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -70,6 +70,8 @@ EXTRA_DIST = \
 	test-offset2.sh \
 	test-parallel-file.sh \
 	test-parallel-nbd.sh \
+	test-partitioning1.sh \
+	test-partitioning2.sh \
 	test-pattern.sh \
 	test-pattern-largest.sh \
 	test-pattern-largest-for-qemu.sh \
@@ -355,6 +357,14 @@ test_memory_SOURCES = test-memory.c test.h
 test_memory_CFLAGS = $(WARNINGS_CFLAGS) $(LIBGUESTFS_CFLAGS)
 test_memory_LDADD = libtest.la $(LIBGUESTFS_LIBS)
 
+# partitioning plugin test.
+TESTS += \
+	test-partitioning1.sh
+if HAVE_GUESTFISH
+TESTS += \
+	test-partitioning2.sh
+endif HAVE_GUESTFISH
+
 # pattern plugin test.
 TESTS += \
 	test-pattern.sh \
diff --git a/tests/test-partitioning1.sh b/tests/test-partitioning1.sh
new file mode 100755
index 0000000..8dd2af7
--- /dev/null
+++ b/tests/test-partitioning1.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018 Red Hat Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the partitioning plugin.
+#
+# Test 1: check that partitioning + partition filter = identity
+
+source ./functions.sh
+set -e
+set -x
+
+files="partitioning1.out partitioning1-p1 partitioning1-p2 partitioning1-p3 partitioning1-p4 partitioning1-p5 partitioning1-p6"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Test that qemu-img works
+if ! qemu-img --help >/dev/null; then
+    echo "$0: missing or broken qemu-img"
+    exit 77
+fi
+
+# Create some odd-sized partitions.  These exist to test alignment and
+# padding.
+truncate -s 1 partitioning1-p1
+truncate -s 511 partitioning1-p2
+truncate -s 10M partitioning1-p3
+truncate -s 1023 partitioning1-p4
+truncate -s 1 partitioning1-p5
+truncate -s 511 partitioning1-p6
+
+# Run nbdkit with partitioning plugin and partition filter.
+nbdkit -f -v -D partitioning.regions=1 -U - \
+       --filter=partition \
+       partitioning \
+       partitioning1-p1 partitioning1-p2 file-data partitioning1-p3 \
+       partition-type=mbr \
+       partition=3 \
+       --run 'qemu-img convert $nbd partitioning1.out'
+
+# Contents of partitioning1.out should be identical to file-data.
+cmp file-data partitioning1.out
+
+# Same test with GPT and more partitions.
+nbdkit -f -v -D partitioning.regions=1 -U - \
+       --filter=partition \
+       partitioning \
+       partitioning1-p1 partitioning1-p2 partitioning1-p3 \
+       partitioning1-p4 file-data partitioning1-p5 partitioning1-p6 \
+       partition-type=gpt \
+       partition=5 \
+       --run 'qemu-img convert $nbd partitioning1.out'
+
+cmp file-data partitioning1.out
diff --git a/tests/test-partitioning2.sh b/tests/test-partitioning2.sh
new file mode 100755
index 0000000..411f392
--- /dev/null
+++ b/tests/test-partitioning2.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# nbdkit
+# Copyright (C) 2018 Red Hat Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# * Neither the name of Red Hat nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+# USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+# Test the partitioning plugin.
+#
+# Test 2: Create a naked filesystem, embed in a partition, and try to
+# read/write it with guestfish.
+
+source ./functions.sh
+set -e
+set -x
+
+files="partitioning2.pid partitioning2.sock partitioning2.fs partitioning2.p1 partitioning2.p3"
+rm -f $files
+cleanup_fn rm -f $files
+
+# Test that mke2fs works
+if ! mke2fs -V; then
+    echo "$0: missing or broken mke2fs"
+    exit 77
+fi
+
+# Create partitions before and after.
+truncate -s 1 partitioning2.p1
+truncate -s 10M partitioning2.p3
+
+# Create the naked filesystem.
+truncate -s 20M partitioning2.fs
+mke2fs -F -t ext2 partitioning2.fs
+
+# Run nbdkit.
+start_nbdkit -P partitioning2.pid -U partitioning2.sock \
+             partitioning partitioning2.p1 partitioning2.fs partitioning2.p3 \
+             partition-type=gpt
+
+# Connect with guestfish and read/write stuff to partition 2.
+guestfish --format=raw -a "nbd://?socket=$PWD/partitioning2.sock" <<'EOF'
+  run
+  mount /dev/sda2 /
+  touch /hello
+  fill-pattern "abc" 10000 /pattern
+  ll /
+  umount /dev/sda2
+  sync
+EOF
-- 
2.19.0.rc0




More information about the Libguestfs mailing list