[Libguestfs] [PATCH v2 nbdkit] common: Improve pseudo-random number generation.

Richard W.M. Jones rjones at redhat.com
Fri Dec 28 20:55:23 UTC 2018


Currently we use non-cryptographically secure random numbers in two
places, the error filter (to inject errors at random) and the random
plugin.  For this we have used either random_r or a home-brew-ish
Linear Congruential Generator.  Use of random_r is problematic on BSDs
because it doesn't exist there.  Use of the LCG is simply a bad
choice.

Replace both uses with a better quality and faster PRNG from David
Blackman and Sebastiano Vigna called ‘xoshiro256** 1.0’
(http://xoshiro.di.unimi.it/).  This is released into the public
domain (where possible) so it compatible with the licensing of nbdkit.

This also fixes a bug in the random plugin where it could never
generate the byte 255 because I used modulo 255 instead of modulo 256
arithmetic.  Ooops.

This also adds a simple sniff test that the random plugin is producing
something which at least looks like random data.
---
 plugins/random/nbdkit-random-plugin.pod |   5 +-
 common/include/random.h                 | 101 ++++++++++++++++++++++++
 filters/error/error.c                   |  28 +++----
 plugins/random/random.c                 |  46 +++++------
 tests/test-random.c                     |  23 ++++++
 common/include/Makefile.am              |   1 +
 plugins/random/Makefile.am              |   1 +
 7 files changed, 160 insertions(+), 45 deletions(-)

diff --git a/plugins/random/nbdkit-random-plugin.pod b/plugins/random/nbdkit-random-plugin.pod
index 5dfe801..750daab 100644
--- a/plugins/random/nbdkit-random-plugin.pod
+++ b/plugins/random/nbdkit-random-plugin.pod
@@ -15,9 +15,8 @@ The size of the virtual disk must be specified using the C<size>
 parameter.  If you specify the C<seed> parameter then you will get the
 same random data over multiple runs with the same seed.
 
-The random data is generated using an I<in>secure method (a Linear
-Congruential Generator).  This plugin is mainly good for testing NBD
-clients.
+The random data is generated using an I<insecure> method.  This plugin
+is mainly good for testing NBD clients.
 
 =head1 PARAMETERS
 
diff --git a/common/include/random.h b/common/include/random.h
new file mode 100644
index 0000000..9d90352
--- /dev/null
+++ b/common/include/random.h
@@ -0,0 +1,101 @@
+/* nbdkit
+ * Copyright (C) 2018 Red Hat Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * * Neither the name of Red Hat nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef NBDKIT_RANDOM_H
+#define NBDKIT_RANDOM_H
+
+#include <stdint.h>
+
+/* Generate pseudo-random numbers, quickly, with explicit state.
+ *
+ * This is based on the xoshiro/xoroshiro generators by David Blackman
+ * and Sebastiano Vigna at http://xoshiro.di.unimi.it/ .  Specifically
+ * this is ‘xoshiro256** 1.0’.
+ *
+ * This does _NOT_ generate cryptographically secure random numbers
+ * (CSPRNG) and so should not be used when cryptography or security is
+ * required - use gcrypt if you need those.
+ */
+
+/* You can seed ‘struct random_state’ by setting the s[] elements
+ * directly - but not you must NOT set it all to zero.  OR if you have
+ * a 64 bit seed, you can use xsrandom below to initialize the state.
+ */
+struct random_state {
+  uint64_t s[4];
+};
+
+static inline uint64_t
+snext (uint64_t *seed)
+{
+  uint64_t z = (*seed += 0x9e3779b97f4a7c15);
+  z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
+  z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
+  return z ^ (z >> 31);
+}
+
+static inline void
+xsrandom (uint64_t seed, struct random_state *state)
+{
+  state->s[0] = snext (&seed);
+  state->s[1] = snext (&seed);
+  state->s[2] = snext (&seed);
+  state->s[3] = snext (&seed);
+}
+
+static inline uint64_t
+rotl (const uint64_t x, int k)
+{
+  return (x << k) | (x >> (64 - k));
+}
+
+/* Returns 64 random bits.  Updates the state. */
+static inline uint64_t
+xrandom (struct random_state *state)
+{
+  const uint64_t result_starstar = rotl (state->s[1] * 5, 7) * 9;
+  const uint64_t t = state->s[1] << 17;
+
+  state->s[2] ^= state->s[0];
+  state->s[3] ^= state->s[1];
+  state->s[1] ^= state->s[2];
+  state->s[0] ^= state->s[3];
+
+  state->s[2] ^= t;
+
+  state->s[3] = rotl (state->s[3], 45);
+
+  return result_starstar;
+}
+
+#endif /* NBDKIT_RANDOM_H */
diff --git a/filters/error/error.c b/filters/error/error.c
index 8509a16..9f33910 100644
--- a/filters/error/error.c
+++ b/filters/error/error.c
@@ -44,6 +44,8 @@
 
 #include <nbdkit-filter.h>
 
+#include "random.h"
+
 #define THREAD_MODEL NBDKIT_THREAD_MODEL_PARALLEL
 
 struct error_settings {
@@ -222,17 +224,13 @@ error_config (nbdkit_next_config *next, void *nxdata,
   "                               Apply settings only to read/write/trim/zero"
 
 struct handle {
-#ifdef __GNU_LIBRARY__
-  struct random_data rd;
-  char rd_state[32];
-#endif
+  struct random_state random_state;
 };
 
 static void *
 error_open (nbdkit_next_open *next, void *nxdata, int readonly)
 {
   struct handle *h;
-  time_t t;
 
   if (next (nxdata, readonly) == -1)
     return NULL;
@@ -242,11 +240,7 @@ error_open (nbdkit_next_open *next, void *nxdata, int readonly)
     nbdkit_error ("malloc: %m");
     return NULL;
   }
-#ifdef __GNU_LIBRARY__
-  memset (&h->rd, 0, sizeof h->rd);
-  time (&t);
-  initstate_r (t, (char *) &h->rd_state, sizeof h->rd_state, &h->rd);
-#endif
+  xsrandom (0, &h->random_state);
   return h;
 }
 
@@ -264,7 +258,7 @@ random_error (struct handle *h,
               const struct error_settings *error_settings,
               const char *fn, int *err)
 {
-  int32_t rand;
+  uint64_t rand;
 
   if (error_settings->rate <= 0)       /* 0% = never inject */
     return false;
@@ -278,12 +272,12 @@ random_error (struct handle *h,
   if (error_settings->rate >= 1)       /* 100% = always inject */
     goto inject;
 
-#ifdef __GNU_LIBRARY__
-  random_r (&h->rd, &rand);
-#else
-  rand = random ();
-#endif
-  if (rand >= error_settings->rate * RAND_MAX)
+  /* To avoid the question if (double)1.0 * UINT64_MAX is
+   * representable in a 64 bit integer, and because we don't need all
+   * this precision anyway, let's work in 32 bits.
+   */
+  rand = xrandom (&h->random_state) & UINT32_MAX;
+  if (rand >= error_settings->rate * UINT32_MAX)
     return false;
 
  inject:
diff --git a/plugins/random/random.c b/plugins/random/random.c
index 8adc26e..72caaed 100644
--- a/plugins/random/random.c
+++ b/plugins/random/random.c
@@ -44,32 +44,14 @@
 #define NBDKIT_API_VERSION 2
 #include <nbdkit-plugin.h>
 
+#include "random.h"
+
 /* The size of disk in bytes (initialized by size=<SIZE> parameter). */
 static int64_t size = 0;
 
 /* Seed. */
 static uint32_t seed;
 
-/* We use a Linear Congruential Generator (LCG) to make low quality
- * but easy to compute random numbers.  However we're not quite using
- * it in the ordinary way.  In order to be able to read any byte of
- * data without needing to run the LCG from the start, the random data
- * is computed from the index and seed through two rounds of LCG:
- *
- * index i     LCG(seed) -> +i   -> LCG -> LCG -> mod 256 -> b[i]
- * index i+1   LCG(seed) -> +i+1 -> LCG -> LCG -> mod 256 -> b[i+1]
- * etc
- *
- * This LCG is the same one as used in glibc.
- */
-static inline uint32_t
-LCG (uint32_t s)
-{
-  s *= 1103515245;
-  s += 12345;
-  return s;
-}
-
 static void
 random_load (void)
 {
@@ -135,13 +117,27 @@ random_pread (void *handle, void *buf, uint32_t count, uint64_t offset,
 {
   uint32_t i;
   unsigned char *b = buf;
-  uint32_t s;
+  uint64_t s;
 
   for (i = 0; i < count; ++i) {
-    s = LCG (seed) + offset + i;
-    s = LCG (s);
-    s = LCG (s);
-    s = s % 255;
+    /* We use nbdkit common/include/random.h to make random numbers.
+     *
+     * However we're not quite using it in the ordinary way.  In order
+     * to be able to read any byte of data without needing to run the
+     * PRNG from the start, the random data is computed from the index
+     * and seed through three rounds of PRNG:
+     *
+     * index i     PRNG(seed+i)   -> PRNG -> PRNG -> mod 256 -> b[i]
+     * index i+1   PRNG(seed+i+1) -> PRNG -> PRNG -> mod 256 -> b[i+1]
+     * etc
+     */
+    struct random_state state;
+
+    xsrandom (seed + offset + i, &state);
+    xrandom (&state);
+    xrandom (&state);
+    s = xrandom (&state);
+    s &= 255;
     b[i] = s;
   }
   return 0;
diff --git a/tests/test-random.c b/tests/test-random.c
index 168331c..c02fdc2 100644
--- a/tests/test-random.c
+++ b/tests/test-random.c
@@ -48,6 +48,8 @@
 #define SIZE 1024*1024
 static char data[SIZE];
 
+static unsigned histogram[256];
+
 /* After reading the whole disk above, we then read randomly selected
  * subsets of the disk and compare the data (it should be identical).
  */
@@ -98,6 +100,27 @@ main (int argc, char *argv[])
   memcpy (data, rdata, SIZE);
   free (rdata);
 
+  /* Test that the data is sufficiently random using a simple
+   * histogram.  This just tests for gross errors and is not a
+   * complete statistical study.
+   */
+  for (i = 0; i < SIZE; ++i) {
+    unsigned char c = (unsigned char) data[i];
+    histogram[c]++;
+  }
+  for (i = 0; i < 256; ++i) {
+    const unsigned expected = SIZE / 256;
+    if (histogram[i] < 80 * expected / 100) {
+      fprintf (stderr, "test-random: "
+               "random data is not uniformly distributed\n"
+               "eg. byte %d occurs %u times (expected about %u times)\n",
+               i, histogram[i], expected);
+      exit (EXIT_FAILURE);
+    }
+  }
+
+  /* Randomly read parts of the disk to ensure we get the same data.
+   */
   srandom (time (NULL));
   for (i = 0; i < NR_READS; ++i) {
     offset = random ();
diff --git a/common/include/Makefile.am b/common/include/Makefile.am
index 81f4804..f96bab5 100644
--- a/common/include/Makefile.am
+++ b/common/include/Makefile.am
@@ -41,4 +41,5 @@ EXTRA_DIST = \
 	isaligned.h \
 	ispowerof2.h \
 	iszero.h \
+	random.h \
 	rounding.h
diff --git a/plugins/random/Makefile.am b/plugins/random/Makefile.am
index d990158..0a84774 100644
--- a/plugins/random/Makefile.am
+++ b/plugins/random/Makefile.am
@@ -41,6 +41,7 @@ nbdkit_random_plugin_la_SOURCES = \
 	$(top_srcdir)/include/nbdkit-plugin.h
 
 nbdkit_random_plugin_la_CPPFLAGS = \
+	-I$(top_srcdir)/common/include \
 	-I$(top_srcdir)/include
 nbdkit_random_plugin_la_CFLAGS = \
 	$(WARNINGS_CFLAGS)
-- 
2.19.2




More information about the Libguestfs mailing list