[Libguestfs] [PATCH v2 1/4] daemon: Translate device names if Linux device ordering is unstable (RHBZ#1804207).

Richard W.M. Jones rjones at redhat.com
Thu Mar 5 13:15:30 UTC 2020


Linux from around 5.6 now enumerates individual disks in any order
(whereas previously it enumerated only drivers in parallel).  This
means that /dev/sdX ordering is no longer stable - in particular we
cannot be sure that /dev/sda inside the guest is the first disk that
was attached to the appliance, /dev/sdb the second disk and so on.

However we can still use SCSI PCI device numbering as found in
/dev/disk/by-path.  Use this to translate device names in and out of
the appliance.

Thanks: Vitaly Kuznetsov, Paolo Bonzini, Dan Berrangé.
---
 TODO                             |  13 +++
 daemon/daemon.h                  |   2 +
 daemon/device-name-translation.c | 195 +++++++++++++++++++++++++++++--
 daemon/devsparts.ml              |  23 +++-
 daemon/guestfsd.c                |   5 +
 daemon/stubs-macros.h            |  10 +-
 lib/canonical-name.c             |   5 +
 7 files changed, 235 insertions(+), 18 deletions(-)

diff --git a/TODO b/TODO
index 066f633d4..064386ac2 100644
--- a/TODO
+++ b/TODO
@@ -189,6 +189,19 @@ might be replaced by direct use of the library (if this is easier).
 But it is very hard to be compatible between RHEL6 and RHEL5 when
 using the library directly.
 
+Properly fix device name translation
+------------------------------------
+
+Currently for Device parameters we pass a string name like "/dev/sda"
+or "/dev/sdb2" or "/dev/VG/LV" to the daemon.  Since Linux broke
+device enumeration (RHBZ#1804207) this works less well, requiring
+non-trivial translation within the daemon.
+
+It would be far better if in the generator we mapped "/dev/XXX" to a
+proper structure.  Device index + partition | LV | MD | ...
+This would be then used everywhere inside the daemon and mean we would
+no longer need device name translation at all.
+
 Visualization
 -------------
 
diff --git a/daemon/daemon.h b/daemon/daemon.h
index 170fb2537..24cf8585d 100644
--- a/daemon/daemon.h
+++ b/daemon/daemon.h
@@ -215,6 +215,8 @@ extern void notify_progress_no_ratelimit (uint64_t position, uint64_t total, con
 
 /* device-name-translation.c */
 extern char *device_name_translation (const char *device);
+extern void device_name_translation_init (void);
+extern void device_name_translation_free (void);
 extern char *reverse_device_name_translation (const char *device);
 
 /* stubs.c (auto-generated) */
diff --git a/daemon/device-name-translation.c b/daemon/device-name-translation.c
index ed826bbae..30173f5c2 100644
--- a/daemon/device-name-translation.c
+++ b/daemon/device-name-translation.c
@@ -27,12 +27,125 @@
 #include <dirent.h>
 #include <limits.h>
 #include <sys/stat.h>
+#include <errno.h>
+#include <error.h>
+
+#include "c-ctype.h"
 
 #include "daemon.h"
 
+static char **cache;
+static size_t cache_size;
+
+/**
+ * Cache daemon disk mapping.
+ *
+ * When the daemon starts up, populate a cache with the contents
+ * of /dev/disk/by-path.  It's easiest to use C<ls -lv> here
+ * since the names are sorted awkwardly.
+ */
+void
+device_name_translation_init (void)
+{
+  const char *by_path = "/dev/disk/by-path";
+  CLEANUP_FREE char *out = NULL, *err = NULL;
+  CLEANUP_FREE_STRING_LIST char **lines = NULL;
+  size_t i, j;
+  int r;
+
+  device_name_translation_free ();
+
+  r = command (&out, &err, "ls", "-1v", by_path, NULL);
+  if (r == -1)
+    error (EXIT_FAILURE, 0,
+           "failed to initialize device name translation cache: %s", err);
+
+  lines = split_lines (out);
+  if (lines == NULL)
+    error (EXIT_FAILURE, errno, "split_lines");
+
+  cache_size = guestfs_int_count_strings (lines);
+  cache = calloc (cache_size, sizeof (char *));
+  if (cache == NULL)
+    error (EXIT_FAILURE, errno, "calloc");
+
+  /* Delete entries for partitions.  (Mark them as NULL in the array
+   * and skip them below.)
+   */
+  for (i = 0; i < cache_size; ++i) {
+    if (strstr (lines[i], "-part"))
+      lines[i] = NULL;
+  }
+
+  /* Look up each device name.  It should be a symlink to /dev/sdX. */
+  for (i = j = 0; i < cache_size; ++i) {
+    if (lines[i] != NULL) {
+      CLEANUP_FREE char *full;
+      char *device;
+
+      if (asprintf (&full, "%s/%s", by_path, lines[i]) == -1)
+        error (EXIT_FAILURE, errno, "asprintf");
+
+      device = realpath (full, NULL);
+      if (device == NULL)
+        error (EXIT_FAILURE, errno, "realpath: %s", full);
+
+      /* Don't add the root device to the cache. */
+      if (is_root_device (device))
+        continue;
+
+      cache[j++] = device;
+    }
+  }
+
+  /* This is the final cache size because we deleted entries above. */
+  cache_size = j;
+}
+
+void
+device_name_translation_free (void)
+{
+  size_t i;
+
+  for (i = 0; i < cache_size; ++i)
+    free (cache[i]);
+  free (cache);
+  cache = NULL;
+  cache_size = 0;
+}
+
 /**
  * Perform device name translation.
  *
+ * Libguestfs defines a few standard formats for device names.
+ * (see also L<guestfs(3)/BLOCK DEVICE NAMING> and
+ * L<guestfs(3)/guestfs_canonical_device_name>).  They are:
+ *
+ * =over 4
+ *
+ * =item F</dev/sdX[N]>
+ *
+ * =item F</dev/hdX[N]>
+ *
+ * =item F</dev/vdX[N]>
+ *
+ * These mean the Nth partition on the Xth device.  Because
+ * Linux no longer enumerates devices in the order they are
+ * passed to qemu, we must translate these by looking up
+ * the actual device using /dev/disk/by-path/
+ *
+ * =item F</dev/mdX>
+ *
+ * =item F</dev/VG/LV>
+ *
+ * =item F</dev/mapper/...>
+ *
+ * =item F</dev/dm-N>
+ *
+ * These are not translated here.
+ *
+ * =back
+ *
  * It returns a newly allocated string which the caller must free.
  *
  * It returns C<NULL> on error.  B<Note> it does I<not> call
@@ -45,18 +158,59 @@ char *
 device_name_translation (const char *device)
 {
   int fd;
-  char *ret;
+  char *ret = NULL;
+  size_t len;
 
-  fd = open (device, O_RDONLY|O_CLOEXEC);
+  /* /dev/sdX[N] and aliases like /dev/vdX[N]. */
+  if (STRPREFIX (device, "/dev/") &&
+      strchr (device+5, '/') == NULL && /* not an LV name */
+      device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
+      ((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
+    ssize_t i;
+    const char *start;
+    char dev[16];
+
+    /* Translate to a disk index in /dev/disk/by-path sorted numerically. */
+    start = &device[5+len+1];
+    len = strspn (start, "abcdefghijklmnopqrstuvwxyz");
+    if (len >= sizeof dev - 1) {
+      fprintf (stderr, "unparseable device name: %s\n", device);
+      return NULL;
+    }
+    strcpy (dev, start);
+    dev[len] = '\0';
+
+    i = guestfs_int_drive_index (dev);
+    if (i >= 0 && i < (ssize_t) cache_size) {
+      /* Append the partition name if present. */
+      if (asprintf (&ret, "%s%s", cache[i], start+len) == -1)
+        return NULL;
+    }
+  }
+
+  /* If we didn't translate it above, continue with the same name. */
+  if (ret == NULL) {
+    ret = strdup (device);
+    if (ret == NULL)
+      return NULL;
+  }
+
+  /* Now check the device is openable. */
+  fd = open (ret, O_RDONLY|O_CLOEXEC);
   if (fd >= 0) {
     close (fd);
-    return strdup (device);
+    return ret;
   }
 
-  if (errno != ENXIO && errno != ENOENT)
+  if (errno != ENXIO && errno != ENOENT) {
+    perror (ret);
+    free (ret);
     return NULL;
+  }
 
-  /* If the name begins with "/dev/sd" then try the alternatives. */
+  free (ret);
+
+  /* If the original name begins with "/dev/sd" then try the alternatives. */
   if (!STRPREFIX (device, "/dev/sd"))
     return NULL;
   device += 7;                  /* device == "a1" etc. */
@@ -97,13 +251,34 @@ device_name_translation (const char *device)
 char *
 reverse_device_name_translation (const char *device)
 {
-  char *ret;
+  char *ret = NULL;
+  size_t i;
+
+  /* Look it up in the cache, and if found return the canonical name.
+   * If not found return a copy of the original string.
+   */
+  for (i = 0; i < cache_size; ++i) {
+    const size_t len = strlen (cache[i]);
+
+    if (STREQ (device, cache[i]) ||
+        (STRPREFIX (device, cache[i]) && c_isdigit (device[len]))) {
+      char drv[16];
+
+      guestfs_int_drive_name (i, drv);
+      if (asprintf (&ret, "/dev/sd%s%s", drv, &device[len]) == -1) {
+        reply_with_perror ("asprintf");
+        return NULL;
+      }
+      break;
+    }
+  }
 
-  /* Currently a no-op. */
-  ret = strdup (device);
   if (ret == NULL) {
-    reply_with_perror ("strdup");
-    return NULL;
+    ret = strdup (device);
+    if (ret == NULL) {
+      reply_with_perror ("strdup");
+      return NULL;
+    }
   }
 
   return ret;
diff --git a/daemon/devsparts.ml b/daemon/devsparts.ml
index 59e66e82e..b0703d3ab 100644
--- a/daemon/devsparts.ml
+++ b/daemon/devsparts.ml
@@ -23,9 +23,26 @@ open Std_utils
 
 open Utils
 
-let map_block_devices ~return_md f =
-  let devs = Sys.readdir "/sys/block" in
-  let devs = Array.to_list devs in
+let rec map_block_devices ~return_md f =
+  (* We need to get the devices in translated order.  Use the
+   * same command that we use in device-name-translation.c.
+   *)
+  let path = "/dev/disk/by-path" in
+  let devs = command "ls" ["-1v"; path] in
+  let devs = String.trimr devs in
+  let devs = String.nsplit "\n" devs in
+  (* Delete entries for partitions. *)
+  let devs = List.filter (fun dev -> String.find dev "-part" = (-1)) devs in
+  let devs =
+    List.filter_map (
+      fun file ->
+        let dev = Unix_utils.Realpath.realpath (sprintf "%s/%s" path file) in
+        (* Ignore non-/dev devices, and return without /dev/ prefix. *)
+        if String.is_prefix dev "/dev/" then
+          Some (String.sub dev 5 (String.length dev - 5))
+        else
+          None
+    ) devs in
   let devs = List.filter (
     fun dev ->
       String.is_prefix dev "sd" ||
diff --git a/daemon/guestfsd.c b/daemon/guestfsd.c
index 5400adf64..fd87f6520 100644
--- a/daemon/guestfsd.c
+++ b/daemon/guestfsd.c
@@ -233,6 +233,9 @@ main (int argc, char *argv[])
   _umask (0);
 #endif
 
+  /* Initialize device name translations cache. */
+  device_name_translation_init ();
+
   /* Connect to virtio-serial channel. */
   if (!channel)
     channel = VIRTIO_SERIAL_CHANNEL;
@@ -322,6 +325,8 @@ main (int argc, char *argv[])
   /* Enter the main loop, reading and performing actions. */
   main_loop (sock);
 
+  device_name_translation_free ();
+
   exit (EXIT_SUCCESS);
 }
 
diff --git a/daemon/stubs-macros.h b/daemon/stubs-macros.h
index e8b6c500b..235e79472 100644
--- a/daemon/stubs-macros.h
+++ b/daemon/stubs-macros.h
@@ -30,11 +30,6 @@
  */
 #define RESOLVE_DEVICE(path,path_out,is_filein)                         \
   do {									\
-    if (!is_device_parameter ((path))) {                                \
-      if (is_filein) cancel_receive ();                                 \
-      reply_with_error ("%s: %s: expecting a device name", __func__, (path)); \
-      return;							        \
-    }									\
     (path_out) = device_name_translation ((path));                      \
     if ((path_out) == NULL) {                                           \
       const int err = errno;                                            \
@@ -43,6 +38,11 @@
       reply_with_perror ("%s: %s", __func__, path);                     \
       return;							        \
     }                                                                   \
+    if (!is_device_parameter ((path_out))) {                            \
+      if (is_filein) cancel_receive ();                                 \
+      reply_with_error ("%s: %s: expecting a device name", __func__, (path)); \
+      return;							        \
+    }									\
   } while (0)
 
 /* All functions that take a mountable argument must call this macro.
diff --git a/lib/canonical-name.c b/lib/canonical-name.c
index 42a0fd2a6..efe45c5f1 100644
--- a/lib/canonical-name.c
+++ b/lib/canonical-name.c
@@ -37,6 +37,11 @@ guestfs_impl_canonical_device_name (guestfs_h *g, const char *device)
       strchr (device+5, '/') == NULL && /* not an LV name */
       device[5] != 'm' && /* not /dev/md - RHBZ#1414682 */
       ((len = strcspn (device+5, "d")) > 0 && len <= 2)) {
+    /* NB!  These do not need to be translated by
+     * device_name_translation.  They will be translated if necessary
+     * when the caller uses them in APIs which go through to the
+     * daemon.
+     */
     ret = safe_asprintf (g, "/dev/sd%s", &device[5+len+1]);
   }
   else if (STRPREFIX (device, "/dev/mapper/") ||
-- 
2.25.0




More information about the Libguestfs mailing list