[libvirt] [PATCH] nodedev: Add retry logic to read fibre channel files

John Ferlan jferlan at redhat.com
Fri Jul 22 13:14:50 UTC 2016


ping.   Even if the answer is - let's not fix this...

Tks -

John
On 06/29/2016 05:43 PM, John Ferlan wrote:
> https://bugzilla.redhat.com/show_bug.cgi?id=1319544
> 
> During processing of a vport_create event, udevEventHandleCallback
> will call udevProcessSCSIHost to read the fibre channel configuration
> files for wwpn, wwpn, and fabric_wwn; however, as it turns out those
> files may not have valid data. Rather than carry around invalid data,
> add some logic to re-read the files up to 5 times. If after 5 attempts
> things still fail, don't hold up processing any longer.
> 
> The "ffffffffffffffff" value is what seems to be used to initialize the
> wwnn and wwpn files, while "0" or "ffffffffffffffff" have been used to
> initialize the fabric_wwn file (see bz 1240912 for some details).
> 
> Signed-off-by: John Ferlan <jferlan at redhat.com>
> ---
> 
>  This has been hanging in a local branch for a while. Figured I'd at the
>  very least get some feedback and thoughts from others as to whether it's
>  worth making the adjustments.  Lots of details in the bz, the essentially
>  the tester is bypassing the libvirt create vport mechanism, then wants to
>  use the libvirt delete vHBA; however, the timing of things seems to have
>  gotten clogged up and not all the fields in the vHBA are read properly
>  thus the deletion cannot be done.  A workaround to the issue is to run
>  nodedev-dumpxml on the vHBA again and the data is read properly. This patch
>  went with a retry mechanism to try and ensure the data we've read is
>  correct. I'm a bit ambivalent about this, but it doesn't hurt to ask...
> 
> 
>  src/node_device/node_device_driver.c      |  4 ++--
>  src/node_device/node_device_linux_sysfs.c | 25 +++++++++++++++++++++++++
>  src/node_device/node_device_udev.c        |  9 ++++++++-
>  3 files changed, 35 insertions(+), 3 deletions(-)
> 
> diff --git a/src/node_device/node_device_driver.c b/src/node_device/node_device_driver.c
> index 500caeb..838b3ee 100644
> --- a/src/node_device/node_device_driver.c
> +++ b/src/node_device/node_device_driver.c
> @@ -53,7 +53,7 @@ static int update_caps(virNodeDeviceObjPtr dev)
>      while (cap) {
>          switch (cap->data.type) {
>          case VIR_NODE_DEV_CAP_SCSI_HOST:
> -            nodeDeviceSysfsGetSCSIHostCaps(&dev->def->caps->data);
> +            ignore_value(nodeDeviceSysfsGetSCSIHostCaps(&dev->def->caps->data));
>              break;
>          case VIR_NODE_DEV_CAP_NET:
>              if (virNetDevGetLinkInfo(cap->data.net.ifname, &cap->data.net.lnk) < 0)
> @@ -292,7 +292,7 @@ nodeDeviceLookupSCSIHostByWWN(virConnectPtr conn,
>  
>          while (cap) {
>              if (cap->data.type == VIR_NODE_DEV_CAP_SCSI_HOST) {
> -                nodeDeviceSysfsGetSCSIHostCaps(&cap->data);
> +                ignore_value(nodeDeviceSysfsGetSCSIHostCaps(&cap->data));
>                  if (cap->data.scsi_host.flags &
>                      VIR_NODE_DEV_CAP_FLAG_HBA_FC_HOST) {
>                      if (STREQ(cap->data.scsi_host.wwnn, wwnn) &&
> diff --git a/src/node_device/node_device_linux_sysfs.c b/src/node_device/node_device_linux_sysfs.c
> index 549d32c..9363487 100644
> --- a/src/node_device/node_device_linux_sysfs.c
> +++ b/src/node_device/node_device_linux_sysfs.c
> @@ -41,6 +41,21 @@
>  
>  VIR_LOG_INIT("node_device.node_device_linux_sysfs");
>  
> +
> +/* nodeDeviceSysfsGetSCSIHostCaps:
> + * @d: Pointer to node device capabilities
> + *
> + * Read the various 'scsi_host' files for the device and fill in
> + * the relevant data for our internal representation. It is possible
> + * that the environment isn't completely setup or "stable" when we
> + * go to read, so check for invalid values and fail on those. In
> + * particular, if the wwnn or wwpn are read in as ffffffffffffffff
> + * or the fabric_wwn is read as 0 or ffffffffffffffff, then we need
> + * to force a retry.
> + *
> + * Returns 0 on success, -1 on bad failure, -2 on failure to indicate
> + * to retry
> + */
>  int
>  nodeDeviceSysfsGetSCSIHostCaps(virNodeDevCapDataPtr d)
>  {
> @@ -83,6 +98,16 @@ nodeDeviceSysfsGetSCSIHostCaps(virNodeDevCapDataPtr d)
>                       d->scsi_host.host);
>              goto cleanup;
>          }
> +
> +        if (STREQ(d->scsi_host.wwnn, "ffffffffffffffff") ||
> +            STREQ(d->scsi_host.wwpn, "ffffffffffffffff") ||
> +            STREQ(d->scsi_host.fabric_wwn, "0") ||
> +            STREQ(d->scsi_host.fabric_wwn, "ffffffffffffffff")) {
> +            VIR_WARN("Failed to get valid wwnn, wwpn, or fabric_wwn for host%d",
> +                     d->scsi_host.host);
> +            ret = -2;
> +            goto cleanup;
> +        }
>      }
>  
>      if (virIsCapableVport(NULL, d->scsi_host.host)) {
> diff --git a/src/node_device/node_device_udev.c b/src/node_device/node_device_udev.c
> index 76c60ea..54c9e58 100644
> --- a/src/node_device/node_device_udev.c
> +++ b/src/node_device/node_device_udev.c
> @@ -523,6 +523,7 @@ static int udevProcessSCSIHost(struct udev_device *device ATTRIBUTE_UNUSED,
>      virNodeDevCapDataPtr data = &def->caps->data;
>      char *filename = NULL;
>      char *str;
> +    int retry = 5;
>  
>      filename = last_component(def->sysfs_path);
>  
> @@ -534,7 +535,13 @@ static int udevProcessSCSIHost(struct udev_device *device ATTRIBUTE_UNUSED,
>          return -1;
>      }
>  
> -    nodeDeviceSysfsGetSCSIHostCaps(&def->caps->data);
> +    while (retry--) {
> +        if (nodeDeviceSysfsGetSCSIHostCaps(&def->caps->data) == -2) {
> +            sleep(1);
> +            continue;
> +        }
> +        break;
> +    }
>  
>      if (udevGenerateDeviceName(device, def, NULL) != 0)
>          return -1;
> 




More information about the libvir-list mailing list