rpms/kernel/devel kernel.spec, 1.405, 1.406 linux-2.6-firewire-git-pending.patch, 1.1, 1.2

Jarod Wilson (jwilson) fedora-extras-commits at redhat.com
Wed Feb 6 05:24:09 UTC 2008


Author: jwilson

Update of /cvs/pkgs/rpms/kernel/devel
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv22464

Modified Files:
	kernel.spec linux-2.6-firewire-git-pending.patch 
Log Message:
* Tue Feb 05 2008 Jarod Wilson <jwilson at redhat.com>
- Make FireWire I/O survive bus resets and device
  reconnections better



Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/kernel.spec,v
retrieving revision 1.405
retrieving revision 1.406
diff -u -r1.405 -r1.406
--- kernel.spec	5 Feb 2008 19:24:07 -0000	1.405
+++ kernel.spec	6 Feb 2008 05:23:33 -0000	1.406
@@ -1789,6 +1789,10 @@
 %kernel_variant_files -a /%{image_install_path}/xen*-%{KVERREL} -e /etc/ld.so.conf.d/kernelcap-%{KVERREL}.conf %{with_xen} xen
 
 %changelog
+* Tue Feb 05 2008 Jarod Wilson <jwilson at redhat.com>
+- Make FireWire I/O survive bus resets and device
+  reconnections better
+
 * Tue Feb 05 2008 Chuck Ebbert <cebbert at redhat.com>
 - atl2 network driver 2.0.4
 - ASUS Eeepc ACPI hotkey driver

linux-2.6-firewire-git-pending.patch:

Index: linux-2.6-firewire-git-pending.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/linux-2.6-firewire-git-pending.patch,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- linux-2.6-firewire-git-pending.patch	28 Jan 2008 22:37:22 -0000	1.1
+++ linux-2.6-firewire-git-pending.patch	6 Feb 2008 05:23:33 -0000	1.2
@@ -103,28 +103,35 @@
 attempt to add a device with the same name of an already existing
 device.  http://bugzilla.kernel.org/show_bug.cgi?id=9828
 
-Impact of the bug:  Happens rarely, forces the user to unplug and replug
-the new device to get it working.
-
-The fix moves deregistration of the minor number and device_unregister()
-into a common rw_sem protected section.
-
-We also move the ref count increment from fw_device_op_open into an
-rw_sem protected section with the lookup of the device, so that the
-device pointer can't become invalid between lookup and usage.
+Impact of the bug:  Happens rarely (when shutdown of a device coincides
+with creation of another), forces the user to unplug and replug the new
+device to get it working.
+
+The fix is obvious:  Free the minor number *after* instead of *before*
+device_unregister().  This requires to take an additional reference of
+the fw_device as long as the IDR tree points to it.
+
+And while we are at it, we fix an additional race condition:
+fw_device_op_open() took its reference of the fw_device a little bit too
+late, hence was in danger to access an already invalid fw_device.
 
 Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
-Signed-off-by: Jarod Wilson <jwilson at redhat.com>
 ---
- drivers/firewire/fw-cdev.c   |    6 ++++--
- drivers/firewire/fw-device.c |   10 ++++++----
- 2 files changed, 10 insertions(+), 6 deletions(-)
+ drivers/firewire/fw-cdev.c   |    8 +++++---
+ drivers/firewire/fw-device.c |   20 ++++++++++++++------
+ drivers/firewire/fw-device.h |    2 +-
+ 3 files changed, 20 insertions(+), 10 deletions(-)
 
 Index: linux/drivers/firewire/fw-device.c
 ===================================================================
 --- linux.orig/drivers/firewire/fw-device.c
 +++ linux/drivers/firewire/fw-device.c
-@@ -614,10 +614,12 @@ struct fw_device *fw_device_from_devt(de
+@@ -610,12 +610,14 @@ static DECLARE_RWSEM(idr_rwsem);
+ static DEFINE_IDR(fw_device_idr);
+ int fw_cdev_major;
+ 
+-struct fw_device *fw_device_from_devt(dev_t devt)
++struct fw_device *fw_device_get_by_devt(dev_t devt)
  {
  	struct fw_device *device;
  
@@ -135,11 +142,7 @@
  	up_read(&idr_rwsem);
  
  	return device;
- }
- 
-@@ -625,17 +627,17 @@ static void fw_device_shutdown(struct wo
- {
- 	struct fw_device *device =
+@@ -627,13 +629,14 @@ static void fw_device_shutdown(struct wo
  		container_of(work, struct fw_device, work.work);
  	int minor = MINOR(device->device.devt);
  
@@ -149,22 +152,50 @@
 -
  	fw_device_cdev_remove(device);
  	device_for_each_child(&device->device, NULL, shutdown_unit);
+ 	device_unregister(&device->device);
 +
 +	down_write(&idr_rwsem);
- 	device_unregister(&device->device);
 +	idr_remove(&fw_device_idr, minor);
 +	up_write(&idr_rwsem);
++	fw_device_put(device);
  }
  
  static struct device_type fw_device_type = {
- 	.release	= fw_device_release,
- };
+@@ -682,10 +685,13 @@ static void fw_device_init(struct work_s
+ 	}
+ 
+ 	err = -ENOMEM;
++
++	fw_device_get(device);
+ 	down_write(&idr_rwsem);
+ 	if (idr_pre_get(&fw_device_idr, GFP_KERNEL))
+ 		err = idr_get_new(&fw_device_idr, device, &minor);
+ 	up_write(&idr_rwsem);
++
+ 	if (err < 0)
+ 		goto error;
+ 
+@@ -741,7 +747,9 @@ static void fw_device_init(struct work_s
+ 	idr_remove(&fw_device_idr, minor);
+ 	up_write(&idr_rwsem);
+  error:
+-	put_device(&device->device);
++	fw_device_put(device);		/* fw_device_idr's reference */
++
++	put_device(&device->device);	/* our reference */
+ }
+ 
+ static int update_unit(struct device *dev, void *data)
 Index: linux/drivers/firewire/fw-cdev.c
 ===================================================================
 --- linux.orig/drivers/firewire/fw-cdev.c
 +++ linux/drivers/firewire/fw-cdev.c
-@@ -112,14 +112,16 @@ static int fw_device_op_open(struct inod
- 	device = fw_device_from_devt(inode->i_rdev);
+@@ -109,15 +109,17 @@ static int fw_device_op_open(struct inod
+ 	struct client *client;
+ 	unsigned long flags;
+ 
+-	device = fw_device_from_devt(inode->i_rdev);
++	device = fw_device_get_by_devt(inode->i_rdev);
  	if (device == NULL)
  		return -ENODEV;
  
@@ -180,50 +211,887 @@
  	INIT_LIST_HEAD(&client->event_list);
  	INIT_LIST_HEAD(&client->resource_list);
  	spin_lock_init(&client->lock);
- 	init_waitqueue_head(&client->wait);
+Index: linux/drivers/firewire/fw-device.h
+===================================================================
+--- linux.orig/drivers/firewire/fw-device.h
++++ linux/drivers/firewire/fw-device.h
+@@ -77,13 +77,13 @@ fw_device_is_shutdown(struct fw_device *
+ }
+ 
+ struct fw_device *fw_device_get(struct fw_device *device);
++struct fw_device *fw_device_get_by_devt(dev_t devt);
+ void fw_device_put(struct fw_device *device);
+ int fw_device_enable_phys_dma(struct fw_device *device);
  
+ void fw_device_cdev_update(struct fw_device *device);
+ void fw_device_cdev_remove(struct fw_device *device);
+ 
+-struct fw_device *fw_device_from_devt(dev_t devt);
+ extern int fw_cdev_major;
+ 
+ struct fw_unit {
 
 -- 
 Stefan Richter
--=====-==--- ---= ==-==
+-=====-==--- --=- ---=-
 http://arcgraph.de/sr/
 
 
-Scenario:  Process A keeps the character device file of node N open.
-N is being unplugged.  File /dev/fwN won't be destroyed as long as A
-doesn't close it.  Now, process B opens /dev/fwN as well.  Previously
-it would succeed but be unable to do any IO on it of course.  With this
-patch, process B's open() will fail immediately with -ENODEV.
+This should help to interpret user reports.  E.g. one can look up the
+vendor OUI (first three bytes of the GUID) and thus tell what is what.
+
+Also simplifies the math in the GUID sysfs attribute.
 
 Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
-Signed-off-by: Jarod Wilson <jwilson at redhat.com>
 ---
- drivers/firewire/fw-device.c |    8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
+ drivers/firewire/fw-device.c |   28 +++++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 11 deletions(-)
 
 Index: linux/drivers/firewire/fw-device.c
 ===================================================================
 --- linux.orig/drivers/firewire/fw-device.c
 +++ linux/drivers/firewire/fw-device.c
-@@ -616,8 +616,12 @@ struct fw_device *fw_device_from_devt(de
+@@ -358,12 +358,9 @@ static ssize_t
+ guid_show(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+ 	struct fw_device *device = fw_device(dev);
+-	u64 guid;
  
- 	down_read(&idr_rwsem);
- 	device = idr_find(&fw_device_idr, MINOR(devt));
--	if (device)
--		fw_device_get(device);
-+	if (device) {
-+		if (fw_device_is_shutdown(device))
-+			device = NULL;
+-	guid = ((u64)device->config_rom[3] << 32) | device->config_rom[4];
+-
+-	return snprintf(buf, PAGE_SIZE, "0x%016llx\n",
+-			(unsigned long long)guid);
++	return snprintf(buf, PAGE_SIZE, "0x%08x%08x\n",
++			device->config_rom[3], device->config_rom[4]);
+ }
+ 
+ static struct device_attribute fw_device_attributes[] = {
+@@ -723,13 +720,22 @@ static void fw_device_init(struct work_s
+ 	 */
+ 	if (atomic_cmpxchg(&device->state,
+ 		    FW_DEVICE_INITIALIZING,
+-		    FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
++		    FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) {
+ 		fw_device_shutdown(&device->work.work);
+-	else
+-		fw_notify("created new fw device %s "
+-			  "(%d config rom retries, S%d00)\n",
+-			  device->device.bus_id, device->config_rom_retries,
+-			  1 << device->max_speed);
++	} else {
++		if (device->config_rom_retries)
++			fw_notify("created device %s: GUID %08x%08x, S%d00, "
++				  "%d config ROM retries\n",
++				  device->device.bus_id,
++				  device->config_rom[3], device->config_rom[4],
++				  1 << device->max_speed,
++				  device->config_rom_retries);
 +		else
-+			fw_device_get(device);
++			fw_notify("created device %s: GUID %08x%08x, S%d00\n",
++				  device->device.bus_id,
++				  device->config_rom[3], device->config_rom[4],
++				  1 << device->max_speed);
 +	}
- 	up_read(&idr_rwsem);
  
- 	return device;
+ 	/*
+ 	 * Reschedule the IRM work if we just finished reading the
+
+-- 
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+Several different SBP-2 bridges accept a login early while the IDE
+device is still powering up.  They are therefore unable to respond to
+SCSI INQUIRY immediately, and the SCSI core has to retry the INQUIRY.
+One of these retries is typically successful, and all is well.
+
+But in case of Momobay FX-3A, the INQUIRY retries tend to fail entirely.
+This can usually be avoided by waiting a little while after login before
+letting the SCSI core send the INQUIRY.  The old sbp2 driver handles
+this more gracefully for as yet unknown reasons (perhaps because it
+waits for fetch agent resets to complete, unlike fw-sbp2 which quickly
+proceeds after requesting the agent reset).  Therefore the workaround is
+not as much necessary for sbp2.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c |   15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -32,6 +32,7 @@
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+ #include <linux/mod_devicetable.h>
++#include <linux/delay.h>
+ #include <linux/device.h>
+ #include <linux/scatterlist.h>
+ #include <linux/dma-mapping.h>
+@@ -82,6 +83,9 @@ MODULE_PARM_DESC(exclusive_login, "Exclu
+  *   Avoids access beyond actual disk limits on devices with an off-by-one bug.
+  *   Don't use this with devices which don't have this bug.
+  *
++ * - delay inquiry
++ *   Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry.
++ *
+  * - override internal blacklist
+  *   Instead of adding to the built-in blacklist, use only the workarounds
+  *   specified in the module load parameter.
+@@ -91,6 +95,8 @@ MODULE_PARM_DESC(exclusive_login, "Exclu
+ #define SBP2_WORKAROUND_INQUIRY_36	0x2
+ #define SBP2_WORKAROUND_MODE_SENSE_8	0x4
+ #define SBP2_WORKAROUND_FIX_CAPACITY	0x8
++#define SBP2_WORKAROUND_DELAY_INQUIRY	0x10
++#define SBP2_INQUIRY_DELAY		12
+ #define SBP2_WORKAROUND_OVERRIDE	0x100
+ 
+ static int sbp2_param_workarounds;
+@@ -100,6 +106,7 @@ MODULE_PARM_DESC(workarounds, "Work arou
+ 	", 36 byte inquiry = "    __stringify(SBP2_WORKAROUND_INQUIRY_36)
+ 	", skip mode page 8 = "   __stringify(SBP2_WORKAROUND_MODE_SENSE_8)
+ 	", fix capacity = "       __stringify(SBP2_WORKAROUND_FIX_CAPACITY)
++	", delay inquiry = "      __stringify(SBP2_WORKAROUND_DELAY_INQUIRY)
+ 	", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE)
+ 	", or a combination)");
+ 
+@@ -303,6 +310,11 @@ static const struct {
+ 		.workarounds		= SBP2_WORKAROUND_INQUIRY_36 |
+ 					  SBP2_WORKAROUND_MODE_SENSE_8,
+ 	},
++	/* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
++		.firmware_revision	= 0x002800,
++		.model			= 0x000000,
++		.workarounds		= SBP2_WORKAROUND_DELAY_INQUIRY,
++	},
+ 	/* Initio bridges, actually only needed for some older ones */ {
+ 		.firmware_revision	= 0x000200,
+ 		.model			= ~0,
+@@ -712,6 +724,9 @@ static void sbp2_login(struct work_struc
+ 	PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect);
+ 	sbp2_agent_reset(lu);
+ 
++	if (lu->tgt->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY)
++		ssleep(SBP2_INQUIRY_DELAY);
++
+ 	memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun));
+ 	eight_bytes_lun.scsi_lun[0] = (lu->lun >> 8) & 0xff;
+ 	eight_bytes_lun.scsi_lun[1] = lu->lun & 0xff;
+
+-- 
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+Add the same workaround as found in fw-sbp2 for feature parity and
+compatibility of the workarounds module parameter.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/ieee1394/sbp2.c |   12 ++++++++++++
+ drivers/ieee1394/sbp2.h |    2 ++
+ 2 files changed, 14 insertions(+)
+
+Index: linux/drivers/ieee1394/sbp2.c
+===================================================================
+--- linux.orig/drivers/ieee1394/sbp2.c
++++ linux/drivers/ieee1394/sbp2.c
+@@ -183,6 +183,9 @@ MODULE_PARM_DESC(exclusive_login, "Exclu
+  *   Avoids access beyond actual disk limits on devices with an off-by-one bug.
+  *   Don't use this with devices which don't have this bug.
+  *
++ * - delay inquiry
++ *   Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry.
++ *
+  * - override internal blacklist
+  *   Instead of adding to the built-in blacklist, use only the workarounds
+  *   specified in the module load parameter.
+@@ -195,6 +198,7 @@ MODULE_PARM_DESC(workarounds, "Work arou
+ 	", 36 byte inquiry = "    __stringify(SBP2_WORKAROUND_INQUIRY_36)
+ 	", skip mode page 8 = "   __stringify(SBP2_WORKAROUND_MODE_SENSE_8)
+ 	", fix capacity = "       __stringify(SBP2_WORKAROUND_FIX_CAPACITY)
++	", delay inquiry = "      __stringify(SBP2_WORKAROUND_DELAY_INQUIRY)
+ 	", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE)
+ 	", or a combination)");
+ 
+@@ -357,6 +361,11 @@ static const struct {
+ 		.workarounds		= SBP2_WORKAROUND_INQUIRY_36 |
+ 					  SBP2_WORKAROUND_MODE_SENSE_8,
+ 	},
++	/* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
++		.firmware_revision	= 0x002800,
++		.model_id		= 0x000000,
++		.workarounds		= SBP2_WORKAROUND_DELAY_INQUIRY,
++	},
+ 	/* Initio bridges, actually only needed for some older ones */ {
+ 		.firmware_revision	= 0x000200,
+ 		.model_id		= SBP2_ROM_VALUE_WILDCARD,
+@@ -914,6 +923,9 @@ static int sbp2_start_device(struct sbp2
+ 	sbp2_agent_reset(lu, 1);
+ 	sbp2_max_speed_and_size(lu);
+ 
++	if (lu->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY)
++		ssleep(SBP2_INQUIRY_DELAY);
++
+ 	error = scsi_add_device(lu->shost, 0, lu->ud->id, 0);
+ 	if (error) {
+ 		SBP2_ERR("scsi_add_device failed");
+Index: linux/drivers/ieee1394/sbp2.h
+===================================================================
+--- linux.orig/drivers/ieee1394/sbp2.h
++++ linux/drivers/ieee1394/sbp2.h
+@@ -343,6 +343,8 @@ enum sbp2lu_state_types {
+ #define SBP2_WORKAROUND_INQUIRY_36	0x2
+ #define SBP2_WORKAROUND_MODE_SENSE_8	0x4
+ #define SBP2_WORKAROUND_FIX_CAPACITY	0x8
++#define SBP2_WORKAROUND_DELAY_INQUIRY	0x10
++#define SBP2_INQUIRY_DELAY		12
+ #define SBP2_WORKAROUND_OVERRIDE	0x100
+ 
+ #endif /* SBP2_H */
+
+-- 
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+Like the old sbp2 driver, wait for the write transaction to the
+AGENT_RESET to complete before proceeding (after login, after reconnect,
+or in SCSI error handling).
+
+There is one occasion where AGENT_RESET is written to from atomic
+context when getting DEAD status for a command ORB.  There we still
+continue without waiting for the transaction to complete because this
+is more difficult to fix...
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c |   39 ++++++++++++++++++++++++++-----------
+ 1 file changed, 28 insertions(+), 11 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -603,29 +603,46 @@ sbp2_send_management_orb(struct sbp2_log
+ 
+ static void
+ complete_agent_reset_write(struct fw_card *card, int rcode,
+-			   void *payload, size_t length, void *data)
++			   void *payload, size_t length, void *done)
+ {
+-	struct fw_transaction *t = data;
++	complete(done);
++}
++
++static void sbp2_agent_reset(struct sbp2_logical_unit *lu)
++{
++	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
++	DECLARE_COMPLETION_ONSTACK(done);
++	struct fw_transaction t;
++	static u32 z;
+ 
+-	kfree(t);
++	fw_send_request(device->card, &t, TCODE_WRITE_QUADLET_REQUEST,
++			lu->tgt->node_id, lu->generation, device->max_speed,
++			lu->command_block_agent_address + SBP2_AGENT_RESET,
++			&z, sizeof(z), complete_agent_reset_write, &done);
++	wait_for_completion(&done);
+ }
+ 
+-static int sbp2_agent_reset(struct sbp2_logical_unit *lu)
++static void
++complete_agent_reset_write_no_wait(struct fw_card *card, int rcode,
++				   void *payload, size_t length, void *data)
++{
++	kfree(data);
++}
++
++static void sbp2_agent_reset_no_wait(struct sbp2_logical_unit *lu)
+ {
+ 	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+ 	struct fw_transaction *t;
+-	static u32 zero;
++	static u32 z;
+ 
+-	t = kzalloc(sizeof(*t), GFP_ATOMIC);
++	t = kmalloc(sizeof(*t), GFP_ATOMIC);
+ 	if (t == NULL)
+-		return -ENOMEM;
++		return;
+ 
+ 	fw_send_request(device->card, t, TCODE_WRITE_QUADLET_REQUEST,
+ 			lu->tgt->node_id, lu->generation, device->max_speed,
+ 			lu->command_block_agent_address + SBP2_AGENT_RESET,
+-			&zero, sizeof(zero), complete_agent_reset_write, t);
+-
+-	return 0;
++			&z, sizeof(z), complete_agent_reset_write_no_wait, t);
+ }
+ 
+ static void sbp2_release_target(struct kref *kref)
+@@ -1110,7 +1127,7 @@ complete_command_orb(struct sbp2_orb *ba
+ 
+ 	if (status != NULL) {
+ 		if (STATUS_GET_DEAD(*status))
+-			sbp2_agent_reset(orb->lu);
++			sbp2_agent_reset_no_wait(orb->lu);
+ 
+ 		switch (STATUS_GET_RESPONSE(*status)) {
+ 		case SBP2_STATUS_REQUEST_COMPLETE:
+
+-- 
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+for easier readable logs if more than one SBP-2 device is present.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c |   66 ++++++++++++++++++-------------------
+ 1 file changed, 33 insertions(+), 33 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -149,6 +149,7 @@ struct sbp2_target {
+ 	struct kref kref;
+ 	struct fw_unit *unit;
+ 	struct list_head lu_list;
++	const char *bus_id;
+ 
+ 	u64 management_agent_address;
+ 	int directory_id;
+@@ -566,20 +567,20 @@ sbp2_send_management_orb(struct sbp2_log
+ 
+ 	retval = -EIO;
+ 	if (sbp2_cancel_orbs(lu) == 0) {
+-		fw_error("orb reply timed out, rcode=0x%02x\n",
+-			 orb->base.rcode);
++		fw_error("%s: orb reply timed out, rcode=0x%02x\n",
++			 lu->tgt->bus_id, orb->base.rcode);
+ 		goto out;
+ 	}
+ 
+ 	if (orb->base.rcode != RCODE_COMPLETE) {
+-		fw_error("management write failed, rcode 0x%02x\n",
+-			 orb->base.rcode);
++		fw_error("%s: management write failed, rcode 0x%02x\n",
++			 lu->tgt->bus_id, orb->base.rcode);
+ 		goto out;
+ 	}
+ 
+ 	if (STATUS_GET_RESPONSE(orb->status) != 0 ||
+ 	    STATUS_GET_SBP_STATUS(orb->status) != 0) {
+-		fw_error("error status: %d:%d\n",
++		fw_error("%s: error status: %d:%d\n", lu->tgt->bus_id,
+ 			 STATUS_GET_RESPONSE(orb->status),
+ 			 STATUS_GET_SBP_STATUS(orb->status));
+ 		goto out;
+@@ -664,7 +665,7 @@ static void sbp2_release_target(struct k
+ 		kfree(lu);
+ 	}
+ 	scsi_remove_host(shost);
+-	fw_notify("released %s\n", tgt->unit->device.bus_id);
++	fw_notify("released %s\n", tgt->bus_id);
+ 
+ 	put_device(&tgt->unit->device);
+ 	scsi_host_put(shost);
+@@ -693,12 +694,11 @@ static void sbp2_login(struct work_struc
+ {
+ 	struct sbp2_logical_unit *lu =
+ 		container_of(work, struct sbp2_logical_unit, work.work);
+-	struct Scsi_Host *shost =
+-		container_of((void *)lu->tgt, struct Scsi_Host, hostdata[0]);
++	struct sbp2_target *tgt = lu->tgt;
++	struct fw_device *device = fw_device(tgt->unit->device.parent);
++	struct Scsi_Host *shost;
+ 	struct scsi_device *sdev;
+ 	struct scsi_lun eight_bytes_lun;
+-	struct fw_unit *unit = lu->tgt->unit;
+-	struct fw_device *device = fw_device(unit->device.parent);
+ 	struct sbp2_login_response response;
+ 	int generation, node_id, local_node_id;
+ 
+@@ -715,14 +715,14 @@ static void sbp2_login(struct work_struc
+ 		if (lu->retries++ < 5)
+ 			sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5));
+ 		else
+-			fw_error("failed to login to %s LUN %04x\n",
+-				 unit->device.bus_id, lu->lun);
++			fw_error("%s: failed to login to LUN %04x\n",
++				 tgt->bus_id, lu->lun);
+ 		goto out;
+ 	}
+ 
+-	lu->generation        = generation;
+-	lu->tgt->node_id      = node_id;
+-	lu->tgt->address_high = local_node_id << 16;
++	lu->generation    = generation;
++	tgt->node_id	  = node_id;
++	tgt->address_high = local_node_id << 16;
+ 
+ 	/* Get command block agent offset and login id. */
+ 	lu->command_block_agent_address =
+@@ -730,8 +730,8 @@ static void sbp2_login(struct work_struc
+ 		response.command_block_agent.low;
+ 	lu->login_id = LOGIN_RESPONSE_GET_LOGIN_ID(response);
+ 
+-	fw_notify("logged in to %s LUN %04x (%d retries)\n",
+-		  unit->device.bus_id, lu->lun, lu->retries);
++	fw_notify("%s: logged in to LUN %04x (%d retries)\n",
++		  tgt->bus_id, lu->lun, lu->retries);
+ 
+ #if 0
+ 	/* FIXME: The linux1394 sbp2 does this last step. */
+@@ -747,6 +747,7 @@ static void sbp2_login(struct work_struc
+ 	memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun));
+ 	eight_bytes_lun.scsi_lun[0] = (lu->lun >> 8) & 0xff;
+ 	eight_bytes_lun.scsi_lun[1] = lu->lun & 0xff;
++	shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+ 
+ 	sdev = __scsi_add_device(shost, 0, 0,
+ 				 scsilun_to_int(&eight_bytes_lun), lu);
+@@ -791,7 +792,7 @@ static void sbp2_login(struct work_struc
+ 	 */
+ 	PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+  out:
+-	sbp2_target_put(lu->tgt);
++	sbp2_target_put(tgt);
+ }
+ 
+ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
+@@ -874,7 +875,7 @@ static int sbp2_scan_unit_dir(struct sbp
+ 			if (timeout > tgt->mgt_orb_timeout)
+ 				fw_notify("%s: config rom contains %ds "
+ 					  "management ORB timeout, limiting "
+-					  "to %ds\n", tgt->unit->device.bus_id,
++					  "to %ds\n", tgt->bus_id,
+ 					  timeout / 1000,
+ 					  tgt->mgt_orb_timeout / 1000);
+ 			break;
+@@ -902,7 +903,7 @@ static void sbp2_init_workarounds(struct
+ 	if (w)
+ 		fw_notify("Please notify linux1394-devel at lists.sourceforge.net "
+ 			  "if you need the workarounds parameter for %s\n",
+-			  tgt->unit->device.bus_id);
++			  tgt->bus_id);
+ 
+ 	if (w & SBP2_WORKAROUND_OVERRIDE)
+ 		goto out;
+@@ -924,8 +925,7 @@ static void sbp2_init_workarounds(struct
+ 	if (w)
+ 		fw_notify("Workarounds for %s: 0x%x "
+ 			  "(firmware_revision 0x%06x, model_id 0x%06x)\n",
+-			  tgt->unit->device.bus_id,
+-			  w, firmware_revision, model);
++			  tgt->bus_id, w, firmware_revision, model);
+ 	tgt->workarounds = w;
+ }
+ 
+@@ -949,6 +949,7 @@ static int sbp2_probe(struct device *dev
+ 	tgt->unit = unit;
+ 	kref_init(&tgt->kref);
+ 	INIT_LIST_HEAD(&tgt->lu_list);
++	tgt->bus_id = unit->device.bus_id;
+ 
+ 	if (fw_device_enable_phys_dma(device) < 0)
+ 		goto fail_shost_put;
+@@ -999,8 +1000,8 @@ static void sbp2_reconnect(struct work_s
+ {
+ 	struct sbp2_logical_unit *lu =
+ 		container_of(work, struct sbp2_logical_unit, work.work);
+-	struct fw_unit *unit = lu->tgt->unit;
+-	struct fw_device *device = fw_device(unit->device.parent);
++	struct sbp2_target *tgt = lu->tgt;
++	struct fw_device *device = fw_device(tgt->unit->device.parent);
+ 	int generation, node_id, local_node_id;
+ 
+ 	if (fw_device_is_shutdown(device))
+@@ -1015,8 +1016,7 @@ static void sbp2_reconnect(struct work_s
+ 				     SBP2_RECONNECT_REQUEST,
+ 				     lu->login_id, NULL) < 0) {
+ 		if (lu->retries++ >= 5) {
+-			fw_error("failed to reconnect to %s\n",
+-				 unit->device.bus_id);
++			fw_error("%s: failed to reconnect\n", tgt->bus_id);
+ 			/* Fall back and try to log in again. */
+ 			lu->retries = 0;
+ 			PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+@@ -1025,17 +1025,17 @@ static void sbp2_reconnect(struct work_s
+ 		goto out;
+ 	}
+ 
+-	lu->generation        = generation;
+-	lu->tgt->node_id      = node_id;
+-	lu->tgt->address_high = local_node_id << 16;
++	lu->generation    = generation;
++	tgt->node_id      = node_id;
++	tgt->address_high = local_node_id << 16;
+ 
+-	fw_notify("reconnected to %s LUN %04x (%d retries)\n",
+-		  unit->device.bus_id, lu->lun, lu->retries);
++	fw_notify("%s: reconnected to LUN %04x (%d retries)\n",
++		  tgt->bus_id, lu->lun, lu->retries);
+ 
+ 	sbp2_agent_reset(lu);
+ 	sbp2_cancel_orbs(lu);
+  out:
+-	sbp2_target_put(lu->tgt);
++	sbp2_target_put(tgt);
+ }
+ 
+ static void sbp2_update(struct fw_unit *unit)
+@@ -1377,7 +1377,7 @@ static int sbp2_scsi_abort(struct scsi_c
+ {
+ 	struct sbp2_logical_unit *lu = cmd->device->hostdata;
+ 
+-	fw_notify("sbp2_scsi_abort\n");
++	fw_notify("%s: sbp2_scsi_abort\n", lu->tgt->bus_id);
+ 	sbp2_agent_reset(lu);
+ 	sbp2_cancel_orbs(lu);
+ 
+
+-- 
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+When a reconnect failed but re-login succeeded, __scsi_add_device was
+called again.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -741,6 +741,12 @@ static void sbp2_login(struct work_struc
+ 	PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect);
+ 	sbp2_agent_reset(lu);
+ 
++	/* This was a re-login. */
++	if (lu->sdev) {
++		sbp2_cancel_orbs(lu);
++		goto out;
++	}
++
+ 	if (lu->tgt->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY)
+ 		ssleep(SBP2_INQUIRY_DELAY);
+ 
+
+-- 
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+If fw-sbp2 was too late with requesting the reconnect, the target would
+reject this.  In this case, log out before attempting the reconnect.
+Else several firmwares will deny the re-login because they somehow
+didn't invalidate the old login.
+
+Also, don't retry reconnects in this situation.  The retries won't
+succeed either.
+
+These changes improve chances for successful re-login and shorten the
+period during which the logical unit is inaccessible.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c |   17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -710,6 +710,11 @@ static void sbp2_login(struct work_struc
+ 	node_id       = device->node_id;
+ 	local_node_id = device->card->node_id;
+ 
++	/* If this is a re-login attempt, log out, or we might be rejected. */
++	if (lu->sdev)
++		sbp2_send_management_orb(lu, device->node_id, generation,
++				SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
++
+ 	if (sbp2_send_management_orb(lu, node_id, generation,
+ 				SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) {
+ 		if (lu->retries++ < 5)
+@@ -1021,9 +1026,17 @@ static void sbp2_reconnect(struct work_s
+ 	if (sbp2_send_management_orb(lu, node_id, generation,
+ 				     SBP2_RECONNECT_REQUEST,
+ 				     lu->login_id, NULL) < 0) {
+-		if (lu->retries++ >= 5) {
++		/*
++		 * If reconnect was impossible even though we are in the
++		 * current generation, fall back and try to log in again.
++		 *
++		 * We could check for "Function rejected" status, but
++		 * looking at the bus generation as simpler and more general.
++		 */
++		smp_rmb(); /* get current card generation */
++		if (generation == device->card->generation ||
++		    lu->retries++ >= 5) {
+ 			fw_error("%s: failed to reconnect\n", tgt->bus_id);
+-			/* Fall back and try to log in again. */
+ 			lu->retries = 0;
+ 			PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+ 		}
+
+-- 
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -28,15 +28,15 @@
+  * and many others.
+  */
+ 
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/device.h>
++#include <linux/dma-mapping.h>
+ #include <linux/kernel.h>
++#include <linux/mod_devicetable.h>
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+-#include <linux/mod_devicetable.h>
+-#include <linux/delay.h>
+-#include <linux/device.h>
+ #include <linux/scatterlist.h>
+-#include <linux/dma-mapping.h>
+-#include <linux/blkdev.h>
+ #include <linux/string.h>
+ #include <linux/stringify.h>
+ #include <linux/timer.h>
+@@ -48,9 +48,9 @@
+ #include <scsi/scsi_device.h>
+ #include <scsi/scsi_host.h>
+ 
+-#include "fw-transaction.h"
+-#include "fw-topology.h"
+ #include "fw-device.h"
++#include "fw-topology.h"
++#include "fw-transaction.h"
+ 
+ /*
+  * So far only bridges from Oxford Semiconductor are known to support
 
 -- 
 Stefan Richter
--=====-==--- ---= ==-==
+-=====-==--- --=- ---==
 http://arcgraph.de/sr/
 
 
+While fw-sbp2 takes the necessary time to reconnect to a logical unit
+after bus reset, the SCSI core keeps sending new commands.  They are all
+immediately completed with host busy status, and application clients or
+filesystems will break quickly.  The SCSI device might even be taken
+offline:  http://bugzilla.kernel.org/show_bug.cgi?id=9734
+
+The only remedy seems to be to block the SCSI device until reconnect.
+Alas the SCSI core has no useful API to block only one logical unit i.e.
+the scsi_device, therefore we block the entire Scsi_Host.  This
+currently corresponds to an SBP-2 target.  In case of targets with
+multiple logical units, we need to satisfy the dependencies between
+logical units by carefully tracking the blocking state of the target and
+its units.  We block all logical units of a target as soon as one of
+them needs to be blocked, and keep them blocked until all of them are
+ready to be unblocked.
+
+Furthermore, as the history of the old sbp2 driver has shown, the
+scsi_block_requests() API is a minefield with high potential of
+deadlocks.  We therefore take extra measures to keep logical units
+unblocked during __scsi_add_device() and during shutdown.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c |   71 +++++++++++++++++++++++++++++++++++--
+ 1 file changed, 69 insertions(+), 2 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -41,6 +41,8 @@
+ #include <linux/stringify.h>
+ #include <linux/timer.h>
+ #include <linux/workqueue.h>
++#include <asm/atomic.h>
++#include <asm/system.h>
+ 
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_cmnd.h>
+@@ -139,6 +140,7 @@ struct sbp2_logical_unit {
+ 	int generation;
+ 	int retries;
+ 	struct delayed_work work;
++	atomic_t blocked;
+ };
+ 
+ /*
+@@ -157,6 +159,9 @@ struct sbp2_target {
+ 	int address_high;
+ 	unsigned int workarounds;
+ 	unsigned int mgt_orb_timeout;
++
++	atomic_t dont_block;
++	atomic_t blocked;
+ };
+ 
+ /*
+@@ -646,6 +651,53 @@ static void sbp2_agent_reset_no_wait(str
+ 			&z, sizeof(z), complete_agent_reset_write_no_wait, t);
+ }
+ 
++/*
++ * Blocks lu->tgt if all of the following conditions are met:
++ *   - Login, INQUIRY, and high-level SCSI setup of all logical units of the
++ *     target have been successfully finished (indicated by dont_block == 0).
++ *   - The lu->generation is stale.  sbp2_reconnect will unblock lu later.
++ */
++static void sbp2_conditionally_block(struct sbp2_logical_unit *lu)
++{
++	struct fw_card *card = fw_device(lu->tgt->unit->device.parent)->card;
++
++	if (!atomic_read(&lu->tgt->dont_block) &&
++	    lu->generation != card->generation &&
++	    atomic_cmpxchg(&lu->blocked, 0, 1) == 0) {
++
++		/* raise the block count of the target */
++		if (atomic_inc_return(&lu->tgt->blocked) == 1) {
++			scsi_block_requests(lu->sdev->host);
++			fw_notify("blocked %s\n", lu->tgt->bus_id);
++		}
++	}
++}
++
++/* Unblocks lu->tgt as soon as all its logical units can be unblocked. */
++static void sbp2_conditionally_unblock(struct sbp2_logical_unit *lu)
++{
++	if (atomic_cmpxchg(&lu->blocked, 1, 0) == 1) {
++
++		/* lower the block count of the target */
++		if (atomic_dec_and_test(&lu->tgt->blocked)) {
++			scsi_unblock_requests(lu->sdev->host);
++			fw_notify("unblocked %s\n", lu->tgt->bus_id);
++		}
++	}
++}
++
++
++/* Prevents future blocking of tgt and then unblocks it. */
++static void sbp2_unblock(struct sbp2_target *tgt)
++{
++	struct Scsi_Host *shost =
++		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
++
++	atomic_inc(&tgt->dont_block);
++	smp_wmb();
++	scsi_unblock_requests(shost);
++}
++
+ static void sbp2_release_target(struct kref *kref)
+ {
+ 	struct sbp2_target *tgt = container_of(kref, struct sbp2_target, kref);
+@@ -653,6 +705,12 @@ static void sbp2_release_target(struct k
+ 	struct Scsi_Host *shost =
+ 		container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+ 
++	/*
++	 * Make sure that the target is unblocked and won't be blocked anymore
++	 * before scsi_remove_device() is called.  Else it will deadlock.
++	 */
++	sbp2_unblock(tgt);
++
+ 	list_for_each_entry_safe(lu, next, &tgt->lu_list, link) {
+ 		if (lu->sdev)
+ 			scsi_remove_device(lu->sdev);
+@@ -717,11 +775,14 @@ static void sbp2_login(struct work_struc
+ 
+ 	if (sbp2_send_management_orb(lu, node_id, generation,
+ 				SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) {
+-		if (lu->retries++ < 5)
++		if (lu->retries++ < 5) {
+ 			sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5));
+-		else
++		} else {
+ 			fw_error("%s: failed to login to LUN %04x\n",
+ 				 tgt->bus_id, lu->lun);
++			/* Let any waiting I/O fail from now on. */
++			sbp2_unblock(lu->tgt);
++		}
+ 		goto out;
+ 	}
+ 
+@@ -749,6 +810,7 @@ static void sbp2_login(struct work_struc
+ 	/* This was a re-login. */
+ 	if (lu->sdev) {
+ 		sbp2_cancel_orbs(lu);
++		sbp2_conditionally_unblock(lu);
+ 		goto out;
+ 	}
+ 
+@@ -786,6 +848,8 @@ static void sbp2_login(struct work_struc
+ 		 * Can you believe it?  Everything went well.
+ 		 */
+ 		lu->sdev = sdev;
++		smp_wmb();  /* We need lu->sdev when we want to block lu. */
++		atomic_dec(&lu->tgt->dont_block);
+ 		scsi_device_put(sdev);
+ 		goto out;
+ 	}
+@@ -828,6 +892,7 @@ static int sbp2_add_logical_unit(struct 
+ 	lu->sdev = NULL;
+ 	lu->lun  = lun_entry & 0xffff;
+ 	lu->retries = 0;
++	atomic_inc(&tgt->dont_block);
+ 	INIT_LIST_HEAD(&lu->orb_list);
+ 	INIT_DELAYED_WORK(&lu->work, sbp2_login);
+ 
+@@ -1053,6 +1118,7 @@ static void sbp2_reconnect(struct work_s
+ 
+ 	sbp2_agent_reset(lu);
+ 	sbp2_cancel_orbs(lu);
++	sbp2_conditionally_unblock(lu);
+  out:
+ 	sbp2_target_put(tgt);
+ }
+@@ -1172,6 +1238,7 @@ complete_command_orb(struct sbp2_orb *ba
+ 		 * or when sending the write (less likely).
+ 		 */
+ 		result = DID_BUS_BUSY << 16;
++		sbp2_conditionally_block(orb->lu);
+ 	}
+ 
+ 	dma_unmap_single(device->card->device, orb->base.request_bus,
+
+-- 
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+




More information about the fedora-extras-commits mailing list