rpms/kernel/devel kernel.spec, 1.405, 1.406 linux-2.6-firewire-git-pending.patch, 1.1, 1.2
Jarod Wilson (jwilson)
fedora-extras-commits at redhat.com
Wed Feb 6 05:24:09 UTC 2008
Author: jwilson
Update of /cvs/pkgs/rpms/kernel/devel
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv22464
Modified Files:
kernel.spec linux-2.6-firewire-git-pending.patch
Log Message:
* Tue Feb 05 2008 Jarod Wilson <jwilson at redhat.com>
- Make FireWire I/O survive bus resets and device
reconnections better
Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/kernel.spec,v
retrieving revision 1.405
retrieving revision 1.406
diff -u -r1.405 -r1.406
--- kernel.spec 5 Feb 2008 19:24:07 -0000 1.405
+++ kernel.spec 6 Feb 2008 05:23:33 -0000 1.406
@@ -1789,6 +1789,10 @@
%kernel_variant_files -a /%{image_install_path}/xen*-%{KVERREL} -e /etc/ld.so.conf.d/kernelcap-%{KVERREL}.conf %{with_xen} xen
%changelog
+* Tue Feb 05 2008 Jarod Wilson <jwilson at redhat.com>
+- Make FireWire I/O survive bus resets and device
+ reconnections better
+
* Tue Feb 05 2008 Chuck Ebbert <cebbert at redhat.com>
- atl2 network driver 2.0.4
- ASUS Eeepc ACPI hotkey driver
linux-2.6-firewire-git-pending.patch:
Index: linux-2.6-firewire-git-pending.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/devel/linux-2.6-firewire-git-pending.patch,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- linux-2.6-firewire-git-pending.patch 28 Jan 2008 22:37:22 -0000 1.1
+++ linux-2.6-firewire-git-pending.patch 6 Feb 2008 05:23:33 -0000 1.2
@@ -103,28 +103,35 @@
attempt to add a device with the same name of an already existing
device. http://bugzilla.kernel.org/show_bug.cgi?id=9828
-Impact of the bug: Happens rarely, forces the user to unplug and replug
-the new device to get it working.
-
-The fix moves deregistration of the minor number and device_unregister()
-into a common rw_sem protected section.
-
-We also move the ref count increment from fw_device_op_open into an
-rw_sem protected section with the lookup of the device, so that the
-device pointer can't become invalid between lookup and usage.
+Impact of the bug: Happens rarely (when shutdown of a device coincides
+with creation of another), forces the user to unplug and replug the new
+device to get it working.
+
+The fix is obvious: Free the minor number *after* instead of *before*
+device_unregister(). This requires to take an additional reference of
+the fw_device as long as the IDR tree points to it.
+
+And while we are at it, we fix an additional race condition:
+fw_device_op_open() took its reference of the fw_device a little bit too
+late, hence was in danger to access an already invalid fw_device.
Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
-Signed-off-by: Jarod Wilson <jwilson at redhat.com>
---
- drivers/firewire/fw-cdev.c | 6 ++++--
- drivers/firewire/fw-device.c | 10 ++++++----
- 2 files changed, 10 insertions(+), 6 deletions(-)
+ drivers/firewire/fw-cdev.c | 8 +++++---
+ drivers/firewire/fw-device.c | 20 ++++++++++++++------
+ drivers/firewire/fw-device.h | 2 +-
+ 3 files changed, 20 insertions(+), 10 deletions(-)
Index: linux/drivers/firewire/fw-device.c
===================================================================
--- linux.orig/drivers/firewire/fw-device.c
+++ linux/drivers/firewire/fw-device.c
-@@ -614,10 +614,12 @@ struct fw_device *fw_device_from_devt(de
+@@ -610,12 +610,14 @@ static DECLARE_RWSEM(idr_rwsem);
+ static DEFINE_IDR(fw_device_idr);
+ int fw_cdev_major;
+
+-struct fw_device *fw_device_from_devt(dev_t devt)
++struct fw_device *fw_device_get_by_devt(dev_t devt)
{
struct fw_device *device;
@@ -135,11 +142,7 @@
up_read(&idr_rwsem);
return device;
- }
-
-@@ -625,17 +627,17 @@ static void fw_device_shutdown(struct wo
- {
- struct fw_device *device =
+@@ -627,13 +629,14 @@ static void fw_device_shutdown(struct wo
container_of(work, struct fw_device, work.work);
int minor = MINOR(device->device.devt);
@@ -149,22 +152,50 @@
-
fw_device_cdev_remove(device);
device_for_each_child(&device->device, NULL, shutdown_unit);
+ device_unregister(&device->device);
+
+ down_write(&idr_rwsem);
- device_unregister(&device->device);
+ idr_remove(&fw_device_idr, minor);
+ up_write(&idr_rwsem);
++ fw_device_put(device);
}
static struct device_type fw_device_type = {
- .release = fw_device_release,
- };
+@@ -682,10 +685,13 @@ static void fw_device_init(struct work_s
+ }
+
+ err = -ENOMEM;
++
++ fw_device_get(device);
+ down_write(&idr_rwsem);
+ if (idr_pre_get(&fw_device_idr, GFP_KERNEL))
+ err = idr_get_new(&fw_device_idr, device, &minor);
+ up_write(&idr_rwsem);
++
+ if (err < 0)
+ goto error;
+
+@@ -741,7 +747,9 @@ static void fw_device_init(struct work_s
+ idr_remove(&fw_device_idr, minor);
+ up_write(&idr_rwsem);
+ error:
+- put_device(&device->device);
++ fw_device_put(device); /* fw_device_idr's reference */
++
++ put_device(&device->device); /* our reference */
+ }
+
+ static int update_unit(struct device *dev, void *data)
Index: linux/drivers/firewire/fw-cdev.c
===================================================================
--- linux.orig/drivers/firewire/fw-cdev.c
+++ linux/drivers/firewire/fw-cdev.c
-@@ -112,14 +112,16 @@ static int fw_device_op_open(struct inod
- device = fw_device_from_devt(inode->i_rdev);
+@@ -109,15 +109,17 @@ static int fw_device_op_open(struct inod
+ struct client *client;
+ unsigned long flags;
+
+- device = fw_device_from_devt(inode->i_rdev);
++ device = fw_device_get_by_devt(inode->i_rdev);
if (device == NULL)
return -ENODEV;
@@ -180,50 +211,887 @@
INIT_LIST_HEAD(&client->event_list);
INIT_LIST_HEAD(&client->resource_list);
spin_lock_init(&client->lock);
- init_waitqueue_head(&client->wait);
+Index: linux/drivers/firewire/fw-device.h
+===================================================================
+--- linux.orig/drivers/firewire/fw-device.h
++++ linux/drivers/firewire/fw-device.h
+@@ -77,13 +77,13 @@ fw_device_is_shutdown(struct fw_device *
+ }
+
+ struct fw_device *fw_device_get(struct fw_device *device);
++struct fw_device *fw_device_get_by_devt(dev_t devt);
+ void fw_device_put(struct fw_device *device);
+ int fw_device_enable_phys_dma(struct fw_device *device);
+ void fw_device_cdev_update(struct fw_device *device);
+ void fw_device_cdev_remove(struct fw_device *device);
+
+-struct fw_device *fw_device_from_devt(dev_t devt);
+ extern int fw_cdev_major;
+
+ struct fw_unit {
--
Stefan Richter
--=====-==--- ---= ==-==
+-=====-==--- --=- ---=-
http://arcgraph.de/sr/
-Scenario: Process A keeps the character device file of node N open.
-N is being unplugged. File /dev/fwN won't be destroyed as long as A
-doesn't close it. Now, process B opens /dev/fwN as well. Previously
-it would succeed but be unable to do any IO on it of course. With this
-patch, process B's open() will fail immediately with -ENODEV.
+This should help to interpret user reports. E.g. one can look up the
+vendor OUI (first three bytes of the GUID) and thus tell what is what.
+
+Also simplifies the math in the GUID sysfs attribute.
Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
-Signed-off-by: Jarod Wilson <jwilson at redhat.com>
---
- drivers/firewire/fw-device.c | 8 ++++++--
- 1 file changed, 6 insertions(+), 2 deletions(-)
+ drivers/firewire/fw-device.c | 28 +++++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 11 deletions(-)
Index: linux/drivers/firewire/fw-device.c
===================================================================
--- linux.orig/drivers/firewire/fw-device.c
+++ linux/drivers/firewire/fw-device.c
-@@ -616,8 +616,12 @@ struct fw_device *fw_device_from_devt(de
+@@ -358,12 +358,9 @@ static ssize_t
+ guid_show(struct device *dev, struct device_attribute *attr, char *buf)
+ {
+ struct fw_device *device = fw_device(dev);
+- u64 guid;
- down_read(&idr_rwsem);
- device = idr_find(&fw_device_idr, MINOR(devt));
-- if (device)
-- fw_device_get(device);
-+ if (device) {
-+ if (fw_device_is_shutdown(device))
-+ device = NULL;
+- guid = ((u64)device->config_rom[3] << 32) | device->config_rom[4];
+-
+- return snprintf(buf, PAGE_SIZE, "0x%016llx\n",
+- (unsigned long long)guid);
++ return snprintf(buf, PAGE_SIZE, "0x%08x%08x\n",
++ device->config_rom[3], device->config_rom[4]);
+ }
+
+ static struct device_attribute fw_device_attributes[] = {
+@@ -723,13 +720,22 @@ static void fw_device_init(struct work_s
+ */
+ if (atomic_cmpxchg(&device->state,
+ FW_DEVICE_INITIALIZING,
+- FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN)
++ FW_DEVICE_RUNNING) == FW_DEVICE_SHUTDOWN) {
+ fw_device_shutdown(&device->work.work);
+- else
+- fw_notify("created new fw device %s "
+- "(%d config rom retries, S%d00)\n",
+- device->device.bus_id, device->config_rom_retries,
+- 1 << device->max_speed);
++ } else {
++ if (device->config_rom_retries)
++ fw_notify("created device %s: GUID %08x%08x, S%d00, "
++ "%d config ROM retries\n",
++ device->device.bus_id,
++ device->config_rom[3], device->config_rom[4],
++ 1 << device->max_speed,
++ device->config_rom_retries);
+ else
-+ fw_device_get(device);
++ fw_notify("created device %s: GUID %08x%08x, S%d00\n",
++ device->device.bus_id,
++ device->config_rom[3], device->config_rom[4],
++ 1 << device->max_speed);
+ }
- up_read(&idr_rwsem);
- return device;
+ /*
+ * Reschedule the IRM work if we just finished reading the
+
+--
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+Several different SBP-2 bridges accept a login early while the IDE
+device is still powering up. They are therefore unable to respond to
+SCSI INQUIRY immediately, and the SCSI core has to retry the INQUIRY.
+One of these retries is typically successful, and all is well.
+
+But in case of Momobay FX-3A, the INQUIRY retries tend to fail entirely.
+This can usually be avoided by waiting a little while after login before
+letting the SCSI core send the INQUIRY. The old sbp2 driver handles
+this more gracefully for as yet unknown reasons (perhaps because it
+waits for fetch agent resets to complete, unlike fw-sbp2 which quickly
+proceeds after requesting the agent reset). Therefore the workaround is
+not as much necessary for sbp2.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -32,6 +32,7 @@
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+ #include <linux/mod_devicetable.h>
++#include <linux/delay.h>
+ #include <linux/device.h>
+ #include <linux/scatterlist.h>
+ #include <linux/dma-mapping.h>
+@@ -82,6 +83,9 @@ MODULE_PARM_DESC(exclusive_login, "Exclu
+ * Avoids access beyond actual disk limits on devices with an off-by-one bug.
+ * Don't use this with devices which don't have this bug.
+ *
++ * - delay inquiry
++ * Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry.
++ *
+ * - override internal blacklist
+ * Instead of adding to the built-in blacklist, use only the workarounds
+ * specified in the module load parameter.
+@@ -91,6 +95,8 @@ MODULE_PARM_DESC(exclusive_login, "Exclu
+ #define SBP2_WORKAROUND_INQUIRY_36 0x2
+ #define SBP2_WORKAROUND_MODE_SENSE_8 0x4
+ #define SBP2_WORKAROUND_FIX_CAPACITY 0x8
++#define SBP2_WORKAROUND_DELAY_INQUIRY 0x10
++#define SBP2_INQUIRY_DELAY 12
+ #define SBP2_WORKAROUND_OVERRIDE 0x100
+
+ static int sbp2_param_workarounds;
+@@ -100,6 +106,7 @@ MODULE_PARM_DESC(workarounds, "Work arou
+ ", 36 byte inquiry = " __stringify(SBP2_WORKAROUND_INQUIRY_36)
+ ", skip mode page 8 = " __stringify(SBP2_WORKAROUND_MODE_SENSE_8)
+ ", fix capacity = " __stringify(SBP2_WORKAROUND_FIX_CAPACITY)
++ ", delay inquiry = " __stringify(SBP2_WORKAROUND_DELAY_INQUIRY)
+ ", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE)
+ ", or a combination)");
+
+@@ -303,6 +310,11 @@ static const struct {
+ .workarounds = SBP2_WORKAROUND_INQUIRY_36 |
+ SBP2_WORKAROUND_MODE_SENSE_8,
+ },
++ /* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
++ .firmware_revision = 0x002800,
++ .model = 0x000000,
++ .workarounds = SBP2_WORKAROUND_DELAY_INQUIRY,
++ },
+ /* Initio bridges, actually only needed for some older ones */ {
+ .firmware_revision = 0x000200,
+ .model = ~0,
+@@ -712,6 +724,9 @@ static void sbp2_login(struct work_struc
+ PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect);
+ sbp2_agent_reset(lu);
+
++ if (lu->tgt->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY)
++ ssleep(SBP2_INQUIRY_DELAY);
++
+ memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun));
+ eight_bytes_lun.scsi_lun[0] = (lu->lun >> 8) & 0xff;
+ eight_bytes_lun.scsi_lun[1] = lu->lun & 0xff;
+
+--
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+Add the same workaround as found in fw-sbp2 for feature parity and
+compatibility of the workarounds module parameter.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/ieee1394/sbp2.c | 12 ++++++++++++
+ drivers/ieee1394/sbp2.h | 2 ++
+ 2 files changed, 14 insertions(+)
+
+Index: linux/drivers/ieee1394/sbp2.c
+===================================================================
+--- linux.orig/drivers/ieee1394/sbp2.c
++++ linux/drivers/ieee1394/sbp2.c
+@@ -183,6 +183,9 @@ MODULE_PARM_DESC(exclusive_login, "Exclu
+ * Avoids access beyond actual disk limits on devices with an off-by-one bug.
+ * Don't use this with devices which don't have this bug.
+ *
++ * - delay inquiry
++ * Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry.
++ *
+ * - override internal blacklist
+ * Instead of adding to the built-in blacklist, use only the workarounds
+ * specified in the module load parameter.
+@@ -195,6 +198,7 @@ MODULE_PARM_DESC(workarounds, "Work arou
+ ", 36 byte inquiry = " __stringify(SBP2_WORKAROUND_INQUIRY_36)
+ ", skip mode page 8 = " __stringify(SBP2_WORKAROUND_MODE_SENSE_8)
+ ", fix capacity = " __stringify(SBP2_WORKAROUND_FIX_CAPACITY)
++ ", delay inquiry = " __stringify(SBP2_WORKAROUND_DELAY_INQUIRY)
+ ", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE)
+ ", or a combination)");
+
+@@ -357,6 +361,11 @@ static const struct {
+ .workarounds = SBP2_WORKAROUND_INQUIRY_36 |
+ SBP2_WORKAROUND_MODE_SENSE_8,
+ },
++ /* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
++ .firmware_revision = 0x002800,
++ .model_id = 0x000000,
++ .workarounds = SBP2_WORKAROUND_DELAY_INQUIRY,
++ },
+ /* Initio bridges, actually only needed for some older ones */ {
+ .firmware_revision = 0x000200,
+ .model_id = SBP2_ROM_VALUE_WILDCARD,
+@@ -914,6 +923,9 @@ static int sbp2_start_device(struct sbp2
+ sbp2_agent_reset(lu, 1);
+ sbp2_max_speed_and_size(lu);
+
++ if (lu->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY)
++ ssleep(SBP2_INQUIRY_DELAY);
++
+ error = scsi_add_device(lu->shost, 0, lu->ud->id, 0);
+ if (error) {
+ SBP2_ERR("scsi_add_device failed");
+Index: linux/drivers/ieee1394/sbp2.h
+===================================================================
+--- linux.orig/drivers/ieee1394/sbp2.h
++++ linux/drivers/ieee1394/sbp2.h
+@@ -343,6 +343,8 @@ enum sbp2lu_state_types {
+ #define SBP2_WORKAROUND_INQUIRY_36 0x2
+ #define SBP2_WORKAROUND_MODE_SENSE_8 0x4
+ #define SBP2_WORKAROUND_FIX_CAPACITY 0x8
++#define SBP2_WORKAROUND_DELAY_INQUIRY 0x10
++#define SBP2_INQUIRY_DELAY 12
+ #define SBP2_WORKAROUND_OVERRIDE 0x100
+
+ #endif /* SBP2_H */
+
+--
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+Like the old sbp2 driver, wait for the write transaction to the
+AGENT_RESET to complete before proceeding (after login, after reconnect,
+or in SCSI error handling).
+
+There is one occasion where AGENT_RESET is written to from atomic
+context when getting DEAD status for a command ORB. There we still
+continue without waiting for the transaction to complete because this
+is more difficult to fix...
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c | 39 ++++++++++++++++++++++++++-----------
+ 1 file changed, 28 insertions(+), 11 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -603,29 +603,46 @@ sbp2_send_management_orb(struct sbp2_log
+
+ static void
+ complete_agent_reset_write(struct fw_card *card, int rcode,
+- void *payload, size_t length, void *data)
++ void *payload, size_t length, void *done)
+ {
+- struct fw_transaction *t = data;
++ complete(done);
++}
++
++static void sbp2_agent_reset(struct sbp2_logical_unit *lu)
++{
++ struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
++ DECLARE_COMPLETION_ONSTACK(done);
++ struct fw_transaction t;
++ static u32 z;
+
+- kfree(t);
++ fw_send_request(device->card, &t, TCODE_WRITE_QUADLET_REQUEST,
++ lu->tgt->node_id, lu->generation, device->max_speed,
++ lu->command_block_agent_address + SBP2_AGENT_RESET,
++ &z, sizeof(z), complete_agent_reset_write, &done);
++ wait_for_completion(&done);
+ }
+
+-static int sbp2_agent_reset(struct sbp2_logical_unit *lu)
++static void
++complete_agent_reset_write_no_wait(struct fw_card *card, int rcode,
++ void *payload, size_t length, void *data)
++{
++ kfree(data);
++}
++
++static void sbp2_agent_reset_no_wait(struct sbp2_logical_unit *lu)
+ {
+ struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+ struct fw_transaction *t;
+- static u32 zero;
++ static u32 z;
+
+- t = kzalloc(sizeof(*t), GFP_ATOMIC);
++ t = kmalloc(sizeof(*t), GFP_ATOMIC);
+ if (t == NULL)
+- return -ENOMEM;
++ return;
+
+ fw_send_request(device->card, t, TCODE_WRITE_QUADLET_REQUEST,
+ lu->tgt->node_id, lu->generation, device->max_speed,
+ lu->command_block_agent_address + SBP2_AGENT_RESET,
+- &zero, sizeof(zero), complete_agent_reset_write, t);
+-
+- return 0;
++ &z, sizeof(z), complete_agent_reset_write_no_wait, t);
+ }
+
+ static void sbp2_release_target(struct kref *kref)
+@@ -1110,7 +1127,7 @@ complete_command_orb(struct sbp2_orb *ba
+
+ if (status != NULL) {
+ if (STATUS_GET_DEAD(*status))
+- sbp2_agent_reset(orb->lu);
++ sbp2_agent_reset_no_wait(orb->lu);
+
+ switch (STATUS_GET_RESPONSE(*status)) {
+ case SBP2_STATUS_REQUEST_COMPLETE:
+
+--
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+for easier readable logs if more than one SBP-2 device is present.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c | 66 ++++++++++++++++++-------------------
+ 1 file changed, 33 insertions(+), 33 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -149,6 +149,7 @@ struct sbp2_target {
+ struct kref kref;
+ struct fw_unit *unit;
+ struct list_head lu_list;
++ const char *bus_id;
+
+ u64 management_agent_address;
+ int directory_id;
+@@ -566,20 +567,20 @@ sbp2_send_management_orb(struct sbp2_log
+
+ retval = -EIO;
+ if (sbp2_cancel_orbs(lu) == 0) {
+- fw_error("orb reply timed out, rcode=0x%02x\n",
+- orb->base.rcode);
++ fw_error("%s: orb reply timed out, rcode=0x%02x\n",
++ lu->tgt->bus_id, orb->base.rcode);
+ goto out;
+ }
+
+ if (orb->base.rcode != RCODE_COMPLETE) {
+- fw_error("management write failed, rcode 0x%02x\n",
+- orb->base.rcode);
++ fw_error("%s: management write failed, rcode 0x%02x\n",
++ lu->tgt->bus_id, orb->base.rcode);
+ goto out;
+ }
+
+ if (STATUS_GET_RESPONSE(orb->status) != 0 ||
+ STATUS_GET_SBP_STATUS(orb->status) != 0) {
+- fw_error("error status: %d:%d\n",
++ fw_error("%s: error status: %d:%d\n", lu->tgt->bus_id,
+ STATUS_GET_RESPONSE(orb->status),
+ STATUS_GET_SBP_STATUS(orb->status));
+ goto out;
+@@ -664,7 +665,7 @@ static void sbp2_release_target(struct k
+ kfree(lu);
+ }
+ scsi_remove_host(shost);
+- fw_notify("released %s\n", tgt->unit->device.bus_id);
++ fw_notify("released %s\n", tgt->bus_id);
+
+ put_device(&tgt->unit->device);
+ scsi_host_put(shost);
+@@ -693,12 +694,11 @@ static void sbp2_login(struct work_struc
+ {
+ struct sbp2_logical_unit *lu =
+ container_of(work, struct sbp2_logical_unit, work.work);
+- struct Scsi_Host *shost =
+- container_of((void *)lu->tgt, struct Scsi_Host, hostdata[0]);
++ struct sbp2_target *tgt = lu->tgt;
++ struct fw_device *device = fw_device(tgt->unit->device.parent);
++ struct Scsi_Host *shost;
+ struct scsi_device *sdev;
+ struct scsi_lun eight_bytes_lun;
+- struct fw_unit *unit = lu->tgt->unit;
+- struct fw_device *device = fw_device(unit->device.parent);
+ struct sbp2_login_response response;
+ int generation, node_id, local_node_id;
+
+@@ -715,14 +715,14 @@ static void sbp2_login(struct work_struc
+ if (lu->retries++ < 5)
+ sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5));
+ else
+- fw_error("failed to login to %s LUN %04x\n",
+- unit->device.bus_id, lu->lun);
++ fw_error("%s: failed to login to LUN %04x\n",
++ tgt->bus_id, lu->lun);
+ goto out;
+ }
+
+- lu->generation = generation;
+- lu->tgt->node_id = node_id;
+- lu->tgt->address_high = local_node_id << 16;
++ lu->generation = generation;
++ tgt->node_id = node_id;
++ tgt->address_high = local_node_id << 16;
+
+ /* Get command block agent offset and login id. */
+ lu->command_block_agent_address =
+@@ -730,8 +730,8 @@ static void sbp2_login(struct work_struc
+ response.command_block_agent.low;
+ lu->login_id = LOGIN_RESPONSE_GET_LOGIN_ID(response);
+
+- fw_notify("logged in to %s LUN %04x (%d retries)\n",
+- unit->device.bus_id, lu->lun, lu->retries);
++ fw_notify("%s: logged in to LUN %04x (%d retries)\n",
++ tgt->bus_id, lu->lun, lu->retries);
+
+ #if 0
+ /* FIXME: The linux1394 sbp2 does this last step. */
+@@ -747,6 +747,7 @@ static void sbp2_login(struct work_struc
+ memset(&eight_bytes_lun, 0, sizeof(eight_bytes_lun));
+ eight_bytes_lun.scsi_lun[0] = (lu->lun >> 8) & 0xff;
+ eight_bytes_lun.scsi_lun[1] = lu->lun & 0xff;
++ shost = container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+
+ sdev = __scsi_add_device(shost, 0, 0,
+ scsilun_to_int(&eight_bytes_lun), lu);
+@@ -791,7 +792,7 @@ static void sbp2_login(struct work_struc
+ */
+ PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+ out:
+- sbp2_target_put(lu->tgt);
++ sbp2_target_put(tgt);
+ }
+
+ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry)
+@@ -874,7 +875,7 @@ static int sbp2_scan_unit_dir(struct sbp
+ if (timeout > tgt->mgt_orb_timeout)
+ fw_notify("%s: config rom contains %ds "
+ "management ORB timeout, limiting "
+- "to %ds\n", tgt->unit->device.bus_id,
++ "to %ds\n", tgt->bus_id,
+ timeout / 1000,
+ tgt->mgt_orb_timeout / 1000);
+ break;
+@@ -902,7 +903,7 @@ static void sbp2_init_workarounds(struct
+ if (w)
+ fw_notify("Please notify linux1394-devel at lists.sourceforge.net "
+ "if you need the workarounds parameter for %s\n",
+- tgt->unit->device.bus_id);
++ tgt->bus_id);
+
+ if (w & SBP2_WORKAROUND_OVERRIDE)
+ goto out;
+@@ -924,8 +925,7 @@ static void sbp2_init_workarounds(struct
+ if (w)
+ fw_notify("Workarounds for %s: 0x%x "
+ "(firmware_revision 0x%06x, model_id 0x%06x)\n",
+- tgt->unit->device.bus_id,
+- w, firmware_revision, model);
++ tgt->bus_id, w, firmware_revision, model);
+ tgt->workarounds = w;
+ }
+
+@@ -949,6 +949,7 @@ static int sbp2_probe(struct device *dev
+ tgt->unit = unit;
+ kref_init(&tgt->kref);
+ INIT_LIST_HEAD(&tgt->lu_list);
++ tgt->bus_id = unit->device.bus_id;
+
+ if (fw_device_enable_phys_dma(device) < 0)
+ goto fail_shost_put;
+@@ -999,8 +1000,8 @@ static void sbp2_reconnect(struct work_s
+ {
+ struct sbp2_logical_unit *lu =
+ container_of(work, struct sbp2_logical_unit, work.work);
+- struct fw_unit *unit = lu->tgt->unit;
+- struct fw_device *device = fw_device(unit->device.parent);
++ struct sbp2_target *tgt = lu->tgt;
++ struct fw_device *device = fw_device(tgt->unit->device.parent);
+ int generation, node_id, local_node_id;
+
+ if (fw_device_is_shutdown(device))
+@@ -1015,8 +1016,7 @@ static void sbp2_reconnect(struct work_s
+ SBP2_RECONNECT_REQUEST,
+ lu->login_id, NULL) < 0) {
+ if (lu->retries++ >= 5) {
+- fw_error("failed to reconnect to %s\n",
+- unit->device.bus_id);
++ fw_error("%s: failed to reconnect\n", tgt->bus_id);
+ /* Fall back and try to log in again. */
+ lu->retries = 0;
+ PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+@@ -1025,17 +1025,17 @@ static void sbp2_reconnect(struct work_s
+ goto out;
+ }
+
+- lu->generation = generation;
+- lu->tgt->node_id = node_id;
+- lu->tgt->address_high = local_node_id << 16;
++ lu->generation = generation;
++ tgt->node_id = node_id;
++ tgt->address_high = local_node_id << 16;
+
+- fw_notify("reconnected to %s LUN %04x (%d retries)\n",
+- unit->device.bus_id, lu->lun, lu->retries);
++ fw_notify("%s: reconnected to LUN %04x (%d retries)\n",
++ tgt->bus_id, lu->lun, lu->retries);
+
+ sbp2_agent_reset(lu);
+ sbp2_cancel_orbs(lu);
+ out:
+- sbp2_target_put(lu->tgt);
++ sbp2_target_put(tgt);
+ }
+
+ static void sbp2_update(struct fw_unit *unit)
+@@ -1377,7 +1377,7 @@ static int sbp2_scsi_abort(struct scsi_c
+ {
+ struct sbp2_logical_unit *lu = cmd->device->hostdata;
+
+- fw_notify("sbp2_scsi_abort\n");
++ fw_notify("%s: sbp2_scsi_abort\n", lu->tgt->bus_id);
+ sbp2_agent_reset(lu);
+ sbp2_cancel_orbs(lu);
+
+
+--
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+When a reconnect failed but re-login succeeded, __scsi_add_device was
+called again.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -741,6 +741,12 @@ static void sbp2_login(struct work_struc
+ PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect);
+ sbp2_agent_reset(lu);
+
++ /* This was a re-login. */
++ if (lu->sdev) {
++ sbp2_cancel_orbs(lu);
++ goto out;
++ }
++
+ if (lu->tgt->workarounds & SBP2_WORKAROUND_DELAY_INQUIRY)
+ ssleep(SBP2_INQUIRY_DELAY);
+
+
+--
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+If fw-sbp2 was too late with requesting the reconnect, the target would
+reject this. In this case, log out before attempting the reconnect.
+Else several firmwares will deny the re-login because they somehow
+didn't invalidate the old login.
+
+Also, don't retry reconnects in this situation. The retries won't
+succeed either.
+
+These changes improve chances for successful re-login and shorten the
+period during which the logical unit is inaccessible.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -710,6 +710,11 @@ static void sbp2_login(struct work_struc
+ node_id = device->node_id;
+ local_node_id = device->card->node_id;
+
++ /* If this is a re-login attempt, log out, or we might be rejected. */
++ if (lu->sdev)
++ sbp2_send_management_orb(lu, device->node_id, generation,
++ SBP2_LOGOUT_REQUEST, lu->login_id, NULL);
++
+ if (sbp2_send_management_orb(lu, node_id, generation,
+ SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) {
+ if (lu->retries++ < 5)
+@@ -1021,9 +1026,17 @@ static void sbp2_reconnect(struct work_s
+ if (sbp2_send_management_orb(lu, node_id, generation,
+ SBP2_RECONNECT_REQUEST,
+ lu->login_id, NULL) < 0) {
+- if (lu->retries++ >= 5) {
++ /*
++ * If reconnect was impossible even though we are in the
++ * current generation, fall back and try to log in again.
++ *
++ * We could check for "Function rejected" status, but
++ * looking at the bus generation as simpler and more general.
++ */
++ smp_rmb(); /* get current card generation */
++ if (generation == device->card->generation ||
++ lu->retries++ >= 5) {
+ fw_error("%s: failed to reconnect\n", tgt->bus_id);
+- /* Fall back and try to log in again. */
+ lu->retries = 0;
+ PREPARE_DELAYED_WORK(&lu->work, sbp2_login);
+ }
+
+--
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -28,15 +28,15 @@
+ * and many others.
+ */
+
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/device.h>
++#include <linux/dma-mapping.h>
+ #include <linux/kernel.h>
++#include <linux/mod_devicetable.h>
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+-#include <linux/mod_devicetable.h>
+-#include <linux/delay.h>
+-#include <linux/device.h>
+ #include <linux/scatterlist.h>
+-#include <linux/dma-mapping.h>
+-#include <linux/blkdev.h>
+ #include <linux/string.h>
+ #include <linux/stringify.h>
+ #include <linux/timer.h>
+@@ -48,9 +48,9 @@
+ #include <scsi/scsi_device.h>
+ #include <scsi/scsi_host.h>
+
+-#include "fw-transaction.h"
+-#include "fw-topology.h"
+ #include "fw-device.h"
++#include "fw-topology.h"
++#include "fw-transaction.h"
+
+ /*
+ * So far only bridges from Oxford Semiconductor are known to support
--
Stefan Richter
--=====-==--- ---= ==-==
+-=====-==--- --=- ---==
http://arcgraph.de/sr/
+While fw-sbp2 takes the necessary time to reconnect to a logical unit
+after bus reset, the SCSI core keeps sending new commands. They are all
+immediately completed with host busy status, and application clients or
+filesystems will break quickly. The SCSI device might even be taken
+offline: http://bugzilla.kernel.org/show_bug.cgi?id=9734
+
+The only remedy seems to be to block the SCSI device until reconnect.
+Alas the SCSI core has no useful API to block only one logical unit i.e.
+the scsi_device, therefore we block the entire Scsi_Host. This
+currently corresponds to an SBP-2 target. In case of targets with
+multiple logical units, we need to satisfy the dependencies between
+logical units by carefully tracking the blocking state of the target and
+its units. We block all logical units of a target as soon as one of
+them needs to be blocked, and keep them blocked until all of them are
+ready to be unblocked.
+
+Furthermore, as the history of the old sbp2 driver has shown, the
+scsi_block_requests() API is a minefield with high potential of
+deadlocks. We therefore take extra measures to keep logical units
+unblocked during __scsi_add_device() and during shutdown.
+
+Signed-off-by: Stefan Richter <stefanr at s5r6.in-berlin.de>
+---
+ drivers/firewire/fw-sbp2.c | 71 +++++++++++++++++++++++++++++++++++--
+ 1 file changed, 69 insertions(+), 2 deletions(-)
+
+Index: linux/drivers/firewire/fw-sbp2.c
+===================================================================
+--- linux.orig/drivers/firewire/fw-sbp2.c
++++ linux/drivers/firewire/fw-sbp2.c
+@@ -41,6 +41,8 @@
+ #include <linux/stringify.h>
+ #include <linux/timer.h>
+ #include <linux/workqueue.h>
++#include <asm/atomic.h>
++#include <asm/system.h>
+
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_cmnd.h>
+@@ -139,6 +140,7 @@ struct sbp2_logical_unit {
+ int generation;
+ int retries;
+ struct delayed_work work;
++ atomic_t blocked;
+ };
+
+ /*
+@@ -157,6 +159,9 @@ struct sbp2_target {
+ int address_high;
+ unsigned int workarounds;
+ unsigned int mgt_orb_timeout;
++
++ atomic_t dont_block;
++ atomic_t blocked;
+ };
+
+ /*
+@@ -646,6 +651,53 @@ static void sbp2_agent_reset_no_wait(str
+ &z, sizeof(z), complete_agent_reset_write_no_wait, t);
+ }
+
++/*
++ * Blocks lu->tgt if all of the following conditions are met:
++ * - Login, INQUIRY, and high-level SCSI setup of all logical units of the
++ * target have been successfully finished (indicated by dont_block == 0).
++ * - The lu->generation is stale. sbp2_reconnect will unblock lu later.
++ */
++static void sbp2_conditionally_block(struct sbp2_logical_unit *lu)
++{
++ struct fw_card *card = fw_device(lu->tgt->unit->device.parent)->card;
++
++ if (!atomic_read(&lu->tgt->dont_block) &&
++ lu->generation != card->generation &&
++ atomic_cmpxchg(&lu->blocked, 0, 1) == 0) {
++
++ /* raise the block count of the target */
++ if (atomic_inc_return(&lu->tgt->blocked) == 1) {
++ scsi_block_requests(lu->sdev->host);
++ fw_notify("blocked %s\n", lu->tgt->bus_id);
++ }
++ }
++}
++
++/* Unblocks lu->tgt as soon as all its logical units can be unblocked. */
++static void sbp2_conditionally_unblock(struct sbp2_logical_unit *lu)
++{
++ if (atomic_cmpxchg(&lu->blocked, 1, 0) == 1) {
++
++ /* lower the block count of the target */
++ if (atomic_dec_and_test(&lu->tgt->blocked)) {
++ scsi_unblock_requests(lu->sdev->host);
++ fw_notify("unblocked %s\n", lu->tgt->bus_id);
++ }
++ }
++}
++
++
++/* Prevents future blocking of tgt and then unblocks it. */
++static void sbp2_unblock(struct sbp2_target *tgt)
++{
++ struct Scsi_Host *shost =
++ container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
++
++ atomic_inc(&tgt->dont_block);
++ smp_wmb();
++ scsi_unblock_requests(shost);
++}
++
+ static void sbp2_release_target(struct kref *kref)
+ {
+ struct sbp2_target *tgt = container_of(kref, struct sbp2_target, kref);
+@@ -653,6 +705,12 @@ static void sbp2_release_target(struct k
+ struct Scsi_Host *shost =
+ container_of((void *)tgt, struct Scsi_Host, hostdata[0]);
+
++ /*
++ * Make sure that the target is unblocked and won't be blocked anymore
++ * before scsi_remove_device() is called. Else it will deadlock.
++ */
++ sbp2_unblock(tgt);
++
+ list_for_each_entry_safe(lu, next, &tgt->lu_list, link) {
+ if (lu->sdev)
+ scsi_remove_device(lu->sdev);
+@@ -717,11 +775,14 @@ static void sbp2_login(struct work_struc
+
+ if (sbp2_send_management_orb(lu, node_id, generation,
+ SBP2_LOGIN_REQUEST, lu->lun, &response) < 0) {
+- if (lu->retries++ < 5)
++ if (lu->retries++ < 5) {
+ sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5));
+- else
++ } else {
+ fw_error("%s: failed to login to LUN %04x\n",
+ tgt->bus_id, lu->lun);
++ /* Let any waiting I/O fail from now on. */
++ sbp2_unblock(lu->tgt);
++ }
+ goto out;
+ }
+
+@@ -749,6 +810,7 @@ static void sbp2_login(struct work_struc
+ /* This was a re-login. */
+ if (lu->sdev) {
+ sbp2_cancel_orbs(lu);
++ sbp2_conditionally_unblock(lu);
+ goto out;
+ }
+
+@@ -786,6 +848,8 @@ static void sbp2_login(struct work_struc
+ * Can you believe it? Everything went well.
+ */
+ lu->sdev = sdev;
++ smp_wmb(); /* We need lu->sdev when we want to block lu. */
++ atomic_dec(&lu->tgt->dont_block);
+ scsi_device_put(sdev);
+ goto out;
+ }
+@@ -828,6 +892,7 @@ static int sbp2_add_logical_unit(struct
+ lu->sdev = NULL;
+ lu->lun = lun_entry & 0xffff;
+ lu->retries = 0;
++ atomic_inc(&tgt->dont_block);
+ INIT_LIST_HEAD(&lu->orb_list);
+ INIT_DELAYED_WORK(&lu->work, sbp2_login);
+
+@@ -1053,6 +1118,7 @@ static void sbp2_reconnect(struct work_s
+
+ sbp2_agent_reset(lu);
+ sbp2_cancel_orbs(lu);
++ sbp2_conditionally_unblock(lu);
+ out:
+ sbp2_target_put(tgt);
+ }
+@@ -1172,6 +1238,7 @@ complete_command_orb(struct sbp2_orb *ba
+ * or when sending the write (less likely).
+ */
+ result = DID_BUS_BUSY << 16;
++ sbp2_conditionally_block(orb->lu);
+ }
+
+ dma_unmap_single(device->card->device, orb->base.request_bus,
+
+--
+Stefan Richter
+-=====-==--- --=- ---==
+http://arcgraph.de/sr/
+
More information about the fedora-extras-commits
mailing list