rpms/kernel/F-8 kernel.spec, 1.368, 1.369 linux-2.6-firewire-git-pending.patch, 1.2, 1.3

Mon Mar 10 16:43:02 UTC 2008

Author: jwilson

Update of /cvs/pkgs/rpms/kernel/F-8
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv25681

Modified Files:
	kernel.spec linux-2.6-firewire-git-pending.patch 
Log Message:
* Mon Mar 10 2008 Jarod Wilson <jwilson at redhat.com> 2.6.24.3-23
- firewire-sbp2: improved ability to reconnect to devices
  following a bus reset
- firewire-sbp2: set proper I/O retry limits in SBP-2 devices



Index: kernel.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/F-8/kernel.spec,v
retrieving revision 1.368
retrieving revision 1.369
diff -u -r1.368 -r1.369

--- kernel.spec	6 Mar 2008 20:56:27 -0000	1.368
+++ kernel.spec	10 Mar 2008 16:42:26 -0000	1.369
@@ -1920,6 +1920,11 @@
 
 
 %changelog
+* Mon Mar 10 2008 Jarod Wilson <jwilson at redhat.com> 2.6.24.3-23
+- firewire-sbp2: improved ability to reconnect to devices
+  following a bus reset
+- firewire-sbp2: set proper I/O retry limits in SBP-2 devices
+
 * Thu Mar 06 2008 Chuck Ebbert <cebbert at redhat.com> 2.6.24.3-22
 - Fix libata DMA masking for HPT and SVW (possible fix for #433557)
 

linux-2.6-firewire-git-pending.patch:

Index: linux-2.6-firewire-git-pending.patch
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/F-8/linux-2.6-firewire-git-pending.patch,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- linux-2.6-firewire-git-pending.patch	4 Mar 2008 05:03:18 -0000	1.2
+++ linux-2.6-firewire-git-pending.patch	10 Mar 2008 16:42:26 -0000	1.3
@@ -485,3 +485,214 @@
 http://arcgraph.de/sr/
 
 
+Increase reconnect management orb timeout value.
+
+Signed-off-by: Jarod Wilson <jwilson at redhat.com>
+---
+
+ drivers/firewire/fw-sbp2.c |   15 +++++++++++++--
+ 1 files changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
+index d6d62c6..da315cf 100644
+--- a/drivers/firewire/fw-sbp2.c
++++ b/drivers/firewire/fw-sbp2.c
+@@ -170,6 +170,7 @@ struct sbp2_target {
+  */
+ #define SBP2_MIN_LOGIN_ORB_TIMEOUT	5000U	/* Timeout in ms */
+ #define SBP2_MAX_LOGIN_ORB_TIMEOUT	40000U	/* Timeout in ms */
++#define SBP2_MAX_RECONNECT_ORB_TIMEOUT	6000U	/* Timeout in ms */
+ #define SBP2_ORB_TIMEOUT		2000U	/* Timeout in ms */
+ #define SBP2_ORB_NULL			0x80000000
+ #define SBP2_MAX_SG_ELEMENT_LENGTH	0xf000
+@@ -538,14 +539,24 @@ sbp2_send_management_orb(struct sbp2_logical_unit *lu, int node_id,
+ 	orb->request.status_fifo.low  =
+ 		cpu_to_be32(lu->address_handler.offset);
+ 
+-	if (function == SBP2_LOGIN_REQUEST) {
++	switch (function) {
++
++	case SBP2_LOGIN_REQUEST:
+ 		/* Ask for 2^2 == 4 seconds reconnect grace period */
+ 		orb->request.misc |= cpu_to_be32(
+ 			MANAGEMENT_ORB_RECONNECT(2) |
+ 			MANAGEMENT_ORB_EXCLUSIVE(sbp2_param_exclusive_login));
+ 		timeout = lu->tgt->mgt_orb_timeout;
+-	} else {
++		break;
++
++	case SBP2_RECONNECT_REQUEST:
++		timeout = min(SBP2_MAX_RECONNECT_ORB_TIMEOUT,
++			      lu->tgt->mgt_orb_timeout);
++		break;
++
++	default:
+ 		timeout = SBP2_ORB_TIMEOUT;
++		break;
+ 	}
+ 
+ 	init_completion(&orb->done);
+
+
+
+Per the SBP-2 specification, all SBP-2 target devices must have a BUSY_TIMEOUT
+register. Per the 1394-1995 specification, the retry_limt portion of the
+register should be set to 0x0 initially, and set on the target by a logged in
+initiator (i.e., a Linux host w/firewire controller(s)).
+
+Well, as it turns out, lots of devices these days have actually moved on to
+starting to implement SBP-3 compliance, which says that retry_limit should
+default to 0xf instead (yes, SBP-3 stomps directly on 1394-1995, oops).
+
+Prior to this change, the firewire driver stack didn't touch retry_limit, and
+any SBP-3 compliant device worked fine, while SBP-2 compliant ones were unable
+to retransmit when the host returned an ack_busy_X, which resulted in stalled
+out I/O, eventually causing the SCSI layer to give up and offline the device.
+
+The simple fix is for us to set retry_limit to 0xf in the register for all
+devices (which actually matches what the old ieee1394 stack did).
+
+Prior to this change, a hard disk behind an SBP-2 Prolific PL-3507 bridge chip
+would routinely encounter buffer I/O errors and wind up offlined by the SCSI
+layer. With this change, I've encountered zero I/O failures moving tens of GB
+of data around.
+
+Signed-off-by: Jarod Wilson <jwilson at redhat.com>
+
+---
+
+Update: use CSR_BUSY_TIMEOUT from fw-transaction.h instead of needlessly
+        creating an SBP2_BUSY_TIMEOUT
+
+ drivers/firewire/fw-sbp2.c |   31 +++++++++++++++++++++++++++----
+ 1 files changed, 27 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
+index d6d62c6..fbf5b7a 100644
+--- a/drivers/firewire/fw-sbp2.c
++++ b/drivers/firewire/fw-sbp2.c
+@@ -173,6 +173,7 @@ struct sbp2_target {
+ #define SBP2_ORB_TIMEOUT		2000U	/* Timeout in ms */
+ #define SBP2_ORB_NULL			0x80000000
+ #define SBP2_MAX_SG_ELEMENT_LENGTH	0xf000
++#define SBP2_RETRY_LIMIT		0xf	/* 15 retries */
+ 
+ /* Unit directory keys */
+ #define SBP2_CSR_UNIT_CHARACTERISTICS	0x3a
+@@ -800,6 +801,30 @@ static void sbp2_target_put(struct sbp2_target *tgt)
+ 	kref_put(&tgt->kref, sbp2_release_target);
+ }
+ 
++static void
++complete_set_busy_timeout(struct fw_card *card, int rcode,
++			  void *payload, size_t length, void *done)
++{
++	complete(done);
++}
++
++static void sbp2_set_busy_timeout(struct sbp2_logical_unit *lu)
++{
++	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
++	DECLARE_COMPLETION_ONSTACK(done);
++	struct fw_transaction t;
++	static u32 busy_timeout;
++
++	/* FIXME: we should try to set dual-phase cycle_limit too */
++	busy_timeout = cpu_to_be32(SBP2_RETRY_LIMIT);
++
++	fw_send_request(device->card, &t, TCODE_WRITE_QUADLET_REQUEST,
++			lu->tgt->node_id, lu->generation, device->max_speed,
++			CSR_REGISTER_BASE + CSR_BUSY_TIMEOUT, &busy_timeout,
++			sizeof(busy_timeout), complete_set_busy_timeout, &done);
++	wait_for_completion(&done);
++}
++
+ static void sbp2_reconnect(struct work_struct *work);
+ 
+ static void sbp2_login(struct work_struct *work)
+@@ -851,10 +876,8 @@ static void sbp2_login(struct work_struct *work)
+ 	fw_notify("%s: logged in to LUN %04x (%d retries)\n",
+ 		  tgt->bus_id, lu->lun, lu->retries);
+ 
+-#if 0
+-	/* FIXME: The linux1394 sbp2 does this last step. */
+-	sbp2_set_busy_timeout(scsi_id);
+-#endif
++	/* set appropriate retry limit(s) in BUSY_TIMEOUT register */
++	sbp2_set_busy_timeout(lu);
+ 
+ 	PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect);
+ 	sbp2_agent_reset(lu);
+
+---
+
+Try to write dual-phase retry protocol limits to BUSY_TIMEOUT register.
+- The dual-phase retry protocol is optional to implement, and if not 
+  supported, writes to the dual-phase portion of the register will be
+  ignored. We try to write the original 1394-1995 default here.
+- In the case of devices that are also SBP-3-compliant, all writes are 
+  ignored, as the register is read-only, but contains single-phase retry of
+  15, which is what we're trying to set for all SBP-2 device anyway, so this
+  write attempt is safe and yields more consistent behavior for all devices.
+
+See section 8.3.2.3.5 of the 1394-1995 spec, section 6.2 of the SBP-2 spec,
+and section 6.4 of the SBP-3 spec for further details.
+
+Signed-off-by: Jarod Wilson <jwilson at redhat.com>
+---
+
+Update: refresh for updated patch 1/2 using CSR_BUSY_TIMEOUT
+
+ drivers/firewire/fw-sbp2.c |   22 +++++++++++++++++++---
+ 1 files changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
+index fbf5b7a..763f653 100644
+--- a/drivers/firewire/fw-sbp2.c
++++ b/drivers/firewire/fw-sbp2.c
+@@ -173,7 +173,8 @@ struct sbp2_target {
+ #define SBP2_ORB_TIMEOUT		2000U	/* Timeout in ms */
+ #define SBP2_ORB_NULL			0x80000000
+ #define SBP2_MAX_SG_ELEMENT_LENGTH	0xf000
+-#define SBP2_RETRY_LIMIT		0xf	/* 15 retries */
++#define SBP2_RETRY_LIMIT		0xf		/* 15 retries */
++#define SBP2_CYCLE_LIMIT		(0xc8 << 12)	/* 200 125us cycles */
+ 
+ /* Unit directory keys */
+ #define SBP2_CSR_UNIT_CHARACTERISTICS	0x3a
+@@ -808,6 +809,22 @@ complete_set_busy_timeout(struct fw_card *card, int rcode,
+ 	complete(done);
+ }
+ 
++/*
++ * Write retransmit retry values into the BUSY_TIMEOUT register.
++ * - The single-phase retry protocol is supported by all SBP-2 devices, but the
++ *   default retry_limit value is 0 (i.e. never retry transmission). We write a
++ *   saner value after logging into the device.
++ * - The dual-phase retry protocol is optional to implement, and if not
++ *   supported, writes to the dual-phase portion of the register will be
++ *   ignored. We try to write the original 1394-1995 default here.
++ * - In the case of devices that are also SBP-3-compliant, all writes are
++ *   ignored, as the register is read-only, but contains single-phase retry of
++ *   15, which is what we're trying to set for all SBP-2 device anyway, so this
++ *   write attempt is safe and yields more consistent behavior for all devices.
++ *
++ * See section 8.3.2.3.5 of the 1394-1995 spec, section 6.2 of the SBP-2 spec,
++ * and section 6.4 of the SBP-3 spec for further details.
++ */
+ static void sbp2_set_busy_timeout(struct sbp2_logical_unit *lu)
+ {
+ 	struct fw_device *device = fw_device(lu->tgt->unit->device.parent);
+@@ -815,8 +832,7 @@ static void sbp2_set_busy_timeout(struct sbp2_logical_unit *lu)
+ 	struct fw_transaction t;
+ 	static u32 busy_timeout;
+ 
+-	/* FIXME: we should try to set dual-phase cycle_limit too */
+-	busy_timeout = cpu_to_be32(SBP2_RETRY_LIMIT);
++	busy_timeout = cpu_to_be32(SBP2_CYCLE_LIMIT | SBP2_RETRY_LIMIT);
+ 
+ 	fw_send_request(device->card, &t, TCODE_WRITE_QUADLET_REQUEST,
+ 			lu->tgt->node_id, lu->generation, device->max_speed,
+
+
+