rpms/kernel/F-7 linux-2.6-md-raid5-move-code-to-proper-place.patch, NONE, 1.1 linux-2.6-nfs-fix-writeback-race.patch, NONE, 1.1 kernel-2.6.spec, 1.3376, 1.3377

Fri Nov 9 23:18:55 UTC 2007

Author: cebbert

Update of /cvs/pkgs/rpms/kernel/F-7
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv27015

Modified Files:
	kernel-2.6.spec 
Added Files:
	linux-2.6-md-raid5-move-code-to-proper-place.patch 
	linux-2.6-nfs-fix-writeback-race.patch 
Log Message:
* Fri Nov 09 2007 Chuck Ebbert <cebbert at redhat.com>
- Fix the earlier misapplied raid5 patch.
- Fix race in NFS that can cause data corruption.


linux-2.6-md-raid5-move-code-to-proper-place.patch:

--- NEW FILE linux-2.6-md-raid5-move-code-to-proper-place.patch ---
From: Neil Brown <neilb at suse.de>
To: Justin Piszcz <jpiszcz at lucidpixels.com>
Date: 	Mon, 5 Nov 2007 08:49:55 +1100
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Message-ID: <18222.16003.92062.970530 at notabene.brown>
Cc: linux-kernel at vger.kernel.org, linux-raid at vger.kernel.org
Subject: Re: 2.6.23.1: mdadm/raid5 hung/d-state
In-Reply-To: message from Justin Piszcz on Sunday November 4
References: <Pine.LNX.4.64.0711040658180.30831 at p34.internal.lan>
X-Mailer: VM 7.19 under Emacs 21.4.1
X-face: 	[Gw_3E*Gng}4rRrKRYotwlE?.2|**#s9D<ml'fY1Vw+ at XfR[fRCsUoP?K6bt3YD\ui5Fh?f
	LONpR';(ql)VM_TQ/<l_^D3~B:z$\YC7gUCuC=sYm/80G=$tt"98mr8(l))QzVKCk$6~gldn~*FK9x
	8`;pM{3S8679sP+MbP,72<3_PIH-$I&iaiIb|hV1d%cYg))BmI)AZ
Sender: linux-kernel-owner at vger.kernel.org
Precedence: bulk
X-Mailing-List: 	linux-kernel at vger.kernel.org

On Sunday November 4, jpiszcz at lucidpixels.com wrote:
> # ps auxww | grep D
> USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
> root       273  0.0  0.0      0     0 ?        D    Oct21  14:40 [pdflush]
> root       274  0.0  0.0      0     0 ?        D    Oct21  13:00 [pdflush]
> 
> After several days/weeks, this is the second time this has happened, while 
> doing regular file I/O (decompressing a file), everything on the device 
> went into D-state.

At a guess (I haven't looked closely) I'd say it is the bug that was
meant to be fixed by

commit 4ae3f847e49e3787eca91bced31f8fd328d50496

except that patch applied badly and needed to be fixed with
the following patch (not in git yet).
These have been sent to stable@ and should be in the queue for 2.6.23.2


NeilBrown

Fix misapplied patch in raid5.c

commit 4ae3f847e49e3787eca91bced31f8fd328d50496 did not get applied
correctly, presumably due to substantial similarities between
handle_stripe5 and handle_stripe6.

This patch (with lots of context) moves the chunk of new code from
handle_stripe6 (where it isn't needed (yet)) to handle_stripe5.


Signed-off-by: Neil Brown <neilb at suse.de>

### Diffstat output
 ./drivers/md/raid5.c |   14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff .prev/drivers/md/raid5.c ./drivers/md/raid5.c
--- .prev/drivers/md/raid5.c	2007-11-02 12:10:49.000000000 +1100
+++ ./drivers/md/raid5.c	2007-11-02 12:25:31.000000000 +1100
@@ -2607,40 +2607,47 @@ static void handle_stripe5(struct stripe
 	struct bio *return_bi = NULL;
 	struct stripe_head_state s;
 	struct r5dev *dev;
 	unsigned long pending = 0;
 
 	memset(&s, 0, sizeof(s));
 	pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
 		"ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state,
 		atomic_read(&sh->count), sh->pd_idx,
 		sh->ops.pending, sh->ops.ack, sh->ops.complete);
 
 	spin_lock(&sh->lock);
 	clear_bit(STRIPE_HANDLE, &sh->state);
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
 	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
 	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
 	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
 	/* Now to look around and see what can be done */
 
+	/* clean-up completed biofill operations */
+	if (test_bit(STRIPE_OP_BIOFILL, &sh->ops.complete)) {
+		clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
+		clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
+		clear_bit(STRIPE_OP_BIOFILL, &sh->ops.complete);
+	}
+
 	rcu_read_lock();
 	for (i=disks; i--; ) {
 		mdk_rdev_t *rdev;
 		struct r5dev *dev = &sh->dev[i];
 		clear_bit(R5_Insync, &dev->flags);
 
 		pr_debug("check %d: state 0x%lx toread %p read %p write %p "
 			"written %p\n",	i, dev->flags, dev->toread, dev->read,
 			dev->towrite, dev->written);
 
 		/* maybe we can request a biofill operation
 		 *
 		 * new wantfill requests are only permitted while
 		 * STRIPE_OP_BIOFILL is clear
 		 */
 		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
 			!test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
 			set_bit(R5_Wantfill, &dev->flags);
 
 		/* now count some things */
@@ -2880,47 +2887,40 @@ static void handle_stripe6(struct stripe
 	struct stripe_head_state s;
 	struct r6_state r6s;
 	struct r5dev *dev, *pdev, *qdev;
 
 	r6s.qd_idx = raid6_next_disk(pd_idx, disks);
 	pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
 		"pd_idx=%d, qd_idx=%d\n",
 	       (unsigned long long)sh->sector, sh->state,
 	       atomic_read(&sh->count), pd_idx, r6s.qd_idx);
 	memset(&s, 0, sizeof(s));
 
 	spin_lock(&sh->lock);
 	clear_bit(STRIPE_HANDLE, &sh->state);
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
 	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
 	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
 	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
 	/* Now to look around and see what can be done */
 
-	/* clean-up completed biofill operations */
-	if (test_bit(STRIPE_OP_BIOFILL, &sh->ops.complete)) {
-		clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
-		clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
-		clear_bit(STRIPE_OP_BIOFILL, &sh->ops.complete);
-	}
-
 	rcu_read_lock();
 	for (i=disks; i--; ) {
 		mdk_rdev_t *rdev;
 		dev = &sh->dev[i];
 		clear_bit(R5_Insync, &dev->flags);
 
 		pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
 			i, dev->flags, dev->toread, dev->towrite, dev->written);
 		/* maybe we can reply to a read */
 		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
 			struct bio *rbi, *rbi2;
 			pr_debug("Return read for disc %d\n", i);
 			spin_lock_irq(&conf->device_lock);
 			rbi = dev->toread;
 			dev->toread = NULL;
 			if (test_and_clear_bit(R5_Overlap, &dev->flags))
 				wake_up(&conf->wait_for_overlap);
 			spin_unlock_irq(&conf->device_lock);
 			while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
 				copy_data(0, rbi, dev->page, dev->sector);
-

linux-2.6-nfs-fix-writeback-race.patch:

--- NEW FILE linux-2.6-nfs-fix-writeback-race.patch ---
From: Trond Myklebust <Trond.Myklebust at netapp.com>
Date: Thu, 18 Oct 2007 21:08:05 +0000 (-0400)
Subject: NFS: Fix a writeback race...
X-Git-Tag: v2.6.24-rc1~133^2~3
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=61e930a904966cc37e0a3404276f0b73037e57ca

NFS: Fix a writeback race...

This patch fixes a regression that was introduced by commit
44dd151d5c21234cc534c47d7382f5c28c3143cd

We cannot zero the user page in nfs_mark_uptodate() any more, since

  a) We'd be modifying the page without holding the page lock
  b) We can race with other updates of the page, most notably
     because of the call to nfs_wb_page() in nfs_writepage_setup().

Instead, we do the zeroing in nfs_update_request() if we see that we're
creating a request that might potentially be marked as up to date.

Thanks to Olivier Paquet for reporting the bug and providing a test-case.

Signed-off-by: Trond Myklebust <Trond.Myklebust at netapp.com>
---

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0cf9d1c..89527a4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -174,8 +174,6 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int
 		return;
 	if (count != nfs_page_length(page))
 		return;
-	if (count != PAGE_CACHE_SIZE)
-		zero_user_page(page, count, PAGE_CACHE_SIZE - count, KM_USER0);
 	SetPageUptodate(page);
 }
 
@@ -627,7 +625,8 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				return ERR_PTR(error);
 			}
 			spin_unlock(&inode->i_lock);
-			return new;
+			req = new;
+			goto zero_page;
 		}
 		spin_unlock(&inode->i_lock);
 
@@ -655,13 +654,23 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 	if (offset < req->wb_offset) {
 		req->wb_offset = offset;
 		req->wb_pgbase = offset;
-		req->wb_bytes = rqend - req->wb_offset;
+		req->wb_bytes = max(end, rqend) - req->wb_offset;
+		goto zero_page;
 	}
 
 	if (end > rqend)
 		req->wb_bytes = end - req->wb_offset;
 
 	return req;
+zero_page:
+	/* If this page might potentially be marked as up to date,
+	 * then we need to zero any uninitalised data. */
+	if (req->wb_pgbase == 0 && req->wb_bytes != PAGE_CACHE_SIZE
+			&& !PageUptodate(req->wb_page))
+		zero_user_page(req->wb_page, req->wb_bytes,
+				PAGE_CACHE_SIZE - req->wb_bytes,
+				KM_USER0);
+	return req;
 }
 
 int nfs_flush_incompatible(struct file *file, struct page *page)


Index: kernel-2.6.spec
===================================================================
RCS file: /cvs/pkgs/rpms/kernel/F-7/kernel-2.6.spec,v
retrieving revision 1.3376
retrieving revision 1.3377
diff -u -r1.3376 -r1.3377
--- kernel-2.6.spec	9 Nov 2007 21:30:10 -0000	1.3376
+++ kernel-2.6.spec	9 Nov 2007 23:18:17 -0000	1.3377
@@ -563,8 +563,9 @@
 Patch420: linux-2.6-squashfs.patch
 Patch422: linux-2.6-gfs-locking-exports.patch
 Patch424: linux-2.6-cifs-fix-incomplete-rcv.patch
-Patch426: linux-2.6-cifs-typo-in-cifs_reconnect-fix.patch
-Patch428: linux-2.6-cifs-fix-bad-handling-of-EAGAIN.patch
+Patch425: linux-2.6-cifs-typo-in-cifs_reconnect-fix.patch
+Patch426: linux-2.6-cifs-fix-bad-handling-of-EAGAIN.patch
+Patch428: linux-2.6-nfs-fix-writeback-race.patch
 
 Patch430: linux-2.6-net-silence-noisy-printks.patch
 Patch431: linux-2.6-net-fix-panic-removing-teql-devices.patch
@@ -633,6 +634,7 @@
 Patch801: linux-2.6-wakeups.patch
 
 Patch900: linux-2.6-md-raid5-fix-clearing-of-biofill-operations.patch
+patch901: linux-2.6-md-raid5-move-code-to-proper-place.patch
 
 # drm for bug 228414
 Patch950: linux-2.6-drm-fix-i915-allocation.patch
@@ -1222,6 +1224,8 @@
 ApplyPatch linux-2.6-cifs-fix-incomplete-rcv.patch
 ApplyPatch linux-2.6-cifs-typo-in-cifs_reconnect-fix.patch
 ApplyPatch linux-2.6-cifs-fix-bad-handling-of-EAGAIN.patch
+# fix nfs race
+ApplyPatch linux-2.6-nfs-fix-writeback-race.patch
 
 # Networking
 # Disable easy to trigger printk's.
@@ -1347,6 +1351,8 @@
 # dm / md
 # raid5 data corruption fix
 ApplyPatch linux-2.6-md-raid5-fix-clearing-of-biofill-operations.patch
+# fix the fix
+ApplyPatch linux-2.6-md-raid5-move-code-to-proper-place.patch
 
 # drm patches should be in stable upstream soon
 ApplyPatch linux-2.6-drm-fix-i915-allocation.patch
@@ -2293,6 +2299,10 @@
 
 %changelog
 * Fri Nov 09 2007 Chuck Ebbert <cebbert at redhat.com>
+- Fix the earlier misapplied raid5 patch.
+- Fix race in NFS that can cause data corruption.
+
+* Fri Nov 09 2007 Chuck Ebbert <cebbert at redhat.com>
 - libata sata_sis:
    fix SCSI transfer lengths and SCR read function.