[Linux-cluster] Compilation problem with GFS/GNBD and kernel panics on stress.

Mon Jun 19 11:48:56 UTC 2006

Hello all, 

(I've already posted this to cluster-devel at redhat.com,and it seems it 
wasn't the appropriate place as i didn't get any answer. Sorry for the 
cross-posting.)

I have 2 problems:

1) I'm trying to use GFS with Fedora Core 4. It was upgraded to a kernel 
2.6.16-1.2111_FC4smp. RPM versions are:
GFS-kernel-smp-2.6.11.8-20050601.152643.FC4.25
GFS-6.1.0-3
GFS-kernheaders-2.6.11.8-20050601.152643.FC4.25
dlm-kernheaders-2.6.11.5-20050601.152643.FC4.22
dlm-kernel-smp-2.6.11.5-20050601.152643.FC4.22
dlm-1.0.0-3
gnbd-kernheaders-2.6.11.2-20050420.133124.FC4.58
gnbd-1.0.0-1


There was a problem to install the following packages,and the following 
patches were necessary:

-GFS-kernel

--- gfs-kernel-2.6.11.8-20050601.152643.FC4/src/gfs/ops_file.c.orig    
2006-06-01 13:57:58.000000000 +0200
+++ gfs-kernel-2.6.11.8-20050601.152643.FC4/src/gfs/ops_file.c    
2006-06-01 13:57:24.000000000 +0200
@@ -931,12 +931,12 @@
    if (!access_ok(VERIFY_READ, buf, size))
        return -EFAULT;

-    down(&inode->i_sem);
+    mutex_lock(&inode->i_mutex);
    if (file->f_flags & O_DIRECT)
        count = walk_vm(file, (char *)buf, size, offset, do_write_direct);
    else
        count = walk_vm(file, (char *)buf, size, offset, do_write_buf);
-    up(&inode->i_sem);
+    mutex_unlock(&inode->i_mutex);

    return count;
}
--- gfs-kernel-2.6.11.8-20050601.152643.FC4/src/gfs/ops_fstype.c.orig    
2006-06-01 14:04:16.000000000 +0200
+++ gfs-kernel-2.6.11.8-20050601.152643.FC4/src/gfs/ops_fstype.c    
2006-06-01 14:05:29.000000000 +0200
@@ -712,12 +712,12 @@
        goto out;
    } else {
        char buf[BDEVNAME_SIZE];
-
+        unsigned long bsize;
        sb->s_flags = flags;
        strlcpy(sb->s_id, bdevname(real, buf), sizeof(sb->s_id));
-        sb->s_old_blocksize = block_size(real);
-        sb_set_blocksize(sb, sb->s_old_blocksize);
-        set_blocksize(real, sb->s_old_blocksize);
+        bsize = block_size(real);
+        sb_set_blocksize(sb, bsize);
+        set_blocksize(real, bsize);
        error = fill_super(sb, data, (flags & MS_VERBOSE) ? 1 : 0);
        if (error) {
            up_write(&sb->s_umount);
@@ -748,7 +748,7 @@
{
    struct block_device *diaper = sb->s_bdev;
    struct block_device *real = gfs_diaper_2real(diaper);
-    unsigned long bsize = sb->s_old_blocksize;
+    unsigned long bsize = block_size(real);

    generic_shutdown_super(sb);
    set_blocksize(diaper, bsize);



I am quite confident about "file_ops.c" as it looks like the latest 
version for 2.6.15:
http://sources.redhat.com/cgi-bin/cvsweb.cgi/cluster/gfs-kernel/src/gfs/ops_file.c?rev=1.16.6.2.2.4&content-type=text/x-cvsweb-markup&cvsroot=cluster&only_with_tag=gfs-kernel_2_6_15_2 


For "ops_fstype.c", it should be ok, unless you see obvious errors.


- gnbd-kernel:

--- gnbd-kernel-2.6.11.2-20050420.133124/src/gnbd.c.orig    2006-06-01 
13:46:35.000000000 +0200
+++ gnbd-kernel-2.6.11.2-20050420.133124/src/gnbd.c    2006-06-01 
13:47:03.000000000 +0200
@@ -180,9 +180,9 @@
    set_capacity(dev->disk, size);
    bdev = bdget_disk(dev->disk, 0);
    if (bdev) {
-        down(&bdev->bd_inode->i_sem);
+        mutex_lock(&bdev->bd_inode->i_mutex);
        i_size_write(bdev->bd_inode, (loff_t)size << 9);
-        up(&bdev->bd_inode->i_sem);
+        mutex_unlock(&bdev->bd_inode->i_mutex);
        bdput(bdev);
    }
    up(&dev->do_it_lock);
@@ -281,7 +281,7 @@
    
    spin_lock_irqsave(q->queue_lock, flags);
    if (!end_that_request_first(req, uptodate, req->nr_sectors)) {
-        end_that_request_last(req);
+        end_that_request_last(req, 0);
    }
    spin_unlock_irqrestore(q->queue_lock, flags);
}


This one is quite straightforward.


2) Once compiled and run, i get 1 node running GNBD and exporting one of 
its disks.
3 other nodes are running as client for GNBD, and i mount a GFS on them, 
although all 4 nodes participate to a GFS cluster. (standard config : 
dlm, cman)

I have tried to loop 100 times over parallel "bonnie++" on the 3 nodes, 
with:
bonnie++ -u 0:0 -d /mnt/gfs -x 100

One of the nodes crashed before the end before the 10th loop, with the 
following panic:

Unable to handle kernel paging request at 0000000000200220 RIP:
^M<ffffffff88351d6a>{:gfs:gfs_depend_add+430}
^MPGD 306d7067 PUD 37532067 PMD 0
^MOops: 0000 [1] SMP
^Mlast sysfs file: /class/gnbd/gnbd0/waittime
^MCPU 1
^MModules linked in: gnbd(U) lock_dlm(U) dlm(U) gfs(U) lock_harness(U) 
cman(U)
ipv6 parport_pc lp parport autofs4 rfcomm l2cap bluetooth sunrpc pcmcia 
yent
a_socket rsrc_nonstatic pcmcia_core dm_mod video button battery ac uhci_hcd
ehci_hcd i2c_i801 i2c_core tg3 e1000 ext3 jbd ata_piix libata sd_mod 
scsi_mod
^MPid: 5679, comm: bonnie++ Tainted: GF     2.6.16-1.2111_FC4smp #1
^MRIP: 0010:[<ffffffff88351d6a>] 
<ffffffff88351d6a>{:gfs:gfs_depend_add+430}
^MRSP: 0018:ffff81002bfddb38  EFLAGS: 00010206
^MRAX: ffff810037571200 RBX: 0000000000003a98 RCX: 0000000000000002
^MRDX: ffff810037571338 RSI: ffff81002bfddb08 RDI: ffff810001dd5c40
^MRBP: ffffc2001017a000 R08: ffffc2001017c650 R09: 0000000000000040
^MR10: 0000000000000040 R11: 0000000000040000 R12: 0000000000003a98
^MR13: 00000001002ac770 R14: 00000000002001f0 R15: ffffc2001017a258
^MFS:  00002aaaaaab8380(0000) GS:ffff8100021d9f40(0000) 
knlGS:0000000000000000
^MCS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
^MCR2: 0000000000200220 CR3: 0000000035b0b000 CR4: 00000000000006e0
^MProcess bonnie++ (pid: 5679, threadinfo ffff81002bfdc000, task 
ffff81003ecd5860)
^MStack: ffff810037571200 000000018832af2b 0000000000d633e7 
ffff810006d384a8
^M       ffff810022a0d978 0000000000d633e8 ffffc2001017a000 
0000000000000001
^M       ffff810009bd4490 ffffffff8832b99b
^MCall Trace: <ffffffff8832b99b>{:gfs:gfs_wipe_buffers+842}
^M       <ffffffff8833a292>{:gfs:gfs_inode_dealloc+1023}
<ffffffff88356102>{:gfs:gfs_unlinked_limit+230}
^M       <ffffffff8834aaac>{:gfs:gfs_unlink+60}
<ffffffff8834b183>{:gfs:gfs_permission+483}
^M       <ffffffff8019080f>{permission+114} 
<ffffffff80190a39>{vfs_unlink+203}
^M       <ffffffff8019312d>{do_unlinkat+184}
<ffffffff8010d431>{syscall_trace_enter+181}
^M       <ffffffff8010ab11>{tracesys+113} <ffffffff8010ab71>{tracesys+209}

^MCode: 4d 8b 66 30 4c 89 ff e8 34 04 00 f8 8b 9d 94 02 00 00 4c 89
^MRIP <ffffffff88351d6a>{:gfs:gfs_depend_add+430} RSP <ffff81002bfddb38>
^MCR2: 0000000000200220
^M <0>Kernel panic - not syncing: Oops

^MCall Trace: <ffffffff80134f76>{panic+133}
<ffffffff803521fb>{_spin_unlock_irqrestore+11}
^M       <ffffffff8035293c>{oops_end+71} 
<ffffffff803543ba>{do_page_fault+1770}
^M       <ffffffff8017dfc1>{kmem_freepages+191} 
<ffffffff8017e2e7>{slab_destroy+151}
^M       <ffffffff8010b93d>{error_exit+0}
<ffffffff88351d6a>{:gfs:gfs_depend_add+430}
^M       <ffffffff88351da4>{:gfs:gfs_depend_add+488}
<ffffffff8832b99b>{:gfs:gfs_wipe_buffers+842}
^M       <ffffffff8833a292>{:gfs:gfs_inode_dealloc+1023}
<ffffffff88356102>{:gfs:gfs_unlinked_limit+230}
^M       <ffffffff8834aaac>{:gfs:gfs_unlink+60}
<ffffffff8834b183>{:gfs:gfs_permission+483}
^M       <ffffffff8019080f>{permission+114} 
<ffffffff80190a39>{vfs_unlink+203}
^M       <ffffffff8019312d>{do_unlinkat+184}
<ffffffff8010d431>{syscall_trace_enter+181}
^M       <ffffffff8010ab11>{tracesys+113} <ffffffff8010ab71>{tracesys+209}



This is 100% reproducible.

Any thoughts on this ? Maybe it has already been corrected in a more 
recent version ?

-- 
Mathieu Avila