[Linux-cluster] Re: GFS hanging on 3 node RHEL4 cluster

Mon Oct 13 21:32:42 UTC 2008

As a heads up, I'm about to open a high priority bug on this.  It's
crippling us.  Also, I meant to say it is a 4 node cluster, not a 3
node.

Please let me know if I can provide any more information in addition
to this.  I will provide the information from a time series of
gfs_tool counters commands with the support request.

Shawn

On Tue, Oct 7, 2008 at 1:40 PM, Shawn Hood <shawnlhood at gmail.com> wrote:
> More info:
>
> All filesystems mounted using noatime,nodiratime,noquota.
>
> All filesystems report the same data from gfs_tool gettune:
>
> limit1 = 100
> ilimit1_tries = 3
> ilimit1_min = 1
> ilimit2 = 500
> ilimit2_tries = 10
> ilimit2_min = 3
> demote_secs = 300
> incore_log_blocks = 1024
> jindex_refresh_secs = 60
> depend_secs = 60
> scand_secs = 5
> recoverd_secs = 60
> logd_secs = 1
> quotad_secs = 5
> inoded_secs = 15
> glock_purge = 0
> quota_simul_sync = 64
> quota_warn_period = 10
> atime_quantum = 3600
> quota_quantum = 60
> quota_scale = 1.0000   (1, 1)
> quota_enforce = 0
> quota_account = 0
> new_files_jdata = 0
> new_files_directio = 0
> max_atomic_write = 4194304
> max_readahead = 262144
> lockdump_size = 131072
> stall_secs = 600
> complain_secs = 10
> reclaim_limit = 5000
> entries_per_readdir = 32
> prefetch_secs = 10
> statfs_slots = 64
> max_mhc = 10000
> greedy_default = 100
> greedy_quantum = 25
> greedy_max = 250
> rgrp_try_threshold = 100
> statfs_fast = 0
> seq_readahead = 0
>
>
> And data on the FS from gfs_tool counters:
>                                  locks 2948
>                             locks held 1352
>                           freeze count 0
>                          incore inodes 1347
>                       metadata buffers 0
>                        unlinked inodes 0
>                              quota IDs 0
>                     incore log buffers 0
>                         log space used 0.05%
>              meta header cache entries 0
>                     glock dependencies 0
>                 glocks on reclaim list 0
>                              log wraps 2
>                   outstanding LM calls 0
>                  outstanding BIO calls 0
>                       fh2dentry misses 0
>                       glocks reclaimed 223287
>                         glock nq calls 1812286
>                         glock dq calls 1810926
>                   glock prefetch calls 101158
>                          lm_lock calls 198294
>                        lm_unlock calls 142643
>                           lm callbacks 341621
>                     address operations 502691
>                      dentry operations 395330
>                      export operations 0
>                        file operations 199243
>                       inode operations 984276
>                       super operations 1727082
>                          vm operations 0
>                        block I/O reads 520531
>                       block I/O writes 130315
>
>                                  locks 171423
>                             locks held 85717
>                           freeze count 0
>                          incore inodes 85376
>                       metadata buffers 1474
>                        unlinked inodes 0
>                              quota IDs 0
>                     incore log buffers 24
>                         log space used 0.83%
>              meta header cache entries 6621
>                     glock dependencies 2037
>                 glocks on reclaim list 0
>                              log wraps 428
>                   outstanding LM calls 0
>                  outstanding BIO calls 0
>                       fh2dentry misses 0
>                       glocks reclaimed 45784677
>                         glock nq calls 962822941
>                         glock dq calls 962595532
>                   glock prefetch calls 20215922
>                          lm_lock calls 40708633
>                        lm_unlock calls 23410498
>                           lm callbacks 64156052
>                     address operations 705464659
>                      dentry operations 19701522
>                      export operations 0
>                        file operations 364990733
>                       inode operations 98910127
>                       super operations 440061034
>                          vm operations 7
>                        block I/O reads 90394984
>                       block I/O writes 131199864
>
>                                  locks 2916542
>                             locks held 1476005
>                           freeze count 0
>                          incore inodes 1454165
>                       metadata buffers 12539
>                        unlinked inodes 100
>                              quota IDs 0
>                     incore log buffers 11
>                         log space used 13.33%
>              meta header cache entries 9928
>                     glock dependencies 110
>                 glocks on reclaim list 0
>                              log wraps 2393
>                   outstanding LM calls 25
>                  outstanding BIO calls 0
>                       fh2dentry misses 55546
>                       glocks reclaimed 127341056
>                         glock nq calls 867427
>                         glock dq calls 867430
>                   glock prefetch calls 36679316
>                          lm_lock calls 110179878
>                        lm_unlock calls 84588424
>                           lm callbacks 194863553
>                     address operations 250891447
>                      dentry operations 359537343
>                      export operations 390941288
>                        file operations 399156716
>                       inode operations 537830
>                       super operations 1093798409
>                          vm operations 774785
>                        block I/O reads 258044208
>                       block I/O writes 101585172
>
>
>
> On Tue, Oct 7, 2008 at 1:33 PM, Shawn Hood <shawnlhood at gmail.com> wrote:
>> Problem:
>> It seems that IO on one machine in the cluster (not always the same
>> machine) will hang and all processes accessing clustered LVs will
>> block.  Other machines will follow suit shortly thereafter until the
>> machine that first exhibited the problem is rebooted (via fence_drac
>> manually).  No messages in dmesg, syslog, etc.  Filesystems recently
>> fsckd.
>>
>> Hardware:
>> Dell 1950s (similar except memory -- 3x 16GB RAM, 1x 8GB RAM).
>> Running RHEL4 ES U7.  Four machines
>> Onboard gigabit NICs (Machines use little bandwidth, and all network
>> traffic including DLM share NICs)
>> QLogic 2462 PCI-Express dual channel FC HBAs
>> QLogic SANBox 5200 FC switch
>> Apple XRAID which presents as two LUNs (~4.5TB raw aggregate)
>> Cisco Catalyst switch
>>
>> Simple four machine RHEL4 U7 cluster running kernel 2.6.9-78.0.1.ELsmp
>> x86_64 with the following packages:
>> ccs-1.0.12-1
>> cman-1.0.24-1
>> cman-kernel-smp-2.6.9-55.13.el4_7.1
>> cman-kernheaders-2.6.9-55.13.el4_7.1
>> dlm-kernel-smp-2.6.9-54.11.el4_7.1
>> dlm-kernheaders-2.6.9-54.11.el4_7.1
>> fence-1.32.63-1.el4_7.1
>> GFS-6.1.18-1
>> GFS-kernel-smp-2.6.9-80.9.el4_7.1
>>
>> One clustered VG.  Striped across two physical volumes, which
>> correspond to each side of an Apple XRAID.
>> Clustered volume group info:
>>  --- Volume group ---
>>  VG Name               hq-san
>>  System ID
>>  Format                lvm2
>>  Metadata Areas        2
>>  Metadata Sequence No  50
>>  VG Access             read/write
>>  VG Status             resizable
>>  Clustered             yes
>>  Shared                no
>>  MAX LV                0
>>  Cur LV                3
>>  Open LV               3
>>  Max PV                0
>>  Cur PV                2
>>  Act PV                2
>>  VG Size               4.55 TB
>>  PE Size               4.00 MB
>>  Total PE              1192334
>>  Alloc PE / Size       905216 / 3.45 TB
>>  Free  PE / Size       287118 / 1.10 TB
>>  VG UUID               hfeIhf-fzEq-clCf-b26M-cMy3-pphm-B6wmLv
>>
>> Logical volumes contained with hq-san VG:
>>  cam_development   hq-san                          -wi-ao 500.00G
>>  qa            hq-san                          -wi-ao   1.07T
>>  svn_users         hq-san                          -wi-ao   1.89T
>>
>> All four machines mount svn_users, two machines mount qa, and one
>> mounts cam_development.
>>
>> /etc/cluster/cluster.conf:
>>
>> <?xml version="1.0"?>
>> <cluster alias="tungsten" config_version="31" name="qualia">
>>        <fence_daemon post_fail_delay="0" post_join_delay="3"/>
>>        <clusternodes>
>>                <clusternode name="odin" votes="1">
>>                        <fence>
>>                                <method name="1">
>>                    <device modulename="" name="odin-drac"/>
>>                </method>
>>                        </fence>
>>                </clusternode>
>>                <clusternode name="hugin" votes="1">
>>                        <fence>
>>                                <method name="1">
>>                    <device modulename="" name="hugin-drac"/>
>>                </method>
>>                        </fence>
>>                </clusternode>
>>                <clusternode name="munin" votes="1">
>>                        <fence>
>>                                <method name="1">
>>                    <device modulename="" name="munin-drac"/>
>>                </method>
>>                        </fence>
>>                </clusternode>
>>                <clusternode name="zeus" votes="1">
>>                        <fence>
>>                                <method name="1">
>>                    <device modulename="" name="zeus-drac"/>
>>                </method>
>>                        </fence>
>>                </clusternode>
>>    </clusternodes>
>>        <cman expected_votes="1" two_node="0"/>
>>        <fencedevices>
>>                <resources/>
>>                <fencedevice name="odin-drac" agent="fence_drac"
>> ipaddr="redacted" login="root" passwd="redacted"/>
>>                <fencedevice name="hugin-drac" agent="fence_drac"
>> ipaddr="redacted" login="root" passwd="redacted"/>
>>                <fencedevice name="munin-drac" agent="fence_drac"
>> ipaddr="redacted" login="root" passwd="redacted"/>
>>                <fencedevice name="zeus-drac" agent="fence_drac"
>> ipaddr="redacted" login="root" passwd="redacted"/>
>>        </fencedevices>
>>        <rm>
>>        <failoverdomains/>
>>        <resources/>
>>    </rm>
>> </cluster>
>>
>>
>>
>>
>> --
>> Shawn Hood
>> 910.670.1819 m
>>
>
>
>
> --
> Shawn Hood
> 910.670.1819 m
>

-- 
Shawn Hood
910.670.1819 m