[Linux-cluster] Cluster environment issue
Srija
swap_project at yahoo.com
Mon May 30 19:17:07 UTC 2011
Hi,
I am very new to the redhat cluster. Need some help and suggession for the cluster configuration.
We have sixteen node cluster of
OS : Linux Server release 5.5 (Tikanga)
kernel : 2.6.18-194.3.1.el5xen.
The problem is sometimes the cluster is getting broken. The solution is (still yet)to reboot the
sixteen nodes. Otherwise the nodes are not joining
We are using clvm and not using any quorum disk. The quorum is by default.
When it is getting broken, clustat commands shows evrything offline except the node from where
the clustat command executed. If we execute vgs, lvs command, those commands are getting hung.
Here is at present the clustat report
-------------------------------------
[server1]# clustat
Cluster Status for newcluster @ Mon May 30 14:55:10 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
server1 1 Online
server2 2 Online, Local
server3 3 Online
server4 4 Online
server5 5 Online
server6 6 Online
server7 7 Online
server8 8 Online
server9 9 Online
server10 10 Online
server11 11 Online
server12 12 Online
server13 13 Online
server14 14 Online
server15 15 Online
server16 16 Online
Here the cman_tool status output from one server
--------------------------------------------------
[server1 ~]# cman_tool status
Version: 6.2.0
Config Version: 23
Cluster Name: newcluster
Cluster Id: 53322
Cluster Member: Yes
Cluster Generation: 11432
Membership state: Cluster-Member
Nodes: 16
Expected votes: 16
Total votes: 16
Quorum: 9
Active subsystems: 8
Flags: Dirty
Ports Bound: 0 11
Node name: server1
Node ID: 1
Multicast addresses: xxx.xxx.xxx.xx
Node addresses: 192.168.xxx.xx
Here is the cluster.conf file.
------------------------------
<?xml version="1.0"?>
<cluster alias="newcluster" config_version="23" name="newcluster">
<fence_daemon clean_start="1" post_fail_delay="0" post_join_delay="15"/>
<clusternodes>
<clusternode name="server1-priv" nodeid="1" votes="1">
<fence><method name="1">
<device name="ilo-server1r"/></method>
</fence>
</clusternode>
<clusternode name="server2-priv" nodeid="3" votes="1">
<fence><method name="1">
<device name="ilo-server2r"/></method>
</fence>
</clusternode>
<clusternode name="server3-priv" nodeid="2" votes="1">
<fence><method name="1">
<device name="ilo-server3r"/></method>
</fence>
</clusternode>
[ ... sinp .....]
<clusternode name="server16-priv" nodeid="16" votes="1">
<fence><method name="1">
<device name="ilo-server16r"/></method>
</fence>
</clusternode>
</clusternodes>
<cman/>
<dlm plock_ownership="1" plock_rate_limit="0"/>
<gfs_controld plock_rate_limit="0"/>
<fencedevices>
<fencedevice agent="fence_ilo" hostname="server1r" login="Admin" name="ilo-server1r" passwd="xxxxx"/>
..........
<fencedevice agent="fence_ilo" hostname="server16r" login="Admin" name="ilo-server16r" passwd="xxxxx"/>
</fencedevices>
<rm>
<failoverdomains/>
<resources/>
</rm></cluster>
Here is the lvm.conf file
--------------------------
devices {
dir = "/dev"
scan = [ "/dev" ]
preferred_names = [ ]
filter = [ "r/scsi.*/","r/pci.*/","r/sd.*/","a/.*/" ]
cache_dir = "/etc/lvm/cache"
cache_file_prefix = ""
write_cache_state = 1
sysfs_scan = 1
md_component_detection = 1
md_chunk_alignment = 1
data_alignment_detection = 1
data_alignment = 0
data_alignment_offset_detection = 1
ignore_suspended_devices = 0
}
log {
verbose = 0
syslog = 1
overwrite = 0
level = 0
indent = 1
command_names = 0
prefix = " "
}
backup {
backup = 1
backup_dir = "/etc/lvm/backup"
archive = 1
archive_dir = "/etc/lvm/archive"
retain_min = 10
retain_days = 30
}
shell {
history_size = 100
}
global {
library_dir = "/usr/lib64"
umask = 077
test = 0
units = "h"
si_unit_consistency = 0
activation = 1
proc = "/proc"
locking_type = 3
wait_for_locks = 1
fallback_to_clustered_locking = 1
fallback_to_local_locking = 1
locking_dir = "/var/lock/lvm"
prioritise_write_locks = 1
}
activation {
udev_sync = 1
missing_stripe_filler = "error"
reserved_stack = 256
reserved_memory = 8192
process_priority = -18
mirror_region_size = 512
readahead = "auto"
mirror_log_fault_policy = "allocate"
mirror_image_fault_policy = "remove"
}
dmeventd {
mirror_library = "libdevmapper-event-lvm2mirror.so"
snapshot_library = "libdevmapper-event-lvm2snapshot.so"
}
If you need more information, I can provide ...
Thanks for your help
Priya
More information about the Linux-cluster
mailing list