[Linux-cluster] fence_ipmilan issue
Stewart Stafford
scstafford at gmail.com
Fri Nov 2 18:07:58 UTC 2007
Hey all,
I have a 3 node cluster consisting of Sun X2200 M2's. I am using the
ELOM port for fencing the nodes. The problem I am having is when I
manually fence a node by calling fence_ipmilan with the reboot option,
the service running on that node is stopped and never migrates to
another node. However, if I issue a reboot on the node running the
service, the service migrates successfully. Attached is the
cluster.conf and below is a snippet of the messages log.
Thanks,
Stew
cluster.conf
===========================
<?xml version="1.0"?>
<cluster alias="ices_nfscluster" config_version="78" name="nfs_cluster">
<fence_daemon post_fail_delay="20" post_join_delay="3"/>
<clusternodes>
<clusternode name="isc0" nodeid="1" votes="1">
<fence>
<method name="1">
<device lanplus="" name="iisc0"/>
</method>
</fence>
</clusternode>
<clusternode name="isc1" nodeid="2" votes="1">
<fence>
<method name="1">
<device lanplus="" name="iisc1"/>
</method>
</fence>
</clusternode>
<clusternode name="isc2" nodeid="3" votes="1">
<fence>
<method name="1">
<device name="iisc2"/>
</method>
</fence>
</clusternode>
</clusternodes>
<cman/>
<fencedevices>
<fencedevice agent="fence_ipmilan" auth="none"
ipaddr="172.16.158.159" login="root" name="iisc0" passwd="changeme"/>
<fencedevice agent="fence_ipmilan" auth="none"
ipaddr="172.16.158.160" login="root" name="iisc1" passwd="changeme"/>
<fencedevice agent="fence_ipmilan" auth="none"
ipaddr="171.16.158.161" login="root" name="iisc2" passwd="changeme"/>
</fencedevices>
<rm>
<failoverdomains>
<failoverdomain name="fotest" ordered="1"
restricted="1">
<failoverdomainnode name="isc0" priority="1"/>
<failoverdomainnode name="isc1" priority="1"/>
<failoverdomainnode name="isc2" priority="2"/>
</failoverdomain>
</failoverdomains>
<resources>
<ip address="172.16.127.15" monitor_link="1"/>
<ip address="172.16.127.17" monitor_link="1"/>
</resources>
<service autostart="1" domain="fotest" name="nfstest"
recovery="restart">
<fs device="/dev/ices-fs/test" force_fsck="0"
force_unmount="1" fsid="13584" fstype="ext3" mountpoint="/export/test"
name="testfs" options="" self_fence="0"/>
<nfsexport name="test_export">
<nfsclient name="test_export"
options="async,rw,fsid=20" path="/export/test"
target="128.83.68.0/24"/>
</nfsexport>
<ip ref="172.16.127.15"/>
</service>
<service autostart="1" domain="fotest" name="nfsices"
recovery="relocate">
<fs device="/dev/ices-fs/ices" force_fsck="0"
force_unmount="1" fsid="44096" fstype="ext3"
mountpoint="/export/cices" name="nfsfs" options="" self_fence="0"/>
<nfsexport name="nfsexport">
<nfsclient name="nfsclient"
options="async,fsid=25,rw" path="/export/cices"
target="128.83.68.0/24"/>
</nfsexport>
<ip ref="172.16.127.17"/>
</service>
</rm>
</cluster>
Nov 2 13:05:48 isc1 fenced[2762]: fencing node "isc0"
Nov 2 13:05:48 isc1 openais[2743]: [CLM ] got nodejoin message 172.16.127.123
Nov 2 13:05:48 isc1 openais[2743]: [CLM ] got nodejoin message 172.16.127.124
Nov 2 13:05:48 isc1 openais[2743]: [CPG ] got joinlist message from node 2
Nov 2 13:05:48 isc1 openais[2743]: [CPG ] got joinlist message from node 3
Nov 2 13:06:21 isc1 fenced[2762]: agent "fence_ipmilan" reports:
Rebooting machine @ IPMI:172.16.158.159...ipmilan: Failed to con
nect after 30 seconds Failed
Nov 2 13:06:21 isc1 fenced[2762]: fence "isc0" failed
Nov 2 13:06:26 isc1 fenced[2762]: fencing node "isc0"
Nov 2 13:06:26 isc1 ccsd[2736]: process_get: Invalid connection
descriptor received.
Nov 2 13:06:26 isc1 ccsd[2736]: Error while processing get: Invalid
request descriptor
More information about the Linux-cluster
mailing list