[Linux-cluster] Failure of one of two services sharing the same virtual IP tears down IP

Thu Nov 17 15:25:14 UTC 2011

Hi,

I'm creating a 2+1 failover cluster with 4 services of which 2 have to
run on the same node. One of the services on each node needs a (SAN)
disk, the other doesn't. I'm using HA-LVM. When I ifdown the two
interfaces connected to the SAN to simulate SAN failure, the service
needing the disk is disabled, the other keeps running. So far so
expected. Unfortunately, the virtual IP address shared by the two
services on the same machine is also removed, rendering the
still-running service useless. This rather surprised me. How can I
configure the cluster to keep the IP address up?

Thanks,

  Jochen

P.S.: The cluster.conf:

<?xml version="1.0" ?>
<cluster config_version="1" name="cluster">
  <fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
  <cman shutdown_timeout="10000"/>
  <clusternodes>
    <clusternode name="node1" nodeid="1" votes="1">
      <fence>
        <method name="1">
          <device name="device1"/>
        </method>
      </fence>
    </clusternode>
    <clusternode name="node2" nodeid="2" votes="1">
      <fence>
        <method name="1">
          <device name="device2"/>
        </method>
      </fence>
    </clusternode>
    <clusternode name="node3" nodeid="3" votes="1">
      <fence>
        <method name="1">
          <device name="device3"/>
        </method>
      </fence>
    </clusternode>
  </clusternodes>
  <fencedevices>
    <fencedevice agent="fence_ilo" ipaddr="10.0.24.101" login="admin"
name="device1" passwd="password"/>
    <fencedevice agent="fence_ilo" ipaddr="10.0.24.102" login="admin"
name="device2" passwd="password"/>
    <fencedevice agent="fence_ilo" ipaddr="10.0.24.103" login="admin"
name="device3" passwd="password"/>
  </fencedevices>
  <rm>
    <failoverdomains>
      <failoverdomain name="domain1" nofailback="0">
	<failoverdomainnode name="node1" priority="1"/>
      </failoverdomain>
      <failoverdomain name="domain2" nofailback="0">
	<failoverdomainnode name="node2" priority="1"/>
      </failoverdomain>
    </failoverdomains>
    <resources>
      <ip address="10.0.24.111" monitor_link="1"/>
      <ip address="10.0.24.112" monitor_link="1"/>
    </resources>
    <service autostart="1" exclusive="0" name="disk1"
recovery="restart" domain="domain1">
      <ip ref="10.0.24.111"/>
      <script file="/etc/init.d/disk1" name="disk1"/>
      <fs device="/dev/VolGroup10/LogVol10" force_fsck="0"
force_unmount="1" fstype="ext3" mountpoint="/mnt/lun1" name="lun1"
self_fence="1"/>
      <lvm lv_name="LogVol10" name="VolGroup10/LogVol10" vg_name="VolGroup10"/>
    </service>
    <service autostart="1" exclusive="0" name="nodisk1"
recovery="restart" domain="domain1">
      <ip ref="10.0.24.111"/>
      <script file="/etc/init.d/nodisk1" name="nodisk1"/>
    </service>
    <service autostart="1" exclusive="0" name="disk2"
recovery="restart" domain="domain2">
      <ip ref="10.0.24.112"/>
      <script file="/etc/init.d/disk2" name="disk2"/>
      <fs device="/dev/VolGroup20/LogVol20" force_fsck="0"
force_unmount="1" fstype="ext3" mountpoint="/mnt/lun2" name="lun2"
self_fence="1"/>
      <lvm lv_name="LogVol20" name="VolGroup20/LogVol20" vg_name="VolGroup20"/>
    </service>
    <service autostart="1" exclusive="0" name="nodisk2"
recovery="restart" domain="domain2">
      <ip ref="10.0.24.112"/>
      <script file="/etc/init.d/nodisk2" name="nodisk2"/>
    </service>
  </rm>
</cluster>