[Linux-cluster] pvmove locking problem CLVM on RHEL 6

Jay Tingle yogsothoth at sinistar.org
Thu Jun 21 17:54:27 UTC 2012


Hi All, 
    I am having a problem using pvmove during some testing with Red Hat
Cluster using CLVM on RHEL 6.2.  I have 3 nodes which are ESXi 5u1 VMs with the
'multi-writer' flag set for the shared vmdk devices. I keep getting locking
errors during the pvmove.  Everything else seems to be working great as far as
CLVM goes.  Searching through the list archives and consulting the manuals it
looks like all you need is to have cmirrord running.  The RHEL 6 manual
mentions cmirror-kmod which doesn't seem to exist anymore.  Is there still a
kernel module on RHEL 6?  I am standard clvm with ext4 in an active/passive
cluster.  Anyone know what I am doing wrong?  Below is my lvm config and my
cluster config.  Thanks in advance.

[root at rhc6esx1 ~]# rpm -qa|grep -i lvm
lvm2-libs-2.02.87-6.el6.x86_64
lvm2-2.02.87-6.el6.x86_64
lvm2-cluster-2.02.87-6.el6.x86_64
[root at rhc6esx1 ~]# rpm -q cman
cman-3.0.12.1-23.el6.x86_64
[root at rhc6esx1 ~]# rpm -q cmirror
cmirror-2.02.87-6.el6.x86_64

[root at rhc6esx1 ~]# ps -ef|grep cmirror
root     21253 20692  0 13:37 pts/1    00:00:00 grep cmirror
root     31858     1  0 13:18 ?        00:00:00 cmirrord
        
[root at rhc6esx1 ~]# pvs|grep cfq888dbvg
  /dev/sdf1  cfq888dbvg        lvm2 a--  20.00g     0
  /dev/sdi1  cfq888dbvg        lvm2 a--  20.00g     0
  /dev/sdj1  cfq888dbvg        lvm2 a--  20.00g     0
  /dev/sdk1  cfq888dbvg        lvm2 a--  80.00g 80.00g

[root at rhc6esx1 ~]# pvmove -v /dev/sdi1 /dev/sdk1
    Finding volume group "cfq888dbvg"
    Executing: /sbin/modprobe dm-log-userspace
    Archiving volume group "cfq888dbvg" metadata (seqno 7).
    Creating logical volume pvmove0
    Moving 5119 extents of logical volume cfq888dbvg/cfq888_db
  Error locking on node rhc6esx1-priv: Device or resource busy
  Error locking on node rhc6esx3-priv: Volume is busy on another node
  Error locking on node rhc6esx2-priv: Volume is busy on another node
  Failed to activate cfq888_db

[root at rhc6esx1 ~]# clustat
Cluster Status for rhc6 @ Thu Jun 21 13:35:49 2012
Member Status: Quorate

 Member Name                                              ID   Status
 ------ ----                                              ---- ------
 rhc6esx1-priv                                                1 Online, Local, rgmanager
 rhc6esx2-priv                                                2 Online, rgmanager
 rhc6esx3-priv                                                3 Online, rgmanager
 /dev/block/8:33                                              0 Online, Quorum Disk

 Service Name                                    Owner (Last)                                    State
 ------- ----                                    ----- ------                                    -----
 service:cfq888_grp                              rhc6esx1-priv                                   started

[root at rhc6esx1 ~]# lvm dumpconfig
  devices {
        dir="/dev"
        scan="/dev"
        obtain_device_list_from_udev=1
        preferred_names=["^/dev/mpath/", "^/dev/mapper/mpath", "^/dev/[hs]d"]
        filter="a/.*/"
        cache_dir="/etc/lvm/cache"
        cache_file_prefix=""
        write_cache_state=1
        sysfs_scan=1
        md_component_detection=1
        md_chunk_alignment=1
        data_alignment_detection=1
        data_alignment=0
        data_alignment_offset_detection=1
        ignore_suspended_devices=0
        disable_after_error_count=0
        require_restorefile_with_uuid=1
        pv_min_size=2048
        issue_discards=0
  }
  dmeventd {
        mirror_library="libdevmapper-event-lvm2mirror.so"
        snapshot_library="libdevmapper-event-lvm2snapshot.so"
  }
  activation {
        checks=0
        udev_sync=1
        udev_rules=1
        verify_udev_operations=0
        missing_stripe_filler="error"
        reserved_stack=256
        reserved_memory=8192
        process_priority=-18
        mirror_region_size=512
        readahead="auto"
        mirror_log_fault_policy="allocate"
        mirror_image_fault_policy="remove"
        snapshot_autoextend_threshold=100
        snapshot_autoextend_percent=20
        use_mlockall=0
        monitoring=1
        polling_interval=15
  }
  global {
        umask=63
        test=0
        units="h"
        si_unit_consistency=1
        activation=1
        proc="/proc"
        locking_type=3
        wait_for_locks=1
        fallback_to_clustered_locking=1
        fallback_to_local_locking=1
        locking_dir="/var/lock/lvm"
        prioritise_write_locks=1
        abort_on_internal_errors=0
        detect_internal_vg_cache_corruption=0
        metadata_read_only=0
        mirror_segtype_default="mirror"
  }
  shell {
        history_size=100
  }
  backup {
        backup=1
        backup_dir="/etc/lvm/backup"
        archive=1
        archive_dir="/etc/lvm/archive"
        retain_min=10
        retain_days=30
  }
  log {
        verbose=0
        syslog=1
        overwrite=0
        level=0
        indent=1
        command_names=0
        prefix="  "
  }


[root at rhc6esx1 ~]# ccs -h localhost --getconf
<cluster config_version="273" name="rhc6">
  <fence_daemon clean_start="0" post_fail_delay="20" post_join_delay="60"/>
  <clusternodes>
    <clusternode name="rhc6esx1-priv" nodeid="1">
      <fence>
        <method name="1">
          <device name="fence_vmware" uuid="422a2b6a-4093-2694-65e0-a01332ef54bd"/>
        </method>
      </fence>
    </clusternode>
    <clusternode name="rhc6esx2-priv" nodeid="2">
      <fence>
        <method name="1">
          <device name="fence_vmware" uuid="422a9c5d-f9e2-8150-340b-c84b834ba068"/>
        </method>
      </fence>
    </clusternode>
    <clusternode name="rhc6esx3-priv" nodeid="3">
      <fence>
        <method name="1">
          <device name="fence_vmware" uuid="422af24c-909f-187d-4e64-2a28cbe5d09d"/>
        </method>
      </fence>
    </clusternode>
  </clusternodes>
  <cman expected_votes="5"/>
  <fencedevices>
    <fencedevice agent="fence_vmware_soap" ipaddr="192.168.1.111" login="mrfence" name="fence_vmware" passwd="FenceM3" ssl="yes" verbose="yes"/>
  </fencedevices>
  <totem token="30000"/>
  <quorumd interval="1" label="rhc6esx-quorum" stop_cman="1" tko="10" votes="2"/>
  <logging logfile_priority="info" syslog_facility="daemon" syslog_priority="warning" to_logfile="yes" to_syslog="yes">
    <logging_daemon logfile="/var/log/cluster/qdiskd.log" name="qdiskd"/>
    <logging_daemon logfile="/var/log/cluster/fenced.log" name="fenced"/>
    <logging_daemon logfile="/var/log/cluster/dlm_controld.log" name="dlm_controld"/>
    <logging_daemon logfile="/var/log/cluster/gfs_controld.log" name="gfs_controld"/>
    <logging_daemon debug="on" logfile="/var/log/cluster/rgmanager.log" name="rgmanager"/>
    <logging_daemon name="corosync" to_logfile="no"/>
  </logging>
  <rm log_level="7">
    <failoverdomains>
      <failoverdomain name="rhc6esx3_home" nofailback="1" ordered="1" restricted="1">
        <failoverdomainnode name="rhc6esx3-priv" priority="1"/>
        <failoverdomainnode name="rhc6esx2-priv" priority="2"/>
        <failoverdomainnode name="rhc6esx1-priv" priority="3"/>
      </failoverdomain>
      <failoverdomain name="rhc6esx2_home" nofailback="1" ordered="1" restricted="1">
        <failoverdomainnode name="rhc6esx2-priv" priority="1"/>
        <failoverdomainnode name="rhc6esx1-priv" priority="2"/>
        <failoverdomainnode name="rhc6esx3-priv" priority="3"/>
      </failoverdomain>
      <failoverdomain name="rhc6esx1_home" nofailback="1" ordered="1" restricted="1">
        <failoverdomainnode name="rhc6esx1-priv" priority="1"/>
        <failoverdomainnode name="rhc6esx2-priv" priority="2"/>
        <failoverdomainnode name="rhc6esx3-priv" priority="3"/>
      </failoverdomain>
    </failoverdomains>
    <resources>
      <lvm name="cfq888vg_lvm" self_fence="1" vg_name="cfq888vg"/>
      <lvm name="cfq888bkpvg_lvm" self_fence="1" vg_name="cfq888bkpvg"/>
      <lvm name="cfq888dbvg_lvm" self_fence="1" vg_name="cfq888dbvg"/>
      <lvm name="cfq888revg_lvm" vg_name="cfq888revg"/>
      <lvm name="cfq888flashvg_lvm" self_fence="1" vg_name="cfq888flashvg"/>
      <ip address="192.168.1.31" monitor_link="1"/>
      <fs device="/dev/cfq888vg/cfq888" force_fsck="0" force_unmount="1" fstype="ext4" mountpoint="/cfq888" name="cfq888_mnt" self_fence="0"/>
      <fs device="/dev/cfq888vg/cfq888_ar" force_fsck="0" force_unmount="1" fstype="ext4" mountpoint="/cfq888/cfq888_ar" name="cfq888_ar_mnt" self_fence="0"/>
      <fs device="/dev/cfq888vg/cfq888_sw" force_fsck="0" force_unmount="1" fstype="ext4" mountpoint="/cfq888/cfq888_sw" name="cfq888_sw_mnt" self_fence="0"/>
      <fs device="/dev/cfq888bkpvg/cfq888_dmp" force_fsck="0" force_unmount="1" fstype="ext4" mountpoint="/cfq888/cfq888_dmp" name="cfq888_dmp_mnt" self_fence="0"/>
      <fs device="/dev/cfq888bkpvg/cfq888_bk" force_fsck="0" force_unmount="1" fstype="ext4" mountpoint="/cfq888/cfq888_bk" name="cfq888_bk_mnt" self_fence="0"/>
      <fs device="/dev/cfq888dbvg/cfq888_db" force_fsck="0" force_unmount="1" fstype="ext4" mountpoint="/cfq888/cfq888_db" name="cfq888_db_mnt" self_fence="0"/>
      <fs device="/dev/cfq888flashvg/cfq888_flash" force_fsck="0" force_unmount="1" fstype="ext4" mountpoint="/cfq888/cfq888_bk/cfq888_flash" name="cfq888_flash_mnt" self_fence="0"/>
      <fs device="/dev/cfq888revg/cfq888_rd" force_fsck="0" force_unmount="1" fstype="ext4" mountpoint="/cfq888/cfq888_rd" name="cfq888_rd_mnt" self_fence="0"/>
      <oracledb home="/u01/app/oracle/product/11.2.0/dbhome_1" listener_name="cfq888_lsnr" name="cfq888" type="base" user="oracle"/>
    </resources>
    <service autostart="1" domain="rhc6esx1_home" exclusive="0" name="cfq888_grp" recovery="restart">
      <lvm ref="cfq888vg_lvm"/>
      <lvm ref="cfq888bkpvg_lvm"/>
      <lvm ref="cfq888dbvg_lvm"/>
      <lvm ref="cfq888revg_lvm"/>
      <lvm ref="cfq888flashvg_lvm"/>
      <fs ref="cfq888_mnt">
        <fs ref="cfq888_ar_mnt"/>
        <fs ref="cfq888_sw_mnt"/>
        <fs ref="cfq888_dmp_mnt"/>
        <fs ref="cfq888_bk_mnt">
          <fs ref="cfq888_flash_mnt"/>
        </fs>
        <fs ref="cfq888_db_mnt"/>
        <fs ref="cfq888_rd_mnt"/>
      </fs>
      <ip ref="192.168.1.31"/>
      <oracledb ref="cfq888"/>
    </service>
  </rm>
</cluster>


thanks,
--Jason






More information about the Linux-cluster mailing list