[libvirt] Bug 1227257

王松波 hack.coo at gmail.com
Tue Jun 2 12:27:01 UTC 2015


I report a bug [Bug 1227257] . In the environment  libvirt-1.2.16.tar.gz
+ qemu-img version 2.1.2 + ceph version 0.94.1.
libvirt pool will become inactive after one client does vol.delete and the
other does pool.refresh in the same pool simultaneously.

The reason is that rbd_list and  rbd_open are not wrapped in an atomic
operation, but two seperate operations.
For example, two clients are operating in the same pool at the same time.
One client does rbd_list, and got 10 rbd images, meanwhile, the other
client deletes one of the rbd image. In this situation, when the first
client does next operation, such as rbd_open , the command may fail,
because the rbd image has been removed.

I write a testcase in python to reproduce this problem as follow(also have
put it in the attachment):
##################################
import libvirt
import sys
import time
import sys
#coding:utf-8

QEMU_URL = 'qemu:///system'

VOL_TEMPLATE='''
<volume>
    <name>{vol}</name>
    <key>{pool}/{vol}</key>
    <source>
    </source>
    <capacity unit='MB'>{cap_size}</capacity>
    <allocation unit='MB'>{alloc_size}</allocation>
    <target>
        <path>rbd:{pool}/{vol}</path>
        <format type='unknown'/>
        <permissions>
            <mode>00</mode>
            <owner>0</owner>
            <group>0</group>
        </permissions>
    </target>
</volume>'''

def create_vol(pool_name, vol_name, cap_size, alloc_size):
    conn = libvirt.open(QEMU_URL)
    if conn == None:
        print 'Failed to open connection to the hypervisor'
        sys.exit(1)

    try:
        pool = conn.storagePoolLookupByName(pool_name)
        pool.refresh()
        template = VOL_TEMPLATE.format(pool=pool_name, vol=vol_name,
cap_size=cap_size, alloc_size=alloc_size)
        pool.createXML(template, 0)
    except:
        print 'Failed to open pool'
        sys.exit(1)
    finally:
if conn is not None:
            conn.close()

def destroy_vol(pool_name, vol_name):
    conn = libvirt.open(QEMU_URL)
    if conn == None:
        sys.exit(1)
    pool = conn.storagePoolLookupByName(pool_name)
    pool.refresh(0)
    vol = pool.storageVolLookupByName(vol_name)
    if vol is not None:
        vol.delete(0)
    if conn is not None:
         conn.close()

if  sys.argv[2] == 'create':
    for i in range(1, 20):
volname = 'pool-down-test-%s-%d' % (sys.argv[1], i)
        print 'create %s' % volname
        create_vol('capacity', volname, '500', '500')
elif sys.argv[2] == 'destroy':
    for i in range(1, 20):
volname = 'pool-down-test-%s-%d' % (sys.argv[1], i)
        print 'destroy %s' % volname
        destroy_vol('capacity', volname)
else:
    print 'Usage: python vol-test.py clientId OPER'
    print '  '
    print 'where  clientId : a num/string used to distinguish different
client'
    print '       OPER     : create/destroy'




Patch for this is as fllow:

diff --git a/src/storage/storage_backend_rbd.c
b/src/storage/storage_backend_rbd.c
index ae4bcb3..24fbc84 100644
--- a/src/storage/storage_backend_rbd.c
+++ b/src/storage/storage_backend_rbd.c
@@ -266,6 +266,46 @@ static int
virStorageBackendRBDCloseRADOSConn(virStorageBackendRBDStatePtr ptr)
     return ret;
 }

+static int volStorageBackendRBDVolIsExist(char *volname,
virStorageBackendRBDStatePtr ptr)
+{
+    int ret = -1;
+    char *name, *names = NULL;
+    size_t max_size = 1024;
+    int len = -1;
+
+    while (true) {
+        if (VIR_ALLOC_N(names, max_size) < 0)
+            goto cleanup;
+
+        len = rbd_list(ptr->ioctx, names, &max_size);
+        if (len >= 0)
+            break;
+        if (len != -ERANGE) {
+            VIR_WARN("%s", _("A problem occurred while listing RBD
images"));
+            goto cleanup;
+        }
+        VIR_FREE(names);
+    }
+
+    for (name = names; name < names + max_size;) {
+
+        if (STREQ(name, ""))
+            break;
+
+        name += strlen(name) + 1;
+        if (STREQ(volname, name)) {
+            VIR_ERROR("RBD images '%s' is exist, but cannot open it",
volname);
+            ret = -2;
+            break;
+        }
+    }
+    ret = 0;
+
+cleanup:
+    VIR_FREE(names);
+    return ret;
+}
+
 static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol,
                                               virStoragePoolObjPtr pool,
                                               virStorageBackendRBDStatePtr
ptr)
@@ -276,8 +316,15 @@ static int
volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol,

     r = rbd_open(ptr->ioctx, vol->name, &image, NULL);
     if (r < 0) {
-        virReportSystemError(-r, _("failed to open the RBD image '%s'"),
-                             vol->name);
+        VIR_DEBUG("failed to open RBD image '%s', check if it was still
exist in its pool",\
+                  vol->name);
+        if (volStorageBackendRBDVolIsExist(vol->name, ptr) == 0) {
+            VIR_DEBUG("vol '%s' may be removed by the other rbd client",
vol->name);
+            ret = 0;
+        } else {
+            virReportSystemError(-r, _("failed to open the RBD image
'%s'"),
+                    vol->name);
+        }
         return ret;
     }
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://listman.redhat.com/archives/libvir-list/attachments/20150602/b6855a9b/attachment-0001.htm>


More information about the libvir-list mailing list