[lvm-devel] LVM2/daemons/lvmetad lvmetad-core.c
mornfall at sourceware.org
mornfall at sourceware.org
Wed Jul 20 21:23:44 UTC 2011
CVSROOT: /cvs/lvm2
Module name: LVM2
Changes by: mornfall at sourceware.org 2011-07-20 21:23:44
Modified files:
daemons/lvmetad: lvmetad-core.c
Log message:
First stab at making lvmetad-core threadsafe. The current design should allow
very reasonable amount of parallel access, although the hash tables may become
a point of contention under heavy loads. Nevertheless, there should be orders
of magnitude less contention on the hash table locks than we currently have on
block device scanning.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/daemons/lvmetad/lvmetad-core.c.diff?cvsroot=lvm2&r1=1.15&r2=1.16
--- LVM2/daemons/lvmetad/lvmetad-core.c 2011/07/20 18:45:32 1.15
+++ LVM2/daemons/lvmetad/lvmetad-core.c 2011/07/20 21:23:43 1.16
@@ -1,4 +1,5 @@
#include <assert.h>
+#include <pthread.h>
#include "libdevmapper.h"
#include <malloc.h>
@@ -10,15 +11,64 @@
struct dm_hash_table *pvs;
struct dm_hash_table *vgs;
struct dm_hash_table *pvid_map;
+ struct {
+ struct dm_hash_table *vg;
+ pthread_mutex_t pvs;
+ pthread_mutex_t vgs;
+ pthread_mutex_t pvid_map;
+ } lock;
} lvmetad_state;
+void debug(const char *fmt, ...) {
+ va_list ap;
+ va_start(ap, fmt);
+ fprintf(stderr, "[D %u] ", pthread_self());
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+};
+
+void lock_pvs(lvmetad_state *s) { pthread_mutex_lock(&s->lock.pvs); }
+void unlock_pvs(lvmetad_state *s) { pthread_mutex_unlock(&s->lock.pvs); }
+
+void lock_vgs(lvmetad_state *s) { pthread_mutex_lock(&s->lock.vgs); }
+void unlock_vgs(lvmetad_state *s) { pthread_mutex_unlock(&s->lock.vgs); }
+
+void lock_pvid_map(lvmetad_state *s) { pthread_mutex_lock(&s->lock.pvid_map); }
+void unlock_pvid_map(lvmetad_state *s) { pthread_mutex_unlock(&s->lock.pvid_map); }
+
+struct config_tree *lock_vg(lvmetad_state *s, const char *id) {
+ lock_vgs(s);
+ pthread_mutex_t *vg = dm_hash_lookup(s->lock.vg, id);
+ if (!vg) {
+ pthread_mutexattr_t rec;
+ pthread_mutexattr_init(&rec);
+ pthread_mutexattr_settype(&rec, PTHREAD_MUTEX_RECURSIVE_NP);
+ vg = malloc(sizeof(pthread_mutex_t));
+ pthread_mutex_init(vg, &rec);
+ dm_hash_insert(s->lock.vg, id, vg);
+ }
+ pthread_mutex_lock(vg);
+ struct config_tree *cft = dm_hash_lookup(s->vgs, id);
+ unlock_vgs(s);
+ return cft;
+}
+
+void unlock_vg(lvmetad_state *s, const char *id) {
+ lock_vgs(s); /* someone might be changing the s->lock.vg structure right
+ * now, so avoid stepping on each other's toes */
+ pthread_mutex_unlock(dm_hash_lookup(s->lock.vg, id));
+ unlock_vgs(s);
+}
+
static response vg_by_uuid(lvmetad_state *s, request r)
{
const char *uuid = daemon_request_str(r, "uuid", "NONE");
- fprintf(stderr, "[D] vg_by_uuid: %s (vgs = %p)\n", uuid, s->vgs);
- struct config_tree *cft = dm_hash_lookup(s->vgs, uuid);
- if (!cft || !cft->root)
+ debug("vg_by_uuid: %s (vgs = %p)\n", uuid, s->vgs);
+ struct config_tree *cft = lock_vg(s, uuid);
+ if (!cft || !cft->root) {
+ unlock_vg(s, uuid);
return daemon_reply_simple("failed", "reason = %s", "uuid not found", NULL);
+ }
struct config_node *metadata = cft->root;
@@ -35,6 +85,8 @@
n = n->sib = clone_config_node(res.cft, metadata, 1);
n->parent = res.cft->root;
res.error = 0;
+ unlock_vg(s, uuid);
+
return res;
}
@@ -47,7 +99,7 @@
if (node)
value = node->v;
- while (value && strcmp(value->v.str, flag)) {
+ while (value && value->type != CFG_EMPTY_ARRAY && strcmp(value->v.str, flag)) {
pred = value;
value = value->next;
}
@@ -90,62 +142,49 @@
return pv;
}
-static void update_pv_status_in_vg(lvmetad_state *s, struct config_tree *vg)
+/* Either the "big" vgs lock, or a per-vg lock needs to be held before entering
+ * this function. */
+static void update_pv_status(lvmetad_state *s, struct config_tree *vg)
{
+ lock_pvs(s);
struct config_node *pv = pvs(vg);
while (pv) {
const char *uuid = find_config_str(pv->child, "id", "N/A");
const char *vgid = find_config_str(vg->root, "metadata/id", "N/A");
int found = dm_hash_lookup(s->pvs, uuid) ? 1 : 0;
+ // TODO: avoid the override here if MISSING came from the actual
+ // metadata, as opposed from our manipulation...
set_flag(vg, pv, "status", "MISSING", !found);
pv = pv->sib;
}
+ unlock_pvs(s);
}
static int vg_status(lvmetad_state *s, const char *vgid)
{
- struct config_tree *vg = dm_hash_lookup(s->vgs, vgid);
+ struct config_tree *vg = lock_vg(s, vgid);
struct config_node *pv = pvs(vg);
while (pv) {
- const char *uuid = find_config_str(pv->child, "id", "N/A");
- const char *vgid = find_config_str(vg->root, "metadata/id", "N/A");
+ const char *uuid = find_config_str(pv->child, "id", NULL);
+ if (!uuid)
+ continue; // FIXME?
+
+ lock_pvs(s);
int found = dm_hash_lookup(s->pvs, uuid) ? 1 : 0;
- if (!found)
+ unlock_pvs(s);
+ if (!found) {
+ unlock_vg(s, vgid);
return 0;
+ }
pv = pv->sib;
}
+ unlock_vg(s, vgid);
return 1;
}
-/*
- * Walk through metadata cache and update PV flags to reflect our current
- * picture of the PVs in the system. If pvid is non-NULL, this is used as a hint
- * as to which PV has changed state. Otherwise, all flags are recomputed from
- * authoritative data (the s->pvs hash).
- */
-static void update_pv_status(lvmetad_state *s, const char *pvid)
-{
- if (pvid) {
- const char *vgid = dm_hash_lookup(s->pvid_map, pvid);
- if (!vgid)
- return; /* nothing to update */
-
- struct config_tree *vg = dm_hash_lookup(s->vgs, vgid);
- assert(vg);
-
- update_pv_status_in_vg(s, vg);
- } else {
- struct dm_hash_node *n = dm_hash_get_first(s->vgs);
- while (n) {
- struct config_tree *vg = dm_hash_get_data(s->vgs, n);
- update_pv_status_in_vg(s, vg);
- n = dm_hash_get_next(s->vgs, n);
- }
- }
-}
-
+/* You need to be holding the pvid_map lock already to call this. */
int update_pvid_map(lvmetad_state *s, struct config_tree *vg, const char *vgid)
{
struct config_node *pv = pvs(vg);
@@ -162,9 +201,17 @@
return 1;
}
+/* No locks need to be held. The pointers are never used outside of the scope of
+ * this function, so they can be safely destroyed after update_metadata returns
+ * (anything that might have been retained is copied). */
static int update_metadata(lvmetad_state *s, const char *_vgid, struct config_node *metadata)
{
+ int retval = 0;
+ lock_vgs(s);
struct config_tree *old = dm_hash_lookup(s->vgs, _vgid);
+ lock_vg(s, _vgid);
+ unlock_vgs(s);
+
int seq = find_config_int(metadata, "metadata/seqno", -1);
int haveseq = -1;
@@ -172,17 +219,19 @@
haveseq = find_config_int(old->root, "metadata/seqno", -1);
if (seq < 0)
- return 0; /* bad */
+ goto out;
if (seq == haveseq) {
// TODO: compare old->root with metadata to ensure equality
- return 1;
+ retval = 1;
+ goto out;
}
if (seq < haveseq) {
// TODO: we may want to notify the client that their metadata is
// out of date?
- return 1;
+ retval = 1;
+ goto out;
}
struct config_tree *cft = create_config_tree(NULL, 0);
@@ -190,7 +239,9 @@
const char *vgid = find_config_str(cft->root, "metadata/id", NULL);
if (!vgid)
- return 0;
+ goto out;
+
+ lock_pvid_map(s);
if (haveseq >= 0 && haveseq < seq) {
/* temporarily orphan all of our PVs */
@@ -200,10 +251,17 @@
dm_hash_remove(s->vgs, vgid);
}
+ lock_vgs(s);
dm_hash_insert(s->vgs, vgid, cft);
+ unlock_vgs(s);
+
update_pvid_map(s, cft, vgid);
- return 1;
+ unlock_pvid_map(s);
+ retval = 1;
+out:
+ unlock_vg(s, _vgid);
+ return retval;
}
static response pv_add(lvmetad_state *s, request r)
@@ -215,7 +273,9 @@
if (!pvid)
return daemon_reply_simple("failed", "reason = %s", "need PV UUID", NULL);
+ lock_pvs(s);
dm_hash_insert(s->pvs, pvid, (void*)1);
+ unlock_pvs(s);
if (metadata) {
if (!vgid)
@@ -226,10 +286,18 @@
if (!update_metadata(s, vgid, metadata))
return daemon_reply_simple("failed", "reason = %s",
"metadata update failed", NULL);
- } else
+ } else {
+ lock_pvid_map(s);
vgid = dm_hash_lookup(s->pvid_map, pvid);
+ unlock_pvid_map(s);
+ }
+
+ if (vgid) {
+ struct config_tree *cft = lock_vg(s, vgid);
+ update_pv_status(s, cft);
+ unlock_vg(s, vgid);
+ }
- update_pv_status(s, pvid);
int complete = vgid ? vg_status(s, vgid) : 0;
return daemon_reply_simple("OK",
@@ -247,7 +315,7 @@
lvmetad_state *state = s.private;
const char *rq = daemon_request_str(r, "request", "NONE");
- fprintf(stderr, "[D] REQUEST: %s\n", rq);
+ debug("REQUEST: %s\n", rq);
if (!strcmp(rq, "pv_add"))
return pv_add(state, r);
@@ -266,7 +334,16 @@
ls->pvs = dm_hash_create(32);
ls->vgs = dm_hash_create(32);
ls->pvid_map = dm_hash_create(32);
- fprintf(stderr, "[D] initialised state: vgs = %p\n", ls->vgs);
+
+ ls->lock.vg = dm_hash_create(32);
+ pthread_mutexattr_t rec;
+ pthread_mutexattr_init(&rec);
+ pthread_mutexattr_settype(&rec, PTHREAD_MUTEX_RECURSIVE_NP);
+ pthread_mutex_init(&ls->lock.pvs, NULL);
+ pthread_mutex_init(&ls->lock.vgs, &rec);
+ pthread_mutex_init(&ls->lock.pvid_map, NULL);
+
+ debug("initialised state: vgs = %p\n", ls->vgs);
if (!ls->pvs || !ls->vgs)
return 0;
More information about the lvm-devel
mailing list