[Linux-cluster] gfs2, kvm setup
David Teigland
teigland at redhat.com
Fri Jun 27 18:41:17 UTC 2008
On Fri, Jun 27, 2008 at 01:28:56PM -0400, david m. richter wrote:
> i also have another setup in vmware; while i doubt it's
> substantively different than bruce's, i'm a ready and willing tester. is
> there a different branch (or repo, or just a stack of patches somewhere)
> that i should/could be using?
If on 2.6.25, then use
ftp://ftp%40openais%2Eorg:downloads@openais.org/downloads/openais-0.80.3/openais-0.80.3.tar.gz
ftp://sources.redhat.com/pub/cluster/releases/cluster-2.03.04.tar.gz
If on 2.6.26-rc, then you'll need to add the attached patch to cluster.
Dave
-------------- next part --------------
commit 0e25f89dc09cab05c8ce519f4a84bdbf0bff25aa
Author: David Teigland <teigland at redhat.com>
Date: Mon Apr 7 16:15:01 2008 -0500
gfs_controld: read plocks from dlm or lock_dlm
In kernels before 2.6.26, cluster posix lock ops are passed to user
space through the gfs-specific lock_dlm module. In 2.6.26, the same
ops are passed to user space through the dlm module. Update gfs_controld
to read the plock ops from either module, depending on the kernel.
Signed-off-by: David Teigland <teigland at redhat.com>
diff --git a/group/gfs_controld/lock_dlm.h b/group/gfs_controld/lock_dlm.h
index b57c1f3..4b132b0 100644
--- a/group/gfs_controld/lock_dlm.h
+++ b/group/gfs_controld/lock_dlm.h
@@ -110,6 +110,7 @@ struct mountpoint {
struct mountgroup {
struct list_head list;
uint32_t id;
+ uint32_t associated_ls_id;
struct list_head members;
struct list_head members_gone;
int memb_count;
diff --git a/group/gfs_controld/plock.c b/group/gfs_controld/plock.c
index c564300..5e4f56b 100644
--- a/group/gfs_controld/plock.c
+++ b/group/gfs_controld/plock.c
@@ -20,9 +20,10 @@
#include <netdb.h>
#include <limits.h>
#include <unistd.h>
+#include <dirent.h>
#include <openais/saAis.h>
#include <openais/saCkpt.h>
-#include <linux/lock_dlm_plock.h>
+#include <linux/dlm_plock.h>
#include "lock_dlm.h"
@@ -30,8 +31,9 @@
#define PROC_DEVICES "/proc/devices"
#define MISC_NAME "misc"
#define CONTROL_DIR "/dev/misc"
-#define CONTROL_NAME "lock_dlm_plock"
+#define CONTROL_NAME "dlm_plock"
+extern struct list_head mounts;
extern int our_nodeid;
extern int message_flow_control_on;
@@ -57,6 +59,7 @@ static SaCkptCallbacksT callbacks = { 0, 0 };
static SaVersionT version = { 'B', 1, 1 };
static char section_buf[1024 * 1024];
static uint32_t section_len;
+static int need_fsid_translation = 0;
struct pack_plock {
uint64_t start;
@@ -100,13 +103,13 @@ struct posix_lock {
struct lock_waiter {
struct list_head list;
uint32_t flags;
- struct gdlm_plock_info info;
+ struct dlm_plock_info info;
};
static void send_own(struct mountgroup *mg, struct resource *r, int owner);
static void save_pending_plock(struct mountgroup *mg, struct resource *r,
- struct gdlm_plock_info *in);
+ struct dlm_plock_info *in);
static int got_unown(struct resource *r)
@@ -114,7 +117,7 @@ static int got_unown(struct resource *r)
return !!(r->flags & R_GOT_UNOWN);
}
-static void info_bswap_out(struct gdlm_plock_info *i)
+static void info_bswap_out(struct dlm_plock_info *i)
{
i->version[0] = cpu_to_le32(i->version[0]);
i->version[1] = cpu_to_le32(i->version[1]);
@@ -129,7 +132,7 @@ static void info_bswap_out(struct gdlm_plock_info *i)
i->owner = cpu_to_le64(i->owner);
}
-static void info_bswap_in(struct gdlm_plock_info *i)
+static void info_bswap_in(struct dlm_plock_info *i)
{
i->version[0] = le32_to_cpu(i->version[0]);
i->version[1] = le32_to_cpu(i->version[1]);
@@ -147,11 +150,11 @@ static void info_bswap_in(struct gdlm_plock_info *i)
static char *op_str(int optype)
{
switch (optype) {
- case GDLM_PLOCK_OP_LOCK:
+ case DLM_PLOCK_OP_LOCK:
return "LK";
- case GDLM_PLOCK_OP_UNLOCK:
+ case DLM_PLOCK_OP_UNLOCK:
return "UN";
- case GDLM_PLOCK_OP_GET:
+ case DLM_PLOCK_OP_GET:
return "GET";
default:
return "??";
@@ -160,7 +163,7 @@ static char *op_str(int optype)
static char *ex_str(int optype, int ex)
{
- if (optype == GDLM_PLOCK_OP_UNLOCK || optype == GDLM_PLOCK_OP_GET)
+ if (optype == DLM_PLOCK_OP_UNLOCK || optype == DLM_PLOCK_OP_GET)
return "-";
if (ex)
return "WR";
@@ -195,10 +198,11 @@ static int get_proc_number(const char *file, const char *name, uint32_t *number)
return 0;
}
-static int control_device_number(uint32_t *major, uint32_t *minor)
+static int control_device_number(const char *plock_misc_name,
+ uint32_t *major, uint32_t *minor)
{
if (!get_proc_number(PROC_DEVICES, MISC_NAME, major) ||
- !get_proc_number(PROC_MISC, GDLM_PLOCK_MISC_NAME, minor)) {
+ !get_proc_number(PROC_MISC, plock_misc_name, minor)) {
*major = 0;
return 0;
}
@@ -265,7 +269,7 @@ static int create_control(const char *control, uint32_t major, uint32_t minor)
return 1;
}
-static int open_control(void)
+static int open_control(const char *control_name, const char *plock_misc_name)
{
char control[PATH_MAX];
uint32_t major = 0, minor = 0;
@@ -273,22 +277,20 @@ static int open_control(void)
if (control_fd != -1)
return 0;
- snprintf(control, sizeof(control), "%s/%s", CONTROL_DIR, CONTROL_NAME);
+ snprintf(control, sizeof(control), "%s/%s", CONTROL_DIR, control_name);
- if (!control_device_number(&major, &minor)) {
- log_error("Is dlm missing from kernel?");
+ if (!control_device_number(plock_misc_name, &major, &minor))
return -1;
- }
if (!control_exists(control, major, minor) &&
!create_control(control, major, minor)) {
- log_error("Failure to communicate with kernel lock_dlm");
+ log_error("Failure to create device file %s", control);
return -1;
}
control_fd = open(control, O_RDWR);
if (control_fd < 0) {
- log_error("Failure to communicate with kernel lock_dlm: %s",
+ log_error("Failure to open device %s: %s", control,
strerror(errno));
return -1;
}
@@ -296,6 +298,16 @@ static int open_control(void)
return 0;
}
+/*
+ * In kernels before 2.6.26, plocks came from gfs2's lock_dlm module.
+ * Reading plocks from there as well should allow us to use cluster3
+ * on old (RHEL5) kernels. In this case, the fsid we read in plock_info
+ * structs is the mountgroup id, which we need to translate to the ls id.
+ */
+
+#define OLD_CONTROL_NAME "lock_dlm_plock"
+#define OLD_PLOCK_MISC_NAME "lock_dlm_plock"
+
int setup_plocks(void)
{
SaAisErrorT err;
@@ -318,14 +330,29 @@ int setup_plocks(void)
log_error("ckpt init error %d - plocks unavailable", err);
control:
- rv = open_control();
- if (rv)
- return rv;
+ need_fsid_translation = 1;
+
+ rv = open_control(CONTROL_NAME, DLM_PLOCK_MISC_NAME);
+ if (rv) {
+ log_debug("setup_plocks trying old lock_dlm interface");
+ rv = open_control(OLD_CONTROL_NAME, OLD_PLOCK_MISC_NAME);
+ if (rv) {
+ log_error("Is dlm missing from kernel? No control device.");
+ return rv;
+ }
+
+ /* the fsid from the kernel is the mountgroup id in old
+ kernels, which we can use to look up the mg directly
+ without translation */
+
+ need_fsid_translation = 0;
+ }
log_debug("plocks %d", control_fd);
+ log_debug("plock need_fsid_translation %d", need_fsid_translation);
log_debug("plock cpg message size: %u bytes",
(unsigned int) (sizeof(struct gdlm_header) +
- sizeof(struct gdlm_plock_info)));
+ sizeof(struct dlm_plock_info)));
return control_fd;
}
@@ -517,7 +544,7 @@ static int shrink_range(struct posix_lock *po, uint64_t start, uint64_t end)
return shrink_range2(&po->start, &po->end, start, end);
}
-static int is_conflict(struct resource *r, struct gdlm_plock_info *in, int get)
+static int is_conflict(struct resource *r, struct dlm_plock_info *in, int get)
{
struct posix_lock *po;
@@ -566,7 +593,7 @@ static int add_lock(struct resource *r, uint32_t nodeid, uint64_t owner,
2. convert RE to RN range and mode */
static int lock_case1(struct posix_lock *po, struct resource *r,
- struct gdlm_plock_info *in)
+ struct dlm_plock_info *in)
{
uint64_t start2, end2;
int rv;
@@ -593,7 +620,7 @@ static int lock_case1(struct posix_lock *po, struct resource *r,
3. convert RE to RN range and mode */
static int lock_case2(struct posix_lock *po, struct resource *r,
- struct gdlm_plock_info *in)
+ struct dlm_plock_info *in)
{
int rv;
@@ -616,7 +643,7 @@ static int lock_case2(struct posix_lock *po, struct resource *r,
}
static int lock_internal(struct mountgroup *mg, struct resource *r,
- struct gdlm_plock_info *in)
+ struct dlm_plock_info *in)
{
struct posix_lock *po, *safe;
int rv = 0;
@@ -679,7 +706,7 @@ static int lock_internal(struct mountgroup *mg, struct resource *r,
}
static int unlock_internal(struct mountgroup *mg, struct resource *r,
- struct gdlm_plock_info *in)
+ struct dlm_plock_info *in)
{
struct posix_lock *po, *safe;
int rv = 0;
@@ -743,7 +770,7 @@ static int unlock_internal(struct mountgroup *mg, struct resource *r,
}
static int add_waiter(struct mountgroup *mg, struct resource *r,
- struct gdlm_plock_info *in)
+ struct dlm_plock_info *in)
{
struct lock_waiter *w;
@@ -751,14 +778,17 @@ static int add_waiter(struct mountgroup *mg, struct resource *r,
w = malloc(sizeof(struct lock_waiter));
if (!w)
return -ENOMEM;
- memcpy(&w->info, in, sizeof(struct gdlm_plock_info));
+ memcpy(&w->info, in, sizeof(struct dlm_plock_info));
list_add_tail(&w->list, &r->waiters);
return 0;
}
-static void write_result(struct mountgroup *mg, struct gdlm_plock_info *in,
+static void write_result(struct mountgroup *mg, struct dlm_plock_info *in,
int rv)
{
+ if (need_fsid_translation)
+ in->fsid = mg->associated_ls_id;
+
in->rv = rv;
write(control_fd, in, sizeof(struct gdlm_plock_info));
}
@@ -766,7 +796,7 @@ static void write_result(struct mountgroup *mg, struct gdlm_plock_info *in,
static void do_waiters(struct mountgroup *mg, struct resource *r)
{
struct lock_waiter *w, *safe;
- struct gdlm_plock_info *in;
+ struct dlm_plock_info *in;
int rv;
list_for_each_entry_safe(w, safe, &r->waiters, list) {
@@ -792,7 +822,7 @@ static void do_waiters(struct mountgroup *mg, struct resource *r)
}
}
-static void do_lock(struct mountgroup *mg, struct gdlm_plock_info *in,
+static void do_lock(struct mountgroup *mg, struct dlm_plock_info *in,
struct resource *r)
{
int rv;
@@ -817,7 +847,7 @@ static void do_lock(struct mountgroup *mg, struct gdlm_plock_info *in,
put_resource(r);
}
-static void do_unlock(struct mountgroup *mg, struct gdlm_plock_info *in,
+static void do_unlock(struct mountgroup *mg, struct dlm_plock_info *in,
struct resource *r)
{
int rv;
@@ -833,7 +863,7 @@ static void do_unlock(struct mountgroup *mg, struct gdlm_plock_info *in,
/* we don't even get to this function if the getlk isn't from us */
-static void do_get(struct mountgroup *mg, struct gdlm_plock_info *in,
+static void do_get(struct mountgroup *mg, struct dlm_plock_info *in,
struct resource *r)
{
int rv;
@@ -846,19 +876,19 @@ static void do_get(struct mountgroup *mg, struct gdlm_plock_info *in,
write_result(mg, in, rv);
}
-static void __receive_plock(struct mountgroup *mg, struct gdlm_plock_info *in,
+static void __receive_plock(struct mountgroup *mg, struct dlm_plock_info *in,
int from, struct resource *r)
{
switch (in->optype) {
- case GDLM_PLOCK_OP_LOCK:
+ case DLM_PLOCK_OP_LOCK:
mg->last_plock_time = time(NULL);
do_lock(mg, in, r);
break;
- case GDLM_PLOCK_OP_UNLOCK:
+ case DLM_PLOCK_OP_UNLOCK:
mg->last_plock_time = time(NULL);
do_unlock(mg, in, r);
break;
- case GDLM_PLOCK_OP_GET:
+ case DLM_PLOCK_OP_GET:
do_get(mg, in, r);
break;
default:
@@ -880,7 +910,7 @@ static void __receive_plock(struct mountgroup *mg, struct gdlm_plock_info *in,
static void _receive_plock(struct mountgroup *mg, char *buf, int len, int from)
{
- struct gdlm_plock_info info;
+ struct dlm_plock_info info;
struct gdlm_header *hd = (struct gdlm_header *) buf;
struct resource *r = NULL;
struct timeval now;
@@ -907,7 +937,7 @@ static void _receive_plock(struct mountgroup *mg, char *buf, int len, int from)
plock_recv_time = now;
}
- if (info.optype == GDLM_PLOCK_OP_GET && from != our_nodeid)
+ if (info.optype == DLM_PLOCK_OP_GET && from != our_nodeid)
return;
if (from != hd->nodeid || from != info.nodeid) {
@@ -1013,14 +1043,14 @@ void receive_plock(struct mountgroup *mg, char *buf, int len, int from)
_receive_plock(mg, buf, len, from);
}
-static int send_struct_info(struct mountgroup *mg, struct gdlm_plock_info *in,
+static int send_struct_info(struct mountgroup *mg, struct dlm_plock_info *in,
int msg_type)
{
char *buf;
int rv, len;
struct gdlm_header *hd;
- len = sizeof(struct gdlm_header) + sizeof(struct gdlm_plock_info);
+ len = sizeof(struct gdlm_header) + sizeof(struct dlm_plock_info);
buf = malloc(len);
if (!buf) {
rv = -ENOMEM;
@@ -1047,14 +1077,14 @@ static int send_struct_info(struct mountgroup *mg, struct gdlm_plock_info *in,
}
static void send_plock(struct mountgroup *mg, struct resource *r,
- struct gdlm_plock_info *in)
+ struct dlm_plock_info *in)
{
send_struct_info(mg, in, MSG_PLOCK);
}
static void send_own(struct mountgroup *mg, struct resource *r, int owner)
{
- struct gdlm_plock_info info;
+ struct dlm_plock_info info;
/* if we've already sent an own message for this resource,
(pending list is not empty), then we shouldn't send another */
@@ -1074,7 +1104,7 @@ static void send_own(struct mountgroup *mg, struct resource *r, int owner)
static void send_syncs(struct mountgroup *mg, struct resource *r)
{
- struct gdlm_plock_info info;
+ struct dlm_plock_info info;
struct posix_lock *po;
struct lock_waiter *w;
int rv;
@@ -1111,7 +1141,7 @@ static void send_syncs(struct mountgroup *mg, struct resource *r)
static void send_drop(struct mountgroup *mg, struct resource *r)
{
- struct gdlm_plock_info info;
+ struct dlm_plock_info info;
memset(&info, 0, sizeof(info));
info.number = r->number;
@@ -1123,7 +1153,7 @@ static void send_drop(struct mountgroup *mg, struct resource *r)
so the op is saved on the pending list until the r owner is established */
static void save_pending_plock(struct mountgroup *mg, struct resource *r,
- struct gdlm_plock_info *in)
+ struct dlm_plock_info *in)
{
struct lock_waiter *w;
@@ -1132,7 +1162,7 @@ static void save_pending_plock(struct mountgroup *mg, struct resource *r,
log_error("save_pending_plock no mem");
return;
}
- memcpy(&w->info, in, sizeof(struct gdlm_plock_info));
+ memcpy(&w->info, in, sizeof(struct dlm_plock_info));
list_add_tail(&w->list, &r->pending);
}
@@ -1167,7 +1197,7 @@ static void send_pending_plocks(struct mountgroup *mg, struct resource *r)
static void _receive_own(struct mountgroup *mg, char *buf, int len, int from)
{
struct gdlm_header *hd = (struct gdlm_header *) buf;
- struct gdlm_plock_info info;
+ struct dlm_plock_info info;
struct resource *r;
int should_not_happen = 0;
int rv;
@@ -1294,7 +1324,7 @@ void receive_own(struct mountgroup *mg, char *buf, int len, int from)
_receive_own(mg, buf, len, from);
}
-static void clear_syncing_flag(struct resource *r, struct gdlm_plock_info *in)
+static void clear_syncing_flag(struct resource *r, struct dlm_plock_info *in)
{
struct posix_lock *po;
struct lock_waiter *w;
@@ -1333,7 +1363,7 @@ static void clear_syncing_flag(struct resource *r, struct gdlm_plock_info *in)
static void _receive_sync(struct mountgroup *mg, char *buf, int len, int from)
{
- struct gdlm_plock_info info;
+ struct dlm_plock_info info;
struct gdlm_header *hd = (struct gdlm_header *) buf;
struct resource *r;
int rv;
@@ -1379,7 +1409,7 @@ void receive_sync(struct mountgroup *mg, char *buf, int len, int from)
static void _receive_drop(struct mountgroup *mg, char *buf, int len, int from)
{
- struct gdlm_plock_info info;
+ struct dlm_plock_info info;
struct resource *r;
int rv;
@@ -1478,11 +1508,122 @@ static int drop_resources(struct mountgroup *mg)
return 0;
}
+/* iterate through directory names looking for matching id:
+ /sys/kernel/dlm/<name>/id */
+
+#define DLM_SYSFS_DIR "/sys/kernel/dlm"
+
+static char ls_name[256];
+
+static int get_lockspace_name(uint32_t ls_id)
+{
+ char path[PATH_MAX];
+ DIR *d;
+ FILE *file;
+ struct dirent *de;
+ uint32_t id;
+ int rv, error;
+
+ d = opendir(DLM_SYSFS_DIR);
+ if (!d) {
+ log_debug("%s: opendir failed: %d", path, errno);
+ return -1;
+ }
+
+ rv = -1;
+
+ while ((de = readdir(d))) {
+ if (de->d_name[0] == '.')
+ continue;
+
+ id = 0;
+ memset(path, 0, PATH_MAX);
+ snprintf(path, PATH_MAX, "%s/%s/id", DLM_SYSFS_DIR, de->d_name);
+
+ file = fopen(path, "r");
+ if (!file) {
+ log_error("can't open %s %d", path, errno);
+ continue;
+ }
+
+ error = fscanf(file, "%u", &id);
+ fclose(file);
+
+ if (error != 1) {
+ log_error("bad read %s %d", path, errno);
+ continue;
+ }
+ if (id != ls_id) {
+ log_debug("get_lockspace_name skip %x %s",
+ id, de->d_name);
+ continue;
+ }
+
+ log_debug("get_lockspace_name found %x %s", id, de->d_name);
+ strncpy(ls_name, de->d_name, 256);
+ rv = 0;
+ break;
+ }
+
+ closedir(d);
+ return rv;
+}
+
+/* find the locskapce with "ls_id" in sysfs, get it's name, then look for
+ the mg with with the same name in mounts list, return it's id */
+
+static void set_associated_id(uint32_t ls_id)
+{
+ struct mountgroup *mg;
+ int rv;
+
+ log_debug("set_associated_id ls_id %x %d", ls_id, ls_id);
+
+ memset(&ls_name, 0, sizeof(ls_name));
+
+ rv = get_lockspace_name(ls_id);
+ if (rv) {
+ log_error("no lockspace found with id %x", ls_id);
+ return;
+ }
+
+ mg = find_mg(ls_name);
+ if (!mg) {
+ log_error("no mountgroup found with name %s for ls_id %x",
+ ls_name, ls_id);
+ return;
+ }
+
+ log_debug("set_associated_id ls %x is mg %x", ls_id, mg->id);
+
+ mg->associated_ls_id = ls_id;
+}
+
+static uint32_t ls_to_mg_id(uint32_t fsid)
+{
+ struct mountgroup *mg;
+ int do_set = 1;
+
+ retry:
+ list_for_each_entry(mg, &mounts, list) {
+ if (mg->associated_ls_id == fsid)
+ return mg->id;
+ }
+
+ if (do_set) {
+ do_set = 0;
+ set_associated_id(fsid);
+ goto retry;
+ }
+
+ return fsid;
+}
+
int process_plocks(void)
{
struct mountgroup *mg;
struct resource *r;
- struct gdlm_plock_info info;
+ struct dlm_plock_info info;
struct timeval now;
uint64_t usec;
int rv;
@@ -1526,6 +1667,9 @@ int process_plocks(void)
goto fail;
}
+ if (need_fsid_translation)
+ info.fsid = ls_to_mg_id(info.fsid);
+
mg = find_mg_id(info.fsid);
if (!mg) {
log_debug("process_plocks: no mg id %x", info.fsid);
More information about the Linux-cluster
mailing list