[lvm-devel] [PATCH 02/13] Replicator: add libdm support

Zdenek Kabelac zkabelac at redhat.com
Mon Oct 5 14:00:29 UTC 2009


Introducing dm_tree_node_add_replicator_target() and
dm_tree_node_add_replicator_dev_target()

Modifing API dm_tree_suspend_children() to support prioritized suspend
and update lvm activation.

Avoid preloading childrens with nonzero activation priority

Signed-off-by: Zdenek Kabelac <zkabelac at redhat.com>
---
 lib/activate/dev_manager.c |    2 +-
 libdm/.exported_symbols    |    3 +
 libdm/libdevmapper.h       |   34 +++++-
 libdm/libdm-deptree.c      |  296 ++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 323 insertions(+), 12 deletions(-)

diff --git a/lib/activate/dev_manager.c b/lib/activate/dev_manager.c
index 13cec36..19deab8 100644
--- a/lib/activate/dev_manager.c
+++ b/lib/activate/dev_manager.c
@@ -1221,7 +1221,7 @@ static int _tree_action(struct dev_manager *dm, struct logical_volume *lv, actio
 		if (!dm->flush_required && (lv->status & MIRRORED) && !(lv->status & PVMOVE))
 			dm_tree_use_no_flush_suspend(root);
 	case SUSPEND_WITH_LOCKFS:
-		if (!dm_tree_suspend_children(root, dlid, ID_LEN + sizeof(UUID_PREFIX) - 1))
+		if (!dm_tree_suspend_children(root, dlid, ID_LEN + sizeof(UUID_PREFIX) - 1, 0))
 			goto_out;
 		break;
 	case PRELOAD:
diff --git a/libdm/.exported_symbols b/libdm/.exported_symbols
index c357131..5fd81ec 100644
--- a/libdm/.exported_symbols
+++ b/libdm/.exported_symbols
@@ -64,6 +64,7 @@ dm_tree_next_parent
 dm_tree_deactivate_children
 dm_tree_activate_children
 dm_tree_preload_children
+dm_tree_set_priority
 dm_tree_suspend_children
 dm_tree_children_use_uuid
 dm_tree_node_add_snapshot_origin_target
@@ -76,6 +77,8 @@ dm_tree_node_add_crypt_target
 dm_tree_node_add_mirror_target
 dm_tree_node_add_mirror_target_log
 dm_tree_node_add_target_area
+dm_tree_node_add_replicator_target
+dm_tree_node_add_replicator_dev_target
 dm_tree_node_set_read_ahead
 dm_tree_skip_lockfs
 dm_tree_use_no_flush_suspend
diff --git a/libdm/libdevmapper.h b/libdm/libdevmapper.h
index 204372a..4e0103a 100644
--- a/libdm/libdevmapper.h
+++ b/libdm/libdevmapper.h
@@ -341,8 +341,9 @@ int dm_tree_activate_children(struct dm_tree_node *dnode,
  * Ignores devices that don't have a uuid starting with uuid_prefix.
  */
 int dm_tree_suspend_children(struct dm_tree_node *dnode,
-				   const char *uuid_prefix,
-				   size_t uuid_prefix_len);
+			     const char *uuid_prefix,
+			     size_t uuid_prefix_len,
+			     int priority);
 
 /*
  * Skip the filesystem sync when suspending.
@@ -421,11 +422,40 @@ int dm_tree_node_add_mirror_target_log(struct dm_tree_node *node,
 					  const char *log_uuid,
 					  unsigned area_count,
 					  uint32_t flags);
+
+int dm_tree_node_add_replicator_target(struct dm_tree_node *node,
+				       uint64_t size,
+				       const char *rlog_uuid,
+				       const char *rlog_type,
+				       unsigned site_index,
+				       int async_action,
+/* Replicator async action flags */
+#define DM_REPLICATOR_SYNC	0		/* use synchronous replication */
+#define DM_REPLICATOR_WARN	1		/* warn if replicator is slow */
+#define DM_REPLICATOR_STALL	2		/* stall replicator if not fast enough */
+#define DM_REPLICATOR_DROP	3		/* drop legs */
+#define DM_REPLICATOR_FAIL	4		/* fail replicator if slow */
+				       uint32_t async_timeout,
+				       uint64_t fall_behind_data,
+				       uint32_t fall_behind_ios);
+
+int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node,
+					   uint64_t size,
+					   const char *replog_uuid,	/* replicator-log device */
+					   uint64_t rdevice_index,
+					   const char *rdev_uuid,	/* remove/rimage device name/uuid */
+					   unsigned rsite_index,
+					   const char *llog_uuid,
+					   uint32_t llog_flags,		/* Mirror log flags */
+					   uint32_t llog_size);
+
 int dm_tree_node_add_target_area(struct dm_tree_node *node,
 				    const char *dev_name,
 				    const char *dlid,
 				    uint64_t offset);
 
+int dm_tree_set_priority(struct dm_tree *dtree, const char *uuid, int priority);
+
 /*
  * Set readahead (in sectors) after loading the node.
  */
diff --git a/libdm/libdm-deptree.c b/libdm/libdm-deptree.c
index 75fb201..4fc625f 100644
--- a/libdm/libdm-deptree.c
+++ b/libdm/libdm-deptree.c
@@ -33,6 +33,8 @@ enum {
 	SEG_ERROR,
 	SEG_LINEAR,
 	SEG_MIRRORED,
+	SEG_REPLICATOR,
+	SEG_REPLICATOR_DEV,
 	SEG_SNAPSHOT,
 	SEG_SNAPSHOT_ORIGIN,
 	SEG_STRIPED,
@@ -49,6 +51,8 @@ struct {
 	{ SEG_ERROR, "error" },
 	{ SEG_LINEAR, "linear" },
 	{ SEG_MIRRORED, "mirror" },
+	{ SEG_REPLICATOR, "replicator" },
+	{ SEG_REPLICATOR_DEV, "replicator-dev" },
 	{ SEG_SNAPSHOT, "snapshot" },
 	{ SEG_SNAPSHOT_ORIGIN, "snapshot-origin" },
 	{ SEG_STRIPED, "striped" },
@@ -62,6 +66,23 @@ struct seg_area {
 	struct dm_tree_node *dev_node;
 
 	uint64_t offset;
+
+	unsigned rsite_index;		/* Replicator site index */
+	struct dm_tree_node *llog;	/* Replicator link log node */
+	uint64_t region_size;		/* Replicator link log size */
+	uint32_t flags;			/* Replicator link log flags */
+};
+
+/* Replicator-log has a list of sites */
+/* CHECKME: maybe move to seg_area too ?? */
+struct replicator_site {
+	struct dm_list list;
+
+	unsigned rsite_index;
+	int async_action;
+	uint32_t async_timeout;
+	uint32_t fall_behind_ios;
+	uint64_t fall_behind_data;
 };
 
 /* Per-segment properties */
@@ -72,8 +93,8 @@ struct load_segment {
 
 	uint64_t size;
 
-	unsigned area_count;		/* Linear + Striped + Mirrored + Crypt */
-	struct dm_list areas;		/* Linear + Striped + Mirrored + Crypt */
+	unsigned area_count;		/* Linear + Striped + Mirrored + Crypt + Replicator */
+	struct dm_list areas;		/* Linear + Striped + Mirrored + Crypt + Replicator */
 
 	uint32_t stripe_size;		/* Striped */
 
@@ -82,7 +103,7 @@ struct load_segment {
 	struct dm_tree_node *cow;	/* Snapshot */
 	struct dm_tree_node *origin;	/* Snapshot + Snapshot origin */
 
-	struct dm_tree_node *log;	/* Mirror */
+	struct dm_tree_node *log;	/* Mirror + Replicator + Replicator-dev */
 	uint32_t region_size;		/* Mirror */
 	unsigned clustered;		/* Mirror */
 	unsigned mirror_area_count;	/* Mirror */
@@ -94,6 +115,12 @@ struct load_segment {
 	const char *iv;			/* Crypt */
 	uint64_t iv_offset;		/* Crypt */
 	const char *key;		/* Crypt */
+
+	const char *rlog_type;		/* Replicator */
+	struct dm_list rsites;		/* Replicator */
+	unsigned rsite_count;		/* Replicator */
+	unsigned rdevice_count;		/* Replicator */
+	uint64_t rdevice_index;		/* Replicator-dev */
 };
 
 /* Per-device properties */
@@ -625,6 +652,18 @@ void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode,
 	dnode->props.read_ahead_flags = read_ahead_flags;
 }
 
+int dm_tree_set_priority(struct dm_tree *dtree, const char *uuid, int priority)
+{
+	struct dm_tree_node *dnode;
+
+	if ((dnode = dm_tree_find_node_by_uuid(dtree, uuid))) {
+		log_verbose("Setting activation priority %d for %s", priority, dnode->name);
+		dnode->activation_priority = priority;
+	}
+
+        return 1;
+}
+
 int dm_tree_add_dev(struct dm_tree *dtree, uint32_t major, uint32_t minor)
 {
 	return _add_dev(dtree, &dtree->root, major, minor) ? 1 : 0;
@@ -1036,8 +1075,9 @@ void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode)
 }
 
 int dm_tree_suspend_children(struct dm_tree_node *dnode,
-				   const char *uuid_prefix,
-				   size_t uuid_prefix_len)
+			     const char *uuid_prefix,
+			     size_t uuid_prefix_len,
+			     int priority)
 {
 	void *handle = NULL;
 	struct dm_tree_node *child = dnode;
@@ -1068,13 +1108,17 @@ int dm_tree_suspend_children(struct dm_tree_node *dnode,
 			continue;
 
 		/* Ensure immediate parents are already suspended */
-		if (!_children_suspended(child, 1, uuid_prefix, uuid_prefix_len))
+		if (priority == 0 &&
+		    !_children_suspended(child, 1, uuid_prefix, uuid_prefix_len))
 			continue;
 
 		if (!_info_by_dev(dinfo->major, dinfo->minor, 0, &info) ||
 		    !info.exists || info.suspended)
 			continue;
 
+		if (child->activation_priority != priority)
+			continue;
+
 		if (!_suspend_node(name, info.major, info.minor,
 				   child->dtree->skip_lockfs,
 				   child->dtree->no_flush, &newinfo)) {
@@ -1102,7 +1146,7 @@ int dm_tree_suspend_children(struct dm_tree_node *dnode,
 			continue;
 
 		if (dm_tree_node_num_children(child, 0))
-			dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len);
+			dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len, priority);
 	}
 
 	return 1;
@@ -1270,13 +1314,49 @@ static int _emit_areas_line(struct dm_task *dmt __attribute((unused)),
 	struct seg_area *area;
 	char devbuf[DM_FORMAT_DEV_BUFSIZE];
 	unsigned first_time = 1;
+	const char *logtype;
+	unsigned log_parm_count;
 
 	dm_list_iterate_items(area, &seg->areas) {
 		if (!_build_dev_string(devbuf, sizeof(devbuf), area->dev_node))
 			return_0;
 
-		EMIT_PARAMS(*pos, "%s%s %" PRIu64, first_time ? "" : " ",
-			    devbuf, area->offset);
+		switch (seg->type) {
+		case SEG_REPLICATOR_DEV:
+			EMIT_PARAMS(*pos, " %d 1 %s", area->rsite_index, devbuf);
+			if (!first_time) {
+				/* remote devices */
+				log_parm_count = (area->flags & (DM_NOSYNC | DM_FORCESYNC)) ? 2 : 1;
+
+				if (!area->llog) {
+					devbuf[0] = 0;		/* only core log parameters */
+					logtype = "core";
+				} else {
+					devbuf[0] = ' ';	/* extra space before device name */
+					if (!_build_dev_string(devbuf + 1, sizeof(devbuf) - 1,
+							       area->llog))
+						return_0;
+					logtype = "disk";
+					log_parm_count++;	/* extra link log device name parameter */
+				}
+
+				EMIT_PARAMS(*pos, " %s %u%s %" PRIu64, logtype,
+					    log_parm_count, devbuf, area->region_size);
+
+				logtype = (area->flags & DM_NOSYNC) ?
+					" nosync" : (area->flags & DM_FORCESYNC) ?
+					" sync" : NULL;
+
+                                if (logtype)
+					EMIT_PARAMS(*pos, logtype);
+			} else
+				EMIT_PARAMS(*pos, " nolog 0");
+
+			break;
+		default:
+			EMIT_PARAMS(*pos, "%s%s %" PRIu64, first_time ? "" : " ",
+				    devbuf, area->offset);
+		}
 
 		first_time = 0;
 	}
@@ -1284,6 +1364,42 @@ static int _emit_areas_line(struct dm_task *dmt __attribute((unused)),
 	return 1;
 }
 
+static int _replicator_emit_segment_line(const struct load_segment *seg, char *params,
+					 size_t paramsize, int *pos)
+{
+	const struct load_segment *rlog_seg;
+	const struct replicator_site *rsite;
+	char rlogbuf[DM_FORMAT_DEV_BUFSIZE];
+	unsigned parm_count;
+
+	if (!seg->log || !_build_dev_string(rlogbuf, sizeof(rlogbuf), seg->log))
+		return_0;
+
+	rlog_seg = dm_list_item(dm_list_last(&seg->log->props.segs),
+				struct load_segment);
+
+	EMIT_PARAMS(*pos, "%s 4 %s 0 auto %" PRIu64,
+		    seg->rlog_type, rlogbuf, rlog_seg->size);
+
+	dm_list_iterate_items(rsite, &seg->rsites) {
+		parm_count = (rsite->fall_behind_data
+			      || rsite->fall_behind_ios
+			      || rsite->async_timeout) ? 4 : 2;
+
+		EMIT_PARAMS(*pos, " blockdev %u %u %s", parm_count, rsite->rsite_index,
+			    (rsite->async_action == DM_REPLICATOR_SYNC) ? "sync" : "async");
+
+		if (rsite->fall_behind_data)
+			EMIT_PARAMS(*pos, " data %" PRIu64, rsite->fall_behind_data);
+		else if (rsite->fall_behind_ios)
+			EMIT_PARAMS(*pos, " ios %" PRIu32, rsite->fall_behind_ios);
+		else if (rsite->async_timeout)
+			EMIT_PARAMS(*pos, " timeout %" PRIu32, rsite->async_timeout);
+	}
+
+	return 1;
+}
+
 /*
  * Returns: 1 on success, 0 on failure
  */
@@ -1424,6 +1540,19 @@ static int _emit_segment_line(struct dm_task *dmt, uint32_t major,
 		if (!r)
 			return_0;
 		break;
+	case SEG_REPLICATOR:
+		if ((r = _replicator_emit_segment_line(seg, params, paramsize, &pos))
+		    <= 0) {
+			stack;
+			return r;
+		}
+		break;
+	case SEG_REPLICATOR_DEV:
+		if (!seg->log || !_build_dev_string(originbuf, sizeof(originbuf), seg->log))
+			return_0;
+
+		EMIT_PARAMS(pos, "%s %" PRIu64, originbuf, seg->rdevice_index);
+		break;
 	case SEG_SNAPSHOT:
 		if (!_build_dev_string(originbuf, sizeof(originbuf), seg->origin))
 			return_0;
@@ -1451,12 +1580,14 @@ static int _emit_segment_line(struct dm_task *dmt, uint32_t major,
 
 	switch(seg->type) {
 	case SEG_ERROR:
+	case SEG_REPLICATOR:
 	case SEG_SNAPSHOT:
 	case SEG_SNAPSHOT_ORIGIN:
 	case SEG_ZERO:
 		break;
 	case SEG_CRYPT:
 	case SEG_LINEAR:
+	case SEG_REPLICATOR_DEV:
 	case SEG_STRIPED:
 		if ((r = _emit_areas_line(dmt, seg, params, paramsize, &pos)) <= 0) {
 			stack;
@@ -1609,6 +1740,9 @@ int dm_tree_preload_children(struct dm_tree_node *dnode,
 			}
 		}
 
+		if (child->activation_priority != 0)
+			continue;
+
 		/* Propagate device size change change */
 		if (child->props.size_changed)
 			dnode->props.size_changed = 1;
@@ -1871,6 +2005,150 @@ int dm_tree_node_add_mirror_target(struct dm_tree_node *node,
 	return 1;
 }
 
+int dm_tree_node_add_replicator_target(struct dm_tree_node *node,
+				       uint64_t size,
+				       const char *rlog_uuid,
+				       const char *rlog_type,
+				       unsigned rsite_index,
+				       int async_action,
+				       uint32_t async_timeout,
+				       uint64_t fall_behind_data,
+				       uint32_t fall_behind_ios)
+{
+	struct load_segment *rseg;
+	struct replicator_site *rsite;
+
+	if (rsite_index == 0) {
+		/* local site0 - add replog segment and set rlog device */
+		if (!(rseg = _add_segment(node, SEG_REPLICATOR, size)))
+			return_0;
+
+		if (!(rseg->log = dm_tree_find_node_by_uuid(node->dtree, rlog_uuid))) {
+			log_error("Missing replicator log uuid %s.", rlog_uuid);
+			return 0;
+		}
+
+		if (!_link_tree_nodes(node, rseg->log))
+			return_0;
+
+		if (strcmp(rlog_type, "ringbuffer") != 0) {
+			log_error("Unsupported rlog type %s.", rlog_type);
+			return 0;
+		}
+
+		if (!(rseg->rlog_type = dm_pool_strdup(node->dtree->mem, rlog_type)))
+			return_0;
+
+		dm_list_init(&rseg->rsites);
+		rseg->rdevice_count = 0;
+		node->activation_priority = 1;
+	}
+
+	if (!node->props.segment_count) {
+		log_error("Internal error: Attempt to add remote site area before replog.");
+		return 0;
+	}
+
+	/* new remote site */
+	if (async_action == DM_REPLICATOR_SYNC
+	    && (async_timeout || fall_behind_ios || fall_behind_data)) {
+		log_error("Unsupported combination of sync options passed.");
+		return 0;
+	}
+
+	rseg = dm_list_item(dm_list_last(&node->props.segs), struct load_segment);
+
+	if (!(rsite = dm_pool_zalloc(node->dtree->mem, sizeof (*rsite)))) {
+		log_error("Failed to allocate remote site segment.");
+		return 0;
+	}
+	dm_list_add(&rseg->rsites, &rsite->list);
+	rseg->rsite_count++;
+
+	rsite->async_action = async_action;
+	rsite->async_timeout = async_timeout;
+	rsite->fall_behind_data = fall_behind_data;
+	rsite->fall_behind_ios = fall_behind_ios;
+	rsite->rsite_index = rsite_index;
+
+	return 1;
+}
+
+/* Appends device node to Replicator */
+int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node,
+					   uint64_t size,
+					   const char *replog_uuid,
+					   uint64_t rdevice_index,
+					   const char *rdev_uuid,
+					   unsigned rsite_index,
+					   const char *llog_uuid,
+					   uint32_t llog_flags,
+					   uint32_t llog_size)
+{
+	struct seg_area *area;
+	struct load_segment *rseg;
+	int is_uuid = (rdev_uuid) ? (strchr(rdev_uuid, '/') == NULL) : 0;
+
+	if (rsite_index == 0) {
+		/* site index for local target */
+		if (!(rseg = _add_segment(node, SEG_REPLICATOR_DEV, size)))
+			return_0;
+
+		if (!(rseg->log = dm_tree_find_node_by_uuid(node->dtree, replog_uuid))) {
+			log_error("Missing replicator log uuid %s.", replog_uuid);
+			return 0;
+		}
+
+		if (!rseg->log->props.segment_count) {
+			/* local slink 0 for replicator must be always initialized first */
+			log_error("Internal error: Attempt to use empty replog segment.");
+			return 0;
+		}
+		dm_list_item(dm_list_last(&rseg->log->props.segs),
+			     struct load_segment)->rdevice_count++;
+
+		if (!_link_tree_nodes(node, rseg->log))
+			return_0;
+
+		rseg->rdevice_index = rdevice_index;
+	} else {
+		if (!node->props.segment_count) {
+			/* local slink 0 for replicator must be always initialized first */
+			log_error("Internal error: Attempt to add incorrrect remote target segment.");
+			return 0;
+		}
+
+		rseg = dm_list_item(dm_list_last(&node->props.segs), struct load_segment);
+	}
+
+	if (!(llog_flags & DM_CORELOG) && !llog_uuid) {
+		log_error("Unspecified link log uuid.");
+		return 0;
+	}
+
+	if (!dm_tree_node_add_target_area(node, (is_uuid) ? NULL : rdev_uuid,
+					  (is_uuid) ? rdev_uuid :  NULL, 0))
+		return 0;
+
+	area = dm_list_item(dm_list_last(&rseg->areas), struct seg_area);
+
+	if (!(llog_flags & DM_CORELOG)) {
+		if (!(area->llog = dm_tree_find_node_by_uuid(node->dtree, llog_uuid))) {
+			log_error("Couldn't find link log uuid %s.", llog_uuid);
+			return 0;
+		}
+
+		if (!_link_tree_nodes(node, area->llog))
+			return_0;
+	}
+
+	area->flags = llog_flags;
+	area->region_size = llog_size;
+	area->rsite_index = rsite_index;
+
+	return 1;
+}
+
 static int _add_area(struct dm_tree_node *node, struct load_segment *seg, struct dm_tree_node *dev_node, uint64_t offset)
 {
 	struct seg_area *area;
-- 
1.6.5.rc2




More information about the lvm-devel mailing list