[dm-devel] [PATCH 06/14] dm-multisnap-mikulas-btree

Mike Snitzer snitzer at redhat.com
Tue Mar 2 00:23:50 UTC 2010


From: Mikulas Patocka <mpatocka at redhat.com>

B+tree operations.

Adding and deleting entries and walking the whole b+tree.

Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
---
 drivers/md/dm-multisnap-btree.c |  838 +++++++++++++++++++++++++++++++++++++++
 1 files changed, 838 insertions(+), 0 deletions(-)
 create mode 100644 drivers/md/dm-multisnap-btree.c

diff --git a/drivers/md/dm-multisnap-btree.c b/drivers/md/dm-multisnap-btree.c
new file mode 100644
index 0000000..a7e3b60
--- /dev/null
+++ b/drivers/md/dm-multisnap-btree.c
@@ -0,0 +1,838 @@
+/*
+ * Copyright (C) 2009 Red Hat Czech, s.r.o.
+ *
+ * Mikulas Patocka <mpatocka at redhat.com>
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-multisnap-mikulas.h"
+
+/*
+ * Read one btree node and do basic consistency checks.
+ * Any btree access should be done with this function.
+ */
+static struct dm_multisnap_bt_node *
+dm_multisnap_read_btnode(struct dm_exception_store *s, int depth,
+			 chunk_t block, unsigned want_entries, struct dm_buffer **bp)
+{
+	struct dm_multisnap_bt_node *node;
+
+	BUG_ON((unsigned)depth >= s->bt_depth);
+
+	node = dm_multisnap_read_block(s, block, bp);
+	if (unlikely(!node))
+		return NULL;
+
+	if (unlikely(node->signature != BT_SIGNATURE)) {
+		dm_bufio_release(*bp);
+		DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+				       ("dm_multisnap_read_btnode: bad signature on btree node %llx",
+					(unsigned long long)block));
+		return NULL;
+	}
+
+	if (unlikely((unsigned)(le32_to_cpu(node->n_entries) - 1) >= s->btree_entries) ||
+	    (want_entries && unlikely(le32_to_cpu(node->n_entries) != want_entries))) {
+		dm_bufio_release(*bp);
+		DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+				       ("dm_multisnap_read_btnode: bad number of entries in btree node "
+					"%llx: %x, wanted %x",
+					(unsigned long long)block,
+					le32_to_cpu(node->n_entries),
+					want_entries));
+		return NULL;
+	}
+
+	return node;
+}
+
+/*
+ * Write orig_chunk entry.
+ *
+ * If we are compiled with 32-bit chunk_t, we must write the special fields
+ * with bits 32-47 set, so that the store could be read on a system with
+ * 64-bit chunk_t.
+ */
+static void write_orig_chunk(struct dm_multisnap_bt_entry *be, chunk_t n)
+{
+	write_48(be, orig_chunk, n);
+	if (sizeof(chunk_t) == 4 && unlikely(n > DM_CHUNK_T_MAX))
+		be->orig_chunk2 = cpu_to_le16(0xffff);
+}
+
+/*
+ * Add an entry (key, new_chunk) at an appropriate index to the btree node.
+ * Move the existing entries
+ */
+static void add_at_idx(struct dm_multisnap_bt_node *node, unsigned index,
+		       struct bt_key *key, chunk_t new_chunk)
+{
+	memmove(&node->entries[index + 1], &node->entries[index],
+		(le32_to_cpu(node->n_entries) - index) * sizeof(struct dm_multisnap_bt_entry));
+	write_orig_chunk(&node->entries[index], key->chunk);
+	write_48(&node->entries[index], new_chunk, new_chunk);
+	node->entries[index].snap_from = cpu_to_mikulas_snapid(key->snap_from);
+	node->entries[index].snap_to = cpu_to_mikulas_snapid(key->snap_to);
+	node->entries[index].flags = cpu_to_le32(0);
+	node->n_entries = cpu_to_le32(le32_to_cpu(node->n_entries) + 1);
+}
+
+/*
+ * Create an initial btree.
+ * (*writing_block) is updated to point after the btree.
+ */
+void dm_multisnap_create_btree(struct dm_exception_store *s, chunk_t *writing_block)
+{
+	struct dm_buffer *bp;
+	struct dm_multisnap_bt_node *node;
+	struct bt_key new_key;
+
+	while (dm_multisnap_is_commit_block(s, *writing_block))
+		(*writing_block)++;
+
+	if (*writing_block >= s->dev_size) {
+		DM_MULTISNAP_SET_ERROR(s->dm, -ENOSPC,
+				       ("dm_multisnap_create_btree: device is too small"));
+		return;
+	}
+
+	node = dm_bufio_new(s->bufio, *writing_block, &bp);
+	if (IS_ERR(node)) {
+		DM_MULTISNAP_SET_ERROR(s->dm, PTR_ERR(node),
+				       ("dm_multisnap_create_btree: 't create direct bitmap block at %llx",
+					(unsigned long long)*writing_block));
+		return;
+	}
+	memset(node, 0, s->chunk_size);
+	node->signature = BT_SIGNATURE;
+	node->n_entries = cpu_to_le32(0);
+
+	/*
+	 * A btree node must have at least one entry --- so create this empty
+	 * one
+	 */
+	new_key.snap_from = new_key.snap_to = DM_SNAPID_T_LAST;
+	new_key.chunk = DM_CHUNK_T_LAST;
+	add_at_idx(node, 0, &new_key, 0);
+
+	dm_bufio_mark_buffer_dirty(bp);
+	dm_bufio_release(bp);
+	s->bt_root = *writing_block;
+	s->bt_depth = 1;
+	(*writing_block)++;
+}
+
+/*
+ * Compare btree entry and a search key. Returns:
+ *	-1: the entry is lower than the key
+ *	1: the entry is higher than the key
+ *	0: the entry matches the key (both entry and key have ranges, a match
+ *		is returned when the ranges overlap)
+ */
+static int compare_key(struct dm_multisnap_bt_entry *e, struct bt_key *key)
+{
+	chunk_t orig_chunk = read_48(e, orig_chunk);
+	if (orig_chunk < key->chunk)
+		return -1;
+	if (orig_chunk > key->chunk)
+		return 1;
+
+	if (mikulas_snapid_to_cpu(e->snap_to) < key->snap_from)
+		return -1;
+	if (mikulas_snapid_to_cpu(e->snap_from) > key->snap_to)
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Perform binary search on the btree node.
+ * Returns: 1 - found, 0 - not found
+ * 	*result - if found, then the first entry in the requested range
+ *		- if not found, then the first entry after the requested range
+ */
+static int binary_search(struct dm_multisnap_bt_node *node, struct bt_key *key,
+			 unsigned *result)
+{
+	int c;
+	int first = 0;
+	int last = le32_to_cpu(node->n_entries) - 1;
+
+	while (1) {
+		int middle = (first + last) >> 1;
+		struct dm_multisnap_bt_entry *e = &node->entries[middle];
+
+		c = compare_key(e, key);
+
+		if (first == last)
+			break;
+
+		if (c < 0)
+			first = middle + 1;
+		else
+			last = middle;
+
+		cond_resched();
+	}
+
+	*result = first;
+	return !c;
+}
+
+/*
+ * Find a given key in the btree.
+ *
+ * Returns: 1 - found, 0 - not found, -1 - error
+ *	In case of not error (0 or 1 is returned), the node and held buffer for
+ *	this node is returned (the buffer must be released with
+ *	dm_bufio_release). Also, path with s->bt_depth entries is returned.
+ */
+static int walk_btree(struct dm_exception_store *s, struct bt_key *key,
+		      struct dm_multisnap_bt_node **nodep, struct dm_buffer **bp,
+		      struct path_element path[MAX_BT_DEPTH])
+{
+#define		node (*nodep)
+	int r;
+	chunk_t block = s->bt_root;
+	unsigned d = 0;
+
+	/*
+	 * These four are purely to check tree consistency.
+	 * They could be commented out. But it's safer to leave them there.
+	 */
+	chunk_t want_last_chunk = DM_CHUNK_T_LAST;
+	mikulas_snapid_t want_last_snapid = DM_SNAPID_T_LAST;
+	chunk_t last_chunk;
+	mikulas_snapid_t last_snapid;
+
+	while (1) {
+		path[d].block = block;
+		node = dm_multisnap_read_btnode(s, d, block, 0, bp);
+		if (!node)
+			return -1;
+		path[d].n_entries = le32_to_cpu(node->n_entries);
+
+		/* Check consistency (can be commented out) */
+		last_chunk = read_48(&node->entries[path[d].n_entries - 1], orig_chunk);
+		last_snapid = mikulas_snapid_to_cpu(node->entries[path[d].n_entries - 1].snap_to);
+		if (unlikely(last_chunk != want_last_chunk) ||
+		    unlikely(last_snapid != want_last_snapid)) {
+			dm_bufio_release(*bp);
+			DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+					       ("walk_btree: invalid last entry in node %llx/%llx: "
+						"last_chunk %llx, want_last_chunk %llx, last_snapid: %llx, "
+						"want_last_snapid: %llx, searching for %llx, %llx-%llx",
+						(unsigned long long)block,
+						(unsigned long long)dm_multisnap_remap_block(s, block),
+						(unsigned long long)last_chunk,
+						(unsigned long long)want_last_chunk,
+						(unsigned long long)last_snapid,
+						(unsigned long long)want_last_snapid,
+						(unsigned long long)key->chunk,
+						(unsigned long long)key->snap_from,
+						(unsigned long long)key->snap_to));
+			return -1;
+		}
+
+		r = binary_search(node, key, &path[d].idx);
+
+		want_last_chunk = read_48(&node->entries[path[d].idx], orig_chunk);
+		want_last_snapid = mikulas_snapid_to_cpu(node->entries[path[d].idx].snap_to);
+
+		block = read_48(&node->entries[path[d].idx], new_chunk);
+		if (++d == s->bt_depth)
+			break;
+		dm_bufio_release(*bp);
+	}
+	if (unlikely(compare_key(&node->entries[path[s->bt_depth - 1].idx], key) < 0))
+		path[s->bt_depth - 1].idx++;
+	return r;
+#undef node
+}
+
+/*
+ * Find a given key in the btree.
+ *
+ * Returns: 1 - found, 0 - not found, -1 - error
+ *	In case the node is found, key contains updated key and result contains
+ *	the resulting chunk.
+ */
+int dm_multisnap_find_in_btree(struct dm_exception_store *s, struct bt_key *key,
+			       chunk_t *result)
+{
+	struct dm_multisnap_bt_node *node;
+	struct path_element path[MAX_BT_DEPTH];
+	struct dm_buffer *bp;
+
+	int r = walk_btree(s, key, &node, &bp, path);
+	if (unlikely(r < 0))
+		return r;
+
+	if (r) {
+		struct dm_multisnap_bt_entry *entry = &node->entries[path[s->bt_depth - 1].idx];
+		*result = read_48(entry, new_chunk);
+		key->chunk = read_48(entry, orig_chunk);
+		key->snap_from = mikulas_snapid_to_cpu(entry->snap_from);
+		key->snap_to = mikulas_snapid_to_cpu(entry->snap_to);
+	}
+	dm_bufio_release(bp);
+
+	return r;
+}
+
+/*
+ * Scan the btree sequentially.
+ * Start with the given key. Perform "call" on each leaf node. When call returns
+ * nonzero, terminate the scan and return the value returned from call.
+ * When the whole tree is scanned, return 0.
+ * On error, return -1.
+ */
+int dm_multisnap_list_btree(struct dm_exception_store *s, struct bt_key *key,
+			    int (*call)(struct dm_exception_store *, struct dm_multisnap_bt_node *,
+					struct dm_multisnap_bt_entry *, void *),
+			    void *cookie)
+{
+	struct dm_multisnap_bt_node *node;
+	struct path_element path[MAX_BT_DEPTH];
+	struct dm_buffer *bp;
+	int depth;
+	int i;
+	int r;
+
+	r = walk_btree(s, key, &node, &bp, path);
+	if (unlikely(r < 0))
+		return r;
+
+list_next_node:
+	for (i = path[s->bt_depth - 1].idx; i < le32_to_cpu(node->n_entries); i++) {
+		cond_resched();
+		r = call(s, node, &node->entries[i], cookie);
+		if (unlikely(r)) {
+			dm_bufio_release(bp);
+			return r;
+		}
+	}
+	dm_bufio_release(bp);
+
+	for (depth = s->bt_depth - 2; depth >= 0; depth--) {
+		int idx;
+		node = dm_multisnap_read_btnode(s, depth, path[depth].block,
+						path[depth].n_entries, &bp);
+		if (!node)
+			return -1;
+		idx = path[depth].idx + 1;
+		if (idx < path[depth].n_entries) {
+			r = compare_key(&node->entries[idx], key);
+			if (unlikely(r <= 0)) {
+				dm_bufio_release(bp);
+				DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+						       ("dm_multisnap_list_btree: non-monotonic btree: node "
+							"%llx, index %x",
+							(unsigned long long)path[depth].block, idx));
+				return 0;
+			}
+			path[depth].idx = idx;
+			do {
+				depth++;
+				path[depth].block = read_48(&node->entries[path[depth - 1].idx], new_chunk);
+				path[depth].idx = 0;
+				dm_bufio_release(bp);
+				node = dm_multisnap_read_btnode(s, depth, path[depth].block, 0, &bp);
+				if (!node)
+					return -1;
+				path[depth].n_entries = le32_to_cpu(node->n_entries);
+			} while (depth < s->bt_depth - 1);
+			goto list_next_node;
+		}
+		dm_bufio_release(bp);
+	}
+
+	return 0;
+}
+
+/*
+ * Add a key and chunk to the btree.
+ * The key must not overlap with any existing btree entry.
+ */
+
+void dm_multisnap_add_to_btree(struct dm_exception_store *s, struct bt_key *key, chunk_t new_chunk)
+{
+	struct dm_multisnap_bt_node *node;
+	struct dm_buffer *bp;
+	struct path_element path[MAX_BT_DEPTH];
+	int depth;
+
+	unsigned split_entries, split_index, split_offset, split_size;
+	struct bt_key new_key;
+	struct dm_multisnap_bt_entry *last_one;
+	chunk_t new_root;
+
+	int r = walk_btree(s, key, &node, &bp, path);
+
+	if (unlikely(r)) {
+		if (r > 0) {
+			dm_bufio_release(bp);
+			DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+					       ("dm_multisnap_add_to_btree: adding key that already exists: "
+						"%llx, %llx-%llx",
+						(unsigned long long)key->chunk,
+						(unsigned long long)key->snap_from,
+						(unsigned long long)key->snap_to));
+		}
+		return;
+	}
+
+	depth = s->bt_depth - 1;
+
+go_up:
+	node = dm_multisnap_alloc_duplicate_block(s, path[depth].block, &bp, node);
+	if (unlikely(!node))
+		return;
+
+	if (likely(le32_to_cpu(node->n_entries) < s->btree_entries)) {
+		add_at_idx(node, path[depth].idx, key, new_chunk);
+		dm_bufio_mark_buffer_dirty(bp);
+		dm_bufio_release(bp);
+		return;
+	}
+	cond_resched();
+	memcpy(s->tmp_chunk, node, s->chunk_size);
+	cond_resched();
+	add_at_idx(s->tmp_chunk, path[depth].idx, key, new_chunk);
+
+	split_entries = le32_to_cpu(((struct dm_multisnap_bt_node *)s->tmp_chunk)->n_entries);
+	split_index = split_entries / 2;
+	split_offset = sizeof(struct dm_multisnap_bt_node) + split_index * sizeof(struct dm_multisnap_bt_entry);
+	split_size = sizeof(struct dm_multisnap_bt_node) + split_entries * sizeof(struct dm_multisnap_bt_entry);
+	cond_resched();
+	memcpy(node, s->tmp_chunk, sizeof(struct dm_multisnap_bt_node));
+	cond_resched();
+	memcpy((char *)node + sizeof(struct dm_multisnap_bt_node),
+	       (char *)s->tmp_chunk + split_offset, split_size - split_offset);
+	cond_resched();
+	memset((char *)node + sizeof(struct dm_multisnap_bt_node) + split_size - split_offset, 0,
+	       s->chunk_size - (sizeof(struct dm_multisnap_bt_node) + split_size - split_offset));
+	cond_resched();
+	node->n_entries = cpu_to_le32(split_entries - split_index);
+
+	dm_bufio_mark_buffer_dirty(bp);
+	dm_bufio_release(bp);
+
+	node = dm_multisnap_alloc_make_block(s, &new_chunk, &bp);
+	if (unlikely(!node))
+		return;
+
+	cond_resched();
+	memcpy(node, s->tmp_chunk, split_offset);
+	cond_resched();
+	memset((char *)node + split_offset, 0, s->chunk_size - split_offset);
+	cond_resched();
+	node->n_entries = cpu_to_le32(split_index);
+
+	last_one = &node->entries[split_index - 1];
+	new_key.chunk = read_48(last_one, orig_chunk);
+	new_key.snap_from = mikulas_snapid_to_cpu(last_one->snap_to);
+	new_key.snap_to = mikulas_snapid_to_cpu(last_one->snap_to);
+
+	key = &new_key;
+
+	dm_bufio_mark_buffer_dirty(bp);
+	dm_bufio_release(bp);
+
+	if (depth--) {
+		node = dm_multisnap_read_btnode(s, depth, path[depth].block,
+						path[depth].n_entries, &bp);
+		if (unlikely(!node))
+			return;
+		goto go_up;
+	}
+
+	if (s->bt_depth >= MAX_BT_DEPTH) {
+		DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+				       ("dm_multisnap_add_to_btree: max b+-tree depth reached"));
+		return;
+	}
+
+	node = dm_multisnap_alloc_make_block(s, &new_root, &bp);
+	if (unlikely(!node))
+		return;
+
+	cond_resched();
+	memset(node, 0, s->chunk_size);
+	cond_resched();
+	node->signature = BT_SIGNATURE;
+	node->n_entries = cpu_to_le32(0);
+	add_at_idx(node, 0, &new_key, new_chunk);
+	new_key.snap_from = new_key.snap_to = DM_SNAPID_T_LAST;
+	new_key.chunk = DM_CHUNK_T_LAST;
+	add_at_idx(node, 1, &new_key, path[0].block);
+
+	dm_bufio_mark_buffer_dirty(bp);
+	dm_bufio_release(bp);
+
+	s->bt_root = new_root;
+	s->bt_depth++;
+}
+
+/*
+ * Change the last entry from old_chunk/old_snapid to new_chunk/new_snapid.
+ * Start at a given depth and go upward to the root.
+ */
+static void dm_multisnap_fixup_backlimits(struct dm_exception_store *s,
+					  struct path_element path[MAX_BT_DEPTH], int depth,
+					  chunk_t old_chunk, mikulas_snapid_t old_snapid,
+					  chunk_t new_chunk, mikulas_snapid_t new_snapid)
+{
+	int idx;
+	struct dm_multisnap_bt_node *node;
+	struct dm_buffer *bp;
+
+	if (old_chunk == new_chunk && old_snapid == new_snapid)
+		return;
+
+	for (depth--; depth >= 0; depth--) {
+		node = dm_multisnap_read_btnode(s, depth, path[depth].block,
+						path[depth].n_entries, &bp);
+		if (unlikely(!node))
+			return;
+
+		node = dm_multisnap_alloc_duplicate_block(s, path[depth].block, &bp, node);
+		if (unlikely(!node))
+			return;
+
+		idx = path[depth].idx;
+
+		if (unlikely(read_48(&node->entries[idx], orig_chunk) != old_chunk) ||
+		    unlikely(mikulas_snapid_to_cpu(node->entries[idx].snap_from) != old_snapid) ||
+		    unlikely(mikulas_snapid_to_cpu(node->entries[idx].snap_to) != old_snapid)) {
+			dm_bufio_release(bp);
+			DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+					       ("dm_multisnap_fixup_backlimits: btree limit does not match, block "
+						"%llx, idx %x, orig_chunk %llx, snap_from %llx, snap_to "
+						"%llx, want %llx, %llx",
+						(unsigned long long)path[depth].block,
+						idx,
+						(unsigned long long)read_48(&node->entries[idx], orig_chunk),
+						(unsigned long long)mikulas_snapid_to_cpu(node->entries[idx].snap_from),
+						(unsigned long long)mikulas_snapid_to_cpu(node->entries[idx].snap_to),
+						(unsigned long long)old_chunk,
+						(unsigned long long)old_snapid));
+			return;
+		}
+		write_48(&node->entries[idx], orig_chunk, new_chunk);
+		node->entries[idx].snap_from = node->entries[idx].snap_to = cpu_to_mikulas_snapid(new_snapid);
+
+		dm_bufio_mark_buffer_dirty(bp);
+		dm_bufio_release(bp);
+
+		if (path[depth].idx != path[depth].n_entries - 1)
+			return;
+	}
+	DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+			       ("dm_multisnap_fixup_backlimits: the last entry modified, %llx/%llx -> %llx/%llx",
+				(unsigned long long)old_chunk,
+				(unsigned long long)old_snapid,
+				(unsigned long long)new_chunk,
+				(unsigned long long)new_snapid));
+}
+
+/*
+ * Restrict the range of an existing btree entry.
+ * The key must have the same beginning or end as some existing entry (not both)
+ * The range of the key is excluded from the entry.
+ */
+void dm_multisnap_restrict_btree_entry(struct dm_exception_store *s, struct bt_key *key)
+{
+	struct dm_multisnap_bt_node *node;
+	struct path_element path[MAX_BT_DEPTH];
+	struct dm_buffer *bp;
+	int idx;
+	struct dm_multisnap_bt_entry *entry;
+	mikulas_snapid_t from, to, new_to;
+
+	int r = walk_btree(s, key, &node, &bp, path);
+	if (unlikely(r < 0))
+		return;
+
+	if (!r) {
+		dm_bufio_release(bp);
+		DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+				       ("dm_multisnap_restrict_btree_entry: unknown key: %llx, %llx-%llx",
+					(unsigned long long)key->chunk,
+					(unsigned long long)key->snap_from,
+					(unsigned long long)key->snap_to));
+		return;
+	}
+
+	node = dm_multisnap_alloc_duplicate_block(s, path[s->bt_depth - 1].block, &bp, node);
+	if (unlikely(!node))
+		return;
+
+	idx = path[s->bt_depth - 1].idx;
+	entry = &node->entries[idx];
+	from = mikulas_snapid_to_cpu(entry->snap_from);
+	to = new_to = mikulas_snapid_to_cpu(entry->snap_to);
+	if (key->snap_from == from && key->snap_to < to) {
+		entry->snap_from = cpu_to_mikulas_snapid(key->snap_to + 1);
+	} else if (key->snap_from > from && key->snap_to == to) {
+		new_to = key->snap_from - 1;
+		entry->snap_to = cpu_to_mikulas_snapid(new_to);
+	} else {
+		dm_bufio_release(bp);
+		DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+				       ("dm_multisnap_restrict_btree_entry: invali range to restruct: "
+					"%llx, %llx-%llx %llx-%llx",
+					(unsigned long long)key->chunk,
+					(unsigned long long)from,
+					(unsigned long long)to,
+					(unsigned long long)key->snap_from,
+					(unsigned long long)key->snap_to));
+		return;
+	}
+
+	dm_bufio_mark_buffer_dirty(bp);
+	dm_bufio_release(bp);
+
+	if (unlikely(idx == path[s->bt_depth - 1].n_entries - 1))
+		dm_multisnap_fixup_backlimits(s, path, s->bt_depth - 1,
+					      key->chunk, to, key->chunk, new_to);
+}
+
+/*
+ * Expand range of an existing btree entry.
+ * The key represents the whole new range (including the old and new part).
+ */
+void dm_multisnap_extend_btree_entry(struct dm_exception_store *s, struct bt_key *key)
+{
+	struct dm_multisnap_bt_node *node;
+	struct path_element path[MAX_BT_DEPTH];
+	struct dm_buffer *bp;
+	int idx;
+	struct dm_multisnap_bt_entry *entry;
+	mikulas_snapid_t from, to, new_to;
+
+	int r = walk_btree(s, key, &node, &bp, path);
+	if (unlikely(r < 0))
+		return;
+
+	if (!r) {
+		dm_bufio_release(bp);
+		DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+				       ("dm_multisnap_extend_btree_entry: unknown key: "
+					"%llx, %llx-%llx",
+					(unsigned long long)key->chunk,
+					(unsigned long long)key->snap_from,
+					(unsigned long long)key->snap_to));
+		return;
+	}
+
+	node = dm_multisnap_alloc_duplicate_block(s, path[s->bt_depth - 1].block,
+						  &bp, node);
+	if (unlikely(!node))
+		return;
+
+	idx = path[s->bt_depth - 1].idx;
+	entry = &node->entries[idx];
+	from = mikulas_snapid_to_cpu(entry->snap_from);
+	to = new_to = mikulas_snapid_to_cpu(entry->snap_to);
+	if (key->snap_from < from)
+		entry->snap_from = cpu_to_mikulas_snapid(key->snap_from);
+	if (key->snap_to > to) {
+		new_to = key->snap_to;
+		entry->snap_to = cpu_to_mikulas_snapid(new_to);
+	}
+
+	dm_bufio_mark_buffer_dirty(bp);
+	dm_bufio_release(bp);
+
+	if (unlikely(idx == path[s->bt_depth - 1].n_entries - 1))
+		dm_multisnap_fixup_backlimits(s, path, s->bt_depth - 1,
+					      key->chunk, to, key->chunk, new_to);
+}
+
+/*
+ * Delete an entry from the btree.
+ */
+void dm_multisnap_delete_from_btree(struct dm_exception_store *s, struct bt_key *key)
+{
+	struct dm_multisnap_bt_node *node;
+	struct path_element path[MAX_BT_DEPTH];
+	struct dm_buffer *bp;
+	int idx;
+	struct dm_multisnap_bt_entry *entry;
+	mikulas_snapid_t from, to;
+	int depth, n_entries;
+
+	struct dm_multisnap_bt_entry *last_one;
+	chunk_t last_one_chunk;
+	mikulas_snapid_t last_one_snap_to;
+
+	int r = walk_btree(s, key, &node, &bp, path);
+	if (unlikely(r < 0))
+		return;
+
+	if (unlikely(!r)) {
+		dm_bufio_release(bp);
+		DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+				       ("dm_multisnap_delete_from_btree: unknown key: %llx, %llx-%llx",
+					(unsigned long long)key->chunk,
+					(unsigned long long)key->snap_from,
+					(unsigned long long)key->snap_to));
+		return;
+	}
+
+	depth = s->bt_depth - 1;
+
+	idx = path[depth].idx;
+	entry = &node->entries[idx];
+	from = mikulas_snapid_to_cpu(entry->snap_from);
+	to = mikulas_snapid_to_cpu(entry->snap_to);
+	if (unlikely(from != key->snap_from) || unlikely(to != key->snap_to)) {
+		dm_bufio_release(bp);
+		DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+				       ("dm_multisnap_delete_from_btree: invalid range to restrict: "
+					"%llx, %llx-%llx %llx-%llx",
+					(unsigned long long)key->chunk,
+					(unsigned long long)from,
+					(unsigned long long)to,
+					(unsigned long long)key->snap_from,
+					(unsigned long long)key->snap_to));
+		return;
+	}
+
+	while (unlikely((n_entries = le32_to_cpu(node->n_entries)) == 1)) {
+		dm_bufio_release(bp);
+		if (unlikely(!depth)) {
+			DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+					       ("dm_multisnap_delete_from_btree: b-tree is empty"));
+			return;
+		}
+		dm_multisnap_free_block_and_duplicates(s, path[depth].block);
+		depth--;
+		node = dm_multisnap_read_btnode(s, depth, path[depth].block,
+						path[depth].n_entries, &bp);
+		if (!node)
+			return;
+	}
+
+	node = dm_multisnap_alloc_duplicate_block(s, path[depth].block, &bp, node);
+	if (unlikely(!node))
+		return;
+
+	idx = path[depth].idx;
+
+	cond_resched();
+	memmove(node->entries + idx, node->entries + idx + 1,
+		(n_entries - idx - 1) * sizeof(struct dm_multisnap_bt_entry));
+	cond_resched();
+	n_entries--;
+	memset(node->entries + n_entries, 0, sizeof(struct dm_multisnap_bt_entry));
+
+	node->n_entries = cpu_to_le32(n_entries);
+
+	last_one = &node->entries[n_entries - 1];
+	last_one_chunk = read_48(last_one, orig_chunk);
+	last_one_snap_to = mikulas_snapid_to_cpu(last_one->snap_to);
+
+	dm_bufio_mark_buffer_dirty(bp);
+	dm_bufio_release(bp);
+
+	if (unlikely(idx == n_entries))
+		dm_multisnap_fixup_backlimits(s, path, depth, key->chunk,
+					      key->snap_to, last_one_chunk,
+					      last_one_snap_to);
+}
+
+/*
+ * Process btree tmp remaps.
+ * Find the whole path for tmp_remap and write the path as new entries, from
+ * the root.
+ */
+void dm_multisnap_bt_finalize_tmp_remap(struct dm_exception_store *s,
+					struct tmp_remap *tmp_remap)
+{
+	struct dm_buffer *bp;
+	struct dm_multisnap_bt_node *node;
+	struct bt_key key;
+	struct path_element path[MAX_BT_DEPTH];
+	int results_ptr;
+
+	chunk_t new_blockn;
+	int r;
+	int i;
+
+	if (s->n_preallocated_blocks < s->bt_depth) {
+		if (dm_multisnap_alloc_blocks(s, s->preallocated_blocks + s->n_preallocated_blocks,
+					      s->bt_depth - s->n_preallocated_blocks, 0) < 0)
+			return;
+		s->n_preallocated_blocks = s->bt_depth;
+	}
+	results_ptr = 0;
+
+	/*
+	 * Read the key from this node --- we'll walk the btree according
+	 * to this key to find a path from the root.
+	 */
+	node = dm_multisnap_read_btnode(s, s->bt_depth - 1, tmp_remap->new, 0, &bp);
+	if (!node)
+		return;
+	key.chunk = read_48(&node->entries[0], orig_chunk);
+	key.snap_from = key.snap_to = mikulas_snapid_to_cpu(node->entries[0].snap_from);
+	dm_bufio_release(bp);
+
+	r = walk_btree(s, &key, &node, &bp, path);
+	if (r < 0)
+		return;
+
+	dm_bufio_release(bp);
+
+	for (i = s->bt_depth - 1; i >= 0; i--)
+		if (path[i].block == tmp_remap->old)
+			goto found;
+
+	DMERR("block %llx/%llx was not found in btree when searching for %llx/%llx",
+	      (unsigned long long)tmp_remap->old,
+	      (unsigned long long)tmp_remap->new,
+	      (unsigned long long)key.chunk,
+	      (unsigned long long)key.snap_from);
+	for (i = 0; i < s->bt_depth; i++)
+		DMERR("path[%d]: %llx/%x", i, (unsigned long long)path[i].block, path[i].idx);
+	dm_multisnap_set_error(s->dm, -EFSERROR);
+	return;
+
+found:
+	dm_multisnap_free_block(s, tmp_remap->old, 0);
+
+	new_blockn = tmp_remap->new;
+	for (i--; i >= 0; i--) {
+		int remapped = 0;
+		node = dm_multisnap_read_btnode(s, i, path[i].block, path[i].n_entries, &bp);
+		if (!node)
+			return;
+		if (!dm_multisnap_block_is_uncommitted(s, path[i].block)) {
+			remapped = 1;
+			dm_bufio_release_move(bp, s->preallocated_blocks[results_ptr]);
+			dm_multisnap_free_block_and_duplicates(s, path[i].block);
+			node = dm_multisnap_read_btnode(s, i, s->preallocated_blocks[results_ptr],
+							path[i].n_entries, &bp);
+			if (!node)
+				return;
+			dm_multisnap_block_set_uncommitted(s, s->preallocated_blocks[results_ptr]);
+		}
+		write_48(&node->entries[path[i].idx], new_chunk, new_blockn);
+		dm_bufio_mark_buffer_dirty(bp);
+		dm_bufio_release(bp);
+
+		if (!remapped)
+			goto skip_it;
+		new_blockn = s->preallocated_blocks[results_ptr];
+		results_ptr++;
+	}
+
+	s->bt_root = new_blockn;
+
+skip_it:
+	memmove(s->preallocated_blocks, s->preallocated_blocks + results_ptr,
+		(s->n_preallocated_blocks -= results_ptr) * sizeof(chunk_t));
+}
-- 
1.6.5.2




More information about the dm-devel mailing list