[dm-devel] [PATCH 04/14] dm-multisnap-mikulas-alloc
Mike Snitzer
snitzer at redhat.com
Tue Mar 2 00:23:48 UTC 2010
From: Mikulas Patocka <mpatocka at redhat.com>
Management of allocation bitmaps.
Creating the bitmaps.
Allocating and freeing blocks.
Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
---
drivers/md/dm-multisnap-alloc.c | 590 +++++++++++++++++++++++++++++++++++++++
1 files changed, 590 insertions(+), 0 deletions(-)
create mode 100644 drivers/md/dm-multisnap-alloc.c
diff --git a/drivers/md/dm-multisnap-alloc.c b/drivers/md/dm-multisnap-alloc.c
new file mode 100644
index 0000000..02f89be
--- /dev/null
+++ b/drivers/md/dm-multisnap-alloc.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright (C) 2009 Red Hat Czech, s.r.o.
+ *
+ * Mikulas Patocka <mpatocka at redhat.com>
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-multisnap-mikulas.h"
+
+#define rshift_roundup(val, bits) (((val) + ((chunk_t)1 << (bits)) - 1) >> (bits))
+
+#define BITS_PER_BYTE_SHIFT 3
+#define BYTES_PER_POINTER_SHIFT 3
+
+/*
+ * Initialize the root bitmap, write it at the position "writing block".
+ */
+void dm_multisnap_create_bitmaps(struct dm_exception_store *s, chunk_t *writing_block)
+{
+ struct dm_buffer *bp;
+ unsigned i;
+ void *bmp;
+
+ while (dm_multisnap_is_commit_block(s, *writing_block))
+ (*writing_block)++;
+
+ if (*writing_block >= s->dev_size) {
+ DM_MULTISNAP_SET_ERROR(s->dm, -ENOSPC,
+ ("dm_multisnap_create_bitmaps: device is too small"));
+ return;
+ }
+
+ if (*writing_block >= s->chunk_size << BITS_PER_BYTE_SHIFT) {
+ DM_MULTISNAP_SET_ERROR(s->dm, -ENOSPC,
+ ("dm_multisnap_create_bitmaps: invalid block to write: %llx",
+ (unsigned long long)*writing_block));
+ return;
+ }
+
+ bmp = dm_bufio_new(s->bufio, *writing_block, &bp);
+ if (IS_ERR(bmp)) {
+ DM_MULTISNAP_SET_ERROR(s->dm, PTR_ERR(bmp),
+ ("dm_multisnap_create_bitmaps: can't create direct bitmap block at %llx",
+ (unsigned long long)*writing_block));
+ return;
+ }
+ cond_resched();
+ memset(bmp, 0, s->chunk_size);
+ for (i = 0; i <= *writing_block; i++) {
+ generic___set_le_bit(i, bmp);
+ dm_multisnap_status_lock(s->dm);
+ s->total_allocated++;
+ dm_multisnap_status_unlock(s->dm);
+ cond_resched();
+ }
+ s->bitmap_root = *writing_block;
+ s->bitmap_depth = 0;
+
+ dm_bufio_mark_buffer_dirty(bp);
+ dm_bufio_release(bp);
+
+ (*writing_block)++;
+}
+
+static void dm_multisnap_add_bitmap(struct dm_exception_store *s);
+
+/*
+ * Extend bitmaps to cover "new_size" area.
+ *
+ * While we extend bitmaps we increase s->dev_size so that the newly mapped
+ * space can be used to hold further bitmaps.
+ */
+void dm_multisnap_extend_bitmaps(struct dm_exception_store *s, chunk_t new_size)
+{
+ while (s->dev_size < new_size) {
+ struct dm_buffer *bp;
+ void *bmp;
+ bitmap_t bitmap_no = s->dev_size >> (s->chunk_shift + BITS_PER_BYTE_SHIFT);
+ unsigned i = s->dev_size & ((1 << (s->chunk_shift + BITS_PER_BYTE_SHIFT)) - 1);
+ chunk_t c = s->dev_size;
+ if (!i) {
+ dm_multisnap_add_bitmap(s);
+ if (unlikely(dm_multisnap_has_error(s->dm)))
+ return;
+ }
+ bmp = dm_multisnap_map_bitmap(s, bitmap_no, &bp, NULL, NULL);
+ if (unlikely(!bmp))
+ return;
+ for (; i < s->chunk_size << BITS_PER_BYTE_SHIFT; i++, c++) {
+ if (unlikely(dm_multisnap_is_commit_block(s, c)))
+ generic___set_le_bit(i, bmp);
+ else
+ generic___clear_le_bit(i, bmp);
+ }
+ dm_bufio_mark_buffer_dirty(bp);
+ dm_bufio_release(bp);
+
+ s->dev_size = ((chunk_t)bitmap_no + 1) << (s->chunk_shift + BITS_PER_BYTE_SHIFT);
+ if (s->dev_size > new_size)
+ s->dev_size = new_size;
+ }
+}
+
+/*
+ * Add one bitmap after the last bitmap. A helper function for
+ * dm_multisnap_extend_bitmaps
+ */
+static void dm_multisnap_add_bitmap(struct dm_exception_store *s)
+{
+ struct path_element path[MAX_BITMAP_DEPTH];
+ struct dm_buffer *bp;
+ int d;
+ __u64 *bmpp;
+ unsigned i;
+ chunk_t c, bitmap_blk, new_blk;
+ bitmap_t bitmap_no = s->dev_size >> (s->chunk_shift + BITS_PER_BYTE_SHIFT);
+ void *bmp = dm_multisnap_alloc_make_block(s, &bitmap_blk, &bp);
+ if (!bmp)
+ return;
+ c = (chunk_t)bitmap_no << (s->chunk_shift + BITS_PER_BYTE_SHIFT);
+ for (i = 0; i < s->chunk_size << BITS_PER_BYTE_SHIFT; i++, c++) {
+ if (unlikely(dm_multisnap_is_commit_block(s, c)))
+ generic___set_le_bit(i, bmp);
+ else
+ generic___clear_le_bit(i, bmp);
+ }
+ dm_bufio_mark_buffer_dirty(bp);
+ dm_bufio_release(bp);
+
+ /* just get the path to the last block */
+ bmp = dm_multisnap_map_bitmap(s, bitmap_no - 1, &bp, NULL, path);
+ if (unlikely(!bmp))
+ return;
+ dm_bufio_release(bp);
+
+ for (d = s->bitmap_depth - 1; d >= 0; d--) {
+ if (path[d].idx + 1 < path[d].n_entries) {
+ bmpp = dm_multisnap_read_block(s, path[d].block, &bp);
+ if (!bmpp)
+ return;
+ bmpp[path[d].idx + 1] = cpu_to_le64(bitmap_blk);
+ dm_bufio_mark_buffer_dirty(bp);
+ dm_bufio_release(bp);
+ return;
+ } else {
+ bmpp = dm_multisnap_alloc_make_block(s, &new_blk, &bp);
+ if (!bmpp)
+ return;
+ memset(bmpp, 0, s->chunk_size);
+ bmpp[0] = cpu_to_le64(bitmap_blk);
+ dm_bufio_mark_buffer_dirty(bp);
+ dm_bufio_release(bp);
+ bitmap_blk = new_blk;
+ }
+ }
+
+ /* make new root */
+ bmpp = dm_multisnap_alloc_make_block(s, &new_blk, &bp);
+ if (!bmpp)
+ return;
+ memset(bmpp, 0, s->chunk_size);
+ bmpp[0] = cpu_to_le64(s->bitmap_root);
+ bmpp[1] = cpu_to_le64(bitmap_blk);
+ dm_bufio_mark_buffer_dirty(bp);
+ dm_bufio_release(bp);
+ s->bitmap_root = new_blk;
+ s->bitmap_depth++;
+}
+
+/*
+ * Read a leaf bitmap node with index "bitmap".
+ * Return the pointer to the data, store the held buffer to bl.
+ * Return the block in block and path in path.
+ */
+void *dm_multisnap_map_bitmap(struct dm_exception_store *s, bitmap_t bitmap,
+ struct dm_buffer **bp, chunk_t *block, struct path_element *path)
+{
+ __u64 *bmp;
+ unsigned idx;
+ unsigned d = s->bitmap_depth;
+ chunk_t blk = s->bitmap_root;
+ chunk_t parent = 0;
+
+ while (1) {
+ bmp = dm_multisnap_read_block(s, blk, bp);
+ if (unlikely(!bmp)) {
+ /* error is already set in dm_multisnap_read_block */
+ DMERR("dm_multisnap_map_bitmap: can't read bitmap at "
+ "%llx (%llx), pointed to by %llx (%llx), depth %d/%d, index %llx",
+ (unsigned long long)blk,
+ (unsigned long long)dm_multisnap_remap_block(s, blk),
+ (unsigned long long)parent,
+ (unsigned long long)dm_multisnap_remap_block(s, parent),
+ s->bitmap_depth - d,
+ s->bitmap_depth,
+ (unsigned long long)bitmap);
+ return NULL;
+ }
+ if (!d) {
+ if (block)
+ *block = blk;
+ return bmp;
+ }
+
+ idx = (bitmap >> ((d - 1) * (s->chunk_shift - BYTES_PER_POINTER_SHIFT))) &
+ ((s->chunk_size - 1) >> BYTES_PER_POINTER_SHIFT);
+
+ if (unlikely(path != NULL)) {
+ path[s->bitmap_depth - d].block = blk;
+ path[s->bitmap_depth - d].idx = idx;
+ path[s->bitmap_depth - d].n_entries = s->chunk_size >> BYTES_PER_POINTER_SHIFT;
+ }
+
+ parent = blk;
+ blk = le64_to_cpu(bmp[idx]);
+
+ dm_bufio_release(*bp);
+
+ d--;
+ }
+}
+
+/*
+ * Find a free bit from "start" to "end" (in bits).
+ * If wide_search is nonzero, search for the whole free byte first.
+ */
+static int find_bit(const void *bmp, unsigned start, unsigned end, int wide_search)
+{
+ const void *p;
+ unsigned bit;
+ if (unlikely(start >= end))
+ return -ENOSPC;
+ if (likely(!generic_test_le_bit(start, bmp)))
+ return start;
+ cond_resched();
+ if (likely(wide_search)) {
+ p = memchr((const char *)bmp + (start >> 3), 0, (end >> 3) - (start >> 3));
+ cond_resched();
+ if (p) {
+ bit = (((const __u8 *)p - (const __u8 *)bmp) << 3) | (start & 7);
+ while (bit > start && !generic_test_le_bit(bit - 1, bmp))
+ bit--;
+ goto ret_bit;
+ }
+ }
+ bit = generic_find_next_zero_le_bit(bmp, end, start);
+ cond_resched();
+
+ret_bit:
+ if (unlikely(bit >= end))
+ return -ENOSPC;
+ return bit;
+}
+
+/*
+ * Find the bitmap limit in bits.
+ *
+ * All the bitmaps hold s->chunk_size << BITS_PER_BYTE_SHIFT bits, except
+ * the last one where we must use s->dev_size modulo number of bits in bitmap
+ * to find the valid number of bits. Note that bits past s->dev_size are
+ * undefined, there can be anything, so we must not scan past this limit.
+ */
+static unsigned bitmap_limit(struct dm_exception_store *s, bitmap_t bmp)
+{
+ if (bmp == (bitmap_t)(s->dev_size >> (s->chunk_shift + BITS_PER_BYTE_SHIFT)))
+ return (unsigned)s->dev_size & ((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1);
+ return s->chunk_size << BITS_PER_BYTE_SHIFT;
+}
+
+/*
+ * The central allocation routine.
+ *
+ * Allocation strategy:
+ *
+ * We maintain s->alloc_rover to point past the last allocated blocks (and wraps
+ * around back to 0 if it would point past the device end).
+ *
+ * We attempt to allocate at the rover and increment the rover, this will
+ * minimize seek times for writes. I assume that this snapshot driver will be
+ * mostly loaded with write requests (it happens when writing to the origin),
+ * so I attempt to optimize for writes.
+ *
+ * If there is no space at the rover, find the whole free byte (8 bits) in the
+ * current chunk. If there is not any free byte, we search the individual bits.
+ * If we don't find any bit, continue with the next bitmap. If we scan the whole
+ * device linearly and still don't find anything, abort with a failure.
+ *
+ * This is similar to what ext[23] does, so I suppose it is tuned well enough
+ * that it won't fragment too much.
+ */
+int dm_multisnap_alloc_blocks(struct dm_exception_store *s, chunk_t *results,
+ unsigned n_blocks, int flags)
+{
+ void *bmp;
+ struct dm_buffer *bp;
+ chunk_t block;
+ int wrap_around = 0;
+ int start_bit;
+ int wide_search;
+ int i;
+ bitmap_t bitmap_no;
+ int c;
+ int bit;
+ chunk_t to_free = 0;
+
+ bitmap_no = s->alloc_rover >> (s->chunk_shift + BITS_PER_BYTE_SHIFT);
+next_bitmap:
+ bmp = dm_multisnap_map_bitmap(s, bitmap_no, &bp, &block, NULL);
+ if (unlikely(!bmp))
+ return -1;
+
+ wide_search = 1;
+find_again:
+ start_bit = s->alloc_rover & ((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1);
+
+ for (i = 0; i < n_blocks; i++) {
+find_another_bit:
+ bit = find_bit(bmp, start_bit, bitmap_limit(s, bitmap_no), wide_search);
+ if (unlikely(bit < 0)) {
+bit_find_failed:
+ if (wide_search) {
+ wide_search = 0;
+ goto find_again;
+ }
+ dm_bufio_release(bp);
+ s->alloc_rover = (chunk_t) ++bitmap_no << (s->chunk_shift + BITS_PER_BYTE_SHIFT);
+ if (unlikely(s->alloc_rover >= s->dev_size)) {
+ s->alloc_rover = 0;
+ bitmap_no = 0;
+ wrap_around++;
+ if (wrap_around >= 2) {
+ DM_MULTISNAP_SET_ERROR(s->dm, -ENOSPC, ("snapshot overflow"));
+ return -1;
+ }
+ }
+ goto next_bitmap;
+ }
+ results[i] = ((chunk_t)bitmap_no << (s->chunk_shift + BITS_PER_BYTE_SHIFT)) | bit;
+ start_bit = bit + 1;
+ dm_bufio_release(bp);
+
+ c = dm_multisnap_check_allocated_block(s, results[i]);
+ if (dm_multisnap_has_error(s->dm))
+ return -1;
+
+ bmp = dm_multisnap_read_block(s, block, &bp);
+ if (unlikely(!bmp))
+ return -1;
+
+ if (unlikely(c))
+ goto find_another_bit;
+ }
+
+ if (flags & ALLOC_DRY)
+ goto bp_release_return;
+
+ if (!dm_multisnap_block_is_uncommitted(s, block)) {
+ chunk_t new_block;
+find_another_bit_for_bitmap:
+ bit = find_bit(bmp, start_bit, bitmap_limit(s, bitmap_no), wide_search);
+ if (unlikely(bit < 0))
+ goto bit_find_failed;
+
+ new_block = ((chunk_t)bitmap_no << (s->chunk_shift + BITS_PER_BYTE_SHIFT)) | bit;
+ start_bit = bit + 1;
+
+ dm_bufio_release(bp);
+ c = dm_multisnap_check_allocated_block(s, new_block);
+ if (dm_multisnap_has_error(s->dm))
+ return -1;
+
+ bmp = dm_multisnap_read_block(s, block, &bp);
+ if (unlikely(!bmp))
+ return -1;
+
+ if (unlikely(c))
+ goto find_another_bit_for_bitmap;
+
+ /*
+ * Warning: record the address of a block to free in a special
+ * variable.
+ *
+ * If we freed it here, that could recurse back to
+ * dm_multisnap_alloc_blocks and corrupt allocations. Free it
+ * later when we are done with the allocation and all the
+ * allocated blocks are marked in the bitmap.
+ */
+ bmp = dm_multisnap_duplicate_block(s, block, new_block, bitmap_no, &bp, &to_free);
+ if (unlikely(!bmp))
+ return -1;
+
+ generic___set_le_bit(bit, bmp);
+ dm_multisnap_status_lock(s->dm);
+ s->total_allocated++;
+ dm_multisnap_status_unlock(s->dm);
+ }
+
+ for (i = 0; i < n_blocks; i++)
+ generic___set_le_bit(results[i] & ((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1), bmp);
+ dm_multisnap_status_lock(s->dm);
+ s->total_allocated += n_blocks;
+ dm_multisnap_status_unlock(s->dm);
+
+ dm_bufio_mark_buffer_dirty(bp);
+
+bp_release_return:
+ dm_bufio_release(bp);
+
+ s->alloc_rover = (s->alloc_rover & ~(chunk_t)((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1)) + start_bit;
+ if (unlikely(s->alloc_rover >= s->dev_size))
+ s->alloc_rover = 0;
+
+ if (unlikely(to_free != 0))
+ dm_multisnap_free_block(s, to_free, 0);
+
+ return 0;
+}
+
+/*
+ * This function gets a valid block number (block), buffer for this block (bp)
+ * and data in this buffer (ptr) and returns new writeable block. It possibly
+ * moves the data to another buffer and updates *bp.
+ *
+ * Note that to maintain log-structured storage, we must not write to the block,
+ * but we must allocate new block and copy the data there.
+ *
+ * The only case where we can write to the provided block directly is if the
+ * block was created since last commit.
+ */
+
+void *dm_multisnap_alloc_duplicate_block(struct dm_exception_store *s, chunk_t block,
+ struct dm_buffer **bp, void *ptr)
+{
+ int r;
+ chunk_t new_chunk;
+ void *data;
+
+ if (dm_multisnap_block_is_uncommitted(s, block))
+ return ptr;
+
+ dm_bufio_release(*bp);
+
+ r = dm_multisnap_alloc_blocks(s, &new_chunk, 1, 0);
+ if (r)
+ return NULL;
+
+ data = dm_multisnap_read_block(s, block, bp);
+ if (!data)
+ return NULL;
+
+ return dm_multisnap_duplicate_block(s, block, new_chunk,
+ CB_BITMAP_IDX_NONE, bp, NULL);
+}
+
+/*
+ * Allocate a new block and return its data. Return the block number in *result
+ * and buffer pointer in *bp.
+ */
+void *dm_multisnap_alloc_make_block(struct dm_exception_store *s, chunk_t *result,
+ struct dm_buffer **bp)
+{
+ int r = dm_multisnap_alloc_blocks(s, result, 1, 0);
+ if (unlikely(r < 0))
+ return NULL;
+
+ return dm_multisnap_make_block(s, *result, bp);
+}
+
+/*
+ * Free the block immediately. You must be careful with this function because
+ * it doesn't follow log-structured protocol.
+ *
+ * It may be used only if
+ * - the blocks to free were allocated since last transactions.
+ * - or from freelist management, which means the blocks were already recorded in
+ * a freelist (thus it would be freed again in case of machine crash).
+ */
+void dm_multisnap_free_blocks_immediate(struct dm_exception_store *s, chunk_t block,
+ unsigned n_blocks)
+{
+ void *bmp;
+ struct dm_buffer *bp;
+
+ if (!n_blocks)
+ return;
+
+ if (unlikely(block + n_blocks > s->dev_size)) {
+ DM_MULTISNAP_SET_ERROR(s->dm, -EFSERROR,
+ ("dm_multisnap_free_block_immediate: freeing invalid blocks %llx, %x",
+ (unsigned long long)block, n_blocks));
+ return;
+ }
+
+ if (block + n_blocks == s->alloc_rover)
+ s->alloc_rover = block;
+
+ do {
+ bitmap_t bitmap_no = block >> (s->chunk_shift + BITS_PER_BYTE_SHIFT);
+
+ bmp = dm_multisnap_map_bitmap(s, bitmap_no, &bp, NULL, NULL);
+ if (!bmp)
+ return;
+
+ do {
+ generic___clear_le_bit(block & ((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1), bmp);
+ dm_multisnap_status_lock(s->dm);
+ s->total_allocated--;
+ dm_multisnap_status_unlock(s->dm);
+ n_blocks--;
+ block++;
+ cond_resched();
+ } while (n_blocks && (block & ((s->chunk_size << BITS_PER_BYTE_SHIFT) - 1)));
+
+ dm_bufio_mark_buffer_dirty(bp);
+ dm_bufio_release(bp);
+ } while (unlikely(n_blocks != 0));
+}
+
+/*
+ * Flush tmp_remaps for bitmaps. Write the path from modified bitmaps to the
+ * root.
+ */
+void dm_multisnap_bitmap_finalize_tmp_remap(struct dm_exception_store *s, struct tmp_remap *tmp_remap)
+{
+ chunk_t block;
+ struct dm_buffer *bp;
+ __u64 *new_block;
+ struct path_element path[MAX_BITMAP_DEPTH];
+ int results_ptr;
+
+ chunk_t new_blockn;
+ int i;
+
+ /*
+ * Preallocate twice the required amount of blocks, so that resolving
+ * the next tmp_remap (created here, in dm_multisnap_alloc_blocks)
+ * doesn't have to allocate anything.
+ */
+ if (s->n_preallocated_blocks < s->bitmap_depth) {
+ if (unlikely(dm_multisnap_alloc_blocks(s, s->preallocated_blocks + s->n_preallocated_blocks,
+ s->bitmap_depth * 2 - s->n_preallocated_blocks, 0) < 0))
+ return;
+ s->n_preallocated_blocks = s->bitmap_depth * 2;
+ }
+ results_ptr = 0;
+
+ new_block = dm_multisnap_map_bitmap(s, tmp_remap->bitmap_idx, &bp, &block, path);
+ if (unlikely(!new_block))
+ return;
+
+ dm_bufio_release(bp);
+
+ new_blockn = tmp_remap->new;
+ for (i = s->bitmap_depth - 1; i >= 0; i--) {
+ chunk_t block_to_free;
+ int remapped = 0;
+ __u64 *bmp = dm_multisnap_read_block(s, path[i].block, &bp);
+ if (unlikely(!bmp))
+ return;
+
+ if (!dm_multisnap_block_is_uncommitted(s, path[i].block)) {
+ remapped = 1;
+ dm_bufio_release_move(bp, s->preallocated_blocks[results_ptr]);
+ bmp = dm_multisnap_read_block(s, s->preallocated_blocks[results_ptr], &bp);
+ if (!bmp)
+ return;
+ dm_multisnap_block_set_uncommitted(s, s->preallocated_blocks[results_ptr]);
+ }
+
+ block_to_free = le64_to_cpu(bmp[path[i].idx]);
+ bmp[path[i].idx] = cpu_to_le64(new_blockn);
+ dm_bufio_mark_buffer_dirty(bp);
+ dm_bufio_release(bp);
+
+ dm_multisnap_free_block(s, block_to_free, 0);
+
+ if (!remapped)
+ goto skip_it;
+ new_blockn = s->preallocated_blocks[results_ptr];
+ results_ptr++;
+ }
+
+ dm_multisnap_free_block(s, s->bitmap_root, 0);
+ s->bitmap_root = new_blockn;
+
+skip_it:
+ memmove(s->preallocated_blocks, s->preallocated_blocks + results_ptr,
+ (s->n_preallocated_blocks -= results_ptr) * sizeof(chunk_t));
+}
--
1.6.5.2
More information about the dm-devel
mailing list