[dm-devel] [PATCH] 2.6.0-t6-mm1-dm2: 7/7: Build snapshot as part of dm-mod

Kevin Corry kevcorry at us.ibm.com
Tue Oct 28 11:58:02 UTC 2003


On Monday 20 October 2003 13:26, Kevin Corry wrote:
> Building snapshot as its own kernel module currently creates circular
> module dependencies.  Build dm-snapshot as part of dm-mod (just like
> dm-linear and dm-stripe) to avoid this problem.
>
> If we really want to be able to build snapshot as its own module, then the
> other option (as I mentioned in an email last week) is to rename
> dm-snapshot.c to dm-snap.c (or something like that) and make some changes
> to the Makefile. If you'd prefer this method, let me know and I'll send a
> different patch.

New 7/7 patch. Please discard the previous patch 7 I sent last week. This
patch also assumes that patches 4, 5, and 6 from last week have already
been applied.

In order to properly build snapshot as its own kernel module, dm-snapshot.c
and dm-snapshot.h must be renamed to dm-snap.c and dm-snap.h. The dm_vcalloc
function must also be exported from dm-table.c so the snapshot module can
find it.

diff -Naur linux-2.6.0-test9a/drivers/md/Makefile linux-2.6.0-test9b/drivers/md/Makefile
--- linux-2.6.0-test9a/drivers/md/Makefile	2003-10-27 11:20:08.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/Makefile	2003-10-27 11:24:38.000000000 -0600
@@ -5,6 +5,8 @@
 dm-mod-objs	:= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
 		   dm-ioctl.o dm-io.o kcopyd.o dm-daemon.o
 
+dm-snapshot-objs := dm-snap.o dm-exception-store.o
+
 dm-mirror-objs	:= dm-log.o dm-raid1.o
 
 # Note: link order is important.  All raid personalities
@@ -19,5 +21,5 @@
 obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
 obj-$(CONFIG_BLK_DEV_MD)	+= md.o
 obj-$(CONFIG_BLK_DEV_DM)	+= dm-mod.o
-obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o dm-exception-store.o
+obj-$(CONFIG_DM_SNAPSHOT)	+= dm-snapshot.o
 obj-$(CONFIG_DM_MIRROR)		+= dm-mirror.o
diff -Naur linux-2.6.0-test9a/drivers/md/dm-exception-store.c linux-2.6.0-test9b/drivers/md/dm-exception-store.c
--- linux-2.6.0-test9a/drivers/md/dm-exception-store.c	2003-10-27 11:20:09.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-exception-store.c	2003-10-27 11:25:09.000000000 -0600
@@ -7,7 +7,7 @@
  */
 
 #include "dm.h"
-#include "dm-snapshot.h"
+#include "dm-snap.h"
 #include "dm-io.h"
 #include "kcopyd.h"
 
diff -Naur linux-2.6.0-test9a/drivers/md/dm-snap.c linux-2.6.0-test9b/drivers/md/dm-snap.c
--- linux-2.6.0-test9a/drivers/md/dm-snap.c	1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-snap.c	2003-10-27 11:25:03.000000000 -0600
@@ -0,0 +1,1298 @@
+/*
+ * dm-snapshot.c
+ *
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/config.h>
+#include <linux/ctype.h>
+#include <linux/device-mapper.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kdev_t.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "dm-snap.h"
+#include "kcopyd.h"
+
+/*
+ * FIXME: Remove this before release.
+ */
+#if 0
+#define DMDEBUG DMWARN
+#else
+#define DMDEBUG(x...)
+#endif
+
+/*
+ * The percentage increment we will wake up users at
+ */
+#define WAKE_UP_PERCENT 5
+
+/*
+ * kcopyd priority of snapshot operations
+ */
+#define SNAPSHOT_COPY_PRIORITY 2
+
+/*
+ * Each snapshot reserves this many pages for io
+ * FIXME: calculate this
+ */
+#define SNAPSHOT_PAGES 256
+
+struct pending_exception {
+	struct exception e;
+
+	/*
+	 * Origin buffers waiting for this to complete are held
+	 * in a list (using b_reqnext).
+	 */
+	struct bio *origin_bios;
+	struct bio *snapshot_bios;
+
+	/*
+	 * Other pending_exceptions that are processing this
+	 * chunk.  When this list is empty, we know we can
+	 * complete the origins.
+	 */
+	struct list_head siblings;
+
+	/* Pointer back to snapshot context */
+	struct dm_snapshot *snap;
+
+	/*
+	 * 1 indicates the exception has already been sent to
+	 * kcopyd.
+	 */
+	int started;
+};
+
+/*
+ * Hash table mapping origin volumes to lists of snapshots and
+ * a lock to protect it
+ */
+static kmem_cache_t *exception_cache;
+static kmem_cache_t *pending_cache;
+static mempool_t *pending_pool;
+
+/*
+ * One of these per registered origin, held in the snapshot_origins hash
+ */
+struct origin {
+	/* The origin device */
+	struct block_device *bdev;
+
+	struct list_head hash_list;
+
+	/* List of snapshots for this origin */
+	struct list_head snapshots;
+};
+
+/*
+ * Size of the hash table for origin volumes. If we make this
+ * the size of the minors list then it should be nearly perfect
+ */
+#define ORIGIN_HASH_SIZE 256
+#define ORIGIN_MASK      0xFF
+static struct list_head *_origins;
+static struct rw_semaphore _origins_lock;
+
+static int init_origin_hash(void)
+{
+	int i;
+
+	_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
+			   GFP_KERNEL);
+	if (!_origins) {
+		DMERR("Device mapper: Snapshot: unable to allocate memory");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
+		INIT_LIST_HEAD(_origins + i);
+	init_rwsem(&_origins_lock);
+
+	return 0;
+}
+
+static void exit_origin_hash(void)
+{
+	kfree(_origins);
+}
+
+static inline unsigned int origin_hash(struct block_device *bdev)
+{
+	return bdev->bd_dev & ORIGIN_MASK;
+}
+
+static struct origin *__lookup_origin(struct block_device *origin)
+{
+	struct list_head *slist;
+	struct list_head *ol;
+	struct origin *o;
+
+	ol = &_origins[origin_hash(origin)];
+	list_for_each(slist, ol) {
+		o = list_entry(slist, struct origin, hash_list);
+
+		if (bdev_equal(o->bdev, origin))
+			return o;
+	}
+
+	return NULL;
+}
+
+static void __insert_origin(struct origin *o)
+{
+	struct list_head *sl = &_origins[origin_hash(o->bdev)];
+	list_add_tail(&o->hash_list, sl);
+}
+
+/*
+ * Make a note of the snapshot and its origin so we can look it
+ * up when the origin has a write on it.
+ */
+static int register_snapshot(struct dm_snapshot *snap)
+{
+	struct origin *o;
+	struct block_device *bdev = snap->origin->bdev;
+
+	down_write(&_origins_lock);
+	o = __lookup_origin(bdev);
+
+	if (!o) {
+		/* New origin */
+		o = kmalloc(sizeof(*o), GFP_KERNEL);
+		if (!o) {
+			up_write(&_origins_lock);
+			return -ENOMEM;
+		}
+
+		/* Initialise the struct */
+		INIT_LIST_HEAD(&o->snapshots);
+		o->bdev = bdev;
+
+		__insert_origin(o);
+	}
+
+	list_add_tail(&snap->list, &o->snapshots);
+
+	up_write(&_origins_lock);
+	return 0;
+}
+
+static void unregister_snapshot(struct dm_snapshot *s)
+{
+	struct origin *o;
+
+	down_write(&_origins_lock);
+	o = __lookup_origin(s->origin->bdev);
+
+	list_del(&s->list);
+	if (list_empty(&o->snapshots)) {
+		list_del(&o->hash_list);
+		kfree(o);
+	}
+
+	up_write(&_origins_lock);
+}
+
+/*
+ * Implementation of the exception hash tables.
+ */
+static int init_exception_table(struct exception_table *et, uint32_t size)
+{
+	unsigned int i;
+
+	et->hash_mask = size - 1;
+	et->table = dm_vcalloc(size, sizeof(struct list_head));
+	if (!et->table)
+		return -ENOMEM;
+
+	for (i = 0; i < size; i++)
+		INIT_LIST_HEAD(et->table + i);
+
+	return 0;
+}
+
+static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
+{
+	struct list_head *slot, *entry, *temp;
+	struct exception *ex;
+	int i, size;
+
+	size = et->hash_mask + 1;
+	for (i = 0; i < size; i++) {
+		slot = et->table + i;
+
+		list_for_each_safe(entry, temp, slot) {
+			ex = list_entry(entry, struct exception, hash_list);
+			kmem_cache_free(mem, ex);
+		}
+	}
+
+	vfree(et->table);
+}
+
+/*
+ * FIXME: check how this hash fn is performing.
+ */
+static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
+{
+	return chunk & et->hash_mask;
+}
+
+static void insert_exception(struct exception_table *eh, struct exception *e)
+{
+	struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
+	list_add(&e->hash_list, l);
+}
+
+static inline void remove_exception(struct exception *e)
+{
+	list_del(&e->hash_list);
+}
+
+/*
+ * Return the exception data for a sector, or NULL if not
+ * remapped.
+ */
+static struct exception *lookup_exception(struct exception_table *et,
+					  chunk_t chunk)
+{
+	struct list_head *slot, *el;
+	struct exception *e;
+
+	slot = &et->table[exception_hash(et, chunk)];
+	list_for_each(el, slot) {
+		e = list_entry(el, struct exception, hash_list);
+		if (e->old_chunk == chunk)
+			return e;
+	}
+
+	return NULL;
+}
+
+static inline struct exception *alloc_exception(void)
+{
+	struct exception *e;
+
+	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
+	if (!e)
+		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
+
+	return e;
+}
+
+static inline void free_exception(struct exception *e)
+{
+	kmem_cache_free(exception_cache, e);
+}
+
+static inline struct pending_exception *alloc_pending_exception(void)
+{
+	return mempool_alloc(pending_pool, GFP_NOIO);
+}
+
+static inline void free_pending_exception(struct pending_exception *pe)
+{
+	mempool_free(pe, pending_pool);
+}
+
+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
+{
+	struct exception *e;
+
+	e = alloc_exception();
+	if (!e)
+		return -ENOMEM;
+
+	e->old_chunk = old;
+	e->new_chunk = new;
+	insert_exception(&s->complete, e);
+	return 0;
+}
+
+/*
+ * Hard coded magic.
+ */
+static int calc_max_buckets(void)
+{
+	unsigned long mem;
+
+	mem = num_physpages << PAGE_SHIFT;
+	mem /= 50;
+	mem /= sizeof(struct list_head);
+
+	return mem;
+}
+
+/*
+ * Rounds a number down to a power of 2.
+ */
+static inline uint32_t round_down(uint32_t n)
+{
+	while (n & (n - 1))
+		n &= (n - 1);
+	return n;
+}
+
+/*
+ * Allocate room for a suitable hash table.
+ */
+static int init_hash_tables(struct dm_snapshot *s)
+{
+	sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
+
+	/*
+	 * Calculate based on the size of the original volume or
+	 * the COW volume...
+	 */
+	cow_dev_size = get_dev_size(s->cow->bdev);
+	origin_dev_size = get_dev_size(s->origin->bdev);
+	max_buckets = calc_max_buckets();
+
+	hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
+	hash_size = min(hash_size, max_buckets);
+
+	/* Round it down to a power of 2 */
+	hash_size = round_down(hash_size);
+	if (init_exception_table(&s->complete, hash_size))
+		return -ENOMEM;
+
+	/*
+	 * Allocate hash table for in-flight exceptions
+	 * Make this smaller than the real hash table
+	 */
+	hash_size >>= 3;
+	if (!hash_size)
+		hash_size = 64;
+
+	if (init_exception_table(&s->pending, hash_size)) {
+		exit_exception_table(&s->complete, exception_cache);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Round a number up to the nearest 'size' boundary.  size must
+ * be a power of 2.
+ */
+static inline ulong round_up(ulong n, ulong size)
+{
+	size--;
+	return (n + size) & ~size;
+}
+
+/*
+ * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
+ */
+static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	struct dm_snapshot *s;
+	unsigned long chunk_size;
+	int r = -EINVAL;
+	char persistent;
+	char *origin_path;
+	char *cow_path;
+	char *value;
+	int blocksize;
+
+	if (argc < 4) {
+		ti->error = "dm-snapshot: requires exactly 4 arguments";
+		r = -EINVAL;
+		goto bad1;
+	}
+
+	origin_path = argv[0];
+	cow_path = argv[1];
+	persistent = toupper(*argv[2]);
+
+	if (persistent != 'P' && persistent != 'N') {
+		ti->error = "Persistent flag is not P or N";
+		r = -EINVAL;
+		goto bad1;
+	}
+
+	chunk_size = simple_strtoul(argv[3], &value, 10);
+	if (chunk_size == 0 || value == NULL) {
+		ti->error = "Invalid chunk size";
+		r = -EINVAL;
+		goto bad1;
+	}
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (s == NULL) {
+		ti->error = "Cannot allocate snapshot context private "
+		    "structure";
+		r = -ENOMEM;
+		goto bad1;
+	}
+
+	r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
+	if (r) {
+		ti->error = "Cannot get origin device";
+		goto bad2;
+	}
+
+	/* FIXME: get cow length */
+	r = dm_get_device(ti, cow_path, 0, 0,
+			  FMODE_READ | FMODE_WRITE, &s->cow);
+	if (r) {
+		dm_put_device(ti, s->origin);
+		ti->error = "Cannot get COW device";
+		goto bad2;
+	}
+
+	/*
+	 * Chunk size must be multiple of page size.  Silently
+	 * round up if it's not.
+	 */
+	chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
+
+	/* Validate the chunk size against the device block size */
+	/* FIXME: check this, also ugly */
+	blocksize = s->cow->bdev->bd_disk->queue->hardsect_size;
+	if (chunk_size % (blocksize >> 9)) {
+		ti->error = "Chunk size is not a multiple of device blocksize";
+		r = -EINVAL;
+		goto bad3;
+	}
+
+	/* Check chunk_size is a power of 2 */
+	if (chunk_size & (chunk_size - 1)) {
+		ti->error = "Chunk size is not a power of 2";
+		r = -EINVAL;
+		goto bad3;
+	}
+
+	s->chunk_size = chunk_size;
+	s->chunk_mask = chunk_size - 1;
+	s->type = persistent;
+	for (s->chunk_shift = 0; chunk_size;
+	     s->chunk_shift++, chunk_size >>= 1)
+		;
+	s->chunk_shift--;
+
+	s->valid = 1;
+	s->have_metadata = 0;
+	s->last_percent = 0;
+	init_rwsem(&s->lock);
+	s->table = ti->table;
+
+	/* Allocate hash table for COW data */
+	if (init_hash_tables(s)) {
+		ti->error = "Unable to allocate hash table space";
+		r = -ENOMEM;
+		goto bad3;
+	}
+
+	/*
+	 * Check the persistent flag - done here because we need the iobuf
+	 * to check the LV header
+	 */
+	s->store.snap = s;
+
+	if (persistent == 'P')
+		r = dm_create_persistent(&s->store, s->chunk_size);
+	else
+		r = dm_create_transient(&s->store, s, blocksize);
+
+	if (r) {
+		ti->error = "Couldn't create exception store";
+		r = -EINVAL;
+		goto bad4;
+	}
+
+	r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
+	if (r) {
+		ti->error = "Could not create kcopyd client";
+		goto bad5;
+	}
+
+	/* Add snapshot to the list of snapshots for this origin */
+	if (register_snapshot(s)) {
+		r = -EINVAL;
+		ti->error = "Cannot register snapshot origin";
+		goto bad6;
+	}
+
+	ti->private = s;
+	ti->split_io = chunk_size;
+
+	return 0;
+
+ bad6:
+	kcopyd_client_destroy(s->kcopyd_client);
+
+ bad5:
+	s->store.destroy(&s->store);
+
+ bad4:
+	exit_exception_table(&s->pending, pending_cache);
+	exit_exception_table(&s->complete, exception_cache);
+
+ bad3:
+	dm_put_device(ti, s->cow);
+	dm_put_device(ti, s->origin);
+
+ bad2:
+	kfree(s);
+
+ bad1:
+	return r;
+}
+
+static void snapshot_dtr(struct dm_target *ti)
+{
+	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+
+	dm_table_event(ti->table);
+
+	unregister_snapshot(s);
+
+	exit_exception_table(&s->pending, pending_cache);
+	exit_exception_table(&s->complete, exception_cache);
+
+	/* Deallocate memory used */
+	s->store.destroy(&s->store);
+
+	dm_put_device(ti, s->origin);
+	dm_put_device(ti, s->cow);
+	kcopyd_client_destroy(s->kcopyd_client);
+	kfree(s);
+}
+
+/*
+ * We hold lists of bios, using the bi_next field.
+ */
+static void queue_bio(struct bio **queue, struct bio *bio)
+{
+	bio->bi_next = *queue;
+	*queue = bio;
+}
+
+/*
+ * FIXME: inefficient.
+ */
+static void queue_bios(struct bio **queue, struct bio *bios)
+{
+	while (*queue)
+		queue = &((*queue)->bi_next);
+
+	*queue = bios;
+}
+
+/*
+ * Flush a list of buffers.
+ */
+static void flush_bios(struct bio *bio)
+{
+	struct bio *n;
+
+	DMDEBUG("begin flush");
+	while (bio) {
+		n = bio->bi_next;
+		bio->bi_next = NULL;
+		DMDEBUG("flushing %p", bio);
+		generic_make_request(bio);
+		bio = n;
+	}
+
+	blk_run_queues();
+}
+
+/*
+ * Error a list of buffers.
+ */
+static void error_bios(struct bio *bio)
+{
+	struct bio *n;
+
+	while (bio) {
+		n = bio->bi_next;
+		bio->bi_next = NULL;
+		bio_io_error(bio, bio->bi_size);
+		bio = n;
+	}
+}
+
+static struct bio *__flush_bios(struct pending_exception *pe)
+{
+	struct pending_exception *sibling;
+
+	if (list_empty(&pe->siblings))
+		return pe->origin_bios;
+
+	sibling = list_entry(pe->siblings.next,
+			     struct pending_exception, siblings);
+
+	list_del(&pe->siblings);
+
+	/* FIXME: I think there's a race on SMP machines here, add spin lock */
+	queue_bios(&sibling->origin_bios, pe->origin_bios);
+
+	return NULL;
+}
+
+static void check_free_space(struct dm_snapshot *s)
+{
+#if 0
+	sector_t numerator, denominator;
+	double n, d;
+	unsigned pc;
+
+	if (!s->store.fraction_full)
+		return;
+
+	s->store.fraction_full(&s->store, &numerator, &denominator);
+	n = (double) numerator;
+	d = (double) denominator;
+
+	pc = (int) (n / d);
+
+	if (pc >= s->last_percent + WAKE_UP_PERCENT) {
+		dm_table_event(s->table);
+		s->last_percent = pc - pc % WAKE_UP_PERCENT;
+	}
+#endif
+}
+
+static void pending_complete(struct pending_exception *pe, int success)
+{
+	struct exception *e;
+	struct dm_snapshot *s = pe->snap;
+	struct bio *flush = NULL;
+
+	if (success) {
+		e = alloc_exception();
+		if (!e) {
+			DMWARN("Unable to allocate exception.");
+			down_write(&s->lock);
+			s->store.drop_snapshot(&s->store);
+			s->valid = 0;
+			flush = __flush_bios(pe);
+			up_write(&s->lock);
+
+			error_bios(pe->snapshot_bios);
+			goto out;
+		}
+		memcpy(e, &pe->e, sizeof(*e));
+
+		/*
+		 * Add a proper exception, and remove the
+		 * in-flight exception from the list.
+		 */
+		down_write(&s->lock);
+		insert_exception(&s->complete, e);
+		remove_exception(&pe->e);
+		flush = __flush_bios(pe);
+
+		/* Submit any pending write BHs */
+		up_write(&s->lock);
+
+		flush_bios(pe->snapshot_bios);
+		DMDEBUG("Exception completed successfully.");
+
+		/* Notify any interested parties */
+		//check_free_space(s);
+
+	} else {
+		/* Read/write error - snapshot is unusable */
+		down_write(&s->lock);
+		if (s->valid)
+			DMERR("Error reading/writing snapshot");
+		s->store.drop_snapshot(&s->store);
+		s->valid = 0;
+		remove_exception(&pe->e);
+		flush = __flush_bios(pe);
+		up_write(&s->lock);
+
+		error_bios(pe->snapshot_bios);
+
+		dm_table_event(s->table);
+		DMDEBUG("Exception failed.");
+	}
+
+ out:
+	free_pending_exception(pe);
+
+	if (flush)
+		flush_bios(flush);
+}
+
+static void commit_callback(void *context, int success)
+{
+	struct pending_exception *pe = (struct pending_exception *) context;
+	pending_complete(pe, success);
+}
+
+/*
+ * Called when the copy I/O has finished.  kcopyd actually runs
+ * this code so don't block.
+ */
+static void copy_callback(int read_err, unsigned int write_err, void *context)
+{
+	struct pending_exception *pe = (struct pending_exception *) context;
+	struct dm_snapshot *s = pe->snap;
+
+	if (read_err || write_err)
+		pending_complete(pe, 0);
+
+	else
+		/* Update the metadata if we are persistent */
+		s->store.commit_exception(&s->store, &pe->e, commit_callback,
+					  pe);
+}
+
+/*
+ * Dispatches the copy operation to kcopyd.
+ */
+static inline void start_copy(struct pending_exception *pe)
+{
+	struct dm_snapshot *s = pe->snap;
+	struct io_region src, dest;
+	struct block_device *bdev = s->origin->bdev;
+	sector_t dev_size;
+
+	dev_size = get_dev_size(bdev);
+
+	src.bdev = bdev;
+	src.sector = chunk_to_sector(s, pe->e.old_chunk);
+	src.count = min(s->chunk_size, dev_size - src.sector);
+
+	dest.bdev = s->cow->bdev;
+	dest.sector = chunk_to_sector(s, pe->e.new_chunk);
+	dest.count = src.count;
+
+	/* Hand over to kcopyd */
+	DMDEBUG("starting exception copy");
+	kcopyd_copy(s->kcopyd_client,
+		    &src, 1, &dest, 0, copy_callback, pe);
+}
+
+/*
+ * Looks to see if this snapshot already has a pending exception
+ * for this chunk, otherwise it allocates a new one and inserts
+ * it into the pending table.
+ *
+ * NOTE: a write lock must be held on snap->lock before calling
+ * this.
+ */
+static struct pending_exception *
+__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
+{
+	struct exception *e;
+	struct pending_exception *pe;
+	chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
+
+	/*
+	 * Is there a pending exception for this already ?
+	 */
+	e = lookup_exception(&s->pending, chunk);
+	if (e) {
+		/* cast the exception to a pending exception */
+		pe = list_entry(e, struct pending_exception, e);
+
+	} else {
+		/*
+		 * Create a new pending exception, we don't want
+		 * to hold the lock while we do this.
+		 */
+		up_write(&s->lock);
+		pe = alloc_pending_exception();
+		down_write(&s->lock);
+
+		e = lookup_exception(&s->pending, chunk);
+		if (e) {
+			free_pending_exception(pe);
+			pe = list_entry(e, struct pending_exception, e);
+		} else {
+			pe->e.old_chunk = chunk;
+			pe->origin_bios = pe->snapshot_bios = NULL;
+			INIT_LIST_HEAD(&pe->siblings);
+			pe->snap = s;
+			pe->started = 0;
+
+			if (s->store.prepare_exception(&s->store, &pe->e)) {
+				free_pending_exception(pe);
+				s->valid = 0;
+				return NULL;
+			}
+
+			insert_exception(&s->pending, &pe->e);
+		}
+	}
+
+	return pe;
+}
+
+static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
+				   struct bio *bio)
+{
+	bio->bi_bdev = s->cow->bdev;
+	bio->bi_sector = chunk_to_sector(s, e->new_chunk) +
+		(bio->bi_sector & s->chunk_mask);
+}
+
+static int snapshot_map(struct dm_target *ti, struct bio *bio,
+			union map_info *map_context)
+{
+	struct exception *e;
+	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+	int r = 1;
+	chunk_t chunk;
+	struct pending_exception *pe;
+
+	chunk = sector_to_chunk(s, bio->bi_sector);
+
+	/* Full snapshots are not usable */
+	if (!s->valid)
+		return -1;
+
+	/*
+	 * Write to snapshot - higher level takes care of RW/RO
+	 * flags so we should only get this if we are
+	 * writeable.
+	 */
+	if (bio_rw(bio) == WRITE) {
+
+		/* FIXME: should only take write lock if we need
+		 * to copy an exception */
+		down_write(&s->lock);
+
+		/* If the block is already remapped - use that, else remap it */
+		e = lookup_exception(&s->complete, chunk);
+		if (e) {
+			remap_exception(s, e, bio);
+			up_write(&s->lock);
+
+		} else {
+			pe = __find_pending_exception(s, bio);
+
+			if (!pe) {
+				s->store.drop_snapshot(&s->store);
+				s->valid = 0;
+				r = -EIO;
+				up_write(&s->lock);
+			} else {
+				remap_exception(s, &pe->e, bio);
+				queue_bio(&pe->snapshot_bios, bio);
+
+				if (!pe->started) {
+					/* this is protected by snap->lock */
+					pe->started = 1;
+					up_write(&s->lock);
+					start_copy(pe);
+				} else
+					up_write(&s->lock);
+				r = 0;
+			}
+		}
+
+	} else {
+		/*
+		 * FIXME: this read path scares me because we
+		 * always use the origin when we have a pending
+		 * exception.  However I can't think of a
+		 * situation where this is wrong - ejt.
+		 */
+
+		/* Do reads */
+		down_read(&s->lock);
+
+		/* See if it it has been remapped */
+		e = lookup_exception(&s->complete, chunk);
+		if (e)
+			remap_exception(s, e, bio);
+		else
+			bio->bi_bdev = s->origin->bdev;
+
+		up_read(&s->lock);
+	}
+
+	return r;
+}
+
+void snapshot_resume(struct dm_target *ti)
+{
+	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+
+	if (s->have_metadata)
+		return;
+
+	if (s->store.read_metadata(&s->store)) {
+		down_write(&s->lock);
+		s->valid = 0;
+		up_write(&s->lock);
+	}
+
+	s->have_metadata = 1;
+}
+
+static int snapshot_status(struct dm_target *ti, status_type_t type,
+			   char *result, unsigned int maxlen)
+{
+	struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
+	char cow[32];
+	char org[32];
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		if (!snap->valid)
+			snprintf(result, maxlen, "Invalid");
+		else {
+			if (snap->store.fraction_full) {
+				sector_t numerator, denominator;
+				snap->store.fraction_full(&snap->store,
+							  &numerator,
+							  &denominator);
+				snprintf(result, maxlen,
+					 SECTOR_FORMAT "/" SECTOR_FORMAT,
+					 numerator, denominator);
+			}
+			else
+				snprintf(result, maxlen, "Unknown");
+		}
+		break;
+
+	case STATUSTYPE_TABLE:
+		/*
+		 * kdevname returns a static pointer so we need
+		 * to make private copies if the output is to
+		 * make sense.
+		 */
+		format_dev_t(cow, snap->cow->bdev->bd_dev);
+		format_dev_t(org, snap->origin->bdev->bd_dev);
+		snprintf(result, maxlen, "%s %s %c %lld", org, cow,
+			 snap->type, snap->chunk_size);
+		break;
+	}
+
+	return 0;
+}
+
+/*-----------------------------------------------------------------
+ * Origin methods
+ *---------------------------------------------------------------*/
+static void list_merge(struct list_head *l1, struct list_head *l2)
+{
+	struct list_head *l1_n, *l2_p;
+
+	l1_n = l1->next;
+	l2_p = l2->prev;
+
+	l1->next = l2;
+	l2->prev = l1;
+
+	l2_p->next = l1_n;
+	l1_n->prev = l2_p;
+}
+
+static int __origin_write(struct list_head *snapshots, struct bio *bio)
+{
+	int r = 1, first = 1;
+	struct list_head *sl;
+	struct dm_snapshot *snap;
+	struct exception *e;
+	struct pending_exception *pe, *last = NULL;
+	chunk_t chunk;
+
+	/* Do all the snapshots on this origin */
+	list_for_each(sl, snapshots) {
+		snap = list_entry(sl, struct dm_snapshot, list);
+
+		/* Only deal with valid snapshots */
+		if (!snap->valid)
+			continue;
+
+		down_write(&snap->lock);
+
+		/*
+		 * Remember, different snapshots can have
+		 * different chunk sizes.
+		 */
+		chunk = sector_to_chunk(snap, bio->bi_sector);
+
+		/*
+		 * Check exception table to see if block
+		 * is already remapped in this snapshot
+		 * and trigger an exception if not.
+		 */
+		e = lookup_exception(&snap->complete, chunk);
+		if (!e) {
+			pe = __find_pending_exception(snap, bio);
+			if (!pe) {
+				snap->store.drop_snapshot(&snap->store);
+				snap->valid = 0;
+
+			} else {
+				if (last)
+					list_merge(&pe->siblings,
+						   &last->siblings);
+
+				last = pe;
+				r = 0;
+			}
+		}
+
+		up_write(&snap->lock);
+	}
+
+	/*
+	 * Now that we have a complete pe list we can start the copying.
+	 */
+	if (last) {
+		pe = last;
+		do {
+			down_write(&pe->snap->lock);
+			if (first)
+				queue_bio(&pe->origin_bios, bio);
+			if (!pe->started) {
+				pe->started = 1;
+				up_write(&pe->snap->lock);
+				start_copy(pe);
+			} else
+				up_write(&pe->snap->lock);
+			first = 0;
+			pe = list_entry(pe->siblings.next,
+					struct pending_exception, siblings);
+
+		} while (pe != last);
+	}
+
+	return r;
+}
+
+/*
+ * Called on a write from the origin driver.
+ */
+int do_origin(struct dm_dev *origin, struct bio *bio)
+{
+	struct origin *o;
+	int r;
+
+	down_read(&_origins_lock);
+	o = __lookup_origin(origin->bdev);
+	if (!o)
+		BUG();
+
+	r = __origin_write(&o->snapshots, bio);
+	up_read(&_origins_lock);
+
+	return r;
+}
+
+/*
+ * Origin: maps a linear range of a device, with hooks for snapshotting.
+ */
+
+/*
+ * Construct an origin mapping: <dev_path>
+ * The context for an origin is merely a 'struct dm_dev *'
+ * pointing to the real device.
+ */
+static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+	int r;
+	struct dm_dev *dev;
+
+	if (argc != 1) {
+		ti->error = "dm-origin: incorrect number of arguments";
+		return -EINVAL;
+	}
+
+	r = dm_get_device(ti, argv[0], 0, ti->len,
+			  dm_table_get_mode(ti->table), &dev);
+	if (r) {
+		ti->error = "Cannot get target device";
+		return r;
+	}
+
+	ti->private = dev;
+	return 0;
+}
+
+static void origin_dtr(struct dm_target *ti)
+{
+	struct dm_dev *dev = (struct dm_dev *) ti->private;
+	dm_put_device(ti, dev);
+}
+
+static int origin_map(struct dm_target *ti, struct bio *bio,
+		      union map_info *map_context)
+{
+	struct dm_dev *dev = (struct dm_dev *) ti->private;
+	bio->bi_bdev = dev->bdev;
+
+	/* Only tell snapshots if this is a write */
+	return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : 1;
+}
+
+#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
+
+/*
+ * Set the target "split_io" field to the minimum of all the snapshots'
+ * chunk sizes.
+ */
+static void origin_resume(struct dm_target *ti)
+{
+	struct dm_dev *dev = (struct dm_dev *) ti->private;
+	struct dm_snapshot *snap;
+	struct origin *o;
+	struct list_head *sl;
+	chunk_t chunk_size = 0;
+
+	down_read(&_origins_lock);
+	o = __lookup_origin(dev->bdev);
+	if (o) {
+		list_for_each(sl, &o->snapshots) {
+			snap = list_entry(sl, struct dm_snapshot, list);
+			chunk_size = min_not_zero(chunk_size, snap->chunk_size);
+		}
+	}
+	up_read(&_origins_lock);
+
+	ti->split_io = chunk_size;
+}
+
+static int origin_status(struct dm_target *ti, status_type_t type, char *result,
+			 unsigned int maxlen)
+{
+	struct dm_dev *dev = (struct dm_dev *) ti->private;
+	char buffer[32];
+
+	switch (type) {
+	case STATUSTYPE_INFO:
+		result[0] = '\0';
+		break;
+
+	case STATUSTYPE_TABLE:
+		format_dev_t(buffer, dev->bdev->bd_dev);
+		snprintf(result, maxlen, "%s", buffer);
+		break;
+	}
+
+	return 0;
+}
+
+static struct target_type origin_target = {
+	name:	"snapshot-origin",
+	module:	THIS_MODULE,
+	ctr:	origin_ctr,
+	dtr:	origin_dtr,
+	map:	origin_map,
+	resume:	origin_resume,
+	status:	origin_status,
+};
+
+static struct target_type snapshot_target = {
+	name:	"snapshot",
+	module:	THIS_MODULE,
+	ctr:	snapshot_ctr,
+	dtr:	snapshot_dtr,
+	map:	snapshot_map,
+	resume: snapshot_resume,
+	status:	snapshot_status,
+};
+
+static int __init dm_snapshot_init(void)
+{
+	int r;
+
+	r = dm_register_target(&snapshot_target);
+	if (r) {
+		DMERR("snapshot target register failed %d", r);
+		return r;
+	}
+
+	r = dm_register_target(&origin_target);
+	if (r < 0) {
+		DMERR("Device mapper: Origin: register failed %d\n", r);
+		goto bad1;
+	}
+
+	r = init_origin_hash();
+	if (r) {
+		DMERR("init_origin_hash failed.");
+		goto bad2;
+	}
+
+	exception_cache = kmem_cache_create("dm-snapshot-ex",
+					    sizeof(struct exception),
+					    __alignof__(struct exception),
+					    0, NULL, NULL);
+	if (!exception_cache) {
+		DMERR("Couldn't create exception cache.");
+		r = -ENOMEM;
+		goto bad3;
+	}
+
+	pending_cache =
+	    kmem_cache_create("dm-snapshot-in",
+			      sizeof(struct pending_exception),
+			      __alignof__(struct pending_exception),
+			      0, NULL, NULL);
+	if (!pending_cache) {
+		DMERR("Couldn't create pending cache.");
+		r = -ENOMEM;
+		goto bad4;
+	}
+
+	pending_pool = mempool_create(128, mempool_alloc_slab,
+				      mempool_free_slab, pending_cache);
+	if (!pending_pool) {
+		DMERR("Couldn't create pending pool.");
+		r = -ENOMEM;
+		goto bad5;
+	}
+
+	return 0;
+
+      bad5:
+	kmem_cache_destroy(pending_cache);
+      bad4:
+	kmem_cache_destroy(exception_cache);
+      bad3:
+	exit_origin_hash();
+      bad2:
+	dm_unregister_target(&origin_target);
+      bad1:
+	dm_unregister_target(&snapshot_target);
+	return r;
+}
+
+static void __exit dm_snapshot_exit(void)
+{
+	int r;
+
+	r = dm_unregister_target(&snapshot_target);
+	if (r)
+		DMERR("snapshot unregister failed %d", r);
+
+	r = dm_unregister_target(&origin_target);
+	if (r)
+		DMERR("origin unregister failed %d", r);
+
+	exit_origin_hash();
+	mempool_destroy(pending_pool);
+	kmem_cache_destroy(pending_cache);
+	kmem_cache_destroy(exception_cache);
+}
+
+/* Module hooks */
+module_init(dm_snapshot_init);
+module_exit(dm_snapshot_exit);
+
+MODULE_DESCRIPTION(DM_NAME " snapshot target");
+MODULE_AUTHOR("Joe Thornber");
+MODULE_LICENSE("GPL");
diff -Naur linux-2.6.0-test9a/drivers/md/dm-snap.h linux-2.6.0-test9b/drivers/md/dm-snap.h
--- linux-2.6.0-test9a/drivers/md/dm-snap.h	1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-snap.h	2003-10-27 11:20:21.000000000 -0600
@@ -0,0 +1,161 @@
+/*
+ * dm-snapshot.c
+ *
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_SNAPSHOT_H
+#define DM_SNAPSHOT_H
+
+#include "dm.h"
+#include <linux/blkdev.h>
+
+struct exception_table {
+	uint32_t hash_mask;
+	struct list_head *table;
+};
+
+/*
+ * The snapshot code deals with largish chunks of the disk at a
+ * time. Typically 64k - 256k.
+ */
+/* FIXME: can we get away with limiting these to a uint32_t ? */
+typedef sector_t chunk_t;
+
+/*
+ * An exception is used where an old chunk of data has been
+ * replaced by a new one.
+ */
+struct exception {
+	struct list_head hash_list;
+
+	chunk_t old_chunk;
+	chunk_t new_chunk;
+};
+
+/*
+ * Abstraction to handle the meta/layout of exception stores (the
+ * COW device).
+ */
+struct exception_store {
+
+	/*
+	 * Destroys this object when you've finished with it.
+	 */
+	void (*destroy) (struct exception_store *store);
+
+	/*
+	 * The target shouldn't read the COW device until this is
+	 * called.
+	 */
+	int (*read_metadata) (struct exception_store *store);
+
+	/*
+	 * Find somewhere to store the next exception.
+	 */
+	int (*prepare_exception) (struct exception_store *store,
+				  struct exception *e);
+
+	/*
+	 * Update the metadata with this exception.
+	 */
+	void (*commit_exception) (struct exception_store *store,
+				  struct exception *e,
+				  void (*callback) (void *, int success),
+				  void *callback_context);
+
+	/*
+	 * The snapshot is invalid, note this in the metadata.
+	 */
+	void (*drop_snapshot) (struct exception_store *store);
+
+	/*
+	 * Return how full the snapshot is.
+	 */
+	void (*fraction_full) (struct exception_store *store,
+			       sector_t *numerator,
+			       sector_t *denominator);
+
+	struct dm_snapshot *snap;
+	void *context;
+};
+
+struct dm_snapshot {
+	struct rw_semaphore lock;
+	struct dm_table *table;
+
+	struct dm_dev *origin;
+	struct dm_dev *cow;
+
+	/* List of snapshots per Origin */
+	struct list_head list;
+
+	/* Size of data blocks saved - must be a power of 2 */
+	chunk_t chunk_size;
+	chunk_t chunk_mask;
+	chunk_t chunk_shift;
+
+	/* You can't use a snapshot if this is 0 (e.g. if full) */
+	int valid;
+	int have_metadata;
+
+	/* Used for display of table */
+	char type;
+
+	/* The last percentage we notified */
+	int last_percent;
+
+	struct exception_table pending;
+	struct exception_table complete;
+
+	/* The on disk metadata handler */
+	struct exception_store store;
+
+	struct kcopyd_client *kcopyd_client;
+};
+
+/*
+ * Used by the exception stores to load exceptions hen
+ * initialising.
+ */
+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
+
+/*
+ * Constructor and destructor for the default persistent
+ * store.
+ */
+int dm_create_persistent(struct exception_store *store, uint32_t chunk_size);
+
+int dm_create_transient(struct exception_store *store,
+			struct dm_snapshot *s, int blocksize);
+
+/*
+ * Return the number of sectors in the device.
+ */
+static inline sector_t get_dev_size(struct block_device *bdev)
+{
+	return bdev->bd_inode->i_size >> SECTOR_SHIFT;
+}
+
+static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
+{
+	return (sector & ~s->chunk_mask) >> s->chunk_shift;
+}
+
+static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
+{
+	return chunk << s->chunk_shift;
+}
+
+static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
+{
+	/*
+	 * There is only ever one instance of a particular block
+	 * device so we can compare pointers safely.
+	 */
+	return lhs == rhs;
+}
+
+#endif
diff -Naur linux-2.6.0-test9a/drivers/md/dm-snapshot.c linux-2.6.0-test9b/drivers/md/dm-snapshot.c
--- linux-2.6.0-test9a/drivers/md/dm-snapshot.c	2003-10-27 11:20:09.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-snapshot.c	1969-12-31 18:00:00.000000000 -0600
@@ -1,1269 +0,0 @@
-/*
- * dm-snapshot.c
- *
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/blkdev.h>
-#include <linux/config.h>
-#include <linux/ctype.h>
-#include <linux/device-mapper.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kdev_t.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "dm-snapshot.h"
-#include "kcopyd.h"
-
-/*
- * FIXME: Remove this before release.
- */
-#if 0
-#define DMDEBUG DMWARN
-#else
-#define DMDEBUG(x...)
-#endif
-
-/*
- * The percentage increment we will wake up users at
- */
-#define WAKE_UP_PERCENT 5
-
-/*
- * kcopyd priority of snapshot operations
- */
-#define SNAPSHOT_COPY_PRIORITY 2
-
-/*
- * Each snapshot reserves this many pages for io
- * FIXME: calculate this
- */
-#define SNAPSHOT_PAGES 256
-
-struct pending_exception {
-	struct exception e;
-
-	/*
-	 * Origin buffers waiting for this to complete are held
-	 * in a list (using b_reqnext).
-	 */
-	struct bio *origin_bios;
-	struct bio *snapshot_bios;
-
-	/*
-	 * Other pending_exceptions that are processing this
-	 * chunk.  When this list is empty, we know we can
-	 * complete the origins.
-	 */
-	struct list_head siblings;
-
-	/* Pointer back to snapshot context */
-	struct dm_snapshot *snap;
-
-	/*
-	 * 1 indicates the exception has already been sent to
-	 * kcopyd.
-	 */
-	int started;
-};
-
-/*
- * Hash table mapping origin volumes to lists of snapshots and
- * a lock to protect it
- */
-static kmem_cache_t *exception_cache;
-static kmem_cache_t *pending_cache;
-static mempool_t *pending_pool;
-
-/*
- * One of these per registered origin, held in the snapshot_origins hash
- */
-struct origin {
-	/* The origin device */
-	struct block_device *bdev;
-
-	struct list_head hash_list;
-
-	/* List of snapshots for this origin */
-	struct list_head snapshots;
-};
-
-/*
- * Size of the hash table for origin volumes. If we make this
- * the size of the minors list then it should be nearly perfect
- */
-#define ORIGIN_HASH_SIZE 256
-#define ORIGIN_MASK      0xFF
-static struct list_head *_origins;
-static struct rw_semaphore _origins_lock;
-
-static int init_origin_hash(void)
-{
-	int i;
-
-	_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
-			   GFP_KERNEL);
-	if (!_origins) {
-		DMERR("Device mapper: Snapshot: unable to allocate memory");
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
-		INIT_LIST_HEAD(_origins + i);
-	init_rwsem(&_origins_lock);
-
-	return 0;
-}
-
-static void exit_origin_hash(void)
-{
-	kfree(_origins);
-}
-
-static inline unsigned int origin_hash(struct block_device *bdev)
-{
-	return bdev->bd_dev & ORIGIN_MASK;
-}
-
-static struct origin *__lookup_origin(struct block_device *origin)
-{
-	struct list_head *slist;
-	struct list_head *ol;
-	struct origin *o;
-
-	ol = &_origins[origin_hash(origin)];
-	list_for_each(slist, ol) {
-		o = list_entry(slist, struct origin, hash_list);
-
-		if (bdev_equal(o->bdev, origin))
-			return o;
-	}
-
-	return NULL;
-}
-
-static void __insert_origin(struct origin *o)
-{
-	struct list_head *sl = &_origins[origin_hash(o->bdev)];
-	list_add_tail(&o->hash_list, sl);
-}
-
-/*
- * Make a note of the snapshot and its origin so we can look it
- * up when the origin has a write on it.
- */
-static int register_snapshot(struct dm_snapshot *snap)
-{
-	struct origin *o;
-	struct block_device *bdev = snap->origin->bdev;
-
-	down_write(&_origins_lock);
-	o = __lookup_origin(bdev);
-
-	if (!o) {
-		/* New origin */
-		o = kmalloc(sizeof(*o), GFP_KERNEL);
-		if (!o) {
-			up_write(&_origins_lock);
-			return -ENOMEM;
-		}
-
-		/* Initialise the struct */
-		INIT_LIST_HEAD(&o->snapshots);
-		o->bdev = bdev;
-
-		__insert_origin(o);
-	}
-
-	list_add_tail(&snap->list, &o->snapshots);
-
-	up_write(&_origins_lock);
-	return 0;
-}
-
-static void unregister_snapshot(struct dm_snapshot *s)
-{
-	struct origin *o;
-
-	down_write(&_origins_lock);
-	o = __lookup_origin(s->origin->bdev);
-
-	list_del(&s->list);
-	if (list_empty(&o->snapshots)) {
-		list_del(&o->hash_list);
-		kfree(o);
-	}
-
-	up_write(&_origins_lock);
-}
-
-/*
- * Implementation of the exception hash tables.
- */
-static int init_exception_table(struct exception_table *et, uint32_t size)
-{
-	unsigned int i;
-
-	et->hash_mask = size - 1;
-	et->table = dm_vcalloc(size, sizeof(struct list_head));
-	if (!et->table)
-		return -ENOMEM;
-
-	for (i = 0; i < size; i++)
-		INIT_LIST_HEAD(et->table + i);
-
-	return 0;
-}
-
-static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
-{
-	struct list_head *slot, *entry, *temp;
-	struct exception *ex;
-	int i, size;
-
-	size = et->hash_mask + 1;
-	for (i = 0; i < size; i++) {
-		slot = et->table + i;
-
-		list_for_each_safe(entry, temp, slot) {
-			ex = list_entry(entry, struct exception, hash_list);
-			kmem_cache_free(mem, ex);
-		}
-	}
-
-	vfree(et->table);
-}
-
-/*
- * FIXME: check how this hash fn is performing.
- */
-static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
-{
-	return chunk & et->hash_mask;
-}
-
-static void insert_exception(struct exception_table *eh, struct exception *e)
-{
-	struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
-	list_add(&e->hash_list, l);
-}
-
-static inline void remove_exception(struct exception *e)
-{
-	list_del(&e->hash_list);
-}
-
-/*
- * Return the exception data for a sector, or NULL if not
- * remapped.
- */
-static struct exception *lookup_exception(struct exception_table *et,
-					  chunk_t chunk)
-{
-	struct list_head *slot, *el;
-	struct exception *e;
-
-	slot = &et->table[exception_hash(et, chunk)];
-	list_for_each(el, slot) {
-		e = list_entry(el, struct exception, hash_list);
-		if (e->old_chunk == chunk)
-			return e;
-	}
-
-	return NULL;
-}
-
-static inline struct exception *alloc_exception(void)
-{
-	struct exception *e;
-
-	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
-	if (!e)
-		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
-
-	return e;
-}
-
-static inline void free_exception(struct exception *e)
-{
-	kmem_cache_free(exception_cache, e);
-}
-
-static inline struct pending_exception *alloc_pending_exception(void)
-{
-	return mempool_alloc(pending_pool, GFP_NOIO);
-}
-
-static inline void free_pending_exception(struct pending_exception *pe)
-{
-	mempool_free(pe, pending_pool);
-}
-
-int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
-{
-	struct exception *e;
-
-	e = alloc_exception();
-	if (!e)
-		return -ENOMEM;
-
-	e->old_chunk = old;
-	e->new_chunk = new;
-	insert_exception(&s->complete, e);
-	return 0;
-}
-
-/*
- * Hard coded magic.
- */
-static int calc_max_buckets(void)
-{
-	unsigned long mem;
-
-	mem = num_physpages << PAGE_SHIFT;
-	mem /= 50;
-	mem /= sizeof(struct list_head);
-
-	return mem;
-}
-
-/*
- * Rounds a number down to a power of 2.
- */
-static inline uint32_t round_down(uint32_t n)
-{
-	while (n & (n - 1))
-		n &= (n - 1);
-	return n;
-}
-
-/*
- * Allocate room for a suitable hash table.
- */
-static int init_hash_tables(struct dm_snapshot *s)
-{
-	sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
-
-	/*
-	 * Calculate based on the size of the original volume or
-	 * the COW volume...
-	 */
-	cow_dev_size = get_dev_size(s->cow->bdev);
-	origin_dev_size = get_dev_size(s->origin->bdev);
-	max_buckets = calc_max_buckets();
-
-	hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
-	hash_size = min(hash_size, max_buckets);
-
-	/* Round it down to a power of 2 */
-	hash_size = round_down(hash_size);
-	if (init_exception_table(&s->complete, hash_size))
-		return -ENOMEM;
-
-	/*
-	 * Allocate hash table for in-flight exceptions
-	 * Make this smaller than the real hash table
-	 */
-	hash_size >>= 3;
-	if (!hash_size)
-		hash_size = 64;
-
-	if (init_exception_table(&s->pending, hash_size)) {
-		exit_exception_table(&s->complete, exception_cache);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-/*
- * Round a number up to the nearest 'size' boundary.  size must
- * be a power of 2.
- */
-static inline ulong round_up(ulong n, ulong size)
-{
-	size--;
-	return (n + size) & ~size;
-}
-
-/*
- * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
- */
-static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	struct dm_snapshot *s;
-	unsigned long chunk_size;
-	int r = -EINVAL;
-	char persistent;
-	char *origin_path;
-	char *cow_path;
-	char *value;
-	int blocksize;
-
-	if (argc < 4) {
-		ti->error = "dm-snapshot: requires exactly 4 arguments";
-		r = -EINVAL;
-		goto bad1;
-	}
-
-	origin_path = argv[0];
-	cow_path = argv[1];
-	persistent = toupper(*argv[2]);
-
-	if (persistent != 'P' && persistent != 'N') {
-		ti->error = "Persistent flag is not P or N";
-		r = -EINVAL;
-		goto bad1;
-	}
-
-	chunk_size = simple_strtoul(argv[3], &value, 10);
-	if (chunk_size == 0 || value == NULL) {
-		ti->error = "Invalid chunk size";
-		r = -EINVAL;
-		goto bad1;
-	}
-
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (s == NULL) {
-		ti->error = "Cannot allocate snapshot context private "
-		    "structure";
-		r = -ENOMEM;
-		goto bad1;
-	}
-
-	r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
-	if (r) {
-		ti->error = "Cannot get origin device";
-		goto bad2;
-	}
-
-	/* FIXME: get cow length */
-	r = dm_get_device(ti, cow_path, 0, 0,
-			  FMODE_READ | FMODE_WRITE, &s->cow);
-	if (r) {
-		dm_put_device(ti, s->origin);
-		ti->error = "Cannot get COW device";
-		goto bad2;
-	}
-
-	/*
-	 * Chunk size must be multiple of page size.  Silently
-	 * round up if it's not.
-	 */
-	chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
-
-	/* Validate the chunk size against the device block size */
-	/* FIXME: check this, also ugly */
-	blocksize = s->cow->bdev->bd_disk->queue->hardsect_size;
-	if (chunk_size % (blocksize >> 9)) {
-		ti->error = "Chunk size is not a multiple of device blocksize";
-		r = -EINVAL;
-		goto bad3;
-	}
-
-	/* Check chunk_size is a power of 2 */
-	if (chunk_size & (chunk_size - 1)) {
-		ti->error = "Chunk size is not a power of 2";
-		r = -EINVAL;
-		goto bad3;
-	}
-
-	s->chunk_size = chunk_size;
-	s->chunk_mask = chunk_size - 1;
-	s->type = persistent;
-	for (s->chunk_shift = 0; chunk_size;
-	     s->chunk_shift++, chunk_size >>= 1)
-		;
-	s->chunk_shift--;
-
-	s->valid = 1;
-	s->have_metadata = 0;
-	s->last_percent = 0;
-	init_rwsem(&s->lock);
-	s->table = ti->table;
-
-	/* Allocate hash table for COW data */
-	if (init_hash_tables(s)) {
-		ti->error = "Unable to allocate hash table space";
-		r = -ENOMEM;
-		goto bad3;
-	}
-
-	/*
-	 * Check the persistent flag - done here because we need the iobuf
-	 * to check the LV header
-	 */
-	s->store.snap = s;
-
-	if (persistent == 'P')
-		r = dm_create_persistent(&s->store, s->chunk_size);
-	else
-		r = dm_create_transient(&s->store, s, blocksize);
-
-	if (r) {
-		ti->error = "Couldn't create exception store";
-		r = -EINVAL;
-		goto bad4;
-	}
-
-	r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
-	if (r) {
-		ti->error = "Could not create kcopyd client";
-		goto bad5;
-	}
-
-	/* Add snapshot to the list of snapshots for this origin */
-	if (register_snapshot(s)) {
-		r = -EINVAL;
-		ti->error = "Cannot register snapshot origin";
-		goto bad6;
-	}
-
-	ti->private = s;
-	return 0;
-
- bad6:
-	kcopyd_client_destroy(s->kcopyd_client);
-
- bad5:
-	s->store.destroy(&s->store);
-
- bad4:
-	exit_exception_table(&s->pending, pending_cache);
-	exit_exception_table(&s->complete, exception_cache);
-
- bad3:
-	dm_put_device(ti, s->cow);
-	dm_put_device(ti, s->origin);
-
- bad2:
-	kfree(s);
-
- bad1:
-	return r;
-}
-
-static void snapshot_dtr(struct dm_target *ti)
-{
-	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
-
-	dm_table_event(ti->table);
-
-	unregister_snapshot(s);
-
-	exit_exception_table(&s->pending, pending_cache);
-	exit_exception_table(&s->complete, exception_cache);
-
-	/* Deallocate memory used */
-	s->store.destroy(&s->store);
-
-	dm_put_device(ti, s->origin);
-	dm_put_device(ti, s->cow);
-	kcopyd_client_destroy(s->kcopyd_client);
-	kfree(s);
-}
-
-/*
- * We hold lists of bios, using the bi_next field.
- */
-static void queue_bio(struct bio **queue, struct bio *bio)
-{
-	bio->bi_next = *queue;
-	*queue = bio;
-}
-
-/*
- * FIXME: inefficient.
- */
-static void queue_bios(struct bio **queue, struct bio *bios)
-{
-	while (*queue)
-		queue = &((*queue)->bi_next);
-
-	*queue = bios;
-}
-
-/*
- * Flush a list of buffers.
- */
-static void flush_bios(struct bio *bio)
-{
-	struct bio *n;
-
-	DMDEBUG("begin flush");
-	while (bio) {
-		n = bio->bi_next;
-		bio->bi_next = NULL;
-		DMDEBUG("flushing %p", bio);
-		generic_make_request(bio);
-		bio = n;
-	}
-
-	blk_run_queues();
-}
-
-/*
- * Error a list of buffers.
- */
-static void error_bios(struct bio *bio)
-{
-	struct bio *n;
-
-	while (bio) {
-		n = bio->bi_next;
-		bio->bi_next = NULL;
-		bio_io_error(bio, bio->bi_size);
-		bio = n;
-	}
-}
-
-static struct bio *__flush_bios(struct pending_exception *pe)
-{
-	struct pending_exception *sibling;
-
-	if (list_empty(&pe->siblings))
-		return pe->origin_bios;
-
-	sibling = list_entry(pe->siblings.next,
-			     struct pending_exception, siblings);
-
-	list_del(&pe->siblings);
-
-	/* FIXME: I think there's a race on SMP machines here, add spin lock */
-	queue_bios(&sibling->origin_bios, pe->origin_bios);
-
-	return NULL;
-}
-
-static void check_free_space(struct dm_snapshot *s)
-{
-#if 0
-	sector_t numerator, denominator;
-	double n, d;
-	unsigned pc;
-
-	if (!s->store.fraction_full)
-		return;
-
-	s->store.fraction_full(&s->store, &numerator, &denominator);
-	n = (double) numerator;
-	d = (double) denominator;
-
-	pc = (int) (n / d);
-
-	if (pc >= s->last_percent + WAKE_UP_PERCENT) {
-		dm_table_event(s->table);
-		s->last_percent = pc - pc % WAKE_UP_PERCENT;
-	}
-#endif
-}
-
-static void pending_complete(struct pending_exception *pe, int success)
-{
-	struct exception *e;
-	struct dm_snapshot *s = pe->snap;
-	struct bio *flush = NULL;
-
-	if (success) {
-		e = alloc_exception();
-		if (!e) {
-			DMWARN("Unable to allocate exception.");
-			down_write(&s->lock);
-			s->store.drop_snapshot(&s->store);
-			s->valid = 0;
-			flush = __flush_bios(pe);
-			up_write(&s->lock);
-
-			error_bios(pe->snapshot_bios);
-			goto out;
-		}
-		memcpy(e, &pe->e, sizeof(*e));
-
-		/*
-		 * Add a proper exception, and remove the
-		 * in-flight exception from the list.
-		 */
-		down_write(&s->lock);
-		insert_exception(&s->complete, e);
-		remove_exception(&pe->e);
-		flush = __flush_bios(pe);
-
-		/* Submit any pending write BHs */
-		up_write(&s->lock);
-
-		flush_bios(pe->snapshot_bios);
-		DMDEBUG("Exception completed successfully.");
-
-		/* Notify any interested parties */
-		//check_free_space(s);
-
-	} else {
-		/* Read/write error - snapshot is unusable */
-		down_write(&s->lock);
-		if (s->valid)
-			DMERR("Error reading/writing snapshot");
-		s->store.drop_snapshot(&s->store);
-		s->valid = 0;
-		remove_exception(&pe->e);
-		flush = __flush_bios(pe);
-		up_write(&s->lock);
-
-		error_bios(pe->snapshot_bios);
-
-		dm_table_event(s->table);
-		DMDEBUG("Exception failed.");
-	}
-
- out:
-	free_pending_exception(pe);
-
-	if (flush)
-		flush_bios(flush);
-}
-
-static void commit_callback(void *context, int success)
-{
-	struct pending_exception *pe = (struct pending_exception *) context;
-	pending_complete(pe, success);
-}
-
-/*
- * Called when the copy I/O has finished.  kcopyd actually runs
- * this code so don't block.
- */
-static void copy_callback(int read_err, unsigned int write_err, void *context)
-{
-	struct pending_exception *pe = (struct pending_exception *) context;
-	struct dm_snapshot *s = pe->snap;
-
-	if (read_err || write_err)
-		pending_complete(pe, 0);
-
-	else
-		/* Update the metadata if we are persistent */
-		s->store.commit_exception(&s->store, &pe->e, commit_callback,
-					  pe);
-}
-
-/*
- * Dispatches the copy operation to kcopyd.
- */
-static inline void start_copy(struct pending_exception *pe)
-{
-	struct dm_snapshot *s = pe->snap;
-	struct io_region src, dest;
-	struct block_device *bdev = s->origin->bdev;
-	sector_t dev_size;
-
-	dev_size = get_dev_size(bdev);
-
-	src.bdev = bdev;
-	src.sector = chunk_to_sector(s, pe->e.old_chunk);
-	src.count = min(s->chunk_size, dev_size - src.sector);
-
-	dest.bdev = s->cow->bdev;
-	dest.sector = chunk_to_sector(s, pe->e.new_chunk);
-	dest.count = src.count;
-
-	/* Hand over to kcopyd */
-	DMDEBUG("starting exception copy");
-	kcopyd_copy(s->kcopyd_client,
-		    &src, 1, &dest, 0, copy_callback, pe);
-}
-
-/*
- * Looks to see if this snapshot already has a pending exception
- * for this chunk, otherwise it allocates a new one and inserts
- * it into the pending table.
- *
- * NOTE: a write lock must be held on snap->lock before calling
- * this.
- */
-static struct pending_exception *
-__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
-{
-	struct exception *e;
-	struct pending_exception *pe;
-	chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
-
-	/*
-	 * Is there a pending exception for this already ?
-	 */
-	e = lookup_exception(&s->pending, chunk);
-	if (e) {
-		/* cast the exception to a pending exception */
-		pe = list_entry(e, struct pending_exception, e);
-
-	} else {
-		/*
-		 * Create a new pending exception, we don't want
-		 * to hold the lock while we do this.
-		 */
-		up_write(&s->lock);
-
-		pe = alloc_pending_exception();
-		pe->e.old_chunk = chunk;
-		pe->origin_bios = pe->snapshot_bios = NULL;
-		INIT_LIST_HEAD(&pe->siblings);
-		pe->snap = s;
-		pe->started = 0;
-
-		down_write(&s->lock);
-		if (s->store.prepare_exception(&s->store, &pe->e)) {
-			free_pending_exception(pe);
-			s->valid = 0;
-			return NULL;
-		}
-
-		insert_exception(&s->pending, &pe->e);
-	}
-
-	return pe;
-}
-
-static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
-				   struct bio *bio)
-{
-	bio->bi_bdev = s->cow->bdev;
-	bio->bi_sector = chunk_to_sector(s, e->new_chunk) +
-		(bio->bi_sector & s->chunk_mask);
-}
-
-static int snapshot_map(struct dm_target *ti, struct bio *bio,
-			union map_info *map_context)
-{
-	struct exception *e;
-	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
-	int r = 1;
-	chunk_t chunk;
-	struct pending_exception *pe;
-
-	chunk = sector_to_chunk(s, bio->bi_sector);
-
-	/* Full snapshots are not usable */
-	if (!s->valid)
-		return -1;
-
-	/*
-	 * Write to snapshot - higher level takes care of RW/RO
-	 * flags so we should only get this if we are
-	 * writeable.
-	 */
-	if (bio_rw(bio) == WRITE) {
-
-		/* FIXME: should only take write lock if we need
-		 * to copy an exception */
-		down_write(&s->lock);
-
-		/* If the block is already remapped - use that, else remap it */
-		e = lookup_exception(&s->complete, chunk);
-		if (e) {
-			remap_exception(s, e, bio);
-			up_write(&s->lock);
-
-		} else {
-			pe = __find_pending_exception(s, bio);
-
-			if (!pe) {
-				s->store.drop_snapshot(&s->store);
-				s->valid = 0;
-				r = -EIO;
-				up_write(&s->lock);
-			} else {
-				remap_exception(s, &pe->e, bio);
-				queue_bio(&pe->snapshot_bios, bio);
-
-				if (!pe->started) {
-					/* this is protected by snap->lock */
-					pe->started = 1;
-					up_write(&s->lock);
-					start_copy(pe);
-				} else
-					up_write(&s->lock);
-				r = 0;
-			}
-		}
-
-	} else {
-		/*
-		 * FIXME: this read path scares me because we
-		 * always use the origin when we have a pending
-		 * exception.  However I can't think of a
-		 * situation where this is wrong - ejt.
-		 */
-
-		/* Do reads */
-		down_read(&s->lock);
-
-		/* See if it it has been remapped */
-		e = lookup_exception(&s->complete, chunk);
-		if (e)
-			remap_exception(s, e, bio);
-		else
-			bio->bi_bdev = s->origin->bdev;
-
-		up_read(&s->lock);
-	}
-
-	return r;
-}
-
-void snapshot_resume(struct dm_target *ti)
-{
-	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
-
-	if (s->have_metadata)
-		return;
-
-	if (s->store.read_metadata(&s->store)) {
-		down_write(&s->lock);
-		s->valid = 0;
-		up_write(&s->lock);
-	}
-
-	s->have_metadata = 1;
-}
-
-static int snapshot_status(struct dm_target *ti, status_type_t type,
-			   char *result, unsigned int maxlen)
-{
-	struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
-	char cow[32];
-	char org[32];
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		if (!snap->valid)
-			snprintf(result, maxlen, "Invalid");
-		else {
-			if (snap->store.fraction_full) {
-				sector_t numerator, denominator;
-				snap->store.fraction_full(&snap->store,
-							  &numerator,
-							  &denominator);
-				snprintf(result, maxlen,
-					 SECTOR_FORMAT "/" SECTOR_FORMAT,
-					 numerator, denominator);
-			}
-			else
-				snprintf(result, maxlen, "Unknown");
-		}
-		break;
-
-	case STATUSTYPE_TABLE:
-		/*
-		 * kdevname returns a static pointer so we need
-		 * to make private copies if the output is to
-		 * make sense.
-		 */
-		format_dev_t(cow, snap->cow->bdev->bd_dev);
-		format_dev_t(org, snap->origin->bdev->bd_dev);
-		snprintf(result, maxlen, "%s %s %c %lld", org, cow,
-			 snap->type, snap->chunk_size);
-		break;
-	}
-
-	return 0;
-}
-
-/*-----------------------------------------------------------------
- * Origin methods
- *---------------------------------------------------------------*/
-static void list_merge(struct list_head *l1, struct list_head *l2)
-{
-	struct list_head *l1_n, *l2_p;
-
-	l1_n = l1->next;
-	l2_p = l2->prev;
-
-	l1->next = l2;
-	l2->prev = l1;
-
-	l2_p->next = l1_n;
-	l1_n->prev = l2_p;
-}
-
-static int __origin_write(struct list_head *snapshots, struct bio *bio)
-{
-	int r = 1, first = 1;
-	struct list_head *sl;
-	struct dm_snapshot *snap;
-	struct exception *e;
-	struct pending_exception *pe, *last = NULL;
-	chunk_t chunk;
-
-	/* Do all the snapshots on this origin */
-	list_for_each(sl, snapshots) {
-		snap = list_entry(sl, struct dm_snapshot, list);
-
-		/* Only deal with valid snapshots */
-		if (!snap->valid)
-			continue;
-
-		down_write(&snap->lock);
-
-		/*
-		 * Remember, different snapshots can have
-		 * different chunk sizes.
-		 */
-		chunk = sector_to_chunk(snap, bio->bi_sector);
-
-		/*
-		 * Check exception table to see if block
-		 * is already remapped in this snapshot
-		 * and trigger an exception if not.
-		 */
-		e = lookup_exception(&snap->complete, chunk);
-		if (!e) {
-			pe = __find_pending_exception(snap, bio);
-			if (!pe) {
-				snap->store.drop_snapshot(&snap->store);
-				snap->valid = 0;
-
-			} else {
-				if (last)
-					list_merge(&pe->siblings,
-						   &last->siblings);
-
-				last = pe;
-				r = 0;
-			}
-		}
-
-		up_write(&snap->lock);
-	}
-
-	/*
-	 * Now that we have a complete pe list we can start the copying.
-	 */
-	if (last) {
-		pe = last;
-		do {
-			down_write(&pe->snap->lock);
-			if (first)
-				queue_bio(&pe->origin_bios, bio);
-
-#if 0
-			if (!pe->started) {
-				pe->started = 1;
-				up_write(&pe->snap->lock);
-				start_copy(pe);
-			} else
-				up_write(&pe->snap->lock);
-#else
-			pe->started = 1;
-			up_write(&pe->snap->lock);
-			start_copy(pe);
-#endif
-			first = 0;
-			pe = list_entry(pe->siblings.next,
-					struct pending_exception, siblings);
-
-		} while (pe != last);
-	}
-
-	return r;
-}
-
-/*
- * Called on a write from the origin driver.
- */
-int do_origin(struct dm_dev *origin, struct bio *bio)
-{
-	struct origin *o;
-	int r;
-
-	down_read(&_origins_lock);
-	o = __lookup_origin(origin->bdev);
-	if (!o)
-		BUG();
-
-	r = __origin_write(&o->snapshots, bio);
-	up_read(&_origins_lock);
-
-	return r;
-}
-
-/*
- * Origin: maps a linear range of a device, with hooks for snapshotting.
- */
-
-/*
- * Construct an origin mapping: <dev_path>
- * The context for an origin is merely a 'struct dm_dev *'
- * pointing to the real device.
- */
-static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
-	int r;
-	struct dm_dev *dev;
-
-	if (argc != 1) {
-		ti->error = "dm-origin: incorrect number of arguments";
-		return -EINVAL;
-	}
-
-	r = dm_get_device(ti, argv[0], 0, ti->len,
-			  dm_table_get_mode(ti->table), &dev);
-	if (r) {
-		ti->error = "Cannot get target device";
-		return r;
-	}
-
-	ti->private = dev;
-	return 0;
-}
-
-static void origin_dtr(struct dm_target *ti)
-{
-	struct dm_dev *dev = (struct dm_dev *) ti->private;
-	dm_put_device(ti, dev);
-}
-
-static int origin_map(struct dm_target *ti, struct bio *bio,
-		      union map_info *map_context)
-{
-	struct dm_dev *dev = (struct dm_dev *) ti->private;
-	bio->bi_bdev = dev->bdev;
-
-	/* Only tell snapshots if this is a write */
-	return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : 1;
-}
-
-static int origin_status(struct dm_target *ti, status_type_t type, char *result,
-			 unsigned int maxlen)
-{
-	struct dm_dev *dev = (struct dm_dev *) ti->private;
-	char buffer[32];
-
-	switch (type) {
-	case STATUSTYPE_INFO:
-		result[0] = '\0';
-		break;
-
-	case STATUSTYPE_TABLE:
-		format_dev_t(buffer, dev->bdev->bd_dev);
-		snprintf(result, maxlen, "%s", buffer);
-		break;
-	}
-
-	return 0;
-}
-
-static struct target_type origin_target = {
-	name:	"snapshot-origin",
-	module:	THIS_MODULE,
-	ctr:	origin_ctr,
-	dtr:	origin_dtr,
-	map:	origin_map,
-	status:	origin_status,
-};
-
-static struct target_type snapshot_target = {
-	name:	"snapshot",
-	module:	THIS_MODULE,
-	ctr:	snapshot_ctr,
-	dtr:	snapshot_dtr,
-	map:	snapshot_map,
-	resume: snapshot_resume,
-	status:	snapshot_status,
-};
-
-static int __init dm_snapshot_init(void)
-{
-	int r;
-
-	r = dm_register_target(&snapshot_target);
-	if (r) {
-		DMERR("snapshot target register failed %d", r);
-		return r;
-	}
-
-	r = dm_register_target(&origin_target);
-	if (r < 0) {
-		DMERR("Device mapper: Origin: register failed %d\n", r);
-		goto bad1;
-	}
-
-	r = init_origin_hash();
-	if (r) {
-		DMERR("init_origin_hash failed.");
-		goto bad2;
-	}
-
-	exception_cache = kmem_cache_create("dm-snapshot-ex",
-					    sizeof(struct exception),
-					    __alignof__(struct exception),
-					    0, NULL, NULL);
-	if (!exception_cache) {
-		DMERR("Couldn't create exception cache.");
-		r = -ENOMEM;
-		goto bad3;
-	}
-
-	pending_cache =
-	    kmem_cache_create("dm-snapshot-in",
-			      sizeof(struct pending_exception),
-			      __alignof__(struct pending_exception),
-			      0, NULL, NULL);
-	if (!pending_cache) {
-		DMERR("Couldn't create pending cache.");
-		r = -ENOMEM;
-		goto bad4;
-	}
-
-	pending_pool = mempool_create(128, mempool_alloc_slab,
-				      mempool_free_slab, pending_cache);
-	if (!pending_pool) {
-		DMERR("Couldn't create pending pool.");
-		r = -ENOMEM;
-		goto bad5;
-	}
-
-	return 0;
-
-      bad5:
-	kmem_cache_destroy(pending_cache);
-      bad4:
-	kmem_cache_destroy(exception_cache);
-      bad3:
-	exit_origin_hash();
-      bad2:
-	dm_unregister_target(&origin_target);
-      bad1:
-	dm_unregister_target(&snapshot_target);
-	return r;
-}
-
-static void __exit dm_snapshot_exit(void)
-{
-	int r;
-
-	r = dm_unregister_target(&snapshot_target);
-	if (r)
-		DMERR("snapshot unregister failed %d", r);
-
-	r = dm_unregister_target(&origin_target);
-	if (r)
-		DMERR("origin unregister failed %d", r);
-
-	exit_origin_hash();
-	mempool_destroy(pending_pool);
-	kmem_cache_destroy(pending_cache);
-	kmem_cache_destroy(exception_cache);
-}
-
-/* Module hooks */
-module_init(dm_snapshot_init);
-module_exit(dm_snapshot_exit);
-
-MODULE_DESCRIPTION(DM_NAME " snapshot target");
-MODULE_AUTHOR("Joe Thornber");
-MODULE_LICENSE("GPL");
diff -Naur linux-2.6.0-test9a/drivers/md/dm-snapshot.h linux-2.6.0-test9b/drivers/md/dm-snapshot.h
--- linux-2.6.0-test9a/drivers/md/dm-snapshot.h	2003-10-27 11:20:09.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-snapshot.h	1969-12-31 18:00:00.000000000 -0600
@@ -1,161 +0,0 @@
-/*
- * dm-snapshot.c
- *
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_SNAPSHOT_H
-#define DM_SNAPSHOT_H
-
-#include "dm.h"
-#include <linux/blkdev.h>
-
-struct exception_table {
-	uint32_t hash_mask;
-	struct list_head *table;
-};
-
-/*
- * The snapshot code deals with largish chunks of the disk at a
- * time. Typically 64k - 256k.
- */
-/* FIXME: can we get away with limiting these to a uint32_t ? */
-typedef sector_t chunk_t;
-
-/*
- * An exception is used where an old chunk of data has been
- * replaced by a new one.
- */
-struct exception {
-	struct list_head hash_list;
-
-	chunk_t old_chunk;
-	chunk_t new_chunk;
-};
-
-/*
- * Abstraction to handle the meta/layout of exception stores (the
- * COW device).
- */
-struct exception_store {
-
-	/*
-	 * Destroys this object when you've finished with it.
-	 */
-	void (*destroy) (struct exception_store *store);
-
-	/*
-	 * The target shouldn't read the COW device until this is
-	 * called.
-	 */
-	int (*read_metadata) (struct exception_store *store);
-
-	/*
-	 * Find somewhere to store the next exception.
-	 */
-	int (*prepare_exception) (struct exception_store *store,
-				  struct exception *e);
-
-	/*
-	 * Update the metadata with this exception.
-	 */
-	void (*commit_exception) (struct exception_store *store,
-				  struct exception *e,
-				  void (*callback) (void *, int success),
-				  void *callback_context);
-
-	/*
-	 * The snapshot is invalid, note this in the metadata.
-	 */
-	void (*drop_snapshot) (struct exception_store *store);
-
-	/*
-	 * Return how full the snapshot is.
-	 */
-	void (*fraction_full) (struct exception_store *store,
-			       sector_t *numerator,
-			       sector_t *denominator);
-
-	struct dm_snapshot *snap;
-	void *context;
-};
-
-struct dm_snapshot {
-	struct rw_semaphore lock;
-	struct dm_table *table;
-
-	struct dm_dev *origin;
-	struct dm_dev *cow;
-
-	/* List of snapshots per Origin */
-	struct list_head list;
-
-	/* Size of data blocks saved - must be a power of 2 */
-	chunk_t chunk_size;
-	chunk_t chunk_mask;
-	chunk_t chunk_shift;
-
-	/* You can't use a snapshot if this is 0 (e.g. if full) */
-	int valid;
-	int have_metadata;
-
-	/* Used for display of table */
-	char type;
-
-	/* The last percentage we notified */
-	int last_percent;
-
-	struct exception_table pending;
-	struct exception_table complete;
-
-	/* The on disk metadata handler */
-	struct exception_store store;
-
-	struct kcopyd_client *kcopyd_client;
-};
-
-/*
- * Used by the exception stores to load exceptions hen
- * initialising.
- */
-int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
-
-/*
- * Constructor and destructor for the default persistent
- * store.
- */
-int dm_create_persistent(struct exception_store *store, uint32_t chunk_size);
-
-int dm_create_transient(struct exception_store *store,
-			struct dm_snapshot *s, int blocksize);
-
-/*
- * Return the number of sectors in the device.
- */
-static inline sector_t get_dev_size(struct block_device *bdev)
-{
-	return bdev->bd_inode->i_size >> SECTOR_SHIFT;
-}
-
-static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
-{
-	return (sector & ~s->chunk_mask) >> s->chunk_shift;
-}
-
-static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
-{
-	return chunk << s->chunk_shift;
-}
-
-static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
-{
-	/*
-	 * There is only ever one instance of a particular block
-	 * device so we can compare pointers safely.
-	 */
-	return lhs == rhs;
-}
-
-#endif
diff -Naur linux-2.6.0-test9a/drivers/md/dm-table.c linux-2.6.0-test9b/drivers/md/dm-table.c
--- linux-2.6.0-test9a/drivers/md/dm-table.c	2003-10-27 11:20:08.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-table.c	2003-10-27 11:25:22.000000000 -0600
@@ -824,6 +824,7 @@
 }
 
 
+EXPORT_SYMBOL(dm_vcalloc);
 EXPORT_SYMBOL(dm_get_device);
 EXPORT_SYMBOL(dm_put_device);
 EXPORT_SYMBOL(dm_table_event);





More information about the dm-devel mailing list