[dm-devel] [PATCH] 2.6.0-t6-mm1-dm2: 7/7: Build snapshot as part of dm-mod
Kevin Corry
kevcorry at us.ibm.com
Tue Oct 28 11:58:02 UTC 2003
On Monday 20 October 2003 13:26, Kevin Corry wrote:
> Building snapshot as its own kernel module currently creates circular
> module dependencies. Build dm-snapshot as part of dm-mod (just like
> dm-linear and dm-stripe) to avoid this problem.
>
> If we really want to be able to build snapshot as its own module, then the
> other option (as I mentioned in an email last week) is to rename
> dm-snapshot.c to dm-snap.c (or something like that) and make some changes
> to the Makefile. If you'd prefer this method, let me know and I'll send a
> different patch.
New 7/7 patch. Please discard the previous patch 7 I sent last week. This
patch also assumes that patches 4, 5, and 6 from last week have already
been applied.
In order to properly build snapshot as its own kernel module, dm-snapshot.c
and dm-snapshot.h must be renamed to dm-snap.c and dm-snap.h. The dm_vcalloc
function must also be exported from dm-table.c so the snapshot module can
find it.
diff -Naur linux-2.6.0-test9a/drivers/md/Makefile linux-2.6.0-test9b/drivers/md/Makefile
--- linux-2.6.0-test9a/drivers/md/Makefile 2003-10-27 11:20:08.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/Makefile 2003-10-27 11:24:38.000000000 -0600
@@ -5,6 +5,8 @@
dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o kcopyd.o dm-daemon.o
+dm-snapshot-objs := dm-snap.o dm-exception-store.o
+
dm-mirror-objs := dm-log.o dm-raid1.o
# Note: link order is important. All raid personalities
@@ -19,5 +21,5 @@
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
obj-$(CONFIG_BLK_DEV_MD) += md.o
obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
-obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o dm-exception-store.o
+obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
diff -Naur linux-2.6.0-test9a/drivers/md/dm-exception-store.c linux-2.6.0-test9b/drivers/md/dm-exception-store.c
--- linux-2.6.0-test9a/drivers/md/dm-exception-store.c 2003-10-27 11:20:09.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-exception-store.c 2003-10-27 11:25:09.000000000 -0600
@@ -7,7 +7,7 @@
*/
#include "dm.h"
-#include "dm-snapshot.h"
+#include "dm-snap.h"
#include "dm-io.h"
#include "kcopyd.h"
diff -Naur linux-2.6.0-test9a/drivers/md/dm-snap.c linux-2.6.0-test9b/drivers/md/dm-snap.c
--- linux-2.6.0-test9a/drivers/md/dm-snap.c 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-snap.c 2003-10-27 11:25:03.000000000 -0600
@@ -0,0 +1,1298 @@
+/*
+ * dm-snapshot.c
+ *
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/config.h>
+#include <linux/ctype.h>
+#include <linux/device-mapper.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kdev_t.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include "dm-snap.h"
+#include "kcopyd.h"
+
+/*
+ * FIXME: Remove this before release.
+ */
+#if 0
+#define DMDEBUG DMWARN
+#else
+#define DMDEBUG(x...)
+#endif
+
+/*
+ * The percentage increment we will wake up users at
+ */
+#define WAKE_UP_PERCENT 5
+
+/*
+ * kcopyd priority of snapshot operations
+ */
+#define SNAPSHOT_COPY_PRIORITY 2
+
+/*
+ * Each snapshot reserves this many pages for io
+ * FIXME: calculate this
+ */
+#define SNAPSHOT_PAGES 256
+
+struct pending_exception {
+ struct exception e;
+
+ /*
+ * Origin buffers waiting for this to complete are held
+ * in a list (using b_reqnext).
+ */
+ struct bio *origin_bios;
+ struct bio *snapshot_bios;
+
+ /*
+ * Other pending_exceptions that are processing this
+ * chunk. When this list is empty, we know we can
+ * complete the origins.
+ */
+ struct list_head siblings;
+
+ /* Pointer back to snapshot context */
+ struct dm_snapshot *snap;
+
+ /*
+ * 1 indicates the exception has already been sent to
+ * kcopyd.
+ */
+ int started;
+};
+
+/*
+ * Hash table mapping origin volumes to lists of snapshots and
+ * a lock to protect it
+ */
+static kmem_cache_t *exception_cache;
+static kmem_cache_t *pending_cache;
+static mempool_t *pending_pool;
+
+/*
+ * One of these per registered origin, held in the snapshot_origins hash
+ */
+struct origin {
+ /* The origin device */
+ struct block_device *bdev;
+
+ struct list_head hash_list;
+
+ /* List of snapshots for this origin */
+ struct list_head snapshots;
+};
+
+/*
+ * Size of the hash table for origin volumes. If we make this
+ * the size of the minors list then it should be nearly perfect
+ */
+#define ORIGIN_HASH_SIZE 256
+#define ORIGIN_MASK 0xFF
+static struct list_head *_origins;
+static struct rw_semaphore _origins_lock;
+
+static int init_origin_hash(void)
+{
+ int i;
+
+ _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!_origins) {
+ DMERR("Device mapper: Snapshot: unable to allocate memory");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < ORIGIN_HASH_SIZE; i++)
+ INIT_LIST_HEAD(_origins + i);
+ init_rwsem(&_origins_lock);
+
+ return 0;
+}
+
+static void exit_origin_hash(void)
+{
+ kfree(_origins);
+}
+
+static inline unsigned int origin_hash(struct block_device *bdev)
+{
+ return bdev->bd_dev & ORIGIN_MASK;
+}
+
+static struct origin *__lookup_origin(struct block_device *origin)
+{
+ struct list_head *slist;
+ struct list_head *ol;
+ struct origin *o;
+
+ ol = &_origins[origin_hash(origin)];
+ list_for_each(slist, ol) {
+ o = list_entry(slist, struct origin, hash_list);
+
+ if (bdev_equal(o->bdev, origin))
+ return o;
+ }
+
+ return NULL;
+}
+
+static void __insert_origin(struct origin *o)
+{
+ struct list_head *sl = &_origins[origin_hash(o->bdev)];
+ list_add_tail(&o->hash_list, sl);
+}
+
+/*
+ * Make a note of the snapshot and its origin so we can look it
+ * up when the origin has a write on it.
+ */
+static int register_snapshot(struct dm_snapshot *snap)
+{
+ struct origin *o;
+ struct block_device *bdev = snap->origin->bdev;
+
+ down_write(&_origins_lock);
+ o = __lookup_origin(bdev);
+
+ if (!o) {
+ /* New origin */
+ o = kmalloc(sizeof(*o), GFP_KERNEL);
+ if (!o) {
+ up_write(&_origins_lock);
+ return -ENOMEM;
+ }
+
+ /* Initialise the struct */
+ INIT_LIST_HEAD(&o->snapshots);
+ o->bdev = bdev;
+
+ __insert_origin(o);
+ }
+
+ list_add_tail(&snap->list, &o->snapshots);
+
+ up_write(&_origins_lock);
+ return 0;
+}
+
+static void unregister_snapshot(struct dm_snapshot *s)
+{
+ struct origin *o;
+
+ down_write(&_origins_lock);
+ o = __lookup_origin(s->origin->bdev);
+
+ list_del(&s->list);
+ if (list_empty(&o->snapshots)) {
+ list_del(&o->hash_list);
+ kfree(o);
+ }
+
+ up_write(&_origins_lock);
+}
+
+/*
+ * Implementation of the exception hash tables.
+ */
+static int init_exception_table(struct exception_table *et, uint32_t size)
+{
+ unsigned int i;
+
+ et->hash_mask = size - 1;
+ et->table = dm_vcalloc(size, sizeof(struct list_head));
+ if (!et->table)
+ return -ENOMEM;
+
+ for (i = 0; i < size; i++)
+ INIT_LIST_HEAD(et->table + i);
+
+ return 0;
+}
+
+static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
+{
+ struct list_head *slot, *entry, *temp;
+ struct exception *ex;
+ int i, size;
+
+ size = et->hash_mask + 1;
+ for (i = 0; i < size; i++) {
+ slot = et->table + i;
+
+ list_for_each_safe(entry, temp, slot) {
+ ex = list_entry(entry, struct exception, hash_list);
+ kmem_cache_free(mem, ex);
+ }
+ }
+
+ vfree(et->table);
+}
+
+/*
+ * FIXME: check how this hash fn is performing.
+ */
+static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
+{
+ return chunk & et->hash_mask;
+}
+
+static void insert_exception(struct exception_table *eh, struct exception *e)
+{
+ struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
+ list_add(&e->hash_list, l);
+}
+
+static inline void remove_exception(struct exception *e)
+{
+ list_del(&e->hash_list);
+}
+
+/*
+ * Return the exception data for a sector, or NULL if not
+ * remapped.
+ */
+static struct exception *lookup_exception(struct exception_table *et,
+ chunk_t chunk)
+{
+ struct list_head *slot, *el;
+ struct exception *e;
+
+ slot = &et->table[exception_hash(et, chunk)];
+ list_for_each(el, slot) {
+ e = list_entry(el, struct exception, hash_list);
+ if (e->old_chunk == chunk)
+ return e;
+ }
+
+ return NULL;
+}
+
+static inline struct exception *alloc_exception(void)
+{
+ struct exception *e;
+
+ e = kmem_cache_alloc(exception_cache, GFP_NOIO);
+ if (!e)
+ e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
+
+ return e;
+}
+
+static inline void free_exception(struct exception *e)
+{
+ kmem_cache_free(exception_cache, e);
+}
+
+static inline struct pending_exception *alloc_pending_exception(void)
+{
+ return mempool_alloc(pending_pool, GFP_NOIO);
+}
+
+static inline void free_pending_exception(struct pending_exception *pe)
+{
+ mempool_free(pe, pending_pool);
+}
+
+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
+{
+ struct exception *e;
+
+ e = alloc_exception();
+ if (!e)
+ return -ENOMEM;
+
+ e->old_chunk = old;
+ e->new_chunk = new;
+ insert_exception(&s->complete, e);
+ return 0;
+}
+
+/*
+ * Hard coded magic.
+ */
+static int calc_max_buckets(void)
+{
+ unsigned long mem;
+
+ mem = num_physpages << PAGE_SHIFT;
+ mem /= 50;
+ mem /= sizeof(struct list_head);
+
+ return mem;
+}
+
+/*
+ * Rounds a number down to a power of 2.
+ */
+static inline uint32_t round_down(uint32_t n)
+{
+ while (n & (n - 1))
+ n &= (n - 1);
+ return n;
+}
+
+/*
+ * Allocate room for a suitable hash table.
+ */
+static int init_hash_tables(struct dm_snapshot *s)
+{
+ sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
+
+ /*
+ * Calculate based on the size of the original volume or
+ * the COW volume...
+ */
+ cow_dev_size = get_dev_size(s->cow->bdev);
+ origin_dev_size = get_dev_size(s->origin->bdev);
+ max_buckets = calc_max_buckets();
+
+ hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
+ hash_size = min(hash_size, max_buckets);
+
+ /* Round it down to a power of 2 */
+ hash_size = round_down(hash_size);
+ if (init_exception_table(&s->complete, hash_size))
+ return -ENOMEM;
+
+ /*
+ * Allocate hash table for in-flight exceptions
+ * Make this smaller than the real hash table
+ */
+ hash_size >>= 3;
+ if (!hash_size)
+ hash_size = 64;
+
+ if (init_exception_table(&s->pending, hash_size)) {
+ exit_exception_table(&s->complete, exception_cache);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/*
+ * Round a number up to the nearest 'size' boundary. size must
+ * be a power of 2.
+ */
+static inline ulong round_up(ulong n, ulong size)
+{
+ size--;
+ return (n + size) & ~size;
+}
+
+/*
+ * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
+ */
+static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ struct dm_snapshot *s;
+ unsigned long chunk_size;
+ int r = -EINVAL;
+ char persistent;
+ char *origin_path;
+ char *cow_path;
+ char *value;
+ int blocksize;
+
+ if (argc < 4) {
+ ti->error = "dm-snapshot: requires exactly 4 arguments";
+ r = -EINVAL;
+ goto bad1;
+ }
+
+ origin_path = argv[0];
+ cow_path = argv[1];
+ persistent = toupper(*argv[2]);
+
+ if (persistent != 'P' && persistent != 'N') {
+ ti->error = "Persistent flag is not P or N";
+ r = -EINVAL;
+ goto bad1;
+ }
+
+ chunk_size = simple_strtoul(argv[3], &value, 10);
+ if (chunk_size == 0 || value == NULL) {
+ ti->error = "Invalid chunk size";
+ r = -EINVAL;
+ goto bad1;
+ }
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (s == NULL) {
+ ti->error = "Cannot allocate snapshot context private "
+ "structure";
+ r = -ENOMEM;
+ goto bad1;
+ }
+
+ r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
+ if (r) {
+ ti->error = "Cannot get origin device";
+ goto bad2;
+ }
+
+ /* FIXME: get cow length */
+ r = dm_get_device(ti, cow_path, 0, 0,
+ FMODE_READ | FMODE_WRITE, &s->cow);
+ if (r) {
+ dm_put_device(ti, s->origin);
+ ti->error = "Cannot get COW device";
+ goto bad2;
+ }
+
+ /*
+ * Chunk size must be multiple of page size. Silently
+ * round up if it's not.
+ */
+ chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
+
+ /* Validate the chunk size against the device block size */
+ /* FIXME: check this, also ugly */
+ blocksize = s->cow->bdev->bd_disk->queue->hardsect_size;
+ if (chunk_size % (blocksize >> 9)) {
+ ti->error = "Chunk size is not a multiple of device blocksize";
+ r = -EINVAL;
+ goto bad3;
+ }
+
+ /* Check chunk_size is a power of 2 */
+ if (chunk_size & (chunk_size - 1)) {
+ ti->error = "Chunk size is not a power of 2";
+ r = -EINVAL;
+ goto bad3;
+ }
+
+ s->chunk_size = chunk_size;
+ s->chunk_mask = chunk_size - 1;
+ s->type = persistent;
+ for (s->chunk_shift = 0; chunk_size;
+ s->chunk_shift++, chunk_size >>= 1)
+ ;
+ s->chunk_shift--;
+
+ s->valid = 1;
+ s->have_metadata = 0;
+ s->last_percent = 0;
+ init_rwsem(&s->lock);
+ s->table = ti->table;
+
+ /* Allocate hash table for COW data */
+ if (init_hash_tables(s)) {
+ ti->error = "Unable to allocate hash table space";
+ r = -ENOMEM;
+ goto bad3;
+ }
+
+ /*
+ * Check the persistent flag - done here because we need the iobuf
+ * to check the LV header
+ */
+ s->store.snap = s;
+
+ if (persistent == 'P')
+ r = dm_create_persistent(&s->store, s->chunk_size);
+ else
+ r = dm_create_transient(&s->store, s, blocksize);
+
+ if (r) {
+ ti->error = "Couldn't create exception store";
+ r = -EINVAL;
+ goto bad4;
+ }
+
+ r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
+ if (r) {
+ ti->error = "Could not create kcopyd client";
+ goto bad5;
+ }
+
+ /* Add snapshot to the list of snapshots for this origin */
+ if (register_snapshot(s)) {
+ r = -EINVAL;
+ ti->error = "Cannot register snapshot origin";
+ goto bad6;
+ }
+
+ ti->private = s;
+ ti->split_io = chunk_size;
+
+ return 0;
+
+ bad6:
+ kcopyd_client_destroy(s->kcopyd_client);
+
+ bad5:
+ s->store.destroy(&s->store);
+
+ bad4:
+ exit_exception_table(&s->pending, pending_cache);
+ exit_exception_table(&s->complete, exception_cache);
+
+ bad3:
+ dm_put_device(ti, s->cow);
+ dm_put_device(ti, s->origin);
+
+ bad2:
+ kfree(s);
+
+ bad1:
+ return r;
+}
+
+static void snapshot_dtr(struct dm_target *ti)
+{
+ struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+
+ dm_table_event(ti->table);
+
+ unregister_snapshot(s);
+
+ exit_exception_table(&s->pending, pending_cache);
+ exit_exception_table(&s->complete, exception_cache);
+
+ /* Deallocate memory used */
+ s->store.destroy(&s->store);
+
+ dm_put_device(ti, s->origin);
+ dm_put_device(ti, s->cow);
+ kcopyd_client_destroy(s->kcopyd_client);
+ kfree(s);
+}
+
+/*
+ * We hold lists of bios, using the bi_next field.
+ */
+static void queue_bio(struct bio **queue, struct bio *bio)
+{
+ bio->bi_next = *queue;
+ *queue = bio;
+}
+
+/*
+ * FIXME: inefficient.
+ */
+static void queue_bios(struct bio **queue, struct bio *bios)
+{
+ while (*queue)
+ queue = &((*queue)->bi_next);
+
+ *queue = bios;
+}
+
+/*
+ * Flush a list of buffers.
+ */
+static void flush_bios(struct bio *bio)
+{
+ struct bio *n;
+
+ DMDEBUG("begin flush");
+ while (bio) {
+ n = bio->bi_next;
+ bio->bi_next = NULL;
+ DMDEBUG("flushing %p", bio);
+ generic_make_request(bio);
+ bio = n;
+ }
+
+ blk_run_queues();
+}
+
+/*
+ * Error a list of buffers.
+ */
+static void error_bios(struct bio *bio)
+{
+ struct bio *n;
+
+ while (bio) {
+ n = bio->bi_next;
+ bio->bi_next = NULL;
+ bio_io_error(bio, bio->bi_size);
+ bio = n;
+ }
+}
+
+static struct bio *__flush_bios(struct pending_exception *pe)
+{
+ struct pending_exception *sibling;
+
+ if (list_empty(&pe->siblings))
+ return pe->origin_bios;
+
+ sibling = list_entry(pe->siblings.next,
+ struct pending_exception, siblings);
+
+ list_del(&pe->siblings);
+
+ /* FIXME: I think there's a race on SMP machines here, add spin lock */
+ queue_bios(&sibling->origin_bios, pe->origin_bios);
+
+ return NULL;
+}
+
+static void check_free_space(struct dm_snapshot *s)
+{
+#if 0
+ sector_t numerator, denominator;
+ double n, d;
+ unsigned pc;
+
+ if (!s->store.fraction_full)
+ return;
+
+ s->store.fraction_full(&s->store, &numerator, &denominator);
+ n = (double) numerator;
+ d = (double) denominator;
+
+ pc = (int) (n / d);
+
+ if (pc >= s->last_percent + WAKE_UP_PERCENT) {
+ dm_table_event(s->table);
+ s->last_percent = pc - pc % WAKE_UP_PERCENT;
+ }
+#endif
+}
+
+static void pending_complete(struct pending_exception *pe, int success)
+{
+ struct exception *e;
+ struct dm_snapshot *s = pe->snap;
+ struct bio *flush = NULL;
+
+ if (success) {
+ e = alloc_exception();
+ if (!e) {
+ DMWARN("Unable to allocate exception.");
+ down_write(&s->lock);
+ s->store.drop_snapshot(&s->store);
+ s->valid = 0;
+ flush = __flush_bios(pe);
+ up_write(&s->lock);
+
+ error_bios(pe->snapshot_bios);
+ goto out;
+ }
+ memcpy(e, &pe->e, sizeof(*e));
+
+ /*
+ * Add a proper exception, and remove the
+ * in-flight exception from the list.
+ */
+ down_write(&s->lock);
+ insert_exception(&s->complete, e);
+ remove_exception(&pe->e);
+ flush = __flush_bios(pe);
+
+ /* Submit any pending write BHs */
+ up_write(&s->lock);
+
+ flush_bios(pe->snapshot_bios);
+ DMDEBUG("Exception completed successfully.");
+
+ /* Notify any interested parties */
+ //check_free_space(s);
+
+ } else {
+ /* Read/write error - snapshot is unusable */
+ down_write(&s->lock);
+ if (s->valid)
+ DMERR("Error reading/writing snapshot");
+ s->store.drop_snapshot(&s->store);
+ s->valid = 0;
+ remove_exception(&pe->e);
+ flush = __flush_bios(pe);
+ up_write(&s->lock);
+
+ error_bios(pe->snapshot_bios);
+
+ dm_table_event(s->table);
+ DMDEBUG("Exception failed.");
+ }
+
+ out:
+ free_pending_exception(pe);
+
+ if (flush)
+ flush_bios(flush);
+}
+
+static void commit_callback(void *context, int success)
+{
+ struct pending_exception *pe = (struct pending_exception *) context;
+ pending_complete(pe, success);
+}
+
+/*
+ * Called when the copy I/O has finished. kcopyd actually runs
+ * this code so don't block.
+ */
+static void copy_callback(int read_err, unsigned int write_err, void *context)
+{
+ struct pending_exception *pe = (struct pending_exception *) context;
+ struct dm_snapshot *s = pe->snap;
+
+ if (read_err || write_err)
+ pending_complete(pe, 0);
+
+ else
+ /* Update the metadata if we are persistent */
+ s->store.commit_exception(&s->store, &pe->e, commit_callback,
+ pe);
+}
+
+/*
+ * Dispatches the copy operation to kcopyd.
+ */
+static inline void start_copy(struct pending_exception *pe)
+{
+ struct dm_snapshot *s = pe->snap;
+ struct io_region src, dest;
+ struct block_device *bdev = s->origin->bdev;
+ sector_t dev_size;
+
+ dev_size = get_dev_size(bdev);
+
+ src.bdev = bdev;
+ src.sector = chunk_to_sector(s, pe->e.old_chunk);
+ src.count = min(s->chunk_size, dev_size - src.sector);
+
+ dest.bdev = s->cow->bdev;
+ dest.sector = chunk_to_sector(s, pe->e.new_chunk);
+ dest.count = src.count;
+
+ /* Hand over to kcopyd */
+ DMDEBUG("starting exception copy");
+ kcopyd_copy(s->kcopyd_client,
+ &src, 1, &dest, 0, copy_callback, pe);
+}
+
+/*
+ * Looks to see if this snapshot already has a pending exception
+ * for this chunk, otherwise it allocates a new one and inserts
+ * it into the pending table.
+ *
+ * NOTE: a write lock must be held on snap->lock before calling
+ * this.
+ */
+static struct pending_exception *
+__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
+{
+ struct exception *e;
+ struct pending_exception *pe;
+ chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
+
+ /*
+ * Is there a pending exception for this already ?
+ */
+ e = lookup_exception(&s->pending, chunk);
+ if (e) {
+ /* cast the exception to a pending exception */
+ pe = list_entry(e, struct pending_exception, e);
+
+ } else {
+ /*
+ * Create a new pending exception, we don't want
+ * to hold the lock while we do this.
+ */
+ up_write(&s->lock);
+ pe = alloc_pending_exception();
+ down_write(&s->lock);
+
+ e = lookup_exception(&s->pending, chunk);
+ if (e) {
+ free_pending_exception(pe);
+ pe = list_entry(e, struct pending_exception, e);
+ } else {
+ pe->e.old_chunk = chunk;
+ pe->origin_bios = pe->snapshot_bios = NULL;
+ INIT_LIST_HEAD(&pe->siblings);
+ pe->snap = s;
+ pe->started = 0;
+
+ if (s->store.prepare_exception(&s->store, &pe->e)) {
+ free_pending_exception(pe);
+ s->valid = 0;
+ return NULL;
+ }
+
+ insert_exception(&s->pending, &pe->e);
+ }
+ }
+
+ return pe;
+}
+
+static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
+ struct bio *bio)
+{
+ bio->bi_bdev = s->cow->bdev;
+ bio->bi_sector = chunk_to_sector(s, e->new_chunk) +
+ (bio->bi_sector & s->chunk_mask);
+}
+
+static int snapshot_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ struct exception *e;
+ struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+ int r = 1;
+ chunk_t chunk;
+ struct pending_exception *pe;
+
+ chunk = sector_to_chunk(s, bio->bi_sector);
+
+ /* Full snapshots are not usable */
+ if (!s->valid)
+ return -1;
+
+ /*
+ * Write to snapshot - higher level takes care of RW/RO
+ * flags so we should only get this if we are
+ * writeable.
+ */
+ if (bio_rw(bio) == WRITE) {
+
+ /* FIXME: should only take write lock if we need
+ * to copy an exception */
+ down_write(&s->lock);
+
+ /* If the block is already remapped - use that, else remap it */
+ e = lookup_exception(&s->complete, chunk);
+ if (e) {
+ remap_exception(s, e, bio);
+ up_write(&s->lock);
+
+ } else {
+ pe = __find_pending_exception(s, bio);
+
+ if (!pe) {
+ s->store.drop_snapshot(&s->store);
+ s->valid = 0;
+ r = -EIO;
+ up_write(&s->lock);
+ } else {
+ remap_exception(s, &pe->e, bio);
+ queue_bio(&pe->snapshot_bios, bio);
+
+ if (!pe->started) {
+ /* this is protected by snap->lock */
+ pe->started = 1;
+ up_write(&s->lock);
+ start_copy(pe);
+ } else
+ up_write(&s->lock);
+ r = 0;
+ }
+ }
+
+ } else {
+ /*
+ * FIXME: this read path scares me because we
+ * always use the origin when we have a pending
+ * exception. However I can't think of a
+ * situation where this is wrong - ejt.
+ */
+
+ /* Do reads */
+ down_read(&s->lock);
+
+ /* See if it it has been remapped */
+ e = lookup_exception(&s->complete, chunk);
+ if (e)
+ remap_exception(s, e, bio);
+ else
+ bio->bi_bdev = s->origin->bdev;
+
+ up_read(&s->lock);
+ }
+
+ return r;
+}
+
+void snapshot_resume(struct dm_target *ti)
+{
+ struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+
+ if (s->have_metadata)
+ return;
+
+ if (s->store.read_metadata(&s->store)) {
+ down_write(&s->lock);
+ s->valid = 0;
+ up_write(&s->lock);
+ }
+
+ s->have_metadata = 1;
+}
+
+static int snapshot_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned int maxlen)
+{
+ struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
+ char cow[32];
+ char org[32];
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ if (!snap->valid)
+ snprintf(result, maxlen, "Invalid");
+ else {
+ if (snap->store.fraction_full) {
+ sector_t numerator, denominator;
+ snap->store.fraction_full(&snap->store,
+ &numerator,
+ &denominator);
+ snprintf(result, maxlen,
+ SECTOR_FORMAT "/" SECTOR_FORMAT,
+ numerator, denominator);
+ }
+ else
+ snprintf(result, maxlen, "Unknown");
+ }
+ break;
+
+ case STATUSTYPE_TABLE:
+ /*
+ * kdevname returns a static pointer so we need
+ * to make private copies if the output is to
+ * make sense.
+ */
+ format_dev_t(cow, snap->cow->bdev->bd_dev);
+ format_dev_t(org, snap->origin->bdev->bd_dev);
+ snprintf(result, maxlen, "%s %s %c %lld", org, cow,
+ snap->type, snap->chunk_size);
+ break;
+ }
+
+ return 0;
+}
+
+/*-----------------------------------------------------------------
+ * Origin methods
+ *---------------------------------------------------------------*/
+static void list_merge(struct list_head *l1, struct list_head *l2)
+{
+ struct list_head *l1_n, *l2_p;
+
+ l1_n = l1->next;
+ l2_p = l2->prev;
+
+ l1->next = l2;
+ l2->prev = l1;
+
+ l2_p->next = l1_n;
+ l1_n->prev = l2_p;
+}
+
+static int __origin_write(struct list_head *snapshots, struct bio *bio)
+{
+ int r = 1, first = 1;
+ struct list_head *sl;
+ struct dm_snapshot *snap;
+ struct exception *e;
+ struct pending_exception *pe, *last = NULL;
+ chunk_t chunk;
+
+ /* Do all the snapshots on this origin */
+ list_for_each(sl, snapshots) {
+ snap = list_entry(sl, struct dm_snapshot, list);
+
+ /* Only deal with valid snapshots */
+ if (!snap->valid)
+ continue;
+
+ down_write(&snap->lock);
+
+ /*
+ * Remember, different snapshots can have
+ * different chunk sizes.
+ */
+ chunk = sector_to_chunk(snap, bio->bi_sector);
+
+ /*
+ * Check exception table to see if block
+ * is already remapped in this snapshot
+ * and trigger an exception if not.
+ */
+ e = lookup_exception(&snap->complete, chunk);
+ if (!e) {
+ pe = __find_pending_exception(snap, bio);
+ if (!pe) {
+ snap->store.drop_snapshot(&snap->store);
+ snap->valid = 0;
+
+ } else {
+ if (last)
+ list_merge(&pe->siblings,
+ &last->siblings);
+
+ last = pe;
+ r = 0;
+ }
+ }
+
+ up_write(&snap->lock);
+ }
+
+ /*
+ * Now that we have a complete pe list we can start the copying.
+ */
+ if (last) {
+ pe = last;
+ do {
+ down_write(&pe->snap->lock);
+ if (first)
+ queue_bio(&pe->origin_bios, bio);
+ if (!pe->started) {
+ pe->started = 1;
+ up_write(&pe->snap->lock);
+ start_copy(pe);
+ } else
+ up_write(&pe->snap->lock);
+ first = 0;
+ pe = list_entry(pe->siblings.next,
+ struct pending_exception, siblings);
+
+ } while (pe != last);
+ }
+
+ return r;
+}
+
+/*
+ * Called on a write from the origin driver.
+ */
+int do_origin(struct dm_dev *origin, struct bio *bio)
+{
+ struct origin *o;
+ int r;
+
+ down_read(&_origins_lock);
+ o = __lookup_origin(origin->bdev);
+ if (!o)
+ BUG();
+
+ r = __origin_write(&o->snapshots, bio);
+ up_read(&_origins_lock);
+
+ return r;
+}
+
+/*
+ * Origin: maps a linear range of a device, with hooks for snapshotting.
+ */
+
+/*
+ * Construct an origin mapping: <dev_path>
+ * The context for an origin is merely a 'struct dm_dev *'
+ * pointing to the real device.
+ */
+static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ int r;
+ struct dm_dev *dev;
+
+ if (argc != 1) {
+ ti->error = "dm-origin: incorrect number of arguments";
+ return -EINVAL;
+ }
+
+ r = dm_get_device(ti, argv[0], 0, ti->len,
+ dm_table_get_mode(ti->table), &dev);
+ if (r) {
+ ti->error = "Cannot get target device";
+ return r;
+ }
+
+ ti->private = dev;
+ return 0;
+}
+
+static void origin_dtr(struct dm_target *ti)
+{
+ struct dm_dev *dev = (struct dm_dev *) ti->private;
+ dm_put_device(ti, dev);
+}
+
+static int origin_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ struct dm_dev *dev = (struct dm_dev *) ti->private;
+ bio->bi_bdev = dev->bdev;
+
+ /* Only tell snapshots if this is a write */
+ return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : 1;
+}
+
+#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
+
+/*
+ * Set the target "split_io" field to the minimum of all the snapshots'
+ * chunk sizes.
+ */
+static void origin_resume(struct dm_target *ti)
+{
+ struct dm_dev *dev = (struct dm_dev *) ti->private;
+ struct dm_snapshot *snap;
+ struct origin *o;
+ struct list_head *sl;
+ chunk_t chunk_size = 0;
+
+ down_read(&_origins_lock);
+ o = __lookup_origin(dev->bdev);
+ if (o) {
+ list_for_each(sl, &o->snapshots) {
+ snap = list_entry(sl, struct dm_snapshot, list);
+ chunk_size = min_not_zero(chunk_size, snap->chunk_size);
+ }
+ }
+ up_read(&_origins_lock);
+
+ ti->split_io = chunk_size;
+}
+
+static int origin_status(struct dm_target *ti, status_type_t type, char *result,
+ unsigned int maxlen)
+{
+ struct dm_dev *dev = (struct dm_dev *) ti->private;
+ char buffer[32];
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ result[0] = '\0';
+ break;
+
+ case STATUSTYPE_TABLE:
+ format_dev_t(buffer, dev->bdev->bd_dev);
+ snprintf(result, maxlen, "%s", buffer);
+ break;
+ }
+
+ return 0;
+}
+
+static struct target_type origin_target = {
+ name: "snapshot-origin",
+ module: THIS_MODULE,
+ ctr: origin_ctr,
+ dtr: origin_dtr,
+ map: origin_map,
+ resume: origin_resume,
+ status: origin_status,
+};
+
+static struct target_type snapshot_target = {
+ name: "snapshot",
+ module: THIS_MODULE,
+ ctr: snapshot_ctr,
+ dtr: snapshot_dtr,
+ map: snapshot_map,
+ resume: snapshot_resume,
+ status: snapshot_status,
+};
+
+static int __init dm_snapshot_init(void)
+{
+ int r;
+
+ r = dm_register_target(&snapshot_target);
+ if (r) {
+ DMERR("snapshot target register failed %d", r);
+ return r;
+ }
+
+ r = dm_register_target(&origin_target);
+ if (r < 0) {
+ DMERR("Device mapper: Origin: register failed %d\n", r);
+ goto bad1;
+ }
+
+ r = init_origin_hash();
+ if (r) {
+ DMERR("init_origin_hash failed.");
+ goto bad2;
+ }
+
+ exception_cache = kmem_cache_create("dm-snapshot-ex",
+ sizeof(struct exception),
+ __alignof__(struct exception),
+ 0, NULL, NULL);
+ if (!exception_cache) {
+ DMERR("Couldn't create exception cache.");
+ r = -ENOMEM;
+ goto bad3;
+ }
+
+ pending_cache =
+ kmem_cache_create("dm-snapshot-in",
+ sizeof(struct pending_exception),
+ __alignof__(struct pending_exception),
+ 0, NULL, NULL);
+ if (!pending_cache) {
+ DMERR("Couldn't create pending cache.");
+ r = -ENOMEM;
+ goto bad4;
+ }
+
+ pending_pool = mempool_create(128, mempool_alloc_slab,
+ mempool_free_slab, pending_cache);
+ if (!pending_pool) {
+ DMERR("Couldn't create pending pool.");
+ r = -ENOMEM;
+ goto bad5;
+ }
+
+ return 0;
+
+ bad5:
+ kmem_cache_destroy(pending_cache);
+ bad4:
+ kmem_cache_destroy(exception_cache);
+ bad3:
+ exit_origin_hash();
+ bad2:
+ dm_unregister_target(&origin_target);
+ bad1:
+ dm_unregister_target(&snapshot_target);
+ return r;
+}
+
+static void __exit dm_snapshot_exit(void)
+{
+ int r;
+
+ r = dm_unregister_target(&snapshot_target);
+ if (r)
+ DMERR("snapshot unregister failed %d", r);
+
+ r = dm_unregister_target(&origin_target);
+ if (r)
+ DMERR("origin unregister failed %d", r);
+
+ exit_origin_hash();
+ mempool_destroy(pending_pool);
+ kmem_cache_destroy(pending_cache);
+ kmem_cache_destroy(exception_cache);
+}
+
+/* Module hooks */
+module_init(dm_snapshot_init);
+module_exit(dm_snapshot_exit);
+
+MODULE_DESCRIPTION(DM_NAME " snapshot target");
+MODULE_AUTHOR("Joe Thornber");
+MODULE_LICENSE("GPL");
diff -Naur linux-2.6.0-test9a/drivers/md/dm-snap.h linux-2.6.0-test9b/drivers/md/dm-snap.h
--- linux-2.6.0-test9a/drivers/md/dm-snap.h 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-snap.h 2003-10-27 11:20:21.000000000 -0600
@@ -0,0 +1,161 @@
+/*
+ * dm-snapshot.c
+ *
+ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_SNAPSHOT_H
+#define DM_SNAPSHOT_H
+
+#include "dm.h"
+#include <linux/blkdev.h>
+
+struct exception_table {
+ uint32_t hash_mask;
+ struct list_head *table;
+};
+
+/*
+ * The snapshot code deals with largish chunks of the disk at a
+ * time. Typically 64k - 256k.
+ */
+/* FIXME: can we get away with limiting these to a uint32_t ? */
+typedef sector_t chunk_t;
+
+/*
+ * An exception is used where an old chunk of data has been
+ * replaced by a new one.
+ */
+struct exception {
+ struct list_head hash_list;
+
+ chunk_t old_chunk;
+ chunk_t new_chunk;
+};
+
+/*
+ * Abstraction to handle the meta/layout of exception stores (the
+ * COW device).
+ */
+struct exception_store {
+
+ /*
+ * Destroys this object when you've finished with it.
+ */
+ void (*destroy) (struct exception_store *store);
+
+ /*
+ * The target shouldn't read the COW device until this is
+ * called.
+ */
+ int (*read_metadata) (struct exception_store *store);
+
+ /*
+ * Find somewhere to store the next exception.
+ */
+ int (*prepare_exception) (struct exception_store *store,
+ struct exception *e);
+
+ /*
+ * Update the metadata with this exception.
+ */
+ void (*commit_exception) (struct exception_store *store,
+ struct exception *e,
+ void (*callback) (void *, int success),
+ void *callback_context);
+
+ /*
+ * The snapshot is invalid, note this in the metadata.
+ */
+ void (*drop_snapshot) (struct exception_store *store);
+
+ /*
+ * Return how full the snapshot is.
+ */
+ void (*fraction_full) (struct exception_store *store,
+ sector_t *numerator,
+ sector_t *denominator);
+
+ struct dm_snapshot *snap;
+ void *context;
+};
+
+struct dm_snapshot {
+ struct rw_semaphore lock;
+ struct dm_table *table;
+
+ struct dm_dev *origin;
+ struct dm_dev *cow;
+
+ /* List of snapshots per Origin */
+ struct list_head list;
+
+ /* Size of data blocks saved - must be a power of 2 */
+ chunk_t chunk_size;
+ chunk_t chunk_mask;
+ chunk_t chunk_shift;
+
+ /* You can't use a snapshot if this is 0 (e.g. if full) */
+ int valid;
+ int have_metadata;
+
+ /* Used for display of table */
+ char type;
+
+ /* The last percentage we notified */
+ int last_percent;
+
+ struct exception_table pending;
+ struct exception_table complete;
+
+ /* The on disk metadata handler */
+ struct exception_store store;
+
+ struct kcopyd_client *kcopyd_client;
+};
+
+/*
+ * Used by the exception stores to load exceptions hen
+ * initialising.
+ */
+int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
+
+/*
+ * Constructor and destructor for the default persistent
+ * store.
+ */
+int dm_create_persistent(struct exception_store *store, uint32_t chunk_size);
+
+int dm_create_transient(struct exception_store *store,
+ struct dm_snapshot *s, int blocksize);
+
+/*
+ * Return the number of sectors in the device.
+ */
+static inline sector_t get_dev_size(struct block_device *bdev)
+{
+ return bdev->bd_inode->i_size >> SECTOR_SHIFT;
+}
+
+static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
+{
+ return (sector & ~s->chunk_mask) >> s->chunk_shift;
+}
+
+static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
+{
+ return chunk << s->chunk_shift;
+}
+
+static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
+{
+ /*
+ * There is only ever one instance of a particular block
+ * device so we can compare pointers safely.
+ */
+ return lhs == rhs;
+}
+
+#endif
diff -Naur linux-2.6.0-test9a/drivers/md/dm-snapshot.c linux-2.6.0-test9b/drivers/md/dm-snapshot.c
--- linux-2.6.0-test9a/drivers/md/dm-snapshot.c 2003-10-27 11:20:09.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-snapshot.c 1969-12-31 18:00:00.000000000 -0600
@@ -1,1269 +0,0 @@
-/*
- * dm-snapshot.c
- *
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#include <linux/blkdev.h>
-#include <linux/config.h>
-#include <linux/ctype.h>
-#include <linux/device-mapper.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kdev_t.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "dm-snapshot.h"
-#include "kcopyd.h"
-
-/*
- * FIXME: Remove this before release.
- */
-#if 0
-#define DMDEBUG DMWARN
-#else
-#define DMDEBUG(x...)
-#endif
-
-/*
- * The percentage increment we will wake up users at
- */
-#define WAKE_UP_PERCENT 5
-
-/*
- * kcopyd priority of snapshot operations
- */
-#define SNAPSHOT_COPY_PRIORITY 2
-
-/*
- * Each snapshot reserves this many pages for io
- * FIXME: calculate this
- */
-#define SNAPSHOT_PAGES 256
-
-struct pending_exception {
- struct exception e;
-
- /*
- * Origin buffers waiting for this to complete are held
- * in a list (using b_reqnext).
- */
- struct bio *origin_bios;
- struct bio *snapshot_bios;
-
- /*
- * Other pending_exceptions that are processing this
- * chunk. When this list is empty, we know we can
- * complete the origins.
- */
- struct list_head siblings;
-
- /* Pointer back to snapshot context */
- struct dm_snapshot *snap;
-
- /*
- * 1 indicates the exception has already been sent to
- * kcopyd.
- */
- int started;
-};
-
-/*
- * Hash table mapping origin volumes to lists of snapshots and
- * a lock to protect it
- */
-static kmem_cache_t *exception_cache;
-static kmem_cache_t *pending_cache;
-static mempool_t *pending_pool;
-
-/*
- * One of these per registered origin, held in the snapshot_origins hash
- */
-struct origin {
- /* The origin device */
- struct block_device *bdev;
-
- struct list_head hash_list;
-
- /* List of snapshots for this origin */
- struct list_head snapshots;
-};
-
-/*
- * Size of the hash table for origin volumes. If we make this
- * the size of the minors list then it should be nearly perfect
- */
-#define ORIGIN_HASH_SIZE 256
-#define ORIGIN_MASK 0xFF
-static struct list_head *_origins;
-static struct rw_semaphore _origins_lock;
-
-static int init_origin_hash(void)
-{
- int i;
-
- _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
- GFP_KERNEL);
- if (!_origins) {
- DMERR("Device mapper: Snapshot: unable to allocate memory");
- return -ENOMEM;
- }
-
- for (i = 0; i < ORIGIN_HASH_SIZE; i++)
- INIT_LIST_HEAD(_origins + i);
- init_rwsem(&_origins_lock);
-
- return 0;
-}
-
-static void exit_origin_hash(void)
-{
- kfree(_origins);
-}
-
-static inline unsigned int origin_hash(struct block_device *bdev)
-{
- return bdev->bd_dev & ORIGIN_MASK;
-}
-
-static struct origin *__lookup_origin(struct block_device *origin)
-{
- struct list_head *slist;
- struct list_head *ol;
- struct origin *o;
-
- ol = &_origins[origin_hash(origin)];
- list_for_each(slist, ol) {
- o = list_entry(slist, struct origin, hash_list);
-
- if (bdev_equal(o->bdev, origin))
- return o;
- }
-
- return NULL;
-}
-
-static void __insert_origin(struct origin *o)
-{
- struct list_head *sl = &_origins[origin_hash(o->bdev)];
- list_add_tail(&o->hash_list, sl);
-}
-
-/*
- * Make a note of the snapshot and its origin so we can look it
- * up when the origin has a write on it.
- */
-static int register_snapshot(struct dm_snapshot *snap)
-{
- struct origin *o;
- struct block_device *bdev = snap->origin->bdev;
-
- down_write(&_origins_lock);
- o = __lookup_origin(bdev);
-
- if (!o) {
- /* New origin */
- o = kmalloc(sizeof(*o), GFP_KERNEL);
- if (!o) {
- up_write(&_origins_lock);
- return -ENOMEM;
- }
-
- /* Initialise the struct */
- INIT_LIST_HEAD(&o->snapshots);
- o->bdev = bdev;
-
- __insert_origin(o);
- }
-
- list_add_tail(&snap->list, &o->snapshots);
-
- up_write(&_origins_lock);
- return 0;
-}
-
-static void unregister_snapshot(struct dm_snapshot *s)
-{
- struct origin *o;
-
- down_write(&_origins_lock);
- o = __lookup_origin(s->origin->bdev);
-
- list_del(&s->list);
- if (list_empty(&o->snapshots)) {
- list_del(&o->hash_list);
- kfree(o);
- }
-
- up_write(&_origins_lock);
-}
-
-/*
- * Implementation of the exception hash tables.
- */
-static int init_exception_table(struct exception_table *et, uint32_t size)
-{
- unsigned int i;
-
- et->hash_mask = size - 1;
- et->table = dm_vcalloc(size, sizeof(struct list_head));
- if (!et->table)
- return -ENOMEM;
-
- for (i = 0; i < size; i++)
- INIT_LIST_HEAD(et->table + i);
-
- return 0;
-}
-
-static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
-{
- struct list_head *slot, *entry, *temp;
- struct exception *ex;
- int i, size;
-
- size = et->hash_mask + 1;
- for (i = 0; i < size; i++) {
- slot = et->table + i;
-
- list_for_each_safe(entry, temp, slot) {
- ex = list_entry(entry, struct exception, hash_list);
- kmem_cache_free(mem, ex);
- }
- }
-
- vfree(et->table);
-}
-
-/*
- * FIXME: check how this hash fn is performing.
- */
-static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
-{
- return chunk & et->hash_mask;
-}
-
-static void insert_exception(struct exception_table *eh, struct exception *e)
-{
- struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
- list_add(&e->hash_list, l);
-}
-
-static inline void remove_exception(struct exception *e)
-{
- list_del(&e->hash_list);
-}
-
-/*
- * Return the exception data for a sector, or NULL if not
- * remapped.
- */
-static struct exception *lookup_exception(struct exception_table *et,
- chunk_t chunk)
-{
- struct list_head *slot, *el;
- struct exception *e;
-
- slot = &et->table[exception_hash(et, chunk)];
- list_for_each(el, slot) {
- e = list_entry(el, struct exception, hash_list);
- if (e->old_chunk == chunk)
- return e;
- }
-
- return NULL;
-}
-
-static inline struct exception *alloc_exception(void)
-{
- struct exception *e;
-
- e = kmem_cache_alloc(exception_cache, GFP_NOIO);
- if (!e)
- e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
-
- return e;
-}
-
-static inline void free_exception(struct exception *e)
-{
- kmem_cache_free(exception_cache, e);
-}
-
-static inline struct pending_exception *alloc_pending_exception(void)
-{
- return mempool_alloc(pending_pool, GFP_NOIO);
-}
-
-static inline void free_pending_exception(struct pending_exception *pe)
-{
- mempool_free(pe, pending_pool);
-}
-
-int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
-{
- struct exception *e;
-
- e = alloc_exception();
- if (!e)
- return -ENOMEM;
-
- e->old_chunk = old;
- e->new_chunk = new;
- insert_exception(&s->complete, e);
- return 0;
-}
-
-/*
- * Hard coded magic.
- */
-static int calc_max_buckets(void)
-{
- unsigned long mem;
-
- mem = num_physpages << PAGE_SHIFT;
- mem /= 50;
- mem /= sizeof(struct list_head);
-
- return mem;
-}
-
-/*
- * Rounds a number down to a power of 2.
- */
-static inline uint32_t round_down(uint32_t n)
-{
- while (n & (n - 1))
- n &= (n - 1);
- return n;
-}
-
-/*
- * Allocate room for a suitable hash table.
- */
-static int init_hash_tables(struct dm_snapshot *s)
-{
- sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
-
- /*
- * Calculate based on the size of the original volume or
- * the COW volume...
- */
- cow_dev_size = get_dev_size(s->cow->bdev);
- origin_dev_size = get_dev_size(s->origin->bdev);
- max_buckets = calc_max_buckets();
-
- hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift;
- hash_size = min(hash_size, max_buckets);
-
- /* Round it down to a power of 2 */
- hash_size = round_down(hash_size);
- if (init_exception_table(&s->complete, hash_size))
- return -ENOMEM;
-
- /*
- * Allocate hash table for in-flight exceptions
- * Make this smaller than the real hash table
- */
- hash_size >>= 3;
- if (!hash_size)
- hash_size = 64;
-
- if (init_exception_table(&s->pending, hash_size)) {
- exit_exception_table(&s->complete, exception_cache);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-/*
- * Round a number up to the nearest 'size' boundary. size must
- * be a power of 2.
- */
-static inline ulong round_up(ulong n, ulong size)
-{
- size--;
- return (n + size) & ~size;
-}
-
-/*
- * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
- */
-static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
- struct dm_snapshot *s;
- unsigned long chunk_size;
- int r = -EINVAL;
- char persistent;
- char *origin_path;
- char *cow_path;
- char *value;
- int blocksize;
-
- if (argc < 4) {
- ti->error = "dm-snapshot: requires exactly 4 arguments";
- r = -EINVAL;
- goto bad1;
- }
-
- origin_path = argv[0];
- cow_path = argv[1];
- persistent = toupper(*argv[2]);
-
- if (persistent != 'P' && persistent != 'N') {
- ti->error = "Persistent flag is not P or N";
- r = -EINVAL;
- goto bad1;
- }
-
- chunk_size = simple_strtoul(argv[3], &value, 10);
- if (chunk_size == 0 || value == NULL) {
- ti->error = "Invalid chunk size";
- r = -EINVAL;
- goto bad1;
- }
-
- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (s == NULL) {
- ti->error = "Cannot allocate snapshot context private "
- "structure";
- r = -ENOMEM;
- goto bad1;
- }
-
- r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
- if (r) {
- ti->error = "Cannot get origin device";
- goto bad2;
- }
-
- /* FIXME: get cow length */
- r = dm_get_device(ti, cow_path, 0, 0,
- FMODE_READ | FMODE_WRITE, &s->cow);
- if (r) {
- dm_put_device(ti, s->origin);
- ti->error = "Cannot get COW device";
- goto bad2;
- }
-
- /*
- * Chunk size must be multiple of page size. Silently
- * round up if it's not.
- */
- chunk_size = round_up(chunk_size, PAGE_SIZE >> 9);
-
- /* Validate the chunk size against the device block size */
- /* FIXME: check this, also ugly */
- blocksize = s->cow->bdev->bd_disk->queue->hardsect_size;
- if (chunk_size % (blocksize >> 9)) {
- ti->error = "Chunk size is not a multiple of device blocksize";
- r = -EINVAL;
- goto bad3;
- }
-
- /* Check chunk_size is a power of 2 */
- if (chunk_size & (chunk_size - 1)) {
- ti->error = "Chunk size is not a power of 2";
- r = -EINVAL;
- goto bad3;
- }
-
- s->chunk_size = chunk_size;
- s->chunk_mask = chunk_size - 1;
- s->type = persistent;
- for (s->chunk_shift = 0; chunk_size;
- s->chunk_shift++, chunk_size >>= 1)
- ;
- s->chunk_shift--;
-
- s->valid = 1;
- s->have_metadata = 0;
- s->last_percent = 0;
- init_rwsem(&s->lock);
- s->table = ti->table;
-
- /* Allocate hash table for COW data */
- if (init_hash_tables(s)) {
- ti->error = "Unable to allocate hash table space";
- r = -ENOMEM;
- goto bad3;
- }
-
- /*
- * Check the persistent flag - done here because we need the iobuf
- * to check the LV header
- */
- s->store.snap = s;
-
- if (persistent == 'P')
- r = dm_create_persistent(&s->store, s->chunk_size);
- else
- r = dm_create_transient(&s->store, s, blocksize);
-
- if (r) {
- ti->error = "Couldn't create exception store";
- r = -EINVAL;
- goto bad4;
- }
-
- r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
- if (r) {
- ti->error = "Could not create kcopyd client";
- goto bad5;
- }
-
- /* Add snapshot to the list of snapshots for this origin */
- if (register_snapshot(s)) {
- r = -EINVAL;
- ti->error = "Cannot register snapshot origin";
- goto bad6;
- }
-
- ti->private = s;
- return 0;
-
- bad6:
- kcopyd_client_destroy(s->kcopyd_client);
-
- bad5:
- s->store.destroy(&s->store);
-
- bad4:
- exit_exception_table(&s->pending, pending_cache);
- exit_exception_table(&s->complete, exception_cache);
-
- bad3:
- dm_put_device(ti, s->cow);
- dm_put_device(ti, s->origin);
-
- bad2:
- kfree(s);
-
- bad1:
- return r;
-}
-
-static void snapshot_dtr(struct dm_target *ti)
-{
- struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
-
- dm_table_event(ti->table);
-
- unregister_snapshot(s);
-
- exit_exception_table(&s->pending, pending_cache);
- exit_exception_table(&s->complete, exception_cache);
-
- /* Deallocate memory used */
- s->store.destroy(&s->store);
-
- dm_put_device(ti, s->origin);
- dm_put_device(ti, s->cow);
- kcopyd_client_destroy(s->kcopyd_client);
- kfree(s);
-}
-
-/*
- * We hold lists of bios, using the bi_next field.
- */
-static void queue_bio(struct bio **queue, struct bio *bio)
-{
- bio->bi_next = *queue;
- *queue = bio;
-}
-
-/*
- * FIXME: inefficient.
- */
-static void queue_bios(struct bio **queue, struct bio *bios)
-{
- while (*queue)
- queue = &((*queue)->bi_next);
-
- *queue = bios;
-}
-
-/*
- * Flush a list of buffers.
- */
-static void flush_bios(struct bio *bio)
-{
- struct bio *n;
-
- DMDEBUG("begin flush");
- while (bio) {
- n = bio->bi_next;
- bio->bi_next = NULL;
- DMDEBUG("flushing %p", bio);
- generic_make_request(bio);
- bio = n;
- }
-
- blk_run_queues();
-}
-
-/*
- * Error a list of buffers.
- */
-static void error_bios(struct bio *bio)
-{
- struct bio *n;
-
- while (bio) {
- n = bio->bi_next;
- bio->bi_next = NULL;
- bio_io_error(bio, bio->bi_size);
- bio = n;
- }
-}
-
-static struct bio *__flush_bios(struct pending_exception *pe)
-{
- struct pending_exception *sibling;
-
- if (list_empty(&pe->siblings))
- return pe->origin_bios;
-
- sibling = list_entry(pe->siblings.next,
- struct pending_exception, siblings);
-
- list_del(&pe->siblings);
-
- /* FIXME: I think there's a race on SMP machines here, add spin lock */
- queue_bios(&sibling->origin_bios, pe->origin_bios);
-
- return NULL;
-}
-
-static void check_free_space(struct dm_snapshot *s)
-{
-#if 0
- sector_t numerator, denominator;
- double n, d;
- unsigned pc;
-
- if (!s->store.fraction_full)
- return;
-
- s->store.fraction_full(&s->store, &numerator, &denominator);
- n = (double) numerator;
- d = (double) denominator;
-
- pc = (int) (n / d);
-
- if (pc >= s->last_percent + WAKE_UP_PERCENT) {
- dm_table_event(s->table);
- s->last_percent = pc - pc % WAKE_UP_PERCENT;
- }
-#endif
-}
-
-static void pending_complete(struct pending_exception *pe, int success)
-{
- struct exception *e;
- struct dm_snapshot *s = pe->snap;
- struct bio *flush = NULL;
-
- if (success) {
- e = alloc_exception();
- if (!e) {
- DMWARN("Unable to allocate exception.");
- down_write(&s->lock);
- s->store.drop_snapshot(&s->store);
- s->valid = 0;
- flush = __flush_bios(pe);
- up_write(&s->lock);
-
- error_bios(pe->snapshot_bios);
- goto out;
- }
- memcpy(e, &pe->e, sizeof(*e));
-
- /*
- * Add a proper exception, and remove the
- * in-flight exception from the list.
- */
- down_write(&s->lock);
- insert_exception(&s->complete, e);
- remove_exception(&pe->e);
- flush = __flush_bios(pe);
-
- /* Submit any pending write BHs */
- up_write(&s->lock);
-
- flush_bios(pe->snapshot_bios);
- DMDEBUG("Exception completed successfully.");
-
- /* Notify any interested parties */
- //check_free_space(s);
-
- } else {
- /* Read/write error - snapshot is unusable */
- down_write(&s->lock);
- if (s->valid)
- DMERR("Error reading/writing snapshot");
- s->store.drop_snapshot(&s->store);
- s->valid = 0;
- remove_exception(&pe->e);
- flush = __flush_bios(pe);
- up_write(&s->lock);
-
- error_bios(pe->snapshot_bios);
-
- dm_table_event(s->table);
- DMDEBUG("Exception failed.");
- }
-
- out:
- free_pending_exception(pe);
-
- if (flush)
- flush_bios(flush);
-}
-
-static void commit_callback(void *context, int success)
-{
- struct pending_exception *pe = (struct pending_exception *) context;
- pending_complete(pe, success);
-}
-
-/*
- * Called when the copy I/O has finished. kcopyd actually runs
- * this code so don't block.
- */
-static void copy_callback(int read_err, unsigned int write_err, void *context)
-{
- struct pending_exception *pe = (struct pending_exception *) context;
- struct dm_snapshot *s = pe->snap;
-
- if (read_err || write_err)
- pending_complete(pe, 0);
-
- else
- /* Update the metadata if we are persistent */
- s->store.commit_exception(&s->store, &pe->e, commit_callback,
- pe);
-}
-
-/*
- * Dispatches the copy operation to kcopyd.
- */
-static inline void start_copy(struct pending_exception *pe)
-{
- struct dm_snapshot *s = pe->snap;
- struct io_region src, dest;
- struct block_device *bdev = s->origin->bdev;
- sector_t dev_size;
-
- dev_size = get_dev_size(bdev);
-
- src.bdev = bdev;
- src.sector = chunk_to_sector(s, pe->e.old_chunk);
- src.count = min(s->chunk_size, dev_size - src.sector);
-
- dest.bdev = s->cow->bdev;
- dest.sector = chunk_to_sector(s, pe->e.new_chunk);
- dest.count = src.count;
-
- /* Hand over to kcopyd */
- DMDEBUG("starting exception copy");
- kcopyd_copy(s->kcopyd_client,
- &src, 1, &dest, 0, copy_callback, pe);
-}
-
-/*
- * Looks to see if this snapshot already has a pending exception
- * for this chunk, otherwise it allocates a new one and inserts
- * it into the pending table.
- *
- * NOTE: a write lock must be held on snap->lock before calling
- * this.
- */
-static struct pending_exception *
-__find_pending_exception(struct dm_snapshot *s, struct bio *bio)
-{
- struct exception *e;
- struct pending_exception *pe;
- chunk_t chunk = sector_to_chunk(s, bio->bi_sector);
-
- /*
- * Is there a pending exception for this already ?
- */
- e = lookup_exception(&s->pending, chunk);
- if (e) {
- /* cast the exception to a pending exception */
- pe = list_entry(e, struct pending_exception, e);
-
- } else {
- /*
- * Create a new pending exception, we don't want
- * to hold the lock while we do this.
- */
- up_write(&s->lock);
-
- pe = alloc_pending_exception();
- pe->e.old_chunk = chunk;
- pe->origin_bios = pe->snapshot_bios = NULL;
- INIT_LIST_HEAD(&pe->siblings);
- pe->snap = s;
- pe->started = 0;
-
- down_write(&s->lock);
- if (s->store.prepare_exception(&s->store, &pe->e)) {
- free_pending_exception(pe);
- s->valid = 0;
- return NULL;
- }
-
- insert_exception(&s->pending, &pe->e);
- }
-
- return pe;
-}
-
-static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
- struct bio *bio)
-{
- bio->bi_bdev = s->cow->bdev;
- bio->bi_sector = chunk_to_sector(s, e->new_chunk) +
- (bio->bi_sector & s->chunk_mask);
-}
-
-static int snapshot_map(struct dm_target *ti, struct bio *bio,
- union map_info *map_context)
-{
- struct exception *e;
- struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
- int r = 1;
- chunk_t chunk;
- struct pending_exception *pe;
-
- chunk = sector_to_chunk(s, bio->bi_sector);
-
- /* Full snapshots are not usable */
- if (!s->valid)
- return -1;
-
- /*
- * Write to snapshot - higher level takes care of RW/RO
- * flags so we should only get this if we are
- * writeable.
- */
- if (bio_rw(bio) == WRITE) {
-
- /* FIXME: should only take write lock if we need
- * to copy an exception */
- down_write(&s->lock);
-
- /* If the block is already remapped - use that, else remap it */
- e = lookup_exception(&s->complete, chunk);
- if (e) {
- remap_exception(s, e, bio);
- up_write(&s->lock);
-
- } else {
- pe = __find_pending_exception(s, bio);
-
- if (!pe) {
- s->store.drop_snapshot(&s->store);
- s->valid = 0;
- r = -EIO;
- up_write(&s->lock);
- } else {
- remap_exception(s, &pe->e, bio);
- queue_bio(&pe->snapshot_bios, bio);
-
- if (!pe->started) {
- /* this is protected by snap->lock */
- pe->started = 1;
- up_write(&s->lock);
- start_copy(pe);
- } else
- up_write(&s->lock);
- r = 0;
- }
- }
-
- } else {
- /*
- * FIXME: this read path scares me because we
- * always use the origin when we have a pending
- * exception. However I can't think of a
- * situation where this is wrong - ejt.
- */
-
- /* Do reads */
- down_read(&s->lock);
-
- /* See if it it has been remapped */
- e = lookup_exception(&s->complete, chunk);
- if (e)
- remap_exception(s, e, bio);
- else
- bio->bi_bdev = s->origin->bdev;
-
- up_read(&s->lock);
- }
-
- return r;
-}
-
-void snapshot_resume(struct dm_target *ti)
-{
- struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
-
- if (s->have_metadata)
- return;
-
- if (s->store.read_metadata(&s->store)) {
- down_write(&s->lock);
- s->valid = 0;
- up_write(&s->lock);
- }
-
- s->have_metadata = 1;
-}
-
-static int snapshot_status(struct dm_target *ti, status_type_t type,
- char *result, unsigned int maxlen)
-{
- struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
- char cow[32];
- char org[32];
-
- switch (type) {
- case STATUSTYPE_INFO:
- if (!snap->valid)
- snprintf(result, maxlen, "Invalid");
- else {
- if (snap->store.fraction_full) {
- sector_t numerator, denominator;
- snap->store.fraction_full(&snap->store,
- &numerator,
- &denominator);
- snprintf(result, maxlen,
- SECTOR_FORMAT "/" SECTOR_FORMAT,
- numerator, denominator);
- }
- else
- snprintf(result, maxlen, "Unknown");
- }
- break;
-
- case STATUSTYPE_TABLE:
- /*
- * kdevname returns a static pointer so we need
- * to make private copies if the output is to
- * make sense.
- */
- format_dev_t(cow, snap->cow->bdev->bd_dev);
- format_dev_t(org, snap->origin->bdev->bd_dev);
- snprintf(result, maxlen, "%s %s %c %lld", org, cow,
- snap->type, snap->chunk_size);
- break;
- }
-
- return 0;
-}
-
-/*-----------------------------------------------------------------
- * Origin methods
- *---------------------------------------------------------------*/
-static void list_merge(struct list_head *l1, struct list_head *l2)
-{
- struct list_head *l1_n, *l2_p;
-
- l1_n = l1->next;
- l2_p = l2->prev;
-
- l1->next = l2;
- l2->prev = l1;
-
- l2_p->next = l1_n;
- l1_n->prev = l2_p;
-}
-
-static int __origin_write(struct list_head *snapshots, struct bio *bio)
-{
- int r = 1, first = 1;
- struct list_head *sl;
- struct dm_snapshot *snap;
- struct exception *e;
- struct pending_exception *pe, *last = NULL;
- chunk_t chunk;
-
- /* Do all the snapshots on this origin */
- list_for_each(sl, snapshots) {
- snap = list_entry(sl, struct dm_snapshot, list);
-
- /* Only deal with valid snapshots */
- if (!snap->valid)
- continue;
-
- down_write(&snap->lock);
-
- /*
- * Remember, different snapshots can have
- * different chunk sizes.
- */
- chunk = sector_to_chunk(snap, bio->bi_sector);
-
- /*
- * Check exception table to see if block
- * is already remapped in this snapshot
- * and trigger an exception if not.
- */
- e = lookup_exception(&snap->complete, chunk);
- if (!e) {
- pe = __find_pending_exception(snap, bio);
- if (!pe) {
- snap->store.drop_snapshot(&snap->store);
- snap->valid = 0;
-
- } else {
- if (last)
- list_merge(&pe->siblings,
- &last->siblings);
-
- last = pe;
- r = 0;
- }
- }
-
- up_write(&snap->lock);
- }
-
- /*
- * Now that we have a complete pe list we can start the copying.
- */
- if (last) {
- pe = last;
- do {
- down_write(&pe->snap->lock);
- if (first)
- queue_bio(&pe->origin_bios, bio);
-
-#if 0
- if (!pe->started) {
- pe->started = 1;
- up_write(&pe->snap->lock);
- start_copy(pe);
- } else
- up_write(&pe->snap->lock);
-#else
- pe->started = 1;
- up_write(&pe->snap->lock);
- start_copy(pe);
-#endif
- first = 0;
- pe = list_entry(pe->siblings.next,
- struct pending_exception, siblings);
-
- } while (pe != last);
- }
-
- return r;
-}
-
-/*
- * Called on a write from the origin driver.
- */
-int do_origin(struct dm_dev *origin, struct bio *bio)
-{
- struct origin *o;
- int r;
-
- down_read(&_origins_lock);
- o = __lookup_origin(origin->bdev);
- if (!o)
- BUG();
-
- r = __origin_write(&o->snapshots, bio);
- up_read(&_origins_lock);
-
- return r;
-}
-
-/*
- * Origin: maps a linear range of a device, with hooks for snapshotting.
- */
-
-/*
- * Construct an origin mapping: <dev_path>
- * The context for an origin is merely a 'struct dm_dev *'
- * pointing to the real device.
- */
-static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
-{
- int r;
- struct dm_dev *dev;
-
- if (argc != 1) {
- ti->error = "dm-origin: incorrect number of arguments";
- return -EINVAL;
- }
-
- r = dm_get_device(ti, argv[0], 0, ti->len,
- dm_table_get_mode(ti->table), &dev);
- if (r) {
- ti->error = "Cannot get target device";
- return r;
- }
-
- ti->private = dev;
- return 0;
-}
-
-static void origin_dtr(struct dm_target *ti)
-{
- struct dm_dev *dev = (struct dm_dev *) ti->private;
- dm_put_device(ti, dev);
-}
-
-static int origin_map(struct dm_target *ti, struct bio *bio,
- union map_info *map_context)
-{
- struct dm_dev *dev = (struct dm_dev *) ti->private;
- bio->bi_bdev = dev->bdev;
-
- /* Only tell snapshots if this is a write */
- return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : 1;
-}
-
-static int origin_status(struct dm_target *ti, status_type_t type, char *result,
- unsigned int maxlen)
-{
- struct dm_dev *dev = (struct dm_dev *) ti->private;
- char buffer[32];
-
- switch (type) {
- case STATUSTYPE_INFO:
- result[0] = '\0';
- break;
-
- case STATUSTYPE_TABLE:
- format_dev_t(buffer, dev->bdev->bd_dev);
- snprintf(result, maxlen, "%s", buffer);
- break;
- }
-
- return 0;
-}
-
-static struct target_type origin_target = {
- name: "snapshot-origin",
- module: THIS_MODULE,
- ctr: origin_ctr,
- dtr: origin_dtr,
- map: origin_map,
- status: origin_status,
-};
-
-static struct target_type snapshot_target = {
- name: "snapshot",
- module: THIS_MODULE,
- ctr: snapshot_ctr,
- dtr: snapshot_dtr,
- map: snapshot_map,
- resume: snapshot_resume,
- status: snapshot_status,
-};
-
-static int __init dm_snapshot_init(void)
-{
- int r;
-
- r = dm_register_target(&snapshot_target);
- if (r) {
- DMERR("snapshot target register failed %d", r);
- return r;
- }
-
- r = dm_register_target(&origin_target);
- if (r < 0) {
- DMERR("Device mapper: Origin: register failed %d\n", r);
- goto bad1;
- }
-
- r = init_origin_hash();
- if (r) {
- DMERR("init_origin_hash failed.");
- goto bad2;
- }
-
- exception_cache = kmem_cache_create("dm-snapshot-ex",
- sizeof(struct exception),
- __alignof__(struct exception),
- 0, NULL, NULL);
- if (!exception_cache) {
- DMERR("Couldn't create exception cache.");
- r = -ENOMEM;
- goto bad3;
- }
-
- pending_cache =
- kmem_cache_create("dm-snapshot-in",
- sizeof(struct pending_exception),
- __alignof__(struct pending_exception),
- 0, NULL, NULL);
- if (!pending_cache) {
- DMERR("Couldn't create pending cache.");
- r = -ENOMEM;
- goto bad4;
- }
-
- pending_pool = mempool_create(128, mempool_alloc_slab,
- mempool_free_slab, pending_cache);
- if (!pending_pool) {
- DMERR("Couldn't create pending pool.");
- r = -ENOMEM;
- goto bad5;
- }
-
- return 0;
-
- bad5:
- kmem_cache_destroy(pending_cache);
- bad4:
- kmem_cache_destroy(exception_cache);
- bad3:
- exit_origin_hash();
- bad2:
- dm_unregister_target(&origin_target);
- bad1:
- dm_unregister_target(&snapshot_target);
- return r;
-}
-
-static void __exit dm_snapshot_exit(void)
-{
- int r;
-
- r = dm_unregister_target(&snapshot_target);
- if (r)
- DMERR("snapshot unregister failed %d", r);
-
- r = dm_unregister_target(&origin_target);
- if (r)
- DMERR("origin unregister failed %d", r);
-
- exit_origin_hash();
- mempool_destroy(pending_pool);
- kmem_cache_destroy(pending_cache);
- kmem_cache_destroy(exception_cache);
-}
-
-/* Module hooks */
-module_init(dm_snapshot_init);
-module_exit(dm_snapshot_exit);
-
-MODULE_DESCRIPTION(DM_NAME " snapshot target");
-MODULE_AUTHOR("Joe Thornber");
-MODULE_LICENSE("GPL");
diff -Naur linux-2.6.0-test9a/drivers/md/dm-snapshot.h linux-2.6.0-test9b/drivers/md/dm-snapshot.h
--- linux-2.6.0-test9a/drivers/md/dm-snapshot.h 2003-10-27 11:20:09.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-snapshot.h 1969-12-31 18:00:00.000000000 -0600
@@ -1,161 +0,0 @@
-/*
- * dm-snapshot.c
- *
- * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_SNAPSHOT_H
-#define DM_SNAPSHOT_H
-
-#include "dm.h"
-#include <linux/blkdev.h>
-
-struct exception_table {
- uint32_t hash_mask;
- struct list_head *table;
-};
-
-/*
- * The snapshot code deals with largish chunks of the disk at a
- * time. Typically 64k - 256k.
- */
-/* FIXME: can we get away with limiting these to a uint32_t ? */
-typedef sector_t chunk_t;
-
-/*
- * An exception is used where an old chunk of data has been
- * replaced by a new one.
- */
-struct exception {
- struct list_head hash_list;
-
- chunk_t old_chunk;
- chunk_t new_chunk;
-};
-
-/*
- * Abstraction to handle the meta/layout of exception stores (the
- * COW device).
- */
-struct exception_store {
-
- /*
- * Destroys this object when you've finished with it.
- */
- void (*destroy) (struct exception_store *store);
-
- /*
- * The target shouldn't read the COW device until this is
- * called.
- */
- int (*read_metadata) (struct exception_store *store);
-
- /*
- * Find somewhere to store the next exception.
- */
- int (*prepare_exception) (struct exception_store *store,
- struct exception *e);
-
- /*
- * Update the metadata with this exception.
- */
- void (*commit_exception) (struct exception_store *store,
- struct exception *e,
- void (*callback) (void *, int success),
- void *callback_context);
-
- /*
- * The snapshot is invalid, note this in the metadata.
- */
- void (*drop_snapshot) (struct exception_store *store);
-
- /*
- * Return how full the snapshot is.
- */
- void (*fraction_full) (struct exception_store *store,
- sector_t *numerator,
- sector_t *denominator);
-
- struct dm_snapshot *snap;
- void *context;
-};
-
-struct dm_snapshot {
- struct rw_semaphore lock;
- struct dm_table *table;
-
- struct dm_dev *origin;
- struct dm_dev *cow;
-
- /* List of snapshots per Origin */
- struct list_head list;
-
- /* Size of data blocks saved - must be a power of 2 */
- chunk_t chunk_size;
- chunk_t chunk_mask;
- chunk_t chunk_shift;
-
- /* You can't use a snapshot if this is 0 (e.g. if full) */
- int valid;
- int have_metadata;
-
- /* Used for display of table */
- char type;
-
- /* The last percentage we notified */
- int last_percent;
-
- struct exception_table pending;
- struct exception_table complete;
-
- /* The on disk metadata handler */
- struct exception_store store;
-
- struct kcopyd_client *kcopyd_client;
-};
-
-/*
- * Used by the exception stores to load exceptions hen
- * initialising.
- */
-int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
-
-/*
- * Constructor and destructor for the default persistent
- * store.
- */
-int dm_create_persistent(struct exception_store *store, uint32_t chunk_size);
-
-int dm_create_transient(struct exception_store *store,
- struct dm_snapshot *s, int blocksize);
-
-/*
- * Return the number of sectors in the device.
- */
-static inline sector_t get_dev_size(struct block_device *bdev)
-{
- return bdev->bd_inode->i_size >> SECTOR_SHIFT;
-}
-
-static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
-{
- return (sector & ~s->chunk_mask) >> s->chunk_shift;
-}
-
-static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
-{
- return chunk << s->chunk_shift;
-}
-
-static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
-{
- /*
- * There is only ever one instance of a particular block
- * device so we can compare pointers safely.
- */
- return lhs == rhs;
-}
-
-#endif
diff -Naur linux-2.6.0-test9a/drivers/md/dm-table.c linux-2.6.0-test9b/drivers/md/dm-table.c
--- linux-2.6.0-test9a/drivers/md/dm-table.c 2003-10-27 11:20:08.000000000 -0600
+++ linux-2.6.0-test9b/drivers/md/dm-table.c 2003-10-27 11:25:22.000000000 -0600
@@ -824,6 +824,7 @@
}
+EXPORT_SYMBOL(dm_vcalloc);
EXPORT_SYMBOL(dm_get_device);
EXPORT_SYMBOL(dm_put_device);
EXPORT_SYMBOL(dm_table_event);
More information about the dm-devel
mailing list