[dm-devel] Re: [PATCH] dm: raid1 block-on-error patch

Jonathan Brassow jbrassow at redhat.com
Fri Apr 25 15:58:13 UTC 2008


Looks good, but be sure to comment on the new 'U' character in the  
comments for device_status_char()

  brassow

On Apr 20, 2008, at 4:34 PM, malahal at us.ibm.com wrote:

> Removed uevent related changes. "block_on_error" feature implicitly  
> sets
> "handle_errors" feature.
>
> --Malahal.
>
> This patch generates an event on a device failure and does NOT process
> further writes until it receives 'unblock' message. LVM or other tools
> are expected to get the mirorset status upon receiving the above event
> and record the failed device in their metadata, and then send the
> 'unblock' message to the dm-raid1 target.
>
> This would help LVM select the right master device at mirror logical
> volume activation/load time.
>
> Signed-off-by: Malahal Naineni <malahal at us.ibm.com>
>
> diff -r bfb50ef53671 drivers/md/dm-raid1.c
> --- a/drivers/md/dm-raid1.c	Mon Mar 31 10:13:13 2008 -0700
> +++ b/drivers/md/dm-raid1.c	Sun Apr 20 13:34:00 2008 -0700
> @@ -26,8 +26,10 @@
> #define DM_MSG_PREFIX "raid1"
> #define DM_IO_PAGES 64
>
> -#define DM_RAID1_HANDLE_ERRORS 0x01
> +#define DM_RAID1_HANDLE_ERRORS  0x01
> +#define DM_RAID1_BLOCK_ON_ERROR 0x02
> #define errors_handled(p)	((p)->features & DM_RAID1_HANDLE_ERRORS)
> +#define block_on_error(p)	((p)->features & DM_RAID1_BLOCK_ON_ERROR)
>
> static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
>
> @@ -148,6 +150,7 @@ struct mirror_set {
> 	region_t nr_regions;
> 	int in_sync;
> 	int log_failure;
> +	int write_blocked;
> 	atomic_t suspend;
>
> 	atomic_t default_mirror;	/* Default mirror */
> @@ -706,6 +709,7 @@ static void fail_mirror(struct mirror *m
> {
> 	struct mirror_set *ms = m->ms;
> 	struct mirror *new;
> +	unsigned long flags;
>
> 	if (!errors_handled(ms))
> 		return;
> @@ -719,6 +723,18 @@ static void fail_mirror(struct mirror *m
>
> 	if (test_and_set_bit(error_type, &m->error_type))
> 		return;
> +
> +	/*
> +	 * Make sure that device failure is recorded in the metadata
> +	 * before allowing any new writes. Agent acting on the following
> +	 * event should query the status of the mirrorset, update
> +	 * metadata accordingly and then send the unblock message.
> +	 */
> +	if (block_on_error(ms)) {
> +		spin_lock_irqsave(&ms->lock, flags);
> +		ms->write_blocked = 1;
> +		spin_unlock_irqrestore(&ms->lock, flags);
> +	}
>
> 	if (m != get_default_mirror(ms))
> 		goto out;
> @@ -835,6 +851,7 @@ static void do_recovery(struct mirror_se
> 	int r;
> 	struct region *reg;
> 	struct dm_dirty_log *log = ms->rh.log;
> +	struct mirror *m;
>
> 	/*
> 	 * Start quiescing some regions.
> @@ -855,6 +872,10 @@ static void do_recovery(struct mirror_se
> 	 */
> 	if (!ms->in_sync &&
> 	    (log->type->get_sync_count(log) == ms->nr_regions)) {
> +		for (m = ms->mirror; m < ms->mirror + ms->nr_mirrors; m++) {
> +			atomic_set(&m->error_count, 0);
> +			m->error_type = 0;
> +		}
> 		/* the sync is complete */
> 		dm_table_event(ms->ti->table);
> 		ms->in_sync = 1;
> @@ -1140,6 +1161,13 @@ static void do_writes(struct mirror_set
> 	if (!writes->head)
> 		return;
>
> +	if (ms->write_blocked) {
> +		spin_lock_irq(&ms->lock);
> +		bio_list_merge(&ms->writes, writes);
> +		spin_unlock_irq(&ms->lock);
> +		return;
> +	}
> +
> 	/*
> 	 * Classify each write.
> 	 */
> @@ -1202,6 +1230,13 @@ static void do_failures(struct mirror_se
>
> 	if (!failures->head)
> 		return;
> +
> +	if (ms->write_blocked) {
> +		spin_lock_irq(&ms->lock);
> +		bio_list_merge(&ms->failures, failures);
> +		spin_unlock_irq(&ms->lock);
> +		return;
> +	}
>
> 	if (!ms->log_failure) {
> 		while ((bio = bio_list_pop(failures)))
> @@ -1327,6 +1362,7 @@ static struct mirror_set *alloc_context(
> 	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
> 	ms->in_sync = 0;
> 	ms->log_failure = 0;
> +	ms->write_blocked = 0;
> 	atomic_set(&ms->suspend, 0);
> 	atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
>
> @@ -1448,6 +1484,7 @@ static int parse_features(struct mirror_
> {
> 	unsigned num_features;
> 	struct dm_target *ti = ms->ti;
> +	int i;
>
> 	*args_used = 0;
>
> @@ -1458,24 +1495,26 @@ static int parse_features(struct mirror_
> 		ti->error = "Invalid number of features";
> 		return -EINVAL;
> 	}
> +	argv++, argc--;
>
> -	argc--;
> -	argv++;
> -	(*args_used)++;
> -
> -	if (num_features > argc) {
> +	if (argc < num_features) {
> 		ti->error = "Not enough arguments to support feature count";
> 		return -EINVAL;
> 	}
>
> -	if (!strcmp("handle_errors", argv[0]))
> -		ms->features |= DM_RAID1_HANDLE_ERRORS;
> -	else {
> -		ti->error = "Unrecognised feature requested";
> -		return -EINVAL;
> +	for (i = 0; i < num_features; i++) {
> +		if (!strcmp("handle_errors", argv[i]))
> +			ms->features |= DM_RAID1_HANDLE_ERRORS;
> +		else if (!strcmp("block_on_error", argv[i])) {
> +			ms->features |= DM_RAID1_BLOCK_ON_ERROR;
> +			ms->features |= DM_RAID1_HANDLE_ERRORS;
> +		} else {
> +			ti->error = "Unrecognised feature requested";
> +			return -EINVAL;
> +		}
> 	}
>
> -	(*args_used)++;
> +	*args_used = 1 + num_features;
>
> 	return 0;
> }
> @@ -1798,8 +1837,14 @@ static void mirror_resume(struct dm_targ
>  */
> static char device_status_char(struct mirror *m)
> {
> -	if (!atomic_read(&(m->error_count)))
> -		return 'A';
> +	struct mirror_set *ms = m->ms;
> +
> +	if (atomic_read(&m->error_count) == 0) {
> +		if (ms->in_sync || get_default_mirror(ms) == m)
> +			return 'A';
> +		else
> +			return 'U';
> +	}
>
> 	return (test_bit(DM_RAID1_WRITE_ERROR, &(m->error_type))) ? 'D' :
> 		(test_bit(DM_RAID1_SYNC_ERROR, &(m->error_type))) ? 'S' :
> @@ -1840,10 +1885,74 @@ static int mirror_status(struct dm_targe
> 			DMEMIT(" %s %llu", ms->mirror[m].dev->name,
> 			       (unsigned long long)ms->mirror[m].offset);
>
> -		if (ms->features & DM_RAID1_HANDLE_ERRORS)
> +		/*
> +		 * 'handle_errors' is implicit with 'block_on_error', so
> +		 * check block_on_error first.
> +		 */
> +		if (block_on_error(ms))
> +			DMEMIT(" 1 block_on_error");
> +		else if (errors_handled(ms))
> 			DMEMIT(" 1 handle_errors");
> 	}
>
> +	return 0;
> +}
> +
> +/* unblock message handler
> + *
> + * This message has the mirror device recorded states. If they don't
> + * agree to the actual state in the target, we regenerate uvent. If  
> the
> + * recorded state and the actual state of each device is same, we
> + * unblock the mirrorset to allow writes.
> + */
> +static int mirror_message(struct dm_target *ti, unsigned argc, char  
> **argv)
> +{
> +	struct mirror_set *ms = (struct mirror_set *) ti->private;
> +	char device_status;
> +	char *name;	/* major:minor format */
> +	int i;
> +
> +	if (!block_on_error(ms))
> +		return -EINVAL;
> +	if (argc < 1 || strnicmp(argv[0], "unblock", sizeof("unblock")))
> +		return -EINVAL;
> +	argv++;
> +	argc--;
> +
> +	spin_lock_irq(&ms->lock);
> +	if (!ms->write_blocked)
> +		DMWARN("Received unblock message when not blocked!");
> +	if (argc != 2 * ms->nr_mirrors)
> +		goto error;
> +
> +	for (i = 0; i < ms->nr_mirrors; i++) {
> +		name = argv[2 * i];
> +		if (strncmp(name, ms->mirror[i].dev->name,
> +			   sizeof(ms->mirror[i].dev->name))) {
> +			DMWARN("name %s doesn't match name %s\n", name,
> +			       (ms->mirror[i].dev->name));
> +			goto error;
> +		}
> +		if (sscanf(argv[2 * i + 1], "%c", &device_status) != 1) {
> +			DMWARN("incorrect recorded state value");
> +			goto error;
> +		}
> +
> +		/* Re-generate event if the actual device state has
> +		 * changed since we last reported.
> +		 */
> +		if (device_status != device_status_char(&ms->mirror[i]))
> +			goto error;
> +	}
> +	ms->write_blocked = 0;
> +	spin_unlock_irq(&ms->lock);
> +	wake(ms);
> +	return 0;
> +
> +error:
> +	/* Regenerate the event */
> +	spin_unlock_irq(&ms->lock);
> +	schedule_work(&ms->trigger_event);
> 	return 0;
> }
>
> @@ -1859,6 +1968,7 @@ static struct target_type mirror_target
> 	.postsuspend = mirror_postsuspend,
> 	.resume	 = mirror_resume,
> 	.status	 = mirror_status,
> +	.message = mirror_message,
> };
>
> static int __init dm_mirror_init(void)




More information about the dm-devel mailing list