[lvm-devel] [PATCH v2] Add --align parameter to pvcreate

Dave Wysochanski dwysocha at redhat.com
Mon Feb 2 16:28:48 UTC 2009


On Fri, 2009-01-30 at 12:56 +0100, Milan Broz wrote:
> (added lvm.conf value and fixed precedence when restoring from backup)
> 
> --
> Add --align parameter to pvcreate.
> 
> Some RAID controllers suggest align volumes
> to stripe or chunk size.
> 
> Currently lvm2 align LVs to underlying MD device chunk automatically,
> but there is no proper way to set this parameter directly
> for other RAID drivers.
> 
> Note that it is not as the same as metadata area size.
> 
> I think we should provide possibility for system administrator
> to force PV data payload alignment during PV creation.
> (Also it is adminitrator or distro installator responsibility
> to set proper physical extent size later during vgcreate.)
> 
> Patch adds --align to pvcreate (default is size in KB).
> Also it add pv_alignment to device section of lvm.conf,
> where the default PV alignment can be specified.
> 
> pe_align() function now takes force_align parameter, which
> can overwite data aligmnent if set to non-zero.
> 
> Patch also fixes that after vgremove the orphan PV data align
> value is not rewritten by default.
> 
> Signed-off-by: Milan Broz <mbroz at redhat.com>
> ---
>  WHATS_NEW                        |    1 +
>  doc/example.conf                 |   16 +++++++++---
>  lib/format_text/format-text.c    |   13 +++++++---
>  lib/metadata/metadata-exported.h |    1 +
>  lib/metadata/metadata.c          |   50 ++++++++++++++++++++++++++++++-------
>  lib/metadata/metadata.h          |    2 +-
>  man/pvcreate.8.in                |    7 +++++
>  tools/args.h                     |    1 +
>  tools/commands.h                 |    5 ++-
>  tools/pvcreate.c                 |   18 +++++++++++++-
>  tools/vgconvert.c                |    2 +-
>  11 files changed, 93 insertions(+), 23 deletions(-)
> 
> diff --git a/WHATS_NEW b/WHATS_NEW
> index 29c3cd2..c9b3d8e 100644
> --- a/WHATS_NEW
> +++ b/WHATS_NEW
> @@ -1,5 +1,6 @@
>  Version 2.02.45 - 
>  ===================================
> +  Add --align parameter to pvcreate to allow specify payload align directly.
>    Replace internal vg_check_status() implementation.
>    Rename vg_read() to vg_read_internal().
>  
> diff --git a/doc/example.conf b/doc/example.conf
> index 16cefb2..3bcfff0 100644
> --- a/doc/example.conf
> +++ b/doc/example.conf
> @@ -86,7 +86,7 @@ devices {
>      # If sysfs is mounted (2.6 kernels) restrict device scanning to 
>      # the block devices it believes are valid.
>      # 1 enables; 0 disables.
> -    sysfs_scan = 1	
> +    sysfs_scan = 1
>  
>      # By default, LVM2 will ignore devices used as components of
>      # software RAID (md) devices by looking for md superblocks.
> @@ -98,6 +98,14 @@ devices {
>      # 1 enables; 0 disables.
>      md_chunk_alignment = 1
>  
> +    # Default alignment (in KB) for PV, data payload will be aligned
> +    # to multiple of this boundary.
> +    # If not defined (or set to zero) alignment will be 64k or page size,
> +    # if page size is bigger.
> +    # Also note that specialized alignment (md_chunk_alignment) takes
> +    # precedence before this setting.
> +    #pv_alignment = 4096
> +
>      # If, while scanning the system for PVs, LVM2 encounters a device-mapper
>      # device that has its I/O suspended, it waits for it to become accessible.
>      # Set this to 1 to skip such devices.  This should only be needed
> @@ -129,7 +137,7 @@ log {
>      # There are 6 syslog-like log levels currently in use - 2 to 7 inclusive.
>      # 7 is the most verbose (LOG_DEBUG).
>      level = 0
> -    
> +
>      # Format of output messages
>      # Whether or not (1 or 0) to indent messages according to their severity
>      indent = 1
> @@ -175,7 +183,7 @@ backup {
>      # Where should archived files go ?
>      # Remember to back up this directory regularly!
>      archive_dir = "/etc/lvm/archive"
> -    
> +
>      # What is the minimum number of archive files you wish to keep ?
>      retain_min = 10
>  
> @@ -193,7 +201,7 @@ shell {
>  
>  # Miscellaneous global LVM2 settings
>  global {
> -    
> +
>      # The file creation mask for any files and directories created.
>      # Interpreted as octal if the first digit is zero.
>      umask = 077

Uintended whitespace changes?

> diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c
> index 428b5e2..74c8759 100644
> --- a/lib/format_text/format-text.c
> +++ b/lib/format_text/format-text.c
> @@ -1182,7 +1182,7 @@ static int _mda_setup(const struct format_type *fmt,
>  	if (!pvmetadatacopies)
>  		return 1;
>  
> -	alignment = pe_align(pv) << SECTOR_SHIFT;
> +	alignment = pe_align(pv, 0) << SECTOR_SHIFT;
>  	disk_size = pv->size << SECTOR_SHIFT;
>  	pe_start <<= SECTOR_SHIFT;
>  	pe_end <<= SECTOR_SHIFT;
> @@ -1347,9 +1347,14 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume
>  	else
>  		dm_list_init(&info->das);
>  
> +	/*
> +	 * Keep PV data alignment if already set
> +	 */
> +	if (pv->pe_start < pe_align(pv, 0))
> +		pv->pe_start = pe_align(pv, 0);
> +
>  	/* Set pe_start to first aligned sector after any metadata
>  	 * areas that begin before pe_start */
> -	pv->pe_start = pe_align(pv);
>  	dm_list_iterate_items(mda, &info->mdas) {
>  		mdac = (struct mda_context *) mda->metadata_locn;
>  		if (pv->dev == mdac->area.dev &&
> @@ -1358,9 +1363,9 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume
>  		     (pv->pe_start << SECTOR_SHIFT))) {
>  			pv->pe_start = (mdac->area.start + mdac->area.size)
>  			    >> SECTOR_SHIFT;
> -			adjustment = pv->pe_start % pe_align(pv);
> +			adjustment = pv->pe_start % pe_align(pv, 0);
>  			if (adjustment)
> -				pv->pe_start += (pe_align(pv) - adjustment);
> +				pv->pe_start += (pe_align(pv, 0) - adjustment);
>  		}
>  	}
>  	if (!add_da
> diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h
> index e785383..b25e30d 100644
> --- a/lib/metadata/metadata-exported.h
> +++ b/lib/metadata/metadata-exported.h
> @@ -407,6 +407,7 @@ pv_t *pv_create(const struct cmd_context *cmd,
>  		      struct device *dev,
>  		      struct id *id,
>  		      uint64_t size,
> +		      unsigned long req_pe_align,
>  		      uint64_t pe_start,
>  		      uint32_t existing_extent_count,
>  		      uint32_t existing_extent_size,
> diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c
> index 0130bd3..cf7cb70 100644
> --- a/lib/metadata/metadata.c
> +++ b/lib/metadata/metadata.c
> @@ -46,6 +46,7 @@ static struct physical_volume *_pv_read(struct cmd_context *cmd,
>  static struct physical_volume *_pv_create(const struct format_type *fmt,
>  				  struct device *dev,
>  				  struct id *id, uint64_t size,
> +				  unsigned long align,
>  				  uint64_t pe_start,
>  				  uint32_t existing_extent_count,
>  				  uint32_t existing_extent_size,
> @@ -65,19 +66,33 @@ static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
>  static struct physical_volume *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
>  						      const struct id *id);
>  
> -unsigned long pe_align(struct physical_volume *pv)
> +unsigned long pe_align(struct physical_volume *pv, unsigned long force_pe_align)


Inconsistent naming?  You have 'req_pe_align', 'align', and
'force_pe_align'.



>  {
> -	if (pv->pe_align)
> +	if (pv->pe_align && !force_pe_align)
>  		goto out;
>  
> -	pv->pe_align = MAX(65536UL, lvm_getpagesize()) >> SECTOR_SHIFT;
> +	if (force_pe_align)
> +		pv->pe_align = force_pe_align;
> +	else 
> +		/*
> +		 * Config value is in KB
> +		 */
> +		pv->pe_align = (find_config_tree_int(pv->fmt->cmd,
> +				"devices/pv_alignment", 0) * 1024) >> SECTOR_SHIFT;
>  
>  	/*
> -	 * Align to chunk size of underlying md device if present
> +	 * Use old default if not specified
>  	 */
> +	if (!pv->pe_align)
> +		pv->pe_align = MAX(65536UL, lvm_getpagesize()) >> SECTOR_SHIFT;
> +
> +
>  	if (!pv->dev)
>  		goto out;
>  
> +	/*
> +	 * Align to chunk size of underlying md device if present
> +	 */
>  	if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
>  				  DEFAULT_MD_CHUNK_ALIGNMENT))
>  		pv->pe_align = MAX(pv->pe_align,
> @@ -148,8 +163,8 @@ int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
>  
>  	/* FIXME Do proper rounding-up alignment? */
>  	/* Reserved space for label; this holds 0 for PVs created by LVM1 */
> -	if (pv->pe_start < pe_align(pv))
> -		pv->pe_start = pe_align(pv);
> +	if (pv->pe_start < pe_align(pv, 0))
> +		pv->pe_start = pe_align(pv, 0);
>  
>  	/*
>  	 * pe_count must always be calculated by pv_setup
> @@ -759,6 +774,7 @@ int vg_split_mdas(struct cmd_context *cmd __attribute((unused)),
>   * @dev: PV device to initialize
>   * @id: PV UUID to use for initialization
>   * @size: size of the PV in sectors
> + * @req_pe_align: requested alignment of payload
>   * @pe_start: physical extent start
>   * @existing_extent_count
>   * @existing_extent_size
> @@ -776,13 +792,14 @@ int vg_split_mdas(struct cmd_context *cmd __attribute((unused)),
>  pv_t *pv_create(const struct cmd_context *cmd,
>  		struct device *dev,
>  		struct id *id, uint64_t size,
> +		unsigned long req_pe_align,
>  		uint64_t pe_start,
>  		uint32_t existing_extent_count,
>  		uint32_t existing_extent_size,
>  		int pvmetadatacopies,
>  		uint64_t pvmetadatasize, struct dm_list *mdas)
>  {
> -	return _pv_create(cmd->fmt, dev, id, size, pe_start,
> +	return _pv_create(cmd->fmt, dev, id, size, req_pe_align, pe_start,
>  			  existing_extent_count,
>  			  existing_extent_size,
>  			  pvmetadatacopies,
> @@ -826,6 +843,7 @@ static struct physical_volume *_alloc_pv(struct dm_pool *mem, struct device *dev
>  static struct physical_volume *_pv_create(const struct format_type *fmt,
>  				  struct device *dev,
>  				  struct id *id, uint64_t size,
> +				  unsigned long req_pe_align,
>  				  uint64_t pe_start,
>  				  uint32_t existing_extent_count,
>  				  uint32_t existing_extent_size,
> @@ -860,15 +878,25 @@ static struct physical_volume *_pv_create(const struct format_type *fmt,
>  		pv->size = size;
>  	}
>  
> -	if (pv->size < PV_MIN_SIZE) {
> -		log_error("%s: Size must exceed minimum of %ld sectors.",
> -			  pv_dev_name(pv), PV_MIN_SIZE);
> +	if (pv->size < (req_pe_align + PV_MIN_SIZE)) {
> +		log_error("%s: Size %smust exceed minimum of %ld sectors.",
> +			  pv_dev_name(pv),
> +			  req_pe_align ? "after data alignment " : "",
> +			  PV_MIN_SIZE);
>  		goto bad;
>  	}
>  
>  	pv->fmt = fmt;
>  	pv->vg_name = fmt->orphan_vg_name;
>  
> +	/*
> +	 * Force data alignment if specified
> +	 */
> +	if (req_pe_align && pe_align(pv, req_pe_align) != req_pe_align)
> +		log_warn("%s: Overriding data aligmnent to %lu sectors"
> +			 " (requested %lu sectors)", pv_dev_name(pv),
> +			 pe_align(pv, 0), req_pe_align);
> +
>  	if (!fmt->ops->pv_setup(fmt, pe_start, existing_extent_count,
>  				existing_extent_size,
>  				pvmetadatacopies, pvmetadatasize, mdas,
> @@ -877,6 +905,8 @@ static struct physical_volume *_pv_create(const struct format_type *fmt,
>  			  "failed.", pv_dev_name(pv));
>  		goto bad;
>  	}
> +
> +
>  	return pv;
>  
>        bad:
> diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h
> index c425fbf..c982b78 100644
> --- a/lib/metadata/metadata.h
> +++ b/lib/metadata/metadata.h
> @@ -263,7 +263,7 @@ struct format_handler {
>  /*
>   * Utility functions
>   */
> -unsigned long pe_align(struct physical_volume *pv);
> +unsigned long pe_align(struct physical_volume *pv, unsigned long force_pe_align);
>  int vg_validate(struct volume_group *vg);
>  
>  int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv);
> diff --git a/man/pvcreate.8.in b/man/pvcreate.8.in
> index 7ecda56..bfecc5b 100644
> --- a/man/pvcreate.8.in
> +++ b/man/pvcreate.8.in
> @@ -13,6 +13,7 @@ pvcreate \- initialize a disk or partition for use by LVM
>  .RB [ \-M | \-\-metadatatype type ]
>  .RB [ \-\-metadatacopies #copies ]
>  .RB [ \-\-metadatasize size ]
> +.RB [ \-\-align size ]
>  .RB [ \-\-restorefile file ]
>  .RB [ \-\-setphysicalvolumesize size ]
>  .RB [ \-u | \-\-uuid uuid ]
> @@ -89,6 +90,12 @@ to see where the metadata areas are placed.
>  The approximate amount of space to be set aside for each metadata area.
>  (The size you specify may get rounded.)
>  .TP
> +.BR \-\-align " size"
> +Force align start of the payload to specified boundary (to align
> +with underlying RAID device stripe or chunk).
> +Note that you should use properly sized physical extent later
> +in vgcreate to correctly align all Logical Volumes start too.
> +.TP

Might be worth adding a sentence or two of how to query / derive this
value.  I would guess someone will use this value, then want to verify
it is set correctly.  I know we don't store it so we can't add it to the
reporting code as a field but we can point out the use of pe_start and
pe_size.


>  .BR \-\-metadatacopies " copies"
>  The number of metadata areas to set aside on each PV.  Currently
>  this can be 0, 1 or 2.  
> diff --git a/tools/args.h b/tools/args.h
> index 8f026fc..776fc0c 100644
> --- a/tools/args.h
> +++ b/tools/args.h
> @@ -56,6 +56,7 @@ arg(ignoremonitoring_ARG, '\0', "ignoremonitoring", NULL, 0)
>  arg(nameprefixes_ARG, '\0', "nameprefixes", NULL, 0)
>  arg(unquoted_ARG, '\0', "unquoted", NULL, 0)
>  arg(rows_ARG, '\0', "rows", NULL, 0)
> +arg(physicalvolumealign_ARG, '\0', "align", size_kb_arg, 0)
>  
>  /* Allow some variations */
>  arg(resizable_ARG, '\0', "resizable", yes_no_arg, 0)
> diff --git a/tools/commands.h b/tools/commands.h
> index 58c6156..4007639 100644
> --- a/tools/commands.h
> +++ b/tools/commands.h
> @@ -462,6 +462,7 @@ xx(pvcreate,
>     "\t[-M|--metadatatype 1|2]" "\n"
>     "\t[--metadatacopies #copies]" "\n"
>     "\t[--metadatasize MetadataSize[kKmMgGtTpPeE]]" "\n"
> +   "\t[--align Size[kKmM]]" "\n"
>     "\t[--setphysicalvolumesize PhysicalVolumeSize[kKmMgGtTpPeE]" "\n"
>     "\t[-t|--test] " "\n"
>     "\t[-u|--uuid uuid] " "\n"
> @@ -472,8 +473,8 @@ xx(pvcreate,
>     "\tPhysicalVolume [PhysicalVolume...]\n",
>  
>     force_ARG, test_ARG, labelsector_ARG, metadatatype_ARG, metadatacopies_ARG,
> -   metadatasize_ARG, physicalvolumesize_ARG, restorefile_ARG, uuidstr_ARG,
> -   yes_ARG, zero_ARG)
> +   metadatasize_ARG, physicalvolumealign_ARG, physicalvolumesize_ARG,
> +   restorefile_ARG, uuidstr_ARG, yes_ARG, zero_ARG)
>  
>  xx(pvdata,
>     "Display the on-disk metadata for physical volume(s)",
> diff --git a/tools/pvcreate.c b/tools/pvcreate.c
> index db1a0e2..9a12c33 100644
> --- a/tools/pvcreate.c
> +++ b/tools/pvcreate.c
> @@ -19,6 +19,7 @@
>  struct pvcreate_params {
>  	int zero;
>  	uint64_t size;
> +	uint64_t align;
>  	int pvmetadatacopies;
>  	uint64_t pvmetadatasize;
>  	int64_t labelsector;
> @@ -177,7 +178,7 @@ static int pvcreate_single(struct cmd_context *cmd, const char *pv_name,
>  	}
>  
>  	dm_list_init(&mdas);
> -	if (!(pv = pv_create(cmd, dev, pp->idp, pp->size, pp->pe_start,
> +	if (!(pv = pv_create(cmd, dev, pp->idp, pp->size, pp->align, pp->pe_start,
>  			     pp->extent_count, pp->extent_size,
>  			     pp->pvmetadatacopies,
>  			     pp->pvmetadatasize,&mdas))) {
> @@ -329,6 +330,21 @@ static int pvcreate_validate_params(struct cmd_context *cmd,
>  	}
>  	pp->size = arg_uint64_value(cmd, physicalvolumesize_ARG, UINT64_C(0));
>  
> +	if (arg_sign_value(cmd, physicalvolumealign_ARG, 0) == SIGN_MINUS) {
> +		log_error("Physical volume alignment may not be negative");
> +		return 0;
> +	}
> +	pp->align = arg_uint64_value(cmd, physicalvolumealign_ARG, UINT64_C(0));
> +
> +	if (pp->align > ULONG_MAX) {
> +		log_error("Physical volume alignment is too big.");
> +		return 0;
> +	}
> +	if (pp->align && pp->pe_start && (pp->pe_start % pp->align)) {
> +		log_error("Incompatible alignment value is ignored.");
> +		pp->align = 0;
> +	}
> +
>  	if (arg_sign_value(cmd, metadatasize_ARG, 0) == SIGN_MINUS) {
>  		log_error("Metadata size may not be negative");
>  		return 0;
> diff --git a/tools/vgconvert.c b/tools/vgconvert.c
> index dc9d023..209d597 100644
> --- a/tools/vgconvert.c
> +++ b/tools/vgconvert.c
> @@ -133,7 +133,7 @@ static int vgconvert_single(struct cmd_context *cmd, const char *vg_name,
>  
>  		dm_list_init(&mdas);
>  		if (!(pv = pv_create(cmd, pv_dev(existing_pv),
> -				     &existing_pv->id, size,
> +				     &existing_pv->id, size, 0,
>  				     pe_start, pv_pe_count(existing_pv),
>  				     pv_pe_size(existing_pv), pvmetadatacopies,
>  				     pvmetadatasize, &mdas))) {
> 
> 

Ack other than the comments above.

I reviewed the whole patch and did limited testing.




More information about the lvm-devel mailing list