[Libguestfs] [hivex PATCH] Re-allocating unused blocks before assigning new blocks

Richard W.M. Jones rjones at redhat.com
Thu Jan 3 11:06:06 UTC 2019


On Mon, Jul 23, 2018 at 02:38:18PM -0400, Shreyas Khare wrote:
> >From 4a9f9b534ebd18254a07ce1de22c5de570cfa2f8 Mon Sep 17 00:00:00 2001
> From: Shreyas Khare <shreyas_khare at rapid7.com>
> Date: Mon, 23 Jul 2018 14:28:29 -0400
> Subject: [PATCH] Re-allocating unused blocks before assigning new blocks

It would be good if you could post this next time using "git
send-email" so that I can apply the patch with the commit message,
date etc using git am.  Otherwise I have to use patch and fake the
commit message.

> ---
>  lib/hivex-internal.h |   8 +++
>  lib/write.c          | 146 +++++++++++++++++++++++++++++++++++++------
>  2 files changed, 134 insertions(+), 20 deletions(-)
> 
> diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
> index a22e732..e7f5e92 100644
> --- a/lib/hivex-internal.h
> +++ b/lib/hivex-internal.h
> @@ -45,6 +45,13 @@ typedef enum {
>    nr_recode_types,
>  } recode_type;
>  
> +typedef struct unallocated_block {
> +	size_t offset;
> +	size_t block_size;
> +	struct unallocated_block *next;
> +} unallocated_block;

This needs to be indented consistently with the other code.  This also
applies to the rest of the code.

>  struct hive_h {
>    char *filename;
>    int fd;
> @@ -52,6 +59,7 @@ struct hive_h {
>    int msglvl;                   /* 1 = verbose, 2 or 3 = debug */
>    int writable;
>    int unsafe;
> +  unallocated_block *free_blocks;
>  
>    /* Registry file, memory mapped if read-only, or malloc'd if writing. */
>    union {
> diff --git a/lib/write.c b/lib/write.c
> index 70105c9..b72e2c8 100644
> --- a/lib/write.c
> +++ b/lib/write.c
> @@ -126,6 +126,97 @@ allocate_page (hive_h *h, size_t allocation_hint)
>    return offset + sizeof (struct ntreg_hbin_page);
>  }
>  
> +/*
> + * Get the first available unused block from free_blocks, using first-fit algorithm.
> + * NB: Attempted to use the best-fit algorithm. This does not work too well because
> + * allocating a block in the smallest possible available block causes fragmentation.
> + */

This needs to be wrapped so that lines are not longer than about 70-79
characters.  Also make the comments consistent with existing code, so
don't start with an empty line after /* .

> +static size_t
> +get_unused_block(hive_h *h, size_t seglen)
> +{
> +	unallocated_block *before = NULL, *found=NULL;

Space around second "=".

> +	for (unallocated_block *current = h->free_blocks; current != NULL ; current = current->next) {

Extra space before the second ";" and the line is too long.

> +		if (current->block_size >= seglen) {
> +			found = current;
> +			break;
> +		}
> +		before = current;
> +	}
> +	if (found) {
> +    size_t offset = found->offset;
> +    size_t unused = found->block_size - seglen;

It looks like tabs and spaces are used inconsistently here.  However
if you reindent the code that should be fixed automatically.

> +		if (unused > 0) {
> +			// Need to write unused memory to hive (to preserve hiv integrity)

hiv -> hive

> +			struct ntreg_hbin_block *blockhdr = (struct ntreg_hbin_block *) ((char *) h->addr + found->offset + seglen);
> +			blockhdr->seg_len = htole32 ((int32_t) unused);
> +			// Lets move and resize the found block
> +		  found->offset = found->offset+seglen;
> +			found->block_size = unused;

I don't think I understand what this block of code is doing.

> +		} else {
> +			//Full buffer was used

Space after //

> +			if (before) {
> +				before->next = found->next;
> +			} else {
> +				h->free_blocks = found->next;
> +			}
> +			free(found);

Space after free.

> +		}
> +		return offset;
> +	} else return -1;

Probably more consistent to add an extra line break after }

> +}
> +
> +/*
> + * Add freed block to free_blocks. Block size is determined by size at the actual offset.
> + * This list is ordered by offset. If the free'd block is preceded or succeded
> + * by another free block, they are merged.
> + */
> +static void
> +set_unused_block(hive_h *h, size_t offset)
> +{
> +  size_t seg_len = block_len (h, offset, NULL);
> +  unallocated_block *current = malloc(sizeof(unallocated_block));
> +  current->offset = offset;
> +  current->block_size = seg_len;
> +  current->next = NULL;
> +
> +	unallocated_block *before = NULL, *after = NULL;
> +	// There are no free blocks
> +	if (h->free_blocks == NULL) {
> +		h->free_blocks = current;
> +		return;
> +	}
> +
> +	// Find the first block _after_ this one
> +	for (after = h->free_blocks; after != NULL; after = after->next) {
> +		if (after->offset > current->offset) {
> +			// Can we merge with the next block?
> +			if (current->offset + current->block_size == after->offset) {
> +				current->block_size += after->block_size;
> +				current->next = after->next;
> +				free(after);
> +			} else {
> +				current->next = after;
> +			}
> +			break;
> +		}
> +		before = after;
> +	}
> +	// This was the block before this one
> +	if (before) {
> +		// Can we merge with the previous block?
> +		if (before->offset + before->block_size  == current->offset) {
> +			before->block_size += current->block_size;
> +			before->next = current->next;
> +			free(current); //discard current block, lets reuse before block
> +		} else {
> +			before->next = current;
> +		}
> +	} else {
> +		h->free_blocks = current;
> +	}
> +}
> +
> +
>  /* Allocate a single block, first allocating an hbin (page) at the end
>   * of the current file if necessary.  NB. To keep the implementation
>   * simple and more likely to be correct, we do not reuse existing free
> @@ -148,6 +239,9 @@ allocate_page (hive_h *h, size_t allocation_hint)
>  static size_t
>  allocate_block (hive_h *h, size_t seg_len, const char id[2])
>  {
> +  bool new_block = true;
> +  size_t offset;
> +
>    CHECK_WRITABLE (0);
>  
>    if (seg_len < 4) {
> @@ -170,17 +264,26 @@ allocate_block (hive_h *h, size_t seg_len, const char id[2])
>     */
>    seg_len = (seg_len + 7) & ~7;
>  
> -  /* Allocate a new page if necessary. */
> -  if (h->endblocks == 0 || h->endblocks + seg_len > h->endpages) {
> -    size_t newendblocks = allocate_page (h, seg_len);
> -    if (newendblocks == 0)
> -      return 0;
> -    h->endblocks = newendblocks;
> +  // Attempt to find a free block before getting a new block
> +  offset = get_unused_block(h, seg_len);
> +  if (offset != -1) {
> +    new_block = false;
> +    DEBUG(2, "Re-use offset found at 0x%zx, size %zu", offset, seg_len);
>    }
>  
> -  size_t offset = h->endblocks;
> +  if (new_block) {
> +  /* Allocate a new page if necessary. */
> +    if (h->endblocks == 0 || h->endblocks + seg_len > h->endpages) {
> +      if(h->endblocks != 0) set_unused_block(h, h->endblocks);
> +      size_t newendblocks = allocate_page (h, seg_len);
> +      if (newendblocks == 0)
> +        return 0;
> +      h->endblocks = newendblocks;
> +    }
>  
> -  DEBUG (2, "new block at 0x%zx, size %zu", offset, seg_len);
> +    offset = h->endblocks;
> +    DEBUG (2, "new block at 0x%zx, size %zu", offset, seg_len);
> +  }
>  
>    struct ntreg_hbin_block *blockhdr =
>      (struct ntreg_hbin_block *) ((char *) h->addr + offset);
> @@ -195,21 +298,23 @@ allocate_block (hive_h *h, size_t seg_len, const char id[2])
>  
>    BITMAP_SET (h->bitmap, offset);
>  
> -  h->endblocks += seg_len;
> +  if (new_block) {
> +    h->endblocks += seg_len;
>  
> -  /* If there is space after the last block in the last page, then we
> -   * have to put a dummy free block header here to mark the rest of
> -   * the page as free.
> -   */
> -  ssize_t rem = h->endpages - h->endblocks;
> -  if (rem > 0) {
> -    DEBUG (2, "marking remainder of page free"
> -           " starting at 0x%zx, size %zd", h->endblocks, rem);
> +    /* If there is space after the last block in the last page, then we
> +     * have to put a dummy free block header here to mark the rest of
> +     * the page as free.
> +     */
> +    ssize_t rem = h->endpages - h->endblocks;
> +    if (rem > 0) {
> +      DEBUG (2, "marking remainder of page free"
> +             " starting at 0x%zx, size %zd", h->endblocks, rem);
>  
> -    assert (rem >= 4);
> +      assert (rem >= 4);
>  
> -    blockhdr = (struct ntreg_hbin_block *) ((char *) h->addr + h->endblocks);
> -    blockhdr->seg_len = htole32 ((int32_t) rem);
> +      blockhdr = (struct ntreg_hbin_block *) ((char *) h->addr + h->endblocks);
> +      blockhdr->seg_len = htole32 ((int32_t) rem);
> +    }

Seems like the patch contains unnecessary whitespace changes.

Can you post a reformatted patch, then I can do a review of the
code.

Rich.

>    }
>  
>    return offset;
> @@ -236,6 +341,7 @@ mark_block_unused (hive_h *h, size_t offset)
>    size_t seg_len = block_len (h, offset, NULL);
>    blockhdr->seg_len = htole32 (seg_len);
>  
> +  set_unused_block(h, offset);
>    BITMAP_CLR (h->bitmap, offset);
>  }
>  
> -- 
> 2.17.0
> 

> _______________________________________________
> Libguestfs mailing list
> Libguestfs at redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs


-- 
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines.  Supports shell scripting,
bindings from many languages.  http://libguestfs.org




More information about the Libguestfs mailing list