[Libguestfs] [hivex PATCH] Re-allocating unused blocks before assigning new blocks
Richard W.M. Jones
rjones at redhat.com
Thu Jan 3 11:06:06 UTC 2019
On Mon, Jul 23, 2018 at 02:38:18PM -0400, Shreyas Khare wrote:
> >From 4a9f9b534ebd18254a07ce1de22c5de570cfa2f8 Mon Sep 17 00:00:00 2001
> From: Shreyas Khare <shreyas_khare at rapid7.com>
> Date: Mon, 23 Jul 2018 14:28:29 -0400
> Subject: [PATCH] Re-allocating unused blocks before assigning new blocks
It would be good if you could post this next time using "git
send-email" so that I can apply the patch with the commit message,
date etc using git am. Otherwise I have to use patch and fake the
commit message.
> ---
> lib/hivex-internal.h | 8 +++
> lib/write.c | 146 +++++++++++++++++++++++++++++++++++++------
> 2 files changed, 134 insertions(+), 20 deletions(-)
>
> diff --git a/lib/hivex-internal.h b/lib/hivex-internal.h
> index a22e732..e7f5e92 100644
> --- a/lib/hivex-internal.h
> +++ b/lib/hivex-internal.h
> @@ -45,6 +45,13 @@ typedef enum {
> nr_recode_types,
> } recode_type;
>
> +typedef struct unallocated_block {
> + size_t offset;
> + size_t block_size;
> + struct unallocated_block *next;
> +} unallocated_block;
This needs to be indented consistently with the other code. This also
applies to the rest of the code.
> struct hive_h {
> char *filename;
> int fd;
> @@ -52,6 +59,7 @@ struct hive_h {
> int msglvl; /* 1 = verbose, 2 or 3 = debug */
> int writable;
> int unsafe;
> + unallocated_block *free_blocks;
>
> /* Registry file, memory mapped if read-only, or malloc'd if writing. */
> union {
> diff --git a/lib/write.c b/lib/write.c
> index 70105c9..b72e2c8 100644
> --- a/lib/write.c
> +++ b/lib/write.c
> @@ -126,6 +126,97 @@ allocate_page (hive_h *h, size_t allocation_hint)
> return offset + sizeof (struct ntreg_hbin_page);
> }
>
> +/*
> + * Get the first available unused block from free_blocks, using first-fit algorithm.
> + * NB: Attempted to use the best-fit algorithm. This does not work too well because
> + * allocating a block in the smallest possible available block causes fragmentation.
> + */
This needs to be wrapped so that lines are not longer than about 70-79
characters. Also make the comments consistent with existing code, so
don't start with an empty line after /* .
> +static size_t
> +get_unused_block(hive_h *h, size_t seglen)
> +{
> + unallocated_block *before = NULL, *found=NULL;
Space around second "=".
> + for (unallocated_block *current = h->free_blocks; current != NULL ; current = current->next) {
Extra space before the second ";" and the line is too long.
> + if (current->block_size >= seglen) {
> + found = current;
> + break;
> + }
> + before = current;
> + }
> + if (found) {
> + size_t offset = found->offset;
> + size_t unused = found->block_size - seglen;
It looks like tabs and spaces are used inconsistently here. However
if you reindent the code that should be fixed automatically.
> + if (unused > 0) {
> + // Need to write unused memory to hive (to preserve hiv integrity)
hiv -> hive
> + struct ntreg_hbin_block *blockhdr = (struct ntreg_hbin_block *) ((char *) h->addr + found->offset + seglen);
> + blockhdr->seg_len = htole32 ((int32_t) unused);
> + // Lets move and resize the found block
> + found->offset = found->offset+seglen;
> + found->block_size = unused;
I don't think I understand what this block of code is doing.
> + } else {
> + //Full buffer was used
Space after //
> + if (before) {
> + before->next = found->next;
> + } else {
> + h->free_blocks = found->next;
> + }
> + free(found);
Space after free.
> + }
> + return offset;
> + } else return -1;
Probably more consistent to add an extra line break after }
> +}
> +
> +/*
> + * Add freed block to free_blocks. Block size is determined by size at the actual offset.
> + * This list is ordered by offset. If the free'd block is preceded or succeded
> + * by another free block, they are merged.
> + */
> +static void
> +set_unused_block(hive_h *h, size_t offset)
> +{
> + size_t seg_len = block_len (h, offset, NULL);
> + unallocated_block *current = malloc(sizeof(unallocated_block));
> + current->offset = offset;
> + current->block_size = seg_len;
> + current->next = NULL;
> +
> + unallocated_block *before = NULL, *after = NULL;
> + // There are no free blocks
> + if (h->free_blocks == NULL) {
> + h->free_blocks = current;
> + return;
> + }
> +
> + // Find the first block _after_ this one
> + for (after = h->free_blocks; after != NULL; after = after->next) {
> + if (after->offset > current->offset) {
> + // Can we merge with the next block?
> + if (current->offset + current->block_size == after->offset) {
> + current->block_size += after->block_size;
> + current->next = after->next;
> + free(after);
> + } else {
> + current->next = after;
> + }
> + break;
> + }
> + before = after;
> + }
> + // This was the block before this one
> + if (before) {
> + // Can we merge with the previous block?
> + if (before->offset + before->block_size == current->offset) {
> + before->block_size += current->block_size;
> + before->next = current->next;
> + free(current); //discard current block, lets reuse before block
> + } else {
> + before->next = current;
> + }
> + } else {
> + h->free_blocks = current;
> + }
> +}
> +
> +
> /* Allocate a single block, first allocating an hbin (page) at the end
> * of the current file if necessary. NB. To keep the implementation
> * simple and more likely to be correct, we do not reuse existing free
> @@ -148,6 +239,9 @@ allocate_page (hive_h *h, size_t allocation_hint)
> static size_t
> allocate_block (hive_h *h, size_t seg_len, const char id[2])
> {
> + bool new_block = true;
> + size_t offset;
> +
> CHECK_WRITABLE (0);
>
> if (seg_len < 4) {
> @@ -170,17 +264,26 @@ allocate_block (hive_h *h, size_t seg_len, const char id[2])
> */
> seg_len = (seg_len + 7) & ~7;
>
> - /* Allocate a new page if necessary. */
> - if (h->endblocks == 0 || h->endblocks + seg_len > h->endpages) {
> - size_t newendblocks = allocate_page (h, seg_len);
> - if (newendblocks == 0)
> - return 0;
> - h->endblocks = newendblocks;
> + // Attempt to find a free block before getting a new block
> + offset = get_unused_block(h, seg_len);
> + if (offset != -1) {
> + new_block = false;
> + DEBUG(2, "Re-use offset found at 0x%zx, size %zu", offset, seg_len);
> }
>
> - size_t offset = h->endblocks;
> + if (new_block) {
> + /* Allocate a new page if necessary. */
> + if (h->endblocks == 0 || h->endblocks + seg_len > h->endpages) {
> + if(h->endblocks != 0) set_unused_block(h, h->endblocks);
> + size_t newendblocks = allocate_page (h, seg_len);
> + if (newendblocks == 0)
> + return 0;
> + h->endblocks = newendblocks;
> + }
>
> - DEBUG (2, "new block at 0x%zx, size %zu", offset, seg_len);
> + offset = h->endblocks;
> + DEBUG (2, "new block at 0x%zx, size %zu", offset, seg_len);
> + }
>
> struct ntreg_hbin_block *blockhdr =
> (struct ntreg_hbin_block *) ((char *) h->addr + offset);
> @@ -195,21 +298,23 @@ allocate_block (hive_h *h, size_t seg_len, const char id[2])
>
> BITMAP_SET (h->bitmap, offset);
>
> - h->endblocks += seg_len;
> + if (new_block) {
> + h->endblocks += seg_len;
>
> - /* If there is space after the last block in the last page, then we
> - * have to put a dummy free block header here to mark the rest of
> - * the page as free.
> - */
> - ssize_t rem = h->endpages - h->endblocks;
> - if (rem > 0) {
> - DEBUG (2, "marking remainder of page free"
> - " starting at 0x%zx, size %zd", h->endblocks, rem);
> + /* If there is space after the last block in the last page, then we
> + * have to put a dummy free block header here to mark the rest of
> + * the page as free.
> + */
> + ssize_t rem = h->endpages - h->endblocks;
> + if (rem > 0) {
> + DEBUG (2, "marking remainder of page free"
> + " starting at 0x%zx, size %zd", h->endblocks, rem);
>
> - assert (rem >= 4);
> + assert (rem >= 4);
>
> - blockhdr = (struct ntreg_hbin_block *) ((char *) h->addr + h->endblocks);
> - blockhdr->seg_len = htole32 ((int32_t) rem);
> + blockhdr = (struct ntreg_hbin_block *) ((char *) h->addr + h->endblocks);
> + blockhdr->seg_len = htole32 ((int32_t) rem);
> + }
Seems like the patch contains unnecessary whitespace changes.
Can you post a reformatted patch, then I can do a review of the
code.
Rich.
> }
>
> return offset;
> @@ -236,6 +341,7 @@ mark_block_unused (hive_h *h, size_t offset)
> size_t seg_len = block_len (h, offset, NULL);
> blockhdr->seg_len = htole32 (seg_len);
>
> + set_unused_block(h, offset);
> BITMAP_CLR (h->bitmap, offset);
> }
>
> --
> 2.17.0
>
> _______________________________________________
> Libguestfs mailing list
> Libguestfs at redhat.com
> https://www.redhat.com/mailman/listinfo/libguestfs
--
Richard Jones, Virtualization Group, Red Hat http://people.redhat.com/~rjones
Read my programming and virtualization blog: http://rwmj.wordpress.com
libguestfs lets you edit virtual machines. Supports shell scripting,
bindings from many languages. http://libguestfs.org
More information about the Libguestfs
mailing list