[Libguestfs] [PATCH v2 3/5] daemon: Added internal_filesystem_walk command

Pino Toscano ptoscano at redhat.com
Mon Apr 4 09:48:21 UTC 2016


On Sunday 03 April 2016 16:30:48 Matteo Cafasso wrote:
> The internal_filesystem_walk command walks
> through the FS structure of a disk partition
> and returns all the files or directories
> which could be found.
> 
> The command is able to retrieve information
> regarding deleted or unaccessible files as well
> where other commands such as stat or find
> would fail.
> 
> The gathered list of tsk_dirent structs
> is serialised into XDR format and written
> to a file by the appliance.

Not that it is a big issue, but you can wrap commit messages at the
72 columns.

> Signed-off-by: Matteo Cafasso <noxdafox at gmail.com>
> ---
>  daemon/Makefile.am   |   4 +-
>  daemon/tsk.c         | 225 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  generator/actions.ml |  25 ++++++
>  src/MAX_PROC_NR      |   2 +-
>  4 files changed, 254 insertions(+), 2 deletions(-)
>  create mode 100644 daemon/tsk.c
> 
> diff --git a/daemon/Makefile.am b/daemon/Makefile.am
> index beb7962..03bf71f 100644
> --- a/daemon/Makefile.am
> +++ b/daemon/Makefile.am
> @@ -179,6 +179,7 @@ guestfsd_SOURCES = \
>  	sync.c \
>  	syslinux.c \
>  	tar.c \
> +	tsk.c \
>  	truncate.c \
>  	umask.c \
>  	upload.c \
> @@ -209,7 +210,8 @@ guestfsd_LDADD = \
>  	$(LIB_CLOCK_GETTIME) \
>  	$(LIBINTL) \
>  	$(SERVENT_LIB) \
> -	$(PCRE_LIBS)
> +	$(PCRE_LIBS) \
> +	$(TSK_LIBS)
> 
>  guestfsd_CPPFLAGS = \
>  	-I$(top_srcdir)/gnulib/lib \
> diff --git a/daemon/tsk.c b/daemon/tsk.c
> new file mode 100644
> index 0000000..ac44106
> --- /dev/null
> +++ b/daemon/tsk.c
> @@ -0,0 +1,225 @@
> +/* libguestfs - the guestfsd daemon
> + * Copyright (C) 2016 Red Hat Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include <config.h>
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <rpc/xdr.h>
> +#include <rpc/types.h>
> +
> +#include "guestfs_protocol.h"
> +#include "daemon.h"
> +#include "actions.h"
> +#include "optgroups.h"
> +
> +#ifdef HAVE_LIBTSK
> +
> +#include <tsk/libtsk.h>
> +
> +/* File types map similar to dirent. */
> +#define TSK_FILE_TYPE_NUM 10
> +char TSK_FILE_TYPE[TSK_FILE_TYPE_NUM] = {
> +  'u', 'f', 'c', 'd', 'b', 'r', 'l', 's', 'h', 'w'
> +};

I see the libtsk already uses TSK_* and tsk_* prefixes for its own
stuff, so I'd avoid using the same prefixes for local variables.

Also, make the string as static const, since it does not need to be
modified at all (and thus can then be placed into .rodata).

> +
> +static int open_filesystem (const char *device,
> +                            TSK_IMG_INFO **img, TSK_FS_INFO **fs);
> +static TSK_WALK_RET_ENUM fswalk_callback (TSK_FS_FILE *fsfile,
> +                                          const char *path, void *data);

Single line for forward declarations.

> +static char file_type (TSK_FS_FILE *fsfile);
> +static int send_dirent_info (guestfs_int_tsk_dirent *dirent);
> +static void reply_with_tsk_error (const char *funcname);
> +
> +int
> +do_internal_filesystem_walk (const mountable_t *mountable)
> +{
> +  int ret = -1;
> +  TSK_FS_INFO *fs = NULL;
> +  TSK_IMG_INFO *img = NULL;  /* Used internally by tsk_fs_dir_walk */
> +  int flags = TSK_FS_DIR_WALK_FLAG_ALLOC | TSK_FS_DIR_WALK_FLAG_UNALLOC |
> +    TSK_FS_DIR_WALK_FLAG_RECURSE | TSK_FS_DIR_WALK_FLAG_NOORPHAN;
> +
> +  ret = open_filesystem (mountable->device, &img, &fs);
> +  if (ret < 0)
> +    return ret;
> +
> +  reply (NULL, NULL);  /* Reply message. */
> +
> +  ret = tsk_fs_dir_walk (fs, fs->root_inum, flags, fswalk_callback, NULL);
> +  if (ret == 0)
> +    ret = send_file_end (0);  /* File transfer end. */
> +  else
> +    send_file_end (1);  /* Cancel file transfer. */
> +
> +  fs->close (fs);
> +  img->close (img);
> +
> +  return ret;
> +}
> +
> +/* Inspect the device and initialises the img and fs structures.
> + * Return 0 on success, -1 on error.
> + */
> +static int
> +open_filesystem (const char *device, TSK_IMG_INFO **img, TSK_FS_INFO **fs)
> +{
> +  const char *images[] = { device };
> +
> +  *img = tsk_img_open (1, images, TSK_IMG_TYPE_DETECT , 0);
> +  if (*img == NULL) {
> +    reply_with_tsk_error ("tsk_image_open");
> +    return -1;
> +  }
> +
> +  *fs = tsk_fs_open_img (*img, 0, TSK_FS_TYPE_DETECT);
> +  if (*fs == NULL) {
> +    reply_with_tsk_error ("tsk_fs_open_img");
> +    (*img)->close (*img);
> +    return -1;
> +  }
> +
> +  return 0;
> +}
> +
> +/* Filesystem walk callback, it gets called on every FS node.
> + * Parse the node, encode it into an XDR structure and send it to the appliance.
> + * Return TSK_WALK_CONT on success, TSK_WALK_ERROR on error.
> + */
> +static TSK_WALK_RET_ENUM
> +fswalk_callback (TSK_FS_FILE *fsfile, const char *path, void *data)
> +{
> +  int ret = 0;
> +  CLEANUP_FREE char *fname = NULL;
> +  struct guestfs_int_tsk_dirent dirent;
> +
> +  /* Ignore ./ and ../ */
> +  ret = TSK_FS_ISDOT (fsfile->name->name);
> +  if (ret != 0)
> +    return TSK_WALK_CONT;
> +
> +  /* Build the full relative path of the entry */
> +  ret = asprintf_nowarn (&fname, "%Q%Q", path, fsfile->name->name);

Why the quoting?  We don't quote results in similar APIs (e.g. readdir).

> +  if (ret < 0) {
> +    fprintf (stderr, "asprintf: %m");
> +    return TSK_WALK_ERROR;
> +  }
> +
> +  dirent.tsk_inode = fsfile->name->meta_addr;
> +  dirent.tsk_type = file_type (fsfile);
> +  dirent.tsk_size = (fsfile->meta != NULL) ? fsfile->meta->size : 0;

If 'meta' is null, then I guess the size should be -1 to indicate it
was not available; otherwise, there is no difference between an empty
file, and a file whose metadata could not be read.

> +  dirent.tsk_name = fname;
> +  dirent.tsk_allocated = !(fsfile->name->flags & TSK_FS_NAME_FLAG_UNALLOC);
> +
> +  ret = send_dirent_info (&dirent);
> +  ret = (ret == 0) ? TSK_WALK_CONT : TSK_WALK_ERROR;
> +
> +  return ret;
> +}
> +
> +/* Inspect fsfile to identify its type. */
> +static char
> +file_type(TSK_FS_FILE *fsfile)
> +{
> +  if (fsfile->name->type < TSK_FILE_TYPE_NUM)
> +    return TSK_FILE_TYPE[fsfile->name->type];
> +  else if (fsfile->meta != NULL && fsfile->meta->type < TSK_FILE_TYPE_NUM)
> +    return TSK_FILE_TYPE[fsfile->meta->type];
> +  else
> +    return 'u';
> +}

I think it would be better to have a switch case on the file type: the
current solution will silently break if a future (incompatible) version
of libtsk changes the order/values of the elements in
TSK_FS_NAME_TYPE_ENUM.  Also, new values are simply not detected at
all, whereas with a switch case at least they would give us compiler
warnings.

Also, IMHO code like:

  switch (type) {
  case TSK_FS_NAME_TYPE_UNDEF: return 'u';
  ...

would be much more readable.

> +/* Serialise dirent into XDR stream and send it to the appliance.
> + * Return 0 on success, -1 on error.
> + */
> +static int
> +send_dirent_info (guestfs_int_tsk_dirent *dirent)
> +{
> +  XDR xdr;
> +  size_t len = 0;
> +  CLEANUP_FREE char *buf;
> +
> +  buf = malloc (GUESTFS_MAX_CHUNK_SIZE);
> +  if (buf == NULL) {
> +    fprintf (stderr, "malloc: %m");
> +    return -1;
> +  }
> +
> +  /* Serialise tsk_dirent struct. */
> +  xdrmem_create (&xdr, buf, GUESTFS_MAX_CHUNK_SIZE, XDR_ENCODE);
> +
> +  if (!xdr_uint64_t (&xdr, &dirent->tsk_inode))
> +    return -1;
> +  if (!xdr_char (&xdr, &dirent->tsk_type))
> +    return -1;
> +  if (!xdr_int64_t (&xdr, &dirent->tsk_size))
> +    return -1;
> +
> +  /* Serialise filename. */
> +  len = strlen (dirent->tsk_name) + 1;
> +  if (!xdr_u_long (&xdr, &len))
> +    return -1;
> +  if (!xdr_string (&xdr, &dirent->tsk_name, len))
> +    return -1;
> +
> +  if (!xdr_uint32_t (&xdr, &dirent->tsk_allocated))
> +    return -1;
> +
> +  /* Resize buffer to actual length. */
> +  len = xdr_getpos (&xdr);
> +  xdr_destroy (&xdr);
> +  buf = realloc (buf, len);
> +  if (buf == NULL) {
> +    fprintf (stderr, "realloc: %m");
> +    return -1;
> +  }

I think this manual code is not needed, and you can use the XDR
serializing functions generated in daemon/guestfs_protocol.{c,h}.

> +
> +  /* Send serialised tsk_dirent out. */
> +  return send_file_write (buf, len);
> +}
> +
> +/* Parse TSK error and send it to the appliance. */
> +static void
> +reply_with_tsk_error (const char *funcname)
> +{
> +  int ret = 0;
> +  const char *buf = NULL;
> +
> +  ret = tsk_error_get_errno ();
> +  if (ret != 0) {
> +    buf = tsk_error_get ();
> +    reply_with_error ("TSK function %s error: %s", funcname, buf);
> +  }
> +  else
> +    reply_with_error ("TSK function %s: unknown error", funcname);
> +}

s/TSK function//

> +
> +int
> +optgroup_libtsk_available (void)
> +{
> +  return 1;
> +}
> +
> +#else   /* !HAVE_LIBTSK */
> +
> +OPTGROUP_LIBTSK_NOT_AVAILABLE
> +
> +#endif  /* !HAVE_LIBTSK */
> diff --git a/generator/actions.ml b/generator/actions.ml
> index e5cb939..449ffa0 100644
> --- a/generator/actions.ml
> +++ b/generator/actions.ml
> @@ -12958,6 +12958,31 @@ and save it as F<filename> on the local machine.
> 
>  This allows to download deleted or inaccessible files." };
> 
> +  { defaults with
> +    name = "internal_filesystem_walk"; added = (1, 33, 17);
> +    style = RErr, [Mountable "device"; FileOut "filename"], [];
> +    proc_nr = Some 465;
> +    visibility = VInternal;
> +    optional = Some "libtsk";
> +    shortdesc = "walk through the filesystem content";
> +    longdesc = "Walk through the internal structures of a disk partition
> +(eg. F</dev/sda1>) in order to return a list of all the files
> +and directories stored within.
> +
> +It is not necessary to mount the disk partition to run this command.
> +
> +All entries in the filesystem are returned, excluding C<.> and
> +C<..>. This function can list deleted or unaccessible files.
> +The entries are I<not> sorted.
> +
> +If the entry is not allocated (ex: it has been deleted),
> +its inode, type or size might not be recovered correctly.
> +In such case, the inode and the size will be 0 while the type
> +will be unidentified 'u'.
> +
> +This call returns basic file type information about each
> +file." };

No need to copy&paste the same description used for the public
filesystem_walk -- a one-liner telling it is an internal function for
filesystem_walk is enough.
Internal functions will not be documented anyway, so it is not worth
adding "proper" documentation text for them.

Thanks,
-- 
Pino Toscano
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://listman.redhat.com/archives/libguestfs/attachments/20160404/8acae04e/attachment.sig>


More information about the Libguestfs mailing list