[Virtio-fs] [PATCH 4/4] virtiofsd: Drop CAP_FSETID if client asked for it

Dr. David Alan Gilbert dgilbert at redhat.com
Wed Aug 14 09:52:16 UTC 2019


* Vivek Goyal (vgoyal at redhat.com) wrote:
> If client requested killing setuid/setgid bits on file being written, drop
> CAP_FSETID capability so that setuid/setgid bits are cleared upon write
> automatically.
> 
> pjdfstest chown/12.t needs this.
> 
> Signed-off-by: Vivek Goyal <vgoyal at redhat.com>

Some high level questions:
  a) This is going to be *really* expensive isn't it - that's a lot of
syscalls for each write

  b) and isn't this the normal case, i.e. a non-root guest writing to a
normal file?

  c) I guess an unpriv virtiofsd still works OK because the cap is
already gone.

  d) How is this thread safe?  My reading is cap_set_proc is process
wide; you'd have other writesin other threads doing non-kill-priv
writes; also you'd have multiple threads doing   (clear) write (set) -
that feels like they'd interact badly.

  e) I think you could share some stuff between the drop and the
restore;  for example you only ever need to do cap_from_name once
in the entire run.   You can probably also do teh cap_get_proc when
you drop it, keep that 'caps' around and then set it again.


Dave

> ---
>  contrib/virtiofsd/Makefile.objs    |   2 +
>  contrib/virtiofsd/passthrough_ll.c | 127 +++++++++++++++++++++++++++++
>  contrib/virtiofsd/seccomp.c        |   2 +
>  3 files changed, 131 insertions(+)
> 
> diff --git a/contrib/virtiofsd/Makefile.objs b/contrib/virtiofsd/Makefile.objs
> index 941b19f18e..25f1e8dd73 100644
> --- a/contrib/virtiofsd/Makefile.objs
> +++ b/contrib/virtiofsd/Makefile.objs
> @@ -11,3 +11,5 @@ virtiofsd-obj-y = buffer.o \
>  
>  seccomp.o-cflags := $(SECCOMP_CFLAGS)
>  seccomp.o-libs := $(SECCOMP_LIBS)
> +
> +passthrough_ll.o-libs += -lcap
> diff --git a/contrib/virtiofsd/passthrough_ll.c b/contrib/virtiofsd/passthrough_ll.c
> index 321bbb20be..412663653a 100644
> --- a/contrib/virtiofsd/passthrough_ll.c
> +++ b/contrib/virtiofsd/passthrough_ll.c
> @@ -56,6 +56,7 @@
>  #include <sys/mman.h>
>  #include <sys/socket.h>
>  #include <sys/un.h>
> +#include <sys/capability.h>
>  
>  #include "ireg.h"
>  #include <sys/mount.h>
> @@ -230,6 +231,115 @@ static struct lo_data *lo_data(fuse_req_t req)
>  	return (struct lo_data *) fuse_req_userdata(req);
>  }
>  
> +/* Helpers for dropping and regaining effective capabilities. Returns 0
> + * on success, error otherwise  */
> +static int drop_effective_cap(const char *cap_name, bool *cap_dropped)
> +{
> +	cap_t caps;
> +	cap_value_t cap;
> +	cap_flag_value_t cap_value;
> +	int ret = 0;
> +
> +	ret = cap_from_name(cap_name, &cap);
> +	if (ret == -1) {
> +		ret = errno;
> +		fuse_err("cap_from_name(%s) failed:%s\n", cap_name,
> +			 strerror(errno));
> +		goto out;
> +	}
> +
> +	if (!CAP_IS_SUPPORTED(cap)) {
> +		fuse_err("capability(%s) is not supported\n", cap_name);
> +		return EINVAL;
> +	}
> +
> +	caps = cap_get_proc();
> +	if (caps == NULL) {
> +		ret = errno;
> +		fuse_err("cap_get_proc() failed\n");
> +		goto out;
> +	}
> +
> +	if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &cap_value) == -1) {
> +		ret = errno;
> +		fuse_err("cap_get_flag() failed\n");
> +		goto out_cap_free;
> +	}
> +
> +	/* We dont have this capability in effective set already. */
> +	if (cap_value == CAP_CLEAR) {
> +		ret = 0;
> +		goto out_cap_free;
> +	}
> +
> +	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_CLEAR) == -1) {
> +		ret = errno;
> +		fuse_err("cap_set_flag() failed\n");
> +		goto out_cap_free;
> +	}
> +
> +	if (cap_set_proc(caps) == -1) {
> +		ret = errno;
> +		fuse_err("cap_set_procs() failed\n");
> +		goto out_cap_free;
> +	}
> +
> +	ret = 0;
> +	if (cap_dropped)
> +		*cap_dropped = true;
> +
> +out_cap_free:
> +	cap_free(caps);
> +out:
> +	return ret;
> +}
> +
> +static int gain_effective_cap(const char *cap_name)
> +{
> +	cap_t caps;
> +	cap_value_t cap;
> +	int ret = 0;
> +
> +	ret = cap_from_name(cap_name, &cap);
> +	if (ret == -1) {
> +		ret = errno;
> +		fuse_err("cap_from_name(%s) failed:%s\n", cap_name,
> +			 strerror(errno));
> +		goto out;
> +	}
> +
> +	if (!CAP_IS_SUPPORTED(cap)) {
> +		fuse_err("capability(%s) is not supported\n", cap_name);
> +		return EINVAL;
> +	}
> +
> +	caps = cap_get_proc();
> +	if (caps == NULL) {
> +		ret = errno;
> +		fuse_err("cap_get_proc() failed\n");
> +		goto out;
> +	}
> +
> +
> +	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_SET) == -1) {
> +		ret = errno;
> +		fuse_err("cap_set_flag() failed\n");
> +		goto out_cap_free;
> +	}
> +
> +	if (cap_set_proc(caps) == -1) {
> +		ret = errno;
> +		fuse_err("cap_set_procs() failed\n");
> +		goto out_cap_free;
> +	}
> +	ret = 0;
> +
> +out_cap_free:
> +	cap_free(caps);
> +out:
> +	return ret;
> +}
> +
>  static void lo_map_init(struct lo_map *map)
>  {
>  	map->elems = NULL;
> @@ -2014,6 +2124,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
>  	ssize_t res;
>  	struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
>  	struct lo_data *lo = lo_data(req);
> +	bool cap_fsetid_dropped = false;
>  
>  	out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
>  	out_buf.buf[0].fd = lo_fi_fd(req, fi);
> @@ -2023,6 +2134,16 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
>  		fuse_debug("lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n",
>  			   ino, out_buf.buf[0].size, (unsigned long) off);
>  
> +	/*
> +	 * If kill_priv is set, drop CAP_FSETID which should lead to kernel
> +	 * clearing setuid/setgid on file.
> +	 */
> +	if (fi->kill_priv) {
> +		res = drop_effective_cap("cap_fsetid", &cap_fsetid_dropped);
> +		if (res != 0)
> +			fuse_reply_err(req, res);
> +	}
> +
>  	res = fuse_buf_copy(req, &out_buf, in_buf, 0);
>  	if(res < 0) {
>  		fuse_reply_err(req, -res);
> @@ -2037,6 +2158,12 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
>  
>  		fuse_reply_write(req, (size_t) res);
>  	}
> +
> +	if (cap_fsetid_dropped) {
> +		res = gain_effective_cap("cap_fsetid");
> +		if (res)
> +			fuse_err("Failed to gain CAP_FSETID\n");
> +	}
>  }
>  
>  static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
> diff --git a/contrib/virtiofsd/seccomp.c b/contrib/virtiofsd/seccomp.c
> index 5f1c873b82..7384ebee0a 100644
> --- a/contrib/virtiofsd/seccomp.c
> +++ b/contrib/virtiofsd/seccomp.c
> @@ -78,6 +78,8 @@ static const int syscall_whitelist[] = {
>  	SCMP_SYS(unlinkat),
>  	SCMP_SYS(utimensat),
>  	SCMP_SYS(write),
> +	SCMP_SYS(capget),
> +	SCMP_SYS(capset),
>  };
>  
>  /* Syscalls used when --syslog is enabled */
> -- 
> 2.17.2
> 
--
Dr. David Alan Gilbert / dgilbert at redhat.com / Manchester, UK




More information about the Virtio-fs mailing list