[Virtio-fs] [PATCH 4/4] virtiofsd: Drop CAP_FSETID if client asked for it

Vivek Goyal vgoyal at redhat.com
Wed Aug 14 12:43:39 UTC 2019


On Wed, Aug 14, 2019 at 10:52:16AM +0100, Dr. David Alan Gilbert wrote:
> * Vivek Goyal (vgoyal at redhat.com) wrote:
> > If client requested killing setuid/setgid bits on file being written, drop
> > CAP_FSETID capability so that setuid/setgid bits are cleared upon write
> > automatically.
> > 
> > pjdfstest chown/12.t needs this.
> > 
> > Signed-off-by: Vivek Goyal <vgoyal at redhat.com>
> 
> Some high level questions:
>   a) This is going to be *really* expensive isn't it - that's a lot of
> syscalls for each write
> 
>   b) and isn't this the normal case, i.e. a non-root guest writing to a
> normal file?

Hi David,

FUSE_WRITE_KILL_PRIV flag will not be set frequently. It is set only
if a write happens to a file which has setuid/setgid bit set. Very
few files in filesystem typically have this bit set and for those
also it will be set only for first write and not subsequent writes.

So I will not be worried about performance impact of it.

> 
>   c) I guess an unpriv virtiofsd still works OK because the cap is
> already gone.
> 
>   d) How is this thread safe?  My reading is cap_set_proc is process
> wide; you'd have other writesin other threads doing non-kill-priv
> writes; also you'd have multiple threads doing   (clear) write (set) -
> that feels like they'd interact badly.

man page of capabilities says that its a per thread property.

**************************
       Starting with kernel 2.2, Linux divides  the  privileges  traditionally
       associated  with  superuser into distinct units, known as capabilities,
       which can be independently enabled and disabled.   Capabilities  are  a
       per-thread attribute.
**************************

So dropping capability per thread and enabling it back should be fine.

> 
>   e) I think you could share some stuff between the drop and the
> restore;  for example you only ever need to do cap_from_name once
> in the entire run.   You can probably also do teh cap_get_proc when
> you drop it, keep that 'caps' around and then set it again.

Given its not a very frequent operation, I am not too worried about
it. It will just make code structure little more complicated. 

But if you have strong opinion about it, I am open to change it.

Thanks
Vivek

> 
> 
> Dave
> 
> > ---
> >  contrib/virtiofsd/Makefile.objs    |   2 +
> >  contrib/virtiofsd/passthrough_ll.c | 127 +++++++++++++++++++++++++++++
> >  contrib/virtiofsd/seccomp.c        |   2 +
> >  3 files changed, 131 insertions(+)
> > 
> > diff --git a/contrib/virtiofsd/Makefile.objs b/contrib/virtiofsd/Makefile.objs
> > index 941b19f18e..25f1e8dd73 100644
> > --- a/contrib/virtiofsd/Makefile.objs
> > +++ b/contrib/virtiofsd/Makefile.objs
> > @@ -11,3 +11,5 @@ virtiofsd-obj-y = buffer.o \
> >  
> >  seccomp.o-cflags := $(SECCOMP_CFLAGS)
> >  seccomp.o-libs := $(SECCOMP_LIBS)
> > +
> > +passthrough_ll.o-libs += -lcap
> > diff --git a/contrib/virtiofsd/passthrough_ll.c b/contrib/virtiofsd/passthrough_ll.c
> > index 321bbb20be..412663653a 100644
> > --- a/contrib/virtiofsd/passthrough_ll.c
> > +++ b/contrib/virtiofsd/passthrough_ll.c
> > @@ -56,6 +56,7 @@
> >  #include <sys/mman.h>
> >  #include <sys/socket.h>
> >  #include <sys/un.h>
> > +#include <sys/capability.h>
> >  
> >  #include "ireg.h"
> >  #include <sys/mount.h>
> > @@ -230,6 +231,115 @@ static struct lo_data *lo_data(fuse_req_t req)
> >  	return (struct lo_data *) fuse_req_userdata(req);
> >  }
> >  
> > +/* Helpers for dropping and regaining effective capabilities. Returns 0
> > + * on success, error otherwise  */
> > +static int drop_effective_cap(const char *cap_name, bool *cap_dropped)
> > +{
> > +	cap_t caps;
> > +	cap_value_t cap;
> > +	cap_flag_value_t cap_value;
> > +	int ret = 0;
> > +
> > +	ret = cap_from_name(cap_name, &cap);
> > +	if (ret == -1) {
> > +		ret = errno;
> > +		fuse_err("cap_from_name(%s) failed:%s\n", cap_name,
> > +			 strerror(errno));
> > +		goto out;
> > +	}
> > +
> > +	if (!CAP_IS_SUPPORTED(cap)) {
> > +		fuse_err("capability(%s) is not supported\n", cap_name);
> > +		return EINVAL;
> > +	}
> > +
> > +	caps = cap_get_proc();
> > +	if (caps == NULL) {
> > +		ret = errno;
> > +		fuse_err("cap_get_proc() failed\n");
> > +		goto out;
> > +	}
> > +
> > +	if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &cap_value) == -1) {
> > +		ret = errno;
> > +		fuse_err("cap_get_flag() failed\n");
> > +		goto out_cap_free;
> > +	}
> > +
> > +	/* We dont have this capability in effective set already. */
> > +	if (cap_value == CAP_CLEAR) {
> > +		ret = 0;
> > +		goto out_cap_free;
> > +	}
> > +
> > +	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_CLEAR) == -1) {
> > +		ret = errno;
> > +		fuse_err("cap_set_flag() failed\n");
> > +		goto out_cap_free;
> > +	}
> > +
> > +	if (cap_set_proc(caps) == -1) {
> > +		ret = errno;
> > +		fuse_err("cap_set_procs() failed\n");
> > +		goto out_cap_free;
> > +	}
> > +
> > +	ret = 0;
> > +	if (cap_dropped)
> > +		*cap_dropped = true;
> > +
> > +out_cap_free:
> > +	cap_free(caps);
> > +out:
> > +	return ret;
> > +}
> > +
> > +static int gain_effective_cap(const char *cap_name)
> > +{
> > +	cap_t caps;
> > +	cap_value_t cap;
> > +	int ret = 0;
> > +
> > +	ret = cap_from_name(cap_name, &cap);
> > +	if (ret == -1) {
> > +		ret = errno;
> > +		fuse_err("cap_from_name(%s) failed:%s\n", cap_name,
> > +			 strerror(errno));
> > +		goto out;
> > +	}
> > +
> > +	if (!CAP_IS_SUPPORTED(cap)) {
> > +		fuse_err("capability(%s) is not supported\n", cap_name);
> > +		return EINVAL;
> > +	}
> > +
> > +	caps = cap_get_proc();
> > +	if (caps == NULL) {
> > +		ret = errno;
> > +		fuse_err("cap_get_proc() failed\n");
> > +		goto out;
> > +	}
> > +
> > +
> > +	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, CAP_SET) == -1) {
> > +		ret = errno;
> > +		fuse_err("cap_set_flag() failed\n");
> > +		goto out_cap_free;
> > +	}
> > +
> > +	if (cap_set_proc(caps) == -1) {
> > +		ret = errno;
> > +		fuse_err("cap_set_procs() failed\n");
> > +		goto out_cap_free;
> > +	}
> > +	ret = 0;
> > +
> > +out_cap_free:
> > +	cap_free(caps);
> > +out:
> > +	return ret;
> > +}
> > +
> >  static void lo_map_init(struct lo_map *map)
> >  {
> >  	map->elems = NULL;
> > @@ -2014,6 +2124,7 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
> >  	ssize_t res;
> >  	struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
> >  	struct lo_data *lo = lo_data(req);
> > +	bool cap_fsetid_dropped = false;
> >  
> >  	out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
> >  	out_buf.buf[0].fd = lo_fi_fd(req, fi);
> > @@ -2023,6 +2134,16 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
> >  		fuse_debug("lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n",
> >  			   ino, out_buf.buf[0].size, (unsigned long) off);
> >  
> > +	/*
> > +	 * If kill_priv is set, drop CAP_FSETID which should lead to kernel
> > +	 * clearing setuid/setgid on file.
> > +	 */
> > +	if (fi->kill_priv) {
> > +		res = drop_effective_cap("cap_fsetid", &cap_fsetid_dropped);
> > +		if (res != 0)
> > +			fuse_reply_err(req, res);
> > +	}
> > +
> >  	res = fuse_buf_copy(req, &out_buf, in_buf, 0);
> >  	if(res < 0) {
> >  		fuse_reply_err(req, -res);
> > @@ -2037,6 +2158,12 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
> >  
> >  		fuse_reply_write(req, (size_t) res);
> >  	}
> > +
> > +	if (cap_fsetid_dropped) {
> > +		res = gain_effective_cap("cap_fsetid");
> > +		if (res)
> > +			fuse_err("Failed to gain CAP_FSETID\n");
> > +	}
> >  }
> >  
> >  static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
> > diff --git a/contrib/virtiofsd/seccomp.c b/contrib/virtiofsd/seccomp.c
> > index 5f1c873b82..7384ebee0a 100644
> > --- a/contrib/virtiofsd/seccomp.c
> > +++ b/contrib/virtiofsd/seccomp.c
> > @@ -78,6 +78,8 @@ static const int syscall_whitelist[] = {
> >  	SCMP_SYS(unlinkat),
> >  	SCMP_SYS(utimensat),
> >  	SCMP_SYS(write),
> > +	SCMP_SYS(capget),
> > +	SCMP_SYS(capset),
> >  };
> >  
> >  /* Syscalls used when --syslog is enabled */
> > -- 
> > 2.17.2
> > 
> --
> Dr. David Alan Gilbert / dgilbert at redhat.com / Manchester, UK




More information about the Virtio-fs mailing list