[Virtio-fs] [fuse-devel] 'FORGET' ordering semantics (vs unlink & NFS)
Vivek Goyal
vgoyal at redhat.com
Wed Jan 6 16:57:59 UTC 2021
On Wed, Jan 06, 2021 at 02:40:45PM +0100, Miklos Szeredi wrote:
> On Wed, Jan 6, 2021 at 10:16 AM Amir Goldstein <amir73il at gmail.com> wrote:
>
> > Please note that NFS doesn't do "silly rename" for directories,
> > so mitigation is mostly needed for non-dir.
>
> Okay.
>
> > An alternative method if daemon is not capable, is to store parent dirfd
> > in addition to filehandle and implement open_child_by_handle_at(int
> > parent_fd, ...):
> > - readdir(parend_fd)
> > - search a match for d_ino
> > - name_to_handle_at() and verify match to stored filehandle
> >
> > This is essentially what open_by_handle_at(2) does under the covers
> > with a "connectable" non-dir filehandle after having resolved the
> > parent file handle part. And "connectable" file handles are used by nfsd
> > to enforce "subtree_check" to make sure that file wasn't moved outside
> > obtainable path after initial lookup.
>
> Yes, sort of makes sense, but will have corner cases for hard links in
> different directories, open files after unlink, etc..
>
> Also back to the original problem: what we really want is to close the
> O_PATH descriptor on unlink(). This should be possible, regardless of
> any FORGET, assuming
>
> 1) no open files exist that reference the inode
> 2) no aliases have been looked up (i.e. just one cached dentry)
>
> The attached untested patch tries to do this.
>
> Thanks,
> Miklos
> diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
> index ec1008bceba8..d9c03e87e57d 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -107,6 +107,11 @@ struct lo_inode {
> */
> gint refcount;
>
> + /*
> + * Number of open instances
> + */
> + gint opencount;
> +
> struct lo_key key;
>
> /*
> @@ -901,6 +906,18 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
>
> inode = lo_find(lo, &e->attr, mnt_id);
> if (inode) {
> + char buf1[PATH_MAX + 1], buf2[PATH_MAX + 1], procname[64];
> + ssize_t siz1, siz2;
> +
> + sprintf(procname, "%i", inode->fd);
> + siz1 = readlinkat(lo->proc_self_fd, procname, buf1, sizeof(buf1));
> + sprintf(procname, "%i", newfd);
> + siz2 = readlinkat(lo->proc_self_fd, procname, buf2, sizeof(buf2));
> +
> + /* disable close on unlink if alias is detected */
> + if (siz1 != siz2 || memcmp(buf1, buf2, siz1))
> + g_atomic_int_inc(&inode->opencount);
> +
Hi Miklos,
So if I have a hard links to a file in a separate directories, then this
path can hit. (Say dir1/file.txt and dir2/file-link.txt). IIUC, we will
disable automatic inode->fd closing on these hardlinked files. That
means this solution will not solve problem at hand for hard linked files.
Am I missing something.
> close(newfd);
> } else {
> inode = calloc(1, sizeof(struct lo_inode));
> @@ -917,6 +934,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
> */
> g_atomic_int_set(&inode->refcount, 2);
>
> + g_atomic_int_set(&inode->opencount, 0);
> inode->nlookup = 1;
> inode->fd = newfd;
> inode->key.ino = e->attr.st_ino;
> @@ -1295,6 +1313,10 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
> res = unlinkat(lo_fd(req, parent), name, 0);
>
> fuse_reply_err(req, res == -1 ? errno : 0);
> + if (!g_atomic_int_get(&inode->opencount)) {
> + close(inode->fd);
> + inode->fd = -1;
> + }
Can this be racy w.r.t lo_lookup(). IOW, say dir1/file.txt is being
unlinked and we closed inode->fd. And before we could execute
unref_inode_lolocked(), another parallel lookup of dir2/file-link.txt
happens if it gets lo->muxtex lock first, it can still find this
inode and bump up reference count. And that means lo_unlink() will
not free inode (but close inode->fd) and now we will use an inode
with closed O_PATH fd which lead to other failures later.
Thanks
Vivek
> unref_inode_lolocked(lo, inode, 1);
> lo_inode_put(lo, &inode);
> }
> @@ -1904,25 +1926,30 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
> ssize_t fh;
> char buf[64];
> struct lo_data *lo = lo_data(req);
> + struct lo_inode *inode = lo_inode(req, ino);
> + int err = EBADF;
> +
> + if (!inode)
> + goto out_err;
>
> fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
> fi->flags);
>
> update_open_flags(lo->writeback, lo->allow_direct_io, fi);
>
> - sprintf(buf, "%i", lo_fd(req, ino));
> + sprintf(buf, "%i", inode->fd);
> fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
> - if (fd == -1) {
> - return (void)fuse_reply_err(req, errno);
> - }
> + err = errno;
> + if (fd == -1)
> + goto out_err;
>
> pthread_mutex_lock(&lo->mutex);
> fh = lo_add_fd_mapping(req, fd);
> pthread_mutex_unlock(&lo->mutex);
> if (fh == -1) {
> close(fd);
> - fuse_reply_err(req, ENOMEM);
> - return;
> + err = ENOMEM;
> + goto out_err;
> }
>
> fi->fh = fh;
> @@ -1931,18 +1958,26 @@ static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
> } else if (lo->cache == CACHE_ALWAYS) {
> fi->keep_cache = 1;
> }
> + g_atomic_int_inc(&inode->opencount);
> + lo_inode_put(lo, &inode);
> fuse_reply_open(req, fi);
> + return;
> +
> +out_err:
> + lo_inode_put(lo, &inode);
> + fuse_reply_err(req, err);
> }
>
> static void lo_release(fuse_req_t req, fuse_ino_t ino,
> struct fuse_file_info *fi)
> {
> struct lo_data *lo = lo_data(req);
> + struct lo_inode *inode = lo_inode(req, ino);
> struct lo_map_elem *elem;
> int fd = -1;
>
> - (void)ino;
> -
> + if (inode)
> + g_atomic_int_dec_and_test(&inode->opencount);
> pthread_mutex_lock(&lo->mutex);
> elem = lo_map_get(&lo->fd_map, fi->fh);
> if (elem) {
> @@ -1951,6 +1986,7 @@ static void lo_release(fuse_req_t req, fuse_ino_t ino,
> lo_map_remove(&lo->fd_map, fi->fh);
> }
> pthread_mutex_unlock(&lo->mutex);
> + lo_inode_put(lo, &inode);
>
> close(fd);
> fuse_reply_err(req, 0);
More information about the Virtio-fs
mailing list