[libvirt] [PATCH v5 6/6] block: Enable qemu_open/close to work with fd sets

Corey Bryant coreyb at linux.vnet.ibm.com
Mon Jul 23 13:14:42 UTC 2012



On 07/23/2012 09:08 AM, Corey Bryant wrote:
> When qemu_open is passed a filename of the "/dev/fdset/nnn"
> format (where nnn is the fdset ID), an fd with matching access
> mode flags will be searched for within the specified monitor
> fd set.  If the fd is found, a dup of the fd will be returned
> from qemu_open.
>
> Each fd set has a reference count.  The purpose of the reference
> count is to determine if an fd set contains file descriptors that
> have open dup() references that have not yet been closed.  It is
> incremented on qemu_open and decremented on qemu_close.  It is
> not until the refcount is zero that file desriptors in an fd set
> can be closed.  If an fd set has dup() references open, then we
> must keep the other fds in the fd set open in case a reopen
> of the file occurs that requires an fd with a different access
> mode.
>
> Signed-off-by: Corey Bryant <coreyb at linux.vnet.ibm.com>
>
> v2:
>   -Get rid of file_open and move dup code to qemu_open
>    (kwolf at redhat.com)
>   -Use strtol wrapper instead of atoi (kwolf at redhat.com)
>
> v3:
>   -Add note about fd leakage (eblake at redhat.com)
>
> v4
>   -Moved patch to be later in series (lcapitulino at redhat.com)
>   -Update qemu_open to check access mode flags and set flags that
>    can be set (eblake at redhat.com, kwolf at redhat.com)
>
> v5:
>   -This patch was overhauled quite a bit in this version, with
>    the addition of fd set and refcount support.
>   -Use qemu_set_cloexec() on dup'd fd (eblake at redhat.com)
>   -Modify flags set by fcntl on dup'd fd (eblake at redhat.com)
>   -Reduce syscalls when setting flags for dup'd fd (eblake at redhat.com)
>   -Fix O_RDWR, O_RDONLY, O_WRONLY checks (eblake at redhat.com)
> ---
>   block/raw-posix.c |   24 +++++-----
>   block/raw-win32.c |    2 +-
>   block/vmdk.c      |    4 +-
>   block/vpc.c       |    2 +-
>   block/vvfat.c     |   12 ++---
>   cutils.c          |    5 ++
>   monitor.c         |   85 +++++++++++++++++++++++++++++++++
>   monitor.h         |    4 ++
>   osdep.c           |  138 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>   qemu-common.h     |    3 +-
>   qemu-tool.c       |   12 +++++
>   11 files changed, 267 insertions(+), 24 deletions(-)
>
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index a172de3..5d0a801 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -271,7 +271,7 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
>   out_free_buf:
>       qemu_vfree(s->aligned_buf);
>   out_close:
> -    qemu_close(fd);
> +    qemu_close(fd, filename);
>       return -errno;
>   }
>
> @@ -376,7 +376,7 @@ static void raw_close(BlockDriverState *bs)
>   {
>       BDRVRawState *s = bs->opaque;
>       if (s->fd >= 0) {
> -        qemu_close(s->fd);
> +        qemu_close(s->fd, bs->filename);
>           s->fd = -1;
>           if (s->aligned_buf != NULL)
>               qemu_vfree(s->aligned_buf);
> @@ -580,7 +580,7 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
>           if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
>               result = -errno;
>           }
> -        if (qemu_close(fd) != 0) {
> +        if (qemu_close(fd, filename) != 0) {
>               result = -errno;
>           }
>       }
> @@ -850,7 +850,7 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
>               if (fd < 0) {
>                   bsdPath[strlen(bsdPath)-1] = '1';
>               } else {
> -                qemu_close(fd);
> +                qemu_close(fd, bsdPath);
>               }
>               filename = bsdPath;
>           }
> @@ -889,7 +889,7 @@ static int fd_open(BlockDriverState *bs)
>       last_media_present = (s->fd >= 0);
>       if (s->fd >= 0 &&
>           (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
> -        qemu_close(s->fd);
> +        qemu_close(s->fd, bs->filename);
>           s->fd = -1;
>   #ifdef DEBUG_FLOPPY
>           printf("Floppy closed\n");
> @@ -988,7 +988,7 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options)
>       else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
>           ret = -ENOSPC;
>
> -    qemu_close(fd);
> +    qemu_close(fd, filename);
>       return ret;
>   }
>
> @@ -1038,7 +1038,7 @@ static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
>           return ret;
>
>       /* close fd so that we can reopen it as needed */
> -    qemu_close(s->fd);
> +    qemu_close(s->fd, filename);
>       s->fd = -1;
>       s->fd_media_changed = 1;
>
> @@ -1070,7 +1070,7 @@ static int floppy_probe_device(const char *filename)
>           prio = 100;
>
>   outc:
> -    qemu_close(fd);
> +    qemu_close(fd, filename);
>   out:
>       return prio;
>   }
> @@ -1105,14 +1105,14 @@ static void floppy_eject(BlockDriverState *bs, bool eject_flag)
>       int fd;
>
>       if (s->fd >= 0) {
> -        qemu_close(s->fd);
> +        qemu_close(s->fd, bs->filename);
>           s->fd = -1;
>       }
>       fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
>       if (fd >= 0) {
>           if (ioctl(fd, FDEJECT, 0) < 0)
>               perror("FDEJECT");
> -        qemu_close(fd);
> +        qemu_close(fd, bs->filename);
>       }
>   }
>
> @@ -1173,7 +1173,7 @@ static int cdrom_probe_device(const char *filename)
>           prio = 100;
>
>   outc:
> -    qemu_close(fd);
> +    qemu_close(fd, filename);
>   out:
>       return prio;
>   }
> @@ -1281,7 +1281,7 @@ static int cdrom_reopen(BlockDriverState *bs)
>        * FreeBSD seems to not notice sometimes...
>        */
>       if (s->fd >= 0)
> -        qemu_close(s->fd);
> +        qemu_close(s->fd, bs->filename);
>       fd = qemu_open(bs->filename, s->open_flags, 0644);
>       if (fd < 0) {
>           s->fd = -1;
> diff --git a/block/raw-win32.c b/block/raw-win32.c
> index c56bf83..b795871 100644
> --- a/block/raw-win32.c
> +++ b/block/raw-win32.c
> @@ -261,7 +261,7 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
>           return -EIO;
>       set_sparse(fd);
>       ftruncate(fd, total_size * 512);
> -    qemu_close(fd);
> +    qemu_close(fd, filename);
>       return 0;
>   }
>
> diff --git a/block/vmdk.c b/block/vmdk.c
> index daee426..7f1206b 100644
> --- a/block/vmdk.c
> +++ b/block/vmdk.c
> @@ -1258,7 +1258,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
>
>       ret = 0;
>    exit:
> -    qemu_close(fd);
> +    qemu_close(fd, filename);
>       return ret;
>   }
>
> @@ -1506,7 +1506,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
>       }
>       ret = 0;
>   exit:
> -    qemu_close(fd);
> +    qemu_close(fd, filename);
>       return ret;
>   }
>
> diff --git a/block/vpc.c b/block/vpc.c
> index c0b82c4..20f648a 100644
> --- a/block/vpc.c
> +++ b/block/vpc.c
> @@ -744,7 +744,7 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
>       }
>
>    fail:
> -    qemu_close(fd);
> +    qemu_close(fd, filename);
>       return ret;
>   }
>
> diff --git a/block/vvfat.c b/block/vvfat.c
> index 59d3c5b..cbc7543 100644
> --- a/block/vvfat.c
> +++ b/block/vvfat.c
> @@ -1105,7 +1105,7 @@ static inline void vvfat_close_current_file(BDRVVVFATState *s)
>       if(s->current_mapping) {
>   	s->current_mapping = NULL;
>   	if (s->current_fd) {
> -		qemu_close(s->current_fd);
> +		qemu_close(s->current_fd, s->current_mapping->path);
>   		s->current_fd = 0;
>   	}
>       }
> @@ -2230,7 +2230,7 @@ static int commit_one_file(BDRVVVFATState* s,
>       }
>       if (offset > 0) {
>           if (lseek(fd, offset, SEEK_SET) != offset) {
> -            qemu_close(fd);
> +            qemu_close(fd, mapping->path);
>               g_free(cluster);
>               return -3;
>           }
> @@ -2251,13 +2251,13 @@ static int commit_one_file(BDRVVVFATState* s,
>   	    (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200);
>
>           if (ret < 0) {
> -            qemu_close(fd);
> +            qemu_close(fd, mapping->path);
>               g_free(cluster);
>               return ret;
>           }
>
>           if (write(fd, cluster, rest_size) < 0) {
> -            qemu_close(fd);
> +            qemu_close(fd, mapping->path);
>               g_free(cluster);
>               return -2;
>           }
> @@ -2268,11 +2268,11 @@ static int commit_one_file(BDRVVVFATState* s,
>
>       if (ftruncate(fd, size)) {
>           perror("ftruncate()");
> -        qemu_close(fd);
> +        qemu_close(fd, mapping->path);
>           g_free(cluster);
>           return -4;
>       }
> -    qemu_close(fd);
> +    qemu_close(fd, mapping->path);
>       g_free(cluster);
>
>       return commit_mappings(s, first_cluster, dir_index);
> diff --git a/cutils.c b/cutils.c
> index e2bc1b8..4e6bfdc 100644
> --- a/cutils.c
> +++ b/cutils.c
> @@ -375,3 +375,8 @@ int qemu_parse_fd(const char *param)
>       }
>       return fd;
>   }
> +
> +int qemu_parse_fdset(const char *param)
> +{
> +    return qemu_parse_fd(param);
> +}
> diff --git a/monitor.c b/monitor.c
> index 30b085f..fddd2b5 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -2551,6 +2551,91 @@ static void monitor_fdsets_set_in_use(Monitor *mon, bool in_use)
>       }
>   }
>
> +void monitor_fdset_increment_refcount(Monitor *mon, int64_t fdset_id)
> +{
> +    mon_fdset_t *mon_fdset;
> +
> +    if (!mon) {
> +        return;
> +    }
> +
> +    QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
> +        if (mon_fdset->id == fdset_id) {
> +            mon_fdset->refcount++;
> +            break;
> +        }
> +    }
> +}
> +
> +void monitor_fdset_decrement_refcount(Monitor *mon, int64_t fdset_id)
> +{
> +    mon_fdset_t *mon_fdset;
> +
> +    if (!mon) {
> +        return;
> +    }
> +
> +    QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
> +        if (mon_fdset->id == fdset_id) {
> +            mon_fdset->refcount--;
> +            if (mon_fdset->refcount == 0) {
> +                monitor_fdset_cleanup(mon_fdset);
> +            }
> +            break;
> +        }
> +    }
> +}
> +
> +int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags)
> +{
> +    mon_fdset_t *mon_fdset;
> +    mon_fdset_fd_t *mon_fdset_fd;
> +    int mon_fd_flags;
> +
> +    if (!mon) {
> +        errno = ENOENT;
> +        return -1;
> +    }
> +
> +    QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
> +        if (mon_fdset->id != fdset_id) {
> +            continue;
> +        }
> +        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
> +            if (mon_fdset_fd->removed) {
> +                continue;
> +            }
> +
> +            mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
> +            if (mon_fd_flags == -1) {
> +                return -1;
> +            }
> +
> +            switch (flags & O_ACCMODE) {
> +            case O_RDWR:
> +                if ((mon_fd_flags & O_ACCMODE) == O_RDWR) {
> +                    return mon_fdset_fd->fd;
> +                }
> +                break;
> +            case O_RDONLY:
> +                if ((mon_fd_flags & O_ACCMODE) == O_RDONLY) {
> +                    return mon_fdset_fd->fd;
> +                }
> +                break;
> +            case O_WRONLY:
> +                if ((mon_fd_flags & O_ACCMODE) == O_WRONLY) {
> +                    return mon_fdset_fd->fd;
> +                }
> +                break;
> +            }
> +        }
> +        errno = EACCES;
> +        return -1;
> +    }
> +    errno = ENOENT;
> +    return -1;
> +}
> +
>   /* mon_cmds and info_cmds would be sorted at runtime */
>   static mon_cmd_t mon_cmds[] = {
>   #include "hmp-commands.h"
> diff --git a/monitor.h b/monitor.h
> index 5f4de1b..1efed38 100644
> --- a/monitor.h
> +++ b/monitor.h
> @@ -86,4 +86,8 @@ int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret);
>
>   int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret);
>
> +int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags);
> +void monitor_fdset_increment_refcount(Monitor *mon, int64_t fdset_id);
> +void monitor_fdset_decrement_refcount(Monitor *mon, int64_t fdset_id);
> +
>   #endif /* !MONITOR_H */
> diff --git a/osdep.c b/osdep.c
> index 7b09dff..0303cdd 100644
> --- a/osdep.c
> +++ b/osdep.c
> @@ -47,6 +47,7 @@ extern int madvise(caddr_t, size_t, int);
>   #include "qemu-common.h"
>   #include "trace.h"
>   #include "qemu_socket.h"
> +#include "monitor.h"
>
>   static const char *qemu_version = QEMU_VERSION;
>
> @@ -75,6 +76,79 @@ int qemu_madvise(void *addr, size_t len, int advice)
>   #endif
>   }
>
> +/*
> + * Dups an fd and sets the flags
> + */
> +static int qemu_dup(int fd, int flags)
> +{
> +    int i;
> +    int ret;
> +    int serrno;
> +    int dup_flags;
> +    int setfl_flags[] = { O_APPEND, O_ASYNC, O_DIRECT, O_NOATIME,
> +                          O_NONBLOCK, 0 };
> +
> +    if (flags & O_CLOEXEC) {
> +        ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
> +        if (ret == -1 && errno == EINVAL) {
> +            ret = dup(fd);
> +            if (ret != -1 && fcntl_setfl(ret, O_CLOEXEC) == -1) {
> +                goto fail;
> +            }
> +        }
> +    } else {
> +        ret = dup(fd);
> +    }
> +
> +    if (ret == -1) {
> +        goto fail;
> +    }
> +
> +    dup_flags = fcntl(ret, F_GETFL);
> +    if (dup_flags == -1) {
> +        goto fail;
> +    }
> +
> +    if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
> +        errno = EINVAL;
> +        goto fail;
> +    }
> +
> +    /* Set/unset flags that we can with fcntl */
> +    i = 0;
> +    while (setfl_flags[i] != 0) {
> +        if (flags & setfl_flags[i]) {
> +            dup_flags = (dup_flags | setfl_flags[i]);
> +        } else {
> +            dup_flags = (dup_flags & ~setfl_flags[i]);
> +        }
> +        i++;
> +    }
> +
> +    if (fcntl(ret, F_SETFL, dup_flags) == -1) {
> +        goto fail;
> +    }
> +
> +    /* Truncate the file in the cases that open() would truncate it */
> +    if (flags & O_TRUNC ||
> +            ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
> +        if (ftruncate(ret, 0) == -1) {
> +            goto fail;
> +        }
> +    }
> +
> +    qemu_set_cloexec(ret);
> +
> +    return ret;
> +
> +fail:
> +    serrno = errno;
> +    if (ret != -1) {
> +        close(ret);
> +    }
> +    errno = serrno;
> +    return -1;
> +}
>
>   /*
>    * Opens a file with FD_CLOEXEC set
> @@ -84,6 +158,36 @@ int qemu_open(const char *name, int flags, ...)
>       int ret;
>       int mode = 0;
>
> +#ifndef _WIN32
> +    const char *fdset_id_str;
> +
> +    /* Attempt dup of fd from fd set */
> +    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
> +        int64_t fdset_id;
> +        int fd, dupfd;
> +
> +        fdset_id = qemu_parse_fdset(fdset_id_str);
> +        if (fdset_id == -1) {
> +            errno = EINVAL;
> +            return -1;
> +        }
> +
> +        fd = monitor_fdset_get_fd(default_mon, fdset_id, flags);

I know that use of default_mon in this patch is not correct, but I 
wanted to get these patches out for review. I used default_mon for 
testing because cur_mon wasn't pointing to the monitor I'd added fd sets 
to.  I need to figure out why.

> +        if (fd == -1) {
> +            return -1;
> +        }
> +
> +        dupfd = qemu_dup(fd, flags);
> +        if (fd == -1) {
> +            return -1;
> +        }
> +
> +        monitor_fdset_increment_refcount(default_mon, fdset_id);
> +
> +        return dupfd;
> +    }
> +#endif
> +
>       if (flags & O_CREAT) {
>           va_list ap;
>
> @@ -104,8 +208,40 @@ int qemu_open(const char *name, int flags, ...)
>       return ret;
>   }
>
> -int qemu_close(int fd)
> +int qemu_close(int fd, const char *name)
>   {
> +    const char *fdset_id_str;
> +
> +    /* Close fd that was dup'd from an fdset */
> +    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
> +        int ret;
> +        int64_t fdset_id;
> +        int fdset_fd;
> +        int flags;
> +
> +        fdset_id = qemu_parse_fdset(fdset_id_str);
> +        if (fdset_id == -1) {
> +            return -1;
> +        }
> +
> +        flags = fcntl(fd, F_GETFL);
> +        if (flags == -1) {
> +            return -1;
> +        }
> +
> +        fdset_fd = monitor_fdset_get_fd(default_mon, fdset_id, flags);
> +        if (fdset_fd == -1) {
> +            return -1;
> +        }
> +
> +        ret = close(fd);
> +        if (ret == 0) {
> +            monitor_fdset_decrement_refcount(default_mon, fdset_id);
> +        }
> +
> +        return ret;
> +    }
> +
>       return close(fd);
>   }
>
> diff --git a/qemu-common.h b/qemu-common.h
> index c8ddf2f..6b4123c 100644
> --- a/qemu-common.h
> +++ b/qemu-common.h
> @@ -147,6 +147,7 @@ int qemu_fls(int i);
>   int qemu_fdatasync(int fd);
>   int fcntl_setfl(int fd, int flag);
>   int qemu_parse_fd(const char *param);
> +int qemu_parse_fdset(const char *param);
>
>   /*
>    * strtosz() suffixes used to specify the default treatment of an
> @@ -188,7 +189,7 @@ const char *path(const char *pathname);
>   void *qemu_oom_check(void *ptr);
>
>   int qemu_open(const char *name, int flags, ...);
> -int qemu_close(int fd);
> +int qemu_close(int fd, const char *name);
>   ssize_t qemu_write_full(int fd, const void *buf, size_t count)
>       QEMU_WARN_UNUSED_RESULT;
>   ssize_t qemu_send_full(int fd, const void *buf, size_t count, int flags)
> diff --git a/qemu-tool.c b/qemu-tool.c
> index 318c5fc..f4db9db 100644
> --- a/qemu-tool.c
> +++ b/qemu-tool.c
> @@ -31,6 +31,7 @@ struct QEMUBH
>   };
>
>   Monitor *cur_mon;
> +Monitor *default_mon;
>
>   int monitor_cur_is_qmp(void)
>   {
> @@ -57,6 +58,17 @@ void monitor_protocol_event(MonitorEvent event, QObject *data)
>   {
>   }
>
> +int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags)
> +{
> +    return -1;
> +}
> +void monitor_fdset_increment_refcount(Monitor *mon, int64_t fdset_id)
> +{
> +}
> +void monitor_fdset_decrement_refcount(Monitor *mon, int64_t fdset_id)
> +{
> +}
> +
>   int64_t cpu_get_clock(void)
>   {
>       return qemu_get_clock_ns(rt_clock);
>

-- 
Regards,
Corey





More information about the libvir-list mailing list