[libvirt] [PATCH v5 6/6] block: Enable qemu_open/close to work with fd sets

Corey Bryant coreyb at linux.vnet.ibm.com
Thu Aug 2 22:21:28 UTC 2012



On 07/23/2012 09:14 AM, Corey Bryant wrote:
>
>
> On 07/23/2012 09:08 AM, Corey Bryant wrote:
>> When qemu_open is passed a filename of the "/dev/fdset/nnn"
>> format (where nnn is the fdset ID), an fd with matching access
>> mode flags will be searched for within the specified monitor
>> fd set.  If the fd is found, a dup of the fd will be returned
>> from qemu_open.
>>
>> Each fd set has a reference count.  The purpose of the reference
>> count is to determine if an fd set contains file descriptors that
>> have open dup() references that have not yet been closed.  It is
>> incremented on qemu_open and decremented on qemu_close.  It is
>> not until the refcount is zero that file desriptors in an fd set
>> can be closed.  If an fd set has dup() references open, then we
>> must keep the other fds in the fd set open in case a reopen
>> of the file occurs that requires an fd with a different access
>> mode.
>>
>> Signed-off-by: Corey Bryant <coreyb at linux.vnet.ibm.com>
>>
>> v2:
>>   -Get rid of file_open and move dup code to qemu_open
>>    (kwolf at redhat.com)
>>   -Use strtol wrapper instead of atoi (kwolf at redhat.com)
>>
>> v3:
>>   -Add note about fd leakage (eblake at redhat.com)
>>
>> v4
>>   -Moved patch to be later in series (lcapitulino at redhat.com)
>>   -Update qemu_open to check access mode flags and set flags that
>>    can be set (eblake at redhat.com, kwolf at redhat.com)
>>
>> v5:
>>   -This patch was overhauled quite a bit in this version, with
>>    the addition of fd set and refcount support.
>>   -Use qemu_set_cloexec() on dup'd fd (eblake at redhat.com)
>>   -Modify flags set by fcntl on dup'd fd (eblake at redhat.com)
>>   -Reduce syscalls when setting flags for dup'd fd (eblake at redhat.com)
>>   -Fix O_RDWR, O_RDONLY, O_WRONLY checks (eblake at redhat.com)
>> ---
>>   block/raw-posix.c |   24 +++++-----
>>   block/raw-win32.c |    2 +-
>>   block/vmdk.c      |    4 +-
>>   block/vpc.c       |    2 +-
>>   block/vvfat.c     |   12 ++---
>>   cutils.c          |    5 ++
>>   monitor.c         |   85 +++++++++++++++++++++++++++++++++
>>   monitor.h         |    4 ++
>>   osdep.c           |  138
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>   qemu-common.h     |    3 +-
>>   qemu-tool.c       |   12 +++++
>>   11 files changed, 267 insertions(+), 24 deletions(-)
>>
>> diff --git a/block/raw-posix.c b/block/raw-posix.c
>> index a172de3..5d0a801 100644
>> --- a/block/raw-posix.c
>> +++ b/block/raw-posix.c
>> @@ -271,7 +271,7 @@ static int raw_open_common(BlockDriverState *bs,
>> const char *filename,
>>   out_free_buf:
>>       qemu_vfree(s->aligned_buf);
>>   out_close:
>> -    qemu_close(fd);
>> +    qemu_close(fd, filename);
>>       return -errno;
>>   }
>>
>> @@ -376,7 +376,7 @@ static void raw_close(BlockDriverState *bs)
>>   {
>>       BDRVRawState *s = bs->opaque;
>>       if (s->fd >= 0) {
>> -        qemu_close(s->fd);
>> +        qemu_close(s->fd, bs->filename);
>>           s->fd = -1;
>>           if (s->aligned_buf != NULL)
>>               qemu_vfree(s->aligned_buf);
>> @@ -580,7 +580,7 @@ static int raw_create(const char *filename,
>> QEMUOptionParameter *options)
>>           if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
>>               result = -errno;
>>           }
>> -        if (qemu_close(fd) != 0) {
>> +        if (qemu_close(fd, filename) != 0) {
>>               result = -errno;
>>           }
>>       }
>> @@ -850,7 +850,7 @@ static int hdev_open(BlockDriverState *bs, const
>> char *filename, int flags)
>>               if (fd < 0) {
>>                   bsdPath[strlen(bsdPath)-1] = '1';
>>               } else {
>> -                qemu_close(fd);
>> +                qemu_close(fd, bsdPath);
>>               }
>>               filename = bsdPath;
>>           }
>> @@ -889,7 +889,7 @@ static int fd_open(BlockDriverState *bs)
>>       last_media_present = (s->fd >= 0);
>>       if (s->fd >= 0 &&
>>           (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
>> -        qemu_close(s->fd);
>> +        qemu_close(s->fd, bs->filename);
>>           s->fd = -1;
>>   #ifdef DEBUG_FLOPPY
>>           printf("Floppy closed\n");
>> @@ -988,7 +988,7 @@ static int hdev_create(const char *filename,
>> QEMUOptionParameter *options)
>>       else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
>>           ret = -ENOSPC;
>>
>> -    qemu_close(fd);
>> +    qemu_close(fd, filename);
>>       return ret;
>>   }
>>
>> @@ -1038,7 +1038,7 @@ static int floppy_open(BlockDriverState *bs,
>> const char *filename, int flags)
>>           return ret;
>>
>>       /* close fd so that we can reopen it as needed */
>> -    qemu_close(s->fd);
>> +    qemu_close(s->fd, filename);
>>       s->fd = -1;
>>       s->fd_media_changed = 1;
>>
>> @@ -1070,7 +1070,7 @@ static int floppy_probe_device(const char
>> *filename)
>>           prio = 100;
>>
>>   outc:
>> -    qemu_close(fd);
>> +    qemu_close(fd, filename);
>>   out:
>>       return prio;
>>   }
>> @@ -1105,14 +1105,14 @@ static void floppy_eject(BlockDriverState *bs,
>> bool eject_flag)
>>       int fd;
>>
>>       if (s->fd >= 0) {
>> -        qemu_close(s->fd);
>> +        qemu_close(s->fd, bs->filename);
>>           s->fd = -1;
>>       }
>>       fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
>>       if (fd >= 0) {
>>           if (ioctl(fd, FDEJECT, 0) < 0)
>>               perror("FDEJECT");
>> -        qemu_close(fd);
>> +        qemu_close(fd, bs->filename);
>>       }
>>   }
>>
>> @@ -1173,7 +1173,7 @@ static int cdrom_probe_device(const char *filename)
>>           prio = 100;
>>
>>   outc:
>> -    qemu_close(fd);
>> +    qemu_close(fd, filename);
>>   out:
>>       return prio;
>>   }
>> @@ -1281,7 +1281,7 @@ static int cdrom_reopen(BlockDriverState *bs)
>>        * FreeBSD seems to not notice sometimes...
>>        */
>>       if (s->fd >= 0)
>> -        qemu_close(s->fd);
>> +        qemu_close(s->fd, bs->filename);
>>       fd = qemu_open(bs->filename, s->open_flags, 0644);
>>       if (fd < 0) {
>>           s->fd = -1;
>> diff --git a/block/raw-win32.c b/block/raw-win32.c
>> index c56bf83..b795871 100644
>> --- a/block/raw-win32.c
>> +++ b/block/raw-win32.c
>> @@ -261,7 +261,7 @@ static int raw_create(const char *filename,
>> QEMUOptionParameter *options)
>>           return -EIO;
>>       set_sparse(fd);
>>       ftruncate(fd, total_size * 512);
>> -    qemu_close(fd);
>> +    qemu_close(fd, filename);
>>       return 0;
>>   }
>>
>> diff --git a/block/vmdk.c b/block/vmdk.c
>> index daee426..7f1206b 100644
>> --- a/block/vmdk.c
>> +++ b/block/vmdk.c
>> @@ -1258,7 +1258,7 @@ static int vmdk_create_extent(const char
>> *filename, int64_t filesize,
>>
>>       ret = 0;
>>    exit:
>> -    qemu_close(fd);
>> +    qemu_close(fd, filename);
>>       return ret;
>>   }
>>
>> @@ -1506,7 +1506,7 @@ static int vmdk_create(const char *filename,
>> QEMUOptionParameter *options)
>>       }
>>       ret = 0;
>>   exit:
>> -    qemu_close(fd);
>> +    qemu_close(fd, filename);
>>       return ret;
>>   }
>>
>> diff --git a/block/vpc.c b/block/vpc.c
>> index c0b82c4..20f648a 100644
>> --- a/block/vpc.c
>> +++ b/block/vpc.c
>> @@ -744,7 +744,7 @@ static int vpc_create(const char *filename,
>> QEMUOptionParameter *options)
>>       }
>>
>>    fail:
>> -    qemu_close(fd);
>> +    qemu_close(fd, filename);
>>       return ret;
>>   }
>>
>> diff --git a/block/vvfat.c b/block/vvfat.c
>> index 59d3c5b..cbc7543 100644
>> --- a/block/vvfat.c
>> +++ b/block/vvfat.c
>> @@ -1105,7 +1105,7 @@ static inline void
>> vvfat_close_current_file(BDRVVVFATState *s)
>>       if(s->current_mapping) {
>>       s->current_mapping = NULL;
>>       if (s->current_fd) {
>> -        qemu_close(s->current_fd);
>> +        qemu_close(s->current_fd, s->current_mapping->path);
>>           s->current_fd = 0;
>>       }
>>       }
>> @@ -2230,7 +2230,7 @@ static int commit_one_file(BDRVVVFATState* s,
>>       }
>>       if (offset > 0) {
>>           if (lseek(fd, offset, SEEK_SET) != offset) {
>> -            qemu_close(fd);
>> +            qemu_close(fd, mapping->path);
>>               g_free(cluster);
>>               return -3;
>>           }
>> @@ -2251,13 +2251,13 @@ static int commit_one_file(BDRVVVFATState* s,
>>           (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200);
>>
>>           if (ret < 0) {
>> -            qemu_close(fd);
>> +            qemu_close(fd, mapping->path);
>>               g_free(cluster);
>>               return ret;
>>           }
>>
>>           if (write(fd, cluster, rest_size) < 0) {
>> -            qemu_close(fd);
>> +            qemu_close(fd, mapping->path);
>>               g_free(cluster);
>>               return -2;
>>           }
>> @@ -2268,11 +2268,11 @@ static int commit_one_file(BDRVVVFATState* s,
>>
>>       if (ftruncate(fd, size)) {
>>           perror("ftruncate()");
>> -        qemu_close(fd);
>> +        qemu_close(fd, mapping->path);
>>           g_free(cluster);
>>           return -4;
>>       }
>> -    qemu_close(fd);
>> +    qemu_close(fd, mapping->path);
>>       g_free(cluster);
>>
>>       return commit_mappings(s, first_cluster, dir_index);
>> diff --git a/cutils.c b/cutils.c
>> index e2bc1b8..4e6bfdc 100644
>> --- a/cutils.c
>> +++ b/cutils.c
>> @@ -375,3 +375,8 @@ int qemu_parse_fd(const char *param)
>>       }
>>       return fd;
>>   }
>> +
>> +int qemu_parse_fdset(const char *param)
>> +{
>> +    return qemu_parse_fd(param);
>> +}
>> diff --git a/monitor.c b/monitor.c
>> index 30b085f..fddd2b5 100644
>> --- a/monitor.c
>> +++ b/monitor.c
>> @@ -2551,6 +2551,91 @@ static void monitor_fdsets_set_in_use(Monitor
>> *mon, bool in_use)
>>       }
>>   }
>>
>> +void monitor_fdset_increment_refcount(Monitor *mon, int64_t fdset_id)
>> +{
>> +    mon_fdset_t *mon_fdset;
>> +
>> +    if (!mon) {
>> +        return;
>> +    }
>> +
>> +    QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
>> +        if (mon_fdset->id == fdset_id) {
>> +            mon_fdset->refcount++;
>> +            break;
>> +        }
>> +    }
>> +}
>> +
>> +void monitor_fdset_decrement_refcount(Monitor *mon, int64_t fdset_id)
>> +{
>> +    mon_fdset_t *mon_fdset;
>> +
>> +    if (!mon) {
>> +        return;
>> +    }
>> +
>> +    QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
>> +        if (mon_fdset->id == fdset_id) {
>> +            mon_fdset->refcount--;
>> +            if (mon_fdset->refcount == 0) {
>> +                monitor_fdset_cleanup(mon_fdset);
>> +            }
>> +            break;
>> +        }
>> +    }
>> +}
>> +
>> +int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags)
>> +{
>> +    mon_fdset_t *mon_fdset;
>> +    mon_fdset_fd_t *mon_fdset_fd;
>> +    int mon_fd_flags;
>> +
>> +    if (!mon) {
>> +        errno = ENOENT;
>> +        return -1;
>> +    }
>> +
>> +    QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
>> +        if (mon_fdset->id != fdset_id) {
>> +            continue;
>> +        }
>> +        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
>> +            if (mon_fdset_fd->removed) {
>> +                continue;
>> +            }
>> +
>> +            mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
>> +            if (mon_fd_flags == -1) {
>> +                return -1;
>> +            }
>> +
>> +            switch (flags & O_ACCMODE) {
>> +            case O_RDWR:
>> +                if ((mon_fd_flags & O_ACCMODE) == O_RDWR) {
>> +                    return mon_fdset_fd->fd;
>> +                }
>> +                break;
>> +            case O_RDONLY:
>> +                if ((mon_fd_flags & O_ACCMODE) == O_RDONLY) {
>> +                    return mon_fdset_fd->fd;
>> +                }
>> +                break;
>> +            case O_WRONLY:
>> +                if ((mon_fd_flags & O_ACCMODE) == O_WRONLY) {
>> +                    return mon_fdset_fd->fd;
>> +                }
>> +                break;
>> +            }
>> +        }
>> +        errno = EACCES;
>> +        return -1;
>> +    }
>> +    errno = ENOENT;
>> +    return -1;
>> +}
>> +
>>   /* mon_cmds and info_cmds would be sorted at runtime */
>>   static mon_cmd_t mon_cmds[] = {
>>   #include "hmp-commands.h"
>> diff --git a/monitor.h b/monitor.h
>> index 5f4de1b..1efed38 100644
>> --- a/monitor.h
>> +++ b/monitor.h
>> @@ -86,4 +86,8 @@ int qmp_qom_set(Monitor *mon, const QDict *qdict,
>> QObject **ret);
>>
>>   int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret);
>>
>> +int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags);
>> +void monitor_fdset_increment_refcount(Monitor *mon, int64_t fdset_id);
>> +void monitor_fdset_decrement_refcount(Monitor *mon, int64_t fdset_id);
>> +
>>   #endif /* !MONITOR_H */
>> diff --git a/osdep.c b/osdep.c
>> index 7b09dff..0303cdd 100644
>> --- a/osdep.c
>> +++ b/osdep.c
>> @@ -47,6 +47,7 @@ extern int madvise(caddr_t, size_t, int);
>>   #include "qemu-common.h"
>>   #include "trace.h"
>>   #include "qemu_socket.h"
>> +#include "monitor.h"
>>
>>   static const char *qemu_version = QEMU_VERSION;
>>
>> @@ -75,6 +76,79 @@ int qemu_madvise(void *addr, size_t len, int advice)
>>   #endif
>>   }
>>
>> +/*
>> + * Dups an fd and sets the flags
>> + */
>> +static int qemu_dup(int fd, int flags)
>> +{
>> +    int i;
>> +    int ret;
>> +    int serrno;
>> +    int dup_flags;
>> +    int setfl_flags[] = { O_APPEND, O_ASYNC, O_DIRECT, O_NOATIME,
>> +                          O_NONBLOCK, 0 };
>> +
>> +    if (flags & O_CLOEXEC) {
>> +        ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
>> +        if (ret == -1 && errno == EINVAL) {
>> +            ret = dup(fd);
>> +            if (ret != -1 && fcntl_setfl(ret, O_CLOEXEC) == -1) {
>> +                goto fail;
>> +            }
>> +        }
>> +    } else {
>> +        ret = dup(fd);
>> +    }
>> +
>> +    if (ret == -1) {
>> +        goto fail;
>> +    }
>> +
>> +    dup_flags = fcntl(ret, F_GETFL);
>> +    if (dup_flags == -1) {
>> +        goto fail;
>> +    }
>> +
>> +    if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
>> +        errno = EINVAL;
>> +        goto fail;
>> +    }
>> +
>> +    /* Set/unset flags that we can with fcntl */
>> +    i = 0;
>> +    while (setfl_flags[i] != 0) {
>> +        if (flags & setfl_flags[i]) {
>> +            dup_flags = (dup_flags | setfl_flags[i]);
>> +        } else {
>> +            dup_flags = (dup_flags & ~setfl_flags[i]);
>> +        }
>> +        i++;
>> +    }
>> +
>> +    if (fcntl(ret, F_SETFL, dup_flags) == -1) {
>> +        goto fail;
>> +    }
>> +
>> +    /* Truncate the file in the cases that open() would truncate it */
>> +    if (flags & O_TRUNC ||
>> +            ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
>> +        if (ftruncate(ret, 0) == -1) {
>> +            goto fail;
>> +        }
>> +    }
>> +
>> +    qemu_set_cloexec(ret);
>> +
>> +    return ret;
>> +
>> +fail:
>> +    serrno = errno;
>> +    if (ret != -1) {
>> +        close(ret);
>> +    }
>> +    errno = serrno;
>> +    return -1;
>> +}
>>
>>   /*
>>    * Opens a file with FD_CLOEXEC set
>> @@ -84,6 +158,36 @@ int qemu_open(const char *name, int flags, ...)
>>       int ret;
>>       int mode = 0;
>>
>> +#ifndef _WIN32
>> +    const char *fdset_id_str;
>> +
>> +    /* Attempt dup of fd from fd set */
>> +    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
>> +        int64_t fdset_id;
>> +        int fd, dupfd;
>> +
>> +        fdset_id = qemu_parse_fdset(fdset_id_str);
>> +        if (fdset_id == -1) {
>> +            errno = EINVAL;
>> +            return -1;
>> +        }
>> +
>> +        fd = monitor_fdset_get_fd(default_mon, fdset_id, flags);
>
> I know that use of default_mon in this patch is not correct, but I
> wanted to get these patches out for review. I used default_mon for
> testing because cur_mon wasn't pointing to the monitor I'd added fd sets
> to.  I need to figure out why.
>

Does it make sense to use default_mon here?  After digging into this 
some more, I'm thinking it makes sense, and I'll explain why.

It looks like cur_mon can't be used.  cur_mon will point to the monitor 
object for the duration of a command, and be reset to old_mon (NULL in 
my case) after the command completes.

qemu_open() and qemu_close() are frequently called long after a monitor 
command has come and gone, so cur_mon won't work.  For example, 
drive_add will cause qemu_open() to be called, but after the command has 
completed, the file will keep getting opened/closed during normal QEMU 
operation.  I'm not sure why, I've just noticed this behavior.

Does anyone have any thoughts on this?  It would require fd sets to be 
added to the default monitor only.

-- 
Regards,
Corey





More information about the libvir-list mailing list