[libvirt] [PATCH] virNetSocketNewConnectUNIX: Use flocks when spawning a daemon
John Ferlan
jferlan at redhat.com
Wed Apr 15 18:07:53 UTC 2015
On 04/02/2015 12:21 PM, Michal Privoznik wrote:
> https://bugzilla.redhat.com/show_bug.cgi?id=1200149
>
> Even though we have a mutex mechanism so that two clients don't spawn
> two daemons, it's not strong enough. It can happen that while one
> client is spawning the daemon, the other one fails to connect.
> Basically two possible errors can happen:
>
> error: Failed to connect socket to '/home/mprivozn/.cache/libvirt/libvirt-sock': Connection refused
>
> or:
>
> error: Failed to connect socket to '/home/mprivozn/.cache/libvirt/libvirt-sock': No such file or directory
>
> The problem in both cases is, the daemon is only starting up, while we
> are trying to connect (and fail). We should postpone the connecting
> phase until the daemon is started (by the other thread that is
> spawning it). In order to do that, create a file lock 'libvirt-lock'
> in the directory where session daemon would create its socket. So even
> when called from multiple processes, spawning a daemon will serialize
> on the file lock. So only the first to come will spawn the daemon.
>
> Signed-off-by: Michal Privoznik <mprivozn at redhat.com>
> ---
>
> diff to v1:
> -hopefully this one is race free
>
> src/rpc/virnetsocket.c | 148 ++++++++++++-------------------------------------
> 1 file changed, 36 insertions(+), 112 deletions(-)
>
> diff --git a/src/rpc/virnetsocket.c b/src/rpc/virnetsocket.c
> index 6b019cc..2b891ae 100644
> --- a/src/rpc/virnetsocket.c
> +++ b/src/rpc/virnetsocket.c
> @@ -123,7 +123,7 @@ VIR_ONCE_GLOBAL_INIT(virNetSocket)
>
>
> #ifndef WIN32
> -static int virNetSocketForkDaemon(const char *binary, int passfd)
> +static int virNetSocketForkDaemon(const char *binary)
> {
> int ret;
> virCommandPtr cmd = virCommandNewArgList(binary,
> @@ -136,10 +136,6 @@ static int virNetSocketForkDaemon(const char *binary, int passfd)
> virCommandAddEnvPassBlockSUID(cmd, "XDG_RUNTIME_DIR", NULL);
> virCommandClearCaps(cmd);
> virCommandDaemonize(cmd);
> - if (passfd) {
> - virCommandPassFD(cmd, passfd, VIR_COMMAND_PASS_FD_CLOSE_PARENT);
> - virCommandPassListenFDs(cmd);
> - }
> ret = virCommandRun(cmd, NULL);
> virCommandFree(cmd);
> return ret;
> @@ -543,10 +539,10 @@ int virNetSocketNewConnectUNIX(const char *path,
> const char *binary,
> virNetSocketPtr *retsock)
> {
> - char *binname = NULL;
> - char *pidpath = NULL;
> - int fd, passfd = -1;
> - int pidfd = -1;
> + char *lockpath = NULL;
Because we assign this to NULL...
> + int lockfd = -1;
> + int fd = -1;
> + int retries = 100;
> virSocketAddr localAddr;
> virSocketAddr remoteAddr;
>
> @@ -561,6 +557,22 @@ int virNetSocketNewConnectUNIX(const char *path,
> return -1;
> }
>
> + if (spawnDaemon) {
> + if (virPidFileConstructPath(false, NULL, "libvirt-lock", &lockpath) < 0)
> + goto error;
> +
> + if ((lockfd = open(lockpath, O_RDWR | O_CREAT, 0600)) < 0 ||
> + virSetCloseExec(lockfd) < 0) {
> + virReportSystemError(errno, _("Unable to create lock '%s'"), lockpath);
And can jump to error right here
> + goto error;
> + }
> +
> + if (virFileLock(lockfd, false, 0, 1, true) < 0) {
> + virReportSystemError(errno, _("Unable to lock '%s'"), lockpath);
> + goto error;
> + }
> + }
> +
> if ((fd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
> virReportSystemError(errno, "%s", _("Failed to create socket"));
> goto error;
> @@ -574,108 +586,25 @@ int virNetSocketNewConnectUNIX(const char *path,
> if (remoteAddr.data.un.sun_path[0] == '@')
> remoteAddr.data.un.sun_path[0] = '\0';
>
> - retry:
> - if (connect(fd, &remoteAddr.data.sa, remoteAddr.len) < 0) {
> - int status = 0;
> - pid_t pid = 0;
> -
> - if (!spawnDaemon) {
> + while (retries &&
> + connect(fd, &remoteAddr.data.sa, remoteAddr.len) < 0) {
> + if (!(spawnDaemon && errno == ENOENT)) {
> virReportSystemError(errno, _("Failed to connect socket to '%s'"),
> path);
> goto error;
> }
>
> - if (!(binname = last_component(binary)) || binname[0] == '\0') {
> - virReportError(VIR_ERR_INTERNAL_ERROR,
> - _("Cannot determine basename for binary '%s'"),
> - binary);
> - goto error;
> - }
> -
> - if (virPidFileConstructPath(false, NULL, binname, &pidpath) < 0)
> - goto error;
> -
> - pidfd = virPidFileAcquirePath(pidpath, false, getpid());
> - if (pidfd < 0) {
> - /*
> - * This can happen in a very rare case of two clients spawning two
> - * daemons at the same time, and the error in the logs that gets
> - * reset here can be a clue to some future debugging.
> - */
> - virResetLastError();
> - spawnDaemon = false;
> - goto retry;
> - }
> -
> - if ((passfd = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
> - virReportSystemError(errno, "%s", _("Failed to create socket"));
> + if (virNetSocketForkDaemon(binary) < 0)
> goto error;
> - }
>
> - /*
> - * We already even acquired the pidfile, so no one else should be using
> - * @path right now. So we're OK to unlink it and paying attention to
> - * the return value makes no real sense here. Only if it's not an
> - * abstract socket, of course.
> - */
> - if (path[0] != '@')
> - unlink(path);
> -
> - /*
> - * We have to fork() here, because umask() is set per-process, chmod()
> - * is racy and fchmod() has undefined behaviour on sockets according to
> - * POSIX, so it doesn't work outside Linux.
> - */
> - if ((pid = virFork()) < 0)
> - goto error;
> -
> - if (pid == 0) {
> - umask(0077);
> - if (bind(passfd, &remoteAddr.data.sa, remoteAddr.len) < 0)
> - _exit(EXIT_FAILURE);
> -
> - _exit(EXIT_SUCCESS);
> - }
> + retries--;
> + usleep(5000);
> + }
>
> - if (virProcessWait(pid, &status, false) < 0)
> - goto error;
> -
> - if (status != EXIT_SUCCESS) {
> - /*
> - * OK, so the child failed to bind() the socket. This may mean that
> - * another daemon was starting at the same time and succeeded with
> - * its bind() (even though it should not happen because we using a
> - * pidfile for the race). So we'll try connecting again, but this
> - * time without spawning the daemon.
> - */
> - spawnDaemon = false;
> - virPidFileDeletePath(pidpath);
> - VIR_FORCE_CLOSE(pidfd);
> - VIR_FORCE_CLOSE(passfd);
> - goto retry;
> - }
> -
> - if (listen(passfd, 0) < 0) {
> - virReportSystemError(errno, "%s",
> - _("Failed to listen on socket that's about "
> - "to be passed to the daemon"));
> - goto error;
> - }
> -
> - if (connect(fd, &remoteAddr.data.sa, remoteAddr.len) < 0) {
> - virReportSystemError(errno, _("Failed to connect socket to '%s'"),
> - path);
> - goto error;
> - }
> -
> - /*
> - * Do we need to eliminate the super-rare race here any more? It would
> - * need incorporating the following VIR_FORCE_CLOSE() into a
> - * virCommandHook inside a virNetSocketForkDaemon().
> - */
> - VIR_FORCE_CLOSE(pidfd);
> - if (virNetSocketForkDaemon(binary, passfd) < 0)
> - goto error;
> + if (lockfd) {
> + unlink(lockpath);
> + VIR_FORCE_CLOSE(lockfd);
> + VIR_FREE(lockpath);
> }
>
> localAddr.len = sizeof(localAddr.data);
> @@ -687,19 +616,14 @@ int virNetSocketNewConnectUNIX(const char *path,
> if (!(*retsock = virNetSocketNew(&localAddr, &remoteAddr, true, fd, -1, 0)))
> goto error;
>
> - VIR_FREE(pidpath);
> -
> return 0;
>
> error:
> - if (pidfd >= 0)
> - virPidFileDeletePath(pidpath);
> - VIR_FREE(pidpath);
> + if (lockfd)
> + unlink(lockpath);
Coverity complains about calling unlink with NULL:
(8) Event var_deref_model: Passing null pointer "lockpath" to "unlink",
which dereferences it.
Also see events:
> + VIR_FREE(lockpath);
> VIR_FORCE_CLOSE(fd);
> - VIR_FORCE_CLOSE(passfd);
> - VIR_FORCE_CLOSE(pidfd);
> - if (spawnDaemon)
> - unlink(path);
> + VIR_FORCE_CLOSE(lockfd);
> return -1;
> }
> #else
>
More information about the libvir-list
mailing list