[libvirt] [PATCH] Fix domain restore for files on root-squash NFS.

Wed Feb 24 05:33:17 UTC 2010

Thanks for the review!

On 02/23/2010 04:57 PM, Jim Meyering wrote:
> Laine Stump wrote:
>    
>> If qemudDomainRestore fails to open the domain save file, create a
>> pipe, then fork a process that does setuid(qemu_user) and opens the
>> file, then reads this file and stuffs it into the pipe. the parnet
>> libvirtd process will use the other end of the pipe as its fd, then
>> reap the child process after its done reading.
>>      
> Hi Laine,
>
> This looks fine, modulo a few minor stylistic
> and error-handling nits.  Comments below.
>
>    
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>>      
> ...
>    
>> +static int qemudOpenAsUID(const char *path, uid_t uid, pid_t *childpid) {
>>      
> ...
>    
>> +        /* caller gets the read side of the pipe */
>> +        fd = pipefd[0];
>> +        pipefd[0] = -1;
>> +parentcleanup:
>>      
> This_is_more_readable than notusingwordseparators ;-)
> s/cleanup/_cleanup/
>    

Yeah, okay. I get conflicted working in libvirt, because I find bits 
with both studlyCaps and underscore_separators, and can't decide which 
to use, so my brain locks up and I use none.

>> +        if (pipefd[0] != -1)
>> +            close(pipefd[0]);
>> +        if (pipefd[1] != -1)
>> +            close(pipefd[1]);
>> +        if ((fd<  0)&&  (*childpid>  0)) {
>> +            /* a child process was started and subsequently an error
>> +               occurred in the parent, so we need to wait for it to
>> +               exit, but its status is inconsequential. */
>> +            while ((waitpid(*childpid, NULL, 0) == -1)
>> +&&  (errno == EINTR));
>> +            *childpid = -1;
>> +        }
>> +        return fd;
>> +    }
>> +
>> +    /* child */
>> +
>> +    /* setuid to the qemu user, then open the file, read it,
>> +       and stuff it into the pipe for the parent process to
>> +       read */
>> +    int exit_code;
>> +    char *buf = NULL;
>> +    int bufsize = 1024 * 1024;
>>      
> It's slightly better to use size_t in place of int,
> to match the prototype of VIR_ALLOC_N.
>    

Right. Done.

>> +    int bytesread;
>> +
>> +    /* child doesn't need the read side of the pipe */
>> +    close(pipefd[0]);
>> +
>> +    if (forkRet<  0) {
>> +        exit_code = errno;
>> +        virReportSystemError(errno,
>> +                             _("failed in child after forking to read '%s'"),
>> +                             path);
>> +        goto childcleanup;
>> +    }
>> +
>> +    if (setuid(uid) != 0) {
>> +        exit_code = errno;
>> +        virReportSystemError(errno,
>> +                             _("cannot setuid(%d) to read '%s'"),
>> +                             getuid(), path);
>> +        goto childcleanup;
>> +    }
>> +    if ((fd = open(path, O_RDONLY))<  0) {
>> +        exit_code = errno;
>> +        virReportSystemError(errno,
>> +                             _("cannot open '%s' as uid %d"),
>> +                             path, getuid());
>> +        goto childcleanup;
>> +    }
>> +    if (VIR_ALLOC_N(buf, bufsize)<  0) {
>> +        exit_code = ENOMEM;
>> +        virReportOOMError();
>> +        goto childcleanup;
>> +    }
>> +
>> +    /* read from fd and write to pipefd[1] until EOF */
>> +    do {
>> +        if ((bytesread = saferead(fd, buf, bufsize))<  0) {
>> +            exit_code = errno;
>> +            virReportSystemError(errno,
>> +                                 _("child failed reading from '%s'"),
>> +                                 path);
>> +            goto childcleanup;
>> +        }
>> +        if (safewrite(pipefd[1], buf, bytesread) != bytesread) {
>> +            exit_code = errno;
>> +            virReportSystemError(errno, "%s",
>> +                                 _("child failed writing to pipe"));
>> +            goto childcleanup;
>> +        }
>> +    } while (bytesread>  0);
>> +    exit_code = 0;
>> +
>> +childcleanup:
>>      
> s/cleanup/_cleanup/
>    

Yup.

>> +    VIR_FREE(buf);
>> +    if (fd != -1)
>> +        close(fd);
>> +    if (pipefd[1] != -1)
>> +        close(pipefd[1]);
>> +    _exit(exit_code);
>> +}
>> +
>>   /* TODO: check seclabel restore */
>>   static int qemudDomainRestore(virConnectPtr conn,
>>                                 const char *path) {
>> @@ -4666,6 +4795,7 @@ static int qemudDomainRestore(virConnectPtr conn,
>>       virDomainDefPtr def = NULL;
>>       virDomainObjPtr vm = NULL;
>>       int fd = -1;
>> +    pid_t read_pid = -1;
>>       int ret = -1;
>>       char *xml = NULL;
>>       struct qemud_save_header header;
>> @@ -4677,9 +4807,20 @@ static int qemudDomainRestore(virConnectPtr conn,
>>       qemuDriverLock(driver);
>>       /* Verify the header and read the XML */
>>       if ((fd = open(path, O_RDONLY))<  0) {
>> -        qemuReportError(VIR_ERR_OPERATION_FAILED,
>> -                        "%s", _("cannot read domain image"));
>> -        goto cleanup;
>> +        if ((driver->user == 0) || (getuid() != 0)) {
>> +            qemuReportError(VIR_ERR_OPERATION_FAILED,
>> +                            "%s", _("cannot read domain image"));
>> +            goto cleanup;
>> +        }
>> +
>> +        /* Opening as root failed, but qemu runs as a different user
>> +           that might have better luck. Create a pipe, then fork a
>> +           child process to run as the qemu user, which will hopefully
>> +           have the necessary authority to read the file. */
>> +        if ((fd = qemudOpenAsUID(path, driver->user,&read_pid))<  0) {
>> +            /* error already reported */
>> +            goto cleanup;
>> +        }
>>       }
>>
>>       if (saferead(fd,&header, sizeof(header)) != sizeof(header)) {
>> @@ -4769,6 +4910,12 @@ static int qemudDomainRestore(virConnectPtr conn,
>>           close(intermediatefd);
>>       close(fd);
>>       fd = -1;
>> +    if (read_pid != -1) {
>> +        /* reap the process that read the file */
>> +        while ((waitpid(read_pid, NULL, 0) == -1)
>> +&&  (errno == EINTR));
>>      
> Two things.  In general, terminating an empty loop with a semicolon
> is risky -- too easy to miss that syntax, and snuggling it up on the
> end of the "while (cond)" line makes it even easier to miss.  If you
> do this, please at least add a comment on a separate line:
>
>      while (cond)
>          /* empty */ ;
>
> using braces might even be better:
>
>      while (cond) {
>          /* empty */
>      }
>    

There I go, letting cut-paste get me into trouble again.

I also prefer using braces, and will change it to that.

> You might want to diagnose a failing waitpid, when errno != EINTR.
> ECHILD (no such PID) appears to be the only one we'd care about.
> Obviously, we'd ignore waitpid failure during clean-up,
> but in the parent, post-read, it might indicate a problem.
>
> Also, don't you want to detect child failure in the latter?
> If so, you'd pass say,&child_status as second argument to waitpid.
>    

Yeah, I wondered about that, but wasn't sure what I could do after qemu 
had already been started up and read data from the file (which 
presumably would itself have failed if there was some problem in the 
child that cause bad/incomplete data to show up). So I punted.

I'll spend some time in the morning trying to do something suitable if 
the child sends back a failure status.

>> +        read_pid = -1;
>> +    }
>>       if (ret<  0) {
>>           if (!vm->persistent) {
>>               if (qemuDomainObjEndJob(vm)>  0)
>> @@ -4810,6 +4957,11 @@ cleanup:
>>       VIR_FREE(xml);
>>       if (fd != -1)
>>           close(fd);
>> +    if (read_pid != 0) {
>> +        /* reap the process that read the file */
>> +        while ((waitpid(read_pid, NULL, 0) == -1)
>> +&&  (errno == EINTR));
>> +    }
>>       if (vm)
>>           virDomainObjUnlock(vm);
>>       if (event)
>>      
>