[libvirt] [PATCH] Improve LXC startup error messages
Daniel Veillard
veillard at redhat.com
Thu Sep 29 01:00:02 UTC 2011
On Wed, Sep 28, 2011 at 03:11:10PM +0100, Daniel P. Berrange wrote:
> From: "Daniel P. Berrange" <berrange at redhat.com>
>
> The LXC controller and container attempt to run in lockstep
> using a series of handshakes. There was a flaw in the container
> side though, causing it todo partial setup work, before waiting
> for the initial controller handshake.
>
> This in turn meant some of the controller setup could fail,
> and obscure the real error from the container setup. Moving
> the lxcContainerWaitForContinue call to be the first thing
> the container does solves that.
>
> The controller still, however, pollutes the log with a message
> about the container handshake failing, when the container
> shuts down during startup.
>
> To deal with this requires special handling of the EOF condition
> on the controllers lxcContainerWaitForContinue() call.
>
> * src/lxc/lxc_container.c: Wait for continue message from
> controller before doing anything at all
> * src/lxc/lxc_controller.c: Don't pollute log with error
> message if EOF was returned from lxcContainerWaitForContinue.
> * src/lxc/lxc_driver.c: Handle EOF from controller handshake
> ---
> src/lxc/lxc_container.c | 38 ++++++++++++++++++++++++++------------
> src/lxc/lxc_controller.c | 31 ++++++++++++++++++++++++++++---
> src/lxc/lxc_driver.c | 2 +-
> 3 files changed, 55 insertions(+), 16 deletions(-)
>
> diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
> index 787df9a..ee5ca9f 100644
> --- a/src/lxc/lxc_container.c
> +++ b/src/lxc/lxc_container.c
> @@ -214,7 +214,7 @@ error_out:
> * parent process. It will send this message on the socket pair stored in
> * the vm structure once it has completed the post clone container setup.
> *
> - * Returns 0 on success or -1 in case of error
> + * Returns 1 on success, 0 on EOF, or -1 in case of error
> */
> int lxcContainerWaitForContinue(int control)
> {
> @@ -222,12 +222,18 @@ int lxcContainerWaitForContinue(int control)
> int readLen;
>
> readLen = saferead(control, &msg, sizeof(msg));
> - if (readLen != sizeof(msg) ||
> - msg != LXC_CONTINUE_MSG) {
> + if (readLen < 0)
> + return -1;
> +
> + if (readLen != sizeof(msg))
> + return 0;
> +
> + if (msg != LXC_CONTINUE_MSG) {
> + errno = EINVAL;
> return -1;
> }
>
> - return 0;
> + return 1;
> }
>
>
> @@ -1036,6 +1042,20 @@ static int lxcContainerChild( void *data )
> char *ttyPath = NULL;
> virDomainFSDefPtr root;
> virCommandPtr cmd = NULL;
> + int rc;
> +
> + /* Wait for interface devices to show up */
> + if ((rc = lxcContainerWaitForContinue(argv->monitor)) <= 0) {
> + if (rc < 0)
> + virReportSystemError(errno, "%s",
> + _("Failed to read the container continue message"));
> + else
> + lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("Controller unexpectedly quit"));
> + goto cleanup;
> + }
> +
> + VIR_DEBUG("Received container continue message");
>
> if (NULL == vmDef) {
> lxcError(VIR_ERR_INTERNAL_ERROR,
> @@ -1079,14 +1099,6 @@ static int lxcContainerChild( void *data )
> goto cleanup;
> }
>
> - /* Wait for interface devices to show up */
> - if (lxcContainerWaitForContinue(argv->monitor) < 0) {
> - virReportSystemError(errno, "%s",
> - _("Failed to read the container continue message"));
> - goto cleanup;
> - }
> - VIR_DEBUG("Received container continue message");
> -
> /* rename and enable interfaces */
> if (lxcContainerRenameAndEnableInterfaces(argv->nveths,
> argv->veths) < 0) {
> @@ -1120,6 +1132,8 @@ cleanup:
> }
>
> virCommandFree(cmd);
> + if (ret < 0)
> + virDispatchError(NULL);
> return ret;
> }
>
> diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
> index 52d382e..8c3a54f 100644
> --- a/src/lxc/lxc_controller.c
> +++ b/src/lxc/lxc_controller.c
> @@ -780,6 +780,9 @@ static int lxcSetPersonality(virDomainDefPtr def)
> # define MS_SLAVE (1<<19)
> #endif
>
> +/*
> + * Returns -1 on error, 0 if container quit unexpected, 1 on success
> + */
> static int
> lxcControllerRun(virDomainDefPtr def,
> unsigned int nveths,
> @@ -801,6 +804,7 @@ lxcControllerRun(virDomainDefPtr def,
> size_t nloopDevs = 0;
> int *loopDevs = NULL;
> size_t i;
> + int ret;
>
> if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
> virReportSystemError(errno, "%s",
> @@ -935,12 +939,25 @@ lxcControllerRun(virDomainDefPtr def,
> goto cleanup;
> }
>
> - if (lxcContainerWaitForContinue(containerhandshake[0]) < 0) {
> + if ((ret = lxcContainerWaitForContinue(containerhandshake[0])) < 0) {
> virReportSystemError(errno, "%s",
> _("error receiving signal from container"));
> goto cleanup;
> }
>
> + if (ret == 0) {
> + rc = 0;
> + /* We're not raising an error, since the container will have
> + * done that already and we don't want to confuse the driver
> + * when it fetches the error message from logs
> + */
> +#if 0
> + lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
> + _("container quit during startup handshake"));
> +#endif
> + goto cleanup;
> + }
> +
> /* Now the container is fully setup... */
>
> /* ...we can close the loop devices... */
> @@ -959,7 +976,10 @@ lxcControllerRun(virDomainDefPtr def,
> }
> VIR_FORCE_CLOSE(handshakefd);
>
> - rc = lxcControllerMain(monitor, client, appPty, containerPty, container);
> + if (lxcControllerMain(monitor, client, appPty, containerPty, container) != 0)
> + goto cleanup;
> +
> + rc = 1;
>
> cleanup:
> VIR_FREE(devptmx);
> @@ -1186,5 +1206,10 @@ cleanup:
> unlink(sockpath);
> VIR_FREE(sockpath);
>
> - return rc ? EXIT_FAILURE : EXIT_SUCCESS;
> + /* If rc == 0, we have an error, but the lxc container
> + * startup will have printed it already.*/
> + if (rc < 0)
> + virDispatchError(NULL);
> +
> + return rc <=0 ? EXIT_FAILURE : EXIT_SUCCESS;
> }
> diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
> index 4b62600..759e3a9 100644
> --- a/src/lxc/lxc_driver.c
> +++ b/src/lxc/lxc_driver.c
> @@ -1622,7 +1622,7 @@ static int lxcVmStart(virConnectPtr conn,
> vm->def->id = vm->pid;
> virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
>
> - if (lxcContainerWaitForContinue(handshakefds[0]) < 0) {
> + if (lxcContainerWaitForContinue(handshakefds[0]) <= 0) {
> char out[1024];
>
> if (!(lxcReadLogOutput(vm, logfile, pos, out, 1024) < 0)) {
ACK,
Daniel
--
Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/
daniel at veillard.com | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library http://libvirt.org/
More information about the libvir-list
mailing list