[Libguestfs] [PATCH nbdkit 2/2] curl: Fallback to GET if HEAD not supported

Laszlo Ersek lersek at redhat.com
Tue Jun 6 16:09:09 UTC 2023


only superficial comments:

On 6/6/23 13:22, Richard W.M. Jones wrote:
> Some servers do not support HEAD for requesting the headers.  If the
> HEAD request fails, fallback to using the GET method, abandoning the
> transfer as soon as possible after the headers have been received.
> 
> Fixes: https://github.com/kubevirt/containerized-data-importer/issues/2737
> ---
>  tests/Makefile.am                 |  23 +++++
>  plugins/curl/pool.c               |  49 ++++++++++-
>  tests/test-curl-cookie-script.c   |   2 +-
>  tests/test-curl-head-forbidden.c  | 134 ++++++++++++++++++++++++++++++
>  tests/test-curl-header-script.c   |   2 +-
>  tests/test-curl.c                 |   2 +-
>  tests/test-gzip-curl.c            |   2 +-
>  tests/test-retry-request-mirror.c |   2 +-
>  tests/test-tar-gzip-curl.c        |   2 +-
>  tests/test-tar-xz-curl.c          |   2 +-
>  tests/test-xz-curl.c              |   2 +-
>  tests/web-server.c                |  51 +++++++++++-
>  tests/web-server.h                |   9 +-
>  .gitignore                        |   1 +
>  14 files changed, 271 insertions(+), 12 deletions(-)
> 
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 165d61aff..6694e409e 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -655,6 +655,7 @@ EXTRA_DIST += \
>  	$(NULL)
>  LIBGUESTFS_TESTS += test-curl
>  LIBNBD_TESTS += \
> +	test-curl-head-forbidden \
>  	test-curl-header-script \
>  	test-curl-cookie-script \
>  	$(NULL)
> @@ -683,6 +684,28 @@ test_curl_LDADD = \
>  	$(LIBGUESTFS_LIBS) \
>  	$(NULL)
>  
> +test_curl_head_forbidden_SOURCES = \
> +	test-curl-head-forbidden.c \
> +	web-server.c \
> +	web-server.h \
> +	$(NULL)
> +test_curl_head_forbidden_CPPFLAGS = \
> +	-I$(top_srcdir)/common/include \
> +	-I$(top_srcdir)/common/utils \
> +	$(NULL)
> +test_curl_head_forbidden_CFLAGS = \
> +	$(WARNINGS_CFLAGS) \
> +	$(LIBNBD_CFLAGS) \
> +	$(PTHREAD_CFLAGS) \
> +	$(NULL)
> +test_curl_head_forbidden_LDFLAGS = \
> +	$(top_builddir)/common/utils/libutils.la \
> +	$(PTHREAD_LIBS) \
> +	$(NULL)
> +test_curl_head_forbidden_LDADD = \
> +	$(LIBNBD_LIBS) \
> +	$(NULL)
> +
>  test_curl_header_script_SOURCES = \
>  	test-curl-header-script.c \
>  	web-server.c \
> diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c
> index 2af5461a2..c73d52293 100644
> --- a/plugins/curl/pool.c
> +++ b/plugins/curl/pool.c
> @@ -76,7 +76,9 @@ static int debug_cb (CURL *handle, curl_infotype type,
>  static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque);
>  static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque);
>  static int get_content_length_accept_range (struct curl_handle *ch);
> +static bool try_fallback_GET_method (struct curl_handle *ch);
>  static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque);
> +static size_t error_cb (char *ptr, size_t size, size_t nmemb, void *opaque);
>  
>  /* This lock protects access to the curl_handles vector below. */
>  static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
> @@ -443,6 +445,7 @@ static int
>  get_content_length_accept_range (struct curl_handle *ch)
>  {
>    CURLcode r;
> +  long code;
>  #ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T
>    curl_off_t o;
>  #else
> @@ -469,7 +472,20 @@ get_content_length_accept_range (struct curl_handle *ch)
>      display_curl_error (ch, r,
>                          "problem doing HEAD request to fetch size of URL [%s]",
>                          url);
> -    return -1;
> +
> +    /* Get the HTTP status code, if available. */
> +    r = curl_easy_getinfo (ch->c, CURLINFO_RESPONSE_CODE, &code);
> +    if (r == CURLE_OK)
> +      nbdkit_debug ("HTTP status code: %ld", code);
> +    else
> +      code = -1;
> +
> +    /* S3 servers can return 403 Forbidden for HEAD but still respond
> +     * to GET, so we give it a second chance in that case.
> +     * https://github.com/kubevirt/containerized-data-importer/issues/2737#issuecomment-1577786849
> +     */
> +    if (code != 403 || !try_fallback_GET_method (ch))
> +      return -1;
>    }
>  
>    /* Get the content length. */
> @@ -520,6 +536,27 @@ get_content_length_accept_range (struct curl_handle *ch)
>    return 0;
>  }
>  
> +static bool
> +try_fallback_GET_method (struct curl_handle *ch)
> +{
> +  CURLcode r;
> +
> +  nbdkit_debug ("attempting to fetch headers using GET method");
> +
> +  curl_easy_setopt (ch->c, CURLOPT_HTTPGET, 1L);
> +  curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, header_cb);
> +  curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, ch);
> +  curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, error_cb);
> +  curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, ch);
> +  r = curl_easy_perform (ch->c);
> +
> +  /* We expect CURLE_WRITE_ERROR here, but CURLE_OK is possible too
> +   * (eg if the remote has zero length).  Other errors might happen
> +   * but we ignore them since it is a fallback path.
> +   */
> +  return r == CURLE_OK || r == CURLE_WRITE_ERROR;
> +}
> +
>  static size_t
>  header_cb (void *ptr, size_t size, size_t nmemb, void *opaque)
>  {
> @@ -552,3 +589,13 @@ header_cb (void *ptr, size_t size, size_t nmemb, void *opaque)
>  
>    return realsize;
>  }
> +
> +static size_t
> +error_cb (char *ptr, size_t size, size_t nmemb, void *opaque)
> +{
> +#ifdef CURL_WRITEFUNC_ERROR
> +  return CURL_WRITEFUNC_ERROR;
> +#else
> +  return 0; /* in older curl, any size < requested will also be an error */
> +#endif
> +}
> diff --git a/tests/test-curl-cookie-script.c b/tests/test-curl-cookie-script.c
> index 005e73ed0..95b602849 100644
> --- a/tests/test-curl-cookie-script.c
> +++ b/tests/test-curl-cookie-script.c
> @@ -82,7 +82,7 @@ main (int argc, char *argv[])
>    exit (77);
>  #endif
>  
> -  sockpath = web_server ("disk", check_request);
> +  sockpath = web_server ("disk", check_request, false);
>    if (sockpath == NULL) {
>      fprintf (stderr, "%s: could not start web server thread\n", program_name);
>      exit (EXIT_FAILURE);
> diff --git a/tests/test-curl-head-forbidden.c b/tests/test-curl-head-forbidden.c
> new file mode 100644
> index 000000000..16b1f0533
> --- /dev/null
> +++ b/tests/test-curl-head-forbidden.c
> @@ -0,0 +1,134 @@
> +/* nbdkit
> + * Copyright Red Hat
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are
> + * met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + *
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * * Neither the name of Red Hat nor the names of its contributors may be
> + * used to endorse or promote products derived from this software without
> + * specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
> + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
> + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
> + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +/* Test curl plugin against a simulated webserver which responds with
> + * 403 Forbidden to HEAD requests, but allows the GET method.
> + */
> +
> +#include <config.h>
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +
> +#include <libnbd.h>
> +
> +#include "cleanup.h"
> +#include "web-server.h"
> +
> +#include "test.h"
> +
> +static char buf[1024];
> +
> +int
> +main (int argc, char *argv[])
> +{
> +  struct stat statbuf;
> +  const char *sockpath;
> +  struct nbd_handle *nbd;
> +  CLEANUP_FREE char *usp_param = NULL;
> +  int64_t size;
> +
> +#ifndef HAVE_CURLOPT_UNIX_SOCKET_PATH
> +  fprintf (stderr, "%s: curl does not support CURLOPT_UNIX_SOCKET_PATH\n",
> +           program_name);
> +  exit (77);
> +#endif
> +
> +  if (stat ("disk", &statbuf) == -1) {
> +    if (errno == ENOENT) {
> +      fprintf (stderr, "%s: test skipped because \"disk\" is missing\n",
> +               program_name);
> +      exit (77);
> +    }
> +    perror ("disk");
> +    exit (EXIT_FAILURE);
> +  }
> +
> +  sockpath = web_server ("disk", NULL, true);
> +  if (sockpath == NULL) {
> +    fprintf (stderr, "%s: could not start web server thread\n", program_name);
> +    exit (EXIT_FAILURE);
> +  }
> +
> +  nbd = nbd_create ();
> +  if (nbd == NULL) {
> +    fprintf (stderr, "%s\n", nbd_get_error ());
> +    exit (EXIT_FAILURE);
> +  }
> +
> +  /* Start nbdkit. */
> +  if (asprintf (&usp_param, "unix-socket-path=%s", sockpath) == -1) {
> +    perror ("asprintf");
> +    exit (EXIT_FAILURE);
> +  }
> +  if (nbd_connect_command (nbd,
> +                           (char *[]) {
> +                             "nbdkit", "-s", "--exit-with-parent", "-v",
> +                             "curl",
> +                             "-D", "curl.verbose=1",
> +                             "http://localhost/disk",
> +                             usp_param, /* unix-socket-path=... */
> +                             NULL }) == -1) {
> +    fprintf (stderr, "%s\n", nbd_get_error ());
> +    exit (EXIT_FAILURE);
> +  }
> +
> +  /* Check the size is expected. */
> +  size = nbd_get_size (nbd);
> +  if (size == -1) {
> +    fprintf (stderr, "%s\n", nbd_get_error ());
> +    exit (EXIT_FAILURE);
> +  }
> +  if (size != statbuf.st_size) {
> +    fprintf (stderr, "%s: incorrect export size, "
> +             "expected: %" PRIu64 " actual: %" PRIi64 "\n",
> +             program_name,
> +             (uint64_t) statbuf.st_size, size);
> +    exit (EXIT_FAILURE);
> +  }
> +
> +  /* Make a request. */
> +  if (nbd_pread (nbd, buf, sizeof buf, 0, 0) == -1) {
> +    fprintf (stderr, "%s\n", nbd_get_error ());
> +    exit (EXIT_FAILURE);
> +  }
> +
> +  nbd_close (nbd);
> +  exit (EXIT_SUCCESS);
> +}

I take it this is a small customization (more precisely, lightly
modified copy) of another (existent) test. The --find-copies-harder git
formatting option is good for presenting new code in such terms, i.e. as
a copy of another file + diffs on top. I figure the only relevant
difference would be the last, "true", argument, for web_server().

> diff --git a/tests/test-curl-header-script.c b/tests/test-curl-header-script.c
> index 335c96494..e1eb4dacb 100644
> --- a/tests/test-curl-header-script.c
> +++ b/tests/test-curl-header-script.c
> @@ -104,7 +104,7 @@ main (int argc, char *argv[])
>    exit (77);
>  #endif
>  
> -  sockpath = web_server ("disk", check_request);
> +  sockpath = web_server ("disk", check_request, false);
>    if (sockpath == NULL) {
>      fprintf (stderr, "%s: could not start web server thread\n", program_name);
>      exit (EXIT_FAILURE);
> diff --git a/tests/test-curl.c b/tests/test-curl.c
> index ac267a175..e6134d78d 100644
> --- a/tests/test-curl.c
> +++ b/tests/test-curl.c
> @@ -82,7 +82,7 @@ main (int argc, char *argv[])
>    exit (77);
>  #endif
>  
> -  sockpath = web_server ("disk", check_request);
> +  sockpath = web_server ("disk", check_request, false);
>    if (sockpath == NULL) {
>      fprintf (stderr, "%s: could not start web server thread\n", program_name);
>      exit (EXIT_FAILURE);
> diff --git a/tests/test-gzip-curl.c b/tests/test-gzip-curl.c
> index a4133aee5..a3fcb88bf 100644
> --- a/tests/test-gzip-curl.c
> +++ b/tests/test-gzip-curl.c
> @@ -68,7 +68,7 @@ main (int argc, char *argv[])
>    exit (77);
>  #endif
>  
> -  sockpath = web_server (disk, NULL);
> +  sockpath = web_server (disk, NULL, false);
>    if (sockpath == NULL) {
>      fprintf (stderr, "test-gzip-curl: could not start web server thread\n");
>      exit (EXIT_FAILURE);
> diff --git a/tests/test-retry-request-mirror.c b/tests/test-retry-request-mirror.c
> index 276460b1f..cf42c5964 100644
> --- a/tests/test-retry-request-mirror.c
> +++ b/tests/test-retry-request-mirror.c
> @@ -72,7 +72,7 @@ main (int argc, char *argv[])
>      exit (77);
>    }
>  
> -  sockpath = web_server ("disk" /* not used but must be set */, NULL);
> +  sockpath = web_server ("disk" /* not used but must be set */, NULL, false);
>    if (sockpath == NULL) {
>      fprintf (stderr, "%s: could not start web server thread\n", argv[0]);
>      exit (EXIT_FAILURE);
> diff --git a/tests/test-tar-gzip-curl.c b/tests/test-tar-gzip-curl.c
> index ec0035b80..bbfa02f93 100644
> --- a/tests/test-tar-gzip-curl.c
> +++ b/tests/test-tar-gzip-curl.c
> @@ -68,7 +68,7 @@ main (int argc, char *argv[])
>    exit (77);
>  #endif
>  
> -  sockpath = web_server (disk, NULL);
> +  sockpath = web_server (disk, NULL, false);
>    if (sockpath == NULL) {
>      fprintf (stderr, "test-tar-gzip-curl: could not start web server thread\n");
>      exit (EXIT_FAILURE);
> diff --git a/tests/test-tar-xz-curl.c b/tests/test-tar-xz-curl.c
> index ff92dda28..01d548bb5 100644
> --- a/tests/test-tar-xz-curl.c
> +++ b/tests/test-tar-xz-curl.c
> @@ -68,7 +68,7 @@ main (int argc, char *argv[])
>    exit (77);
>  #endif
>  
> -  sockpath = web_server (disk, NULL);
> +  sockpath = web_server (disk, NULL, false);
>    if (sockpath == NULL) {
>      fprintf (stderr, "test-tar-xz-curl: could not start web server thread\n");
>      exit (EXIT_FAILURE);
> diff --git a/tests/test-xz-curl.c b/tests/test-xz-curl.c
> index 52b37a0c7..af81cbac7 100644
> --- a/tests/test-xz-curl.c
> +++ b/tests/test-xz-curl.c
> @@ -68,7 +68,7 @@ main (int argc, char *argv[])
>    exit (77);
>  #endif
>  
> -  sockpath = web_server (disk, NULL);
> +  sockpath = web_server (disk, NULL, false);
>    if (sockpath == NULL) {
>      fprintf (stderr, "test-xz-curl: could not start web server thread\n");
>      exit (EXIT_FAILURE);
> diff --git a/tests/web-server.c b/tests/web-server.c
> index dbc8bc845..ba85e3205 100644
> --- a/tests/web-server.c
> +++ b/tests/web-server.c
> @@ -67,16 +67,19 @@ static int fd = -1;
>  static struct stat statbuf;
>  static char request[16384];
>  static check_request_t check_request;
> +static bool head_fails_with_403 = false;
>  
>  static void *start_web_server (void *arg);
>  static void handle_requests (int s);
>  static void handle_file_request (int s, enum method method);
>  static void handle_mirror_redirect_request (int s);
>  static void handle_mirror_data_request (int s, enum method method, char byte);
> +static void send_403_forbidden (int s);
>  static void send_404_not_found (int s);
>  static void send_405_method_not_allowed (int s);
>  static void send_500_internal_server_error (int s);
>  static void xwrite (int s, const char *buf, size_t len);
> +static void xwrite_allow_epipe (int s, const char *buf, size_t len);
>  static void xpread (char *buf, size_t count, off_t offset);
>  
>  static void
> @@ -99,7 +102,8 @@ ignore_sigpipe (void)
>  }
>  
>  const char *
> -web_server (const char *filename, check_request_t _check_request)
> +web_server (const char *filename, check_request_t _check_request,
> +            bool _head_fails_with_403)

We shouldn't start identifiers with an underscore.

>  {
>    struct sockaddr_un addr;
>    pthread_t thread;
> @@ -108,6 +112,7 @@ web_server (const char *filename, check_request_t _check_request)
>    ignore_sigpipe ();
>  
>    check_request = _check_request;
> +  head_fails_with_403 = _head_fails_with_403;
>  
>    /* Open the file. */
>    fd = open (filename, O_RDONLY|O_CLOEXEC);
> @@ -250,6 +255,11 @@ handle_requests (int s)
>        }
>        memcpy (path, &request[5], n);
>        path[n] = '\0';
> +      if (head_fails_with_403) {
> +        send_403_forbidden (s);
> +        eof = true;
> +        break;
> +      }

I'm not a fan of the pre-existent pattern where we both set "eof = true"
and break out of the loop. It would have to be cleaned up first, in a
different patch, but I'm not sure if we care enough.

>      }
>      else if (strncmp (request, "GET ", 4) == 0) {
>        method = GET;
> @@ -361,7 +371,14 @@ handle_file_request (int s, enum method method)
>    }
>  
>    xpread (data, length, offset);
> -  xwrite (s, data, length);
> +  if (!head_fails_with_403 || offset != 0)
> +    xwrite (s, data, length);
> +  else
> +    /* In the special case where we are testing the fallback from HEAD
> +     * request case, the curl plugin will issue a GET for the whole
> +     * data, but not read it all.  Ignore EPIPE errors here.
> +     */
> +    xwrite_allow_epipe (s, data, length);
>  
>    free (data);
>  }
> @@ -449,6 +466,16 @@ handle_mirror_data_request (int s, enum method method, char byte)
>    free (data);
>  }
>  
> +static void
> +send_403_forbidden (int s)
> +{
> +  const char response[] =
> +    "HTTP/1.1 403 Forbidden\r\n"
> +    "Connection: close\r\n"
> +    "\r\n";
> +  xwrite (s, response, strlen (response));
> +}
> +
>  static void
>  send_404_not_found (int s)
>  {
> @@ -498,6 +525,26 @@ xwrite (int s, const char *buf, size_t len)
>    }
>  }
>  
> +static void
> +xwrite_allow_epipe (int s, const char *buf, size_t len)
> +{
> +  ssize_t r;
> +
> +  while (len > 0) {
> +    r = write (s, buf, len);
> +    if (r == -1) {
> +      if (errno != EPIPE) {
> +        perror ("write");
> +        exit (EXIT_FAILURE);
> +      }
> +      else
> +        return;
> +    }

exit() doesn't return, so I suggest removing the "else" (i.e., unnesting
the "return").

> +    buf += r;
> +    len -= r;
> +  }
> +}
> +
>  static void
>  xpread (char *buf, size_t count, off_t offset)
>  {
> diff --git a/tests/web-server.h b/tests/web-server.h
> index 36f0ea1e1..0961f4c5d 100644
> --- a/tests/web-server.h
> +++ b/tests/web-server.h
> @@ -43,6 +43,8 @@
>  #ifndef NBDKIT_WEB_SERVER_H
>  #define NBDKIT_WEB_SERVER_H
>  
> +#include <stdbool.h>
> +
>  /* Starts a web server in a background thread.  The web server will
>   * serve 'filename' (only) - the URL in requests is ignored.
>   *
> @@ -57,10 +59,15 @@
>   * The optional check_request function is called when the request is
>   * received (note: not in the main thread) and can be used to perform
>   * checks for example that particular headers were sent.
> + *
> + * If head_fails_with_403 == true then we simulate a server that
> + * responds to GET but fails HEAD with 403 errors, see:
> + * https://github.com/kubevirt/containerized-data-importer/issues/2737#issuecomment-1577786849
>   */
>  typedef void (*check_request_t) (const char *request);
>  extern const char *web_server (const char *filename,
> -                               check_request_t check_request)
> +                               check_request_t check_request,
> +                               bool head_fails_with_403)
>    __attribute__ ((__nonnull__ (1)));
>  
>  #endif /* NBDKIT_WEB_SERVER_H */

I'd prefer the introduction of this new parameter to be a separate
patch. That would isolate much the boilerplate from the relevant feature
addition.

> diff --git a/.gitignore b/.gitignore
> index e62c23d8b..49af3998f 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -122,6 +122,7 @@ plugins/*/*.3
>  /tests/test-curl
>  /tests/test-curl-cookie-script
>  /tests/test-curl-header-script
> +/tests/test-curl-head-forbidden
>  /tests/test-data
>  /tests/test-delay
>  /tests/test-exit-with-parent

Reviewed-by: Laszlo Ersek <lersek at redhat.com>



More information about the Libguestfs mailing list