[Libguestfs] [PATCH nbdkit 2/2] curl: Fallback to GET if HEAD not supported
Laszlo Ersek
lersek at redhat.com
Tue Jun 6 16:09:09 UTC 2023
only superficial comments:
On 6/6/23 13:22, Richard W.M. Jones wrote:
> Some servers do not support HEAD for requesting the headers. If the
> HEAD request fails, fallback to using the GET method, abandoning the
> transfer as soon as possible after the headers have been received.
>
> Fixes: https://github.com/kubevirt/containerized-data-importer/issues/2737
> ---
> tests/Makefile.am | 23 +++++
> plugins/curl/pool.c | 49 ++++++++++-
> tests/test-curl-cookie-script.c | 2 +-
> tests/test-curl-head-forbidden.c | 134 ++++++++++++++++++++++++++++++
> tests/test-curl-header-script.c | 2 +-
> tests/test-curl.c | 2 +-
> tests/test-gzip-curl.c | 2 +-
> tests/test-retry-request-mirror.c | 2 +-
> tests/test-tar-gzip-curl.c | 2 +-
> tests/test-tar-xz-curl.c | 2 +-
> tests/test-xz-curl.c | 2 +-
> tests/web-server.c | 51 +++++++++++-
> tests/web-server.h | 9 +-
> .gitignore | 1 +
> 14 files changed, 271 insertions(+), 12 deletions(-)
>
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 165d61aff..6694e409e 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -655,6 +655,7 @@ EXTRA_DIST += \
> $(NULL)
> LIBGUESTFS_TESTS += test-curl
> LIBNBD_TESTS += \
> + test-curl-head-forbidden \
> test-curl-header-script \
> test-curl-cookie-script \
> $(NULL)
> @@ -683,6 +684,28 @@ test_curl_LDADD = \
> $(LIBGUESTFS_LIBS) \
> $(NULL)
>
> +test_curl_head_forbidden_SOURCES = \
> + test-curl-head-forbidden.c \
> + web-server.c \
> + web-server.h \
> + $(NULL)
> +test_curl_head_forbidden_CPPFLAGS = \
> + -I$(top_srcdir)/common/include \
> + -I$(top_srcdir)/common/utils \
> + $(NULL)
> +test_curl_head_forbidden_CFLAGS = \
> + $(WARNINGS_CFLAGS) \
> + $(LIBNBD_CFLAGS) \
> + $(PTHREAD_CFLAGS) \
> + $(NULL)
> +test_curl_head_forbidden_LDFLAGS = \
> + $(top_builddir)/common/utils/libutils.la \
> + $(PTHREAD_LIBS) \
> + $(NULL)
> +test_curl_head_forbidden_LDADD = \
> + $(LIBNBD_LIBS) \
> + $(NULL)
> +
> test_curl_header_script_SOURCES = \
> test-curl-header-script.c \
> web-server.c \
> diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c
> index 2af5461a2..c73d52293 100644
> --- a/plugins/curl/pool.c
> +++ b/plugins/curl/pool.c
> @@ -76,7 +76,9 @@ static int debug_cb (CURL *handle, curl_infotype type,
> static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque);
> static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque);
> static int get_content_length_accept_range (struct curl_handle *ch);
> +static bool try_fallback_GET_method (struct curl_handle *ch);
> static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque);
> +static size_t error_cb (char *ptr, size_t size, size_t nmemb, void *opaque);
>
> /* This lock protects access to the curl_handles vector below. */
> static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
> @@ -443,6 +445,7 @@ static int
> get_content_length_accept_range (struct curl_handle *ch)
> {
> CURLcode r;
> + long code;
> #ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T
> curl_off_t o;
> #else
> @@ -469,7 +472,20 @@ get_content_length_accept_range (struct curl_handle *ch)
> display_curl_error (ch, r,
> "problem doing HEAD request to fetch size of URL [%s]",
> url);
> - return -1;
> +
> + /* Get the HTTP status code, if available. */
> + r = curl_easy_getinfo (ch->c, CURLINFO_RESPONSE_CODE, &code);
> + if (r == CURLE_OK)
> + nbdkit_debug ("HTTP status code: %ld", code);
> + else
> + code = -1;
> +
> + /* S3 servers can return 403 Forbidden for HEAD but still respond
> + * to GET, so we give it a second chance in that case.
> + * https://github.com/kubevirt/containerized-data-importer/issues/2737#issuecomment-1577786849
> + */
> + if (code != 403 || !try_fallback_GET_method (ch))
> + return -1;
> }
>
> /* Get the content length. */
> @@ -520,6 +536,27 @@ get_content_length_accept_range (struct curl_handle *ch)
> return 0;
> }
>
> +static bool
> +try_fallback_GET_method (struct curl_handle *ch)
> +{
> + CURLcode r;
> +
> + nbdkit_debug ("attempting to fetch headers using GET method");
> +
> + curl_easy_setopt (ch->c, CURLOPT_HTTPGET, 1L);
> + curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, header_cb);
> + curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, ch);
> + curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, error_cb);
> + curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, ch);
> + r = curl_easy_perform (ch->c);
> +
> + /* We expect CURLE_WRITE_ERROR here, but CURLE_OK is possible too
> + * (eg if the remote has zero length). Other errors might happen
> + * but we ignore them since it is a fallback path.
> + */
> + return r == CURLE_OK || r == CURLE_WRITE_ERROR;
> +}
> +
> static size_t
> header_cb (void *ptr, size_t size, size_t nmemb, void *opaque)
> {
> @@ -552,3 +589,13 @@ header_cb (void *ptr, size_t size, size_t nmemb, void *opaque)
>
> return realsize;
> }
> +
> +static size_t
> +error_cb (char *ptr, size_t size, size_t nmemb, void *opaque)
> +{
> +#ifdef CURL_WRITEFUNC_ERROR
> + return CURL_WRITEFUNC_ERROR;
> +#else
> + return 0; /* in older curl, any size < requested will also be an error */
> +#endif
> +}
> diff --git a/tests/test-curl-cookie-script.c b/tests/test-curl-cookie-script.c
> index 005e73ed0..95b602849 100644
> --- a/tests/test-curl-cookie-script.c
> +++ b/tests/test-curl-cookie-script.c
> @@ -82,7 +82,7 @@ main (int argc, char *argv[])
> exit (77);
> #endif
>
> - sockpath = web_server ("disk", check_request);
> + sockpath = web_server ("disk", check_request, false);
> if (sockpath == NULL) {
> fprintf (stderr, "%s: could not start web server thread\n", program_name);
> exit (EXIT_FAILURE);
> diff --git a/tests/test-curl-head-forbidden.c b/tests/test-curl-head-forbidden.c
> new file mode 100644
> index 000000000..16b1f0533
> --- /dev/null
> +++ b/tests/test-curl-head-forbidden.c
> @@ -0,0 +1,134 @@
> +/* nbdkit
> + * Copyright Red Hat
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are
> + * met:
> + *
> + * * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + *
> + * * Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * * Neither the name of Red Hat nor the names of its contributors may be
> + * used to endorse or promote products derived from this software without
> + * specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
> + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
> + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR
> + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
> + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
> + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +/* Test curl plugin against a simulated webserver which responds with
> + * 403 Forbidden to HEAD requests, but allows the GET method.
> + */
> +
> +#include <config.h>
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +
> +#include <libnbd.h>
> +
> +#include "cleanup.h"
> +#include "web-server.h"
> +
> +#include "test.h"
> +
> +static char buf[1024];
> +
> +int
> +main (int argc, char *argv[])
> +{
> + struct stat statbuf;
> + const char *sockpath;
> + struct nbd_handle *nbd;
> + CLEANUP_FREE char *usp_param = NULL;
> + int64_t size;
> +
> +#ifndef HAVE_CURLOPT_UNIX_SOCKET_PATH
> + fprintf (stderr, "%s: curl does not support CURLOPT_UNIX_SOCKET_PATH\n",
> + program_name);
> + exit (77);
> +#endif
> +
> + if (stat ("disk", &statbuf) == -1) {
> + if (errno == ENOENT) {
> + fprintf (stderr, "%s: test skipped because \"disk\" is missing\n",
> + program_name);
> + exit (77);
> + }
> + perror ("disk");
> + exit (EXIT_FAILURE);
> + }
> +
> + sockpath = web_server ("disk", NULL, true);
> + if (sockpath == NULL) {
> + fprintf (stderr, "%s: could not start web server thread\n", program_name);
> + exit (EXIT_FAILURE);
> + }
> +
> + nbd = nbd_create ();
> + if (nbd == NULL) {
> + fprintf (stderr, "%s\n", nbd_get_error ());
> + exit (EXIT_FAILURE);
> + }
> +
> + /* Start nbdkit. */
> + if (asprintf (&usp_param, "unix-socket-path=%s", sockpath) == -1) {
> + perror ("asprintf");
> + exit (EXIT_FAILURE);
> + }
> + if (nbd_connect_command (nbd,
> + (char *[]) {
> + "nbdkit", "-s", "--exit-with-parent", "-v",
> + "curl",
> + "-D", "curl.verbose=1",
> + "http://localhost/disk",
> + usp_param, /* unix-socket-path=... */
> + NULL }) == -1) {
> + fprintf (stderr, "%s\n", nbd_get_error ());
> + exit (EXIT_FAILURE);
> + }
> +
> + /* Check the size is expected. */
> + size = nbd_get_size (nbd);
> + if (size == -1) {
> + fprintf (stderr, "%s\n", nbd_get_error ());
> + exit (EXIT_FAILURE);
> + }
> + if (size != statbuf.st_size) {
> + fprintf (stderr, "%s: incorrect export size, "
> + "expected: %" PRIu64 " actual: %" PRIi64 "\n",
> + program_name,
> + (uint64_t) statbuf.st_size, size);
> + exit (EXIT_FAILURE);
> + }
> +
> + /* Make a request. */
> + if (nbd_pread (nbd, buf, sizeof buf, 0, 0) == -1) {
> + fprintf (stderr, "%s\n", nbd_get_error ());
> + exit (EXIT_FAILURE);
> + }
> +
> + nbd_close (nbd);
> + exit (EXIT_SUCCESS);
> +}
I take it this is a small customization (more precisely, lightly
modified copy) of another (existent) test. The --find-copies-harder git
formatting option is good for presenting new code in such terms, i.e. as
a copy of another file + diffs on top. I figure the only relevant
difference would be the last, "true", argument, for web_server().
> diff --git a/tests/test-curl-header-script.c b/tests/test-curl-header-script.c
> index 335c96494..e1eb4dacb 100644
> --- a/tests/test-curl-header-script.c
> +++ b/tests/test-curl-header-script.c
> @@ -104,7 +104,7 @@ main (int argc, char *argv[])
> exit (77);
> #endif
>
> - sockpath = web_server ("disk", check_request);
> + sockpath = web_server ("disk", check_request, false);
> if (sockpath == NULL) {
> fprintf (stderr, "%s: could not start web server thread\n", program_name);
> exit (EXIT_FAILURE);
> diff --git a/tests/test-curl.c b/tests/test-curl.c
> index ac267a175..e6134d78d 100644
> --- a/tests/test-curl.c
> +++ b/tests/test-curl.c
> @@ -82,7 +82,7 @@ main (int argc, char *argv[])
> exit (77);
> #endif
>
> - sockpath = web_server ("disk", check_request);
> + sockpath = web_server ("disk", check_request, false);
> if (sockpath == NULL) {
> fprintf (stderr, "%s: could not start web server thread\n", program_name);
> exit (EXIT_FAILURE);
> diff --git a/tests/test-gzip-curl.c b/tests/test-gzip-curl.c
> index a4133aee5..a3fcb88bf 100644
> --- a/tests/test-gzip-curl.c
> +++ b/tests/test-gzip-curl.c
> @@ -68,7 +68,7 @@ main (int argc, char *argv[])
> exit (77);
> #endif
>
> - sockpath = web_server (disk, NULL);
> + sockpath = web_server (disk, NULL, false);
> if (sockpath == NULL) {
> fprintf (stderr, "test-gzip-curl: could not start web server thread\n");
> exit (EXIT_FAILURE);
> diff --git a/tests/test-retry-request-mirror.c b/tests/test-retry-request-mirror.c
> index 276460b1f..cf42c5964 100644
> --- a/tests/test-retry-request-mirror.c
> +++ b/tests/test-retry-request-mirror.c
> @@ -72,7 +72,7 @@ main (int argc, char *argv[])
> exit (77);
> }
>
> - sockpath = web_server ("disk" /* not used but must be set */, NULL);
> + sockpath = web_server ("disk" /* not used but must be set */, NULL, false);
> if (sockpath == NULL) {
> fprintf (stderr, "%s: could not start web server thread\n", argv[0]);
> exit (EXIT_FAILURE);
> diff --git a/tests/test-tar-gzip-curl.c b/tests/test-tar-gzip-curl.c
> index ec0035b80..bbfa02f93 100644
> --- a/tests/test-tar-gzip-curl.c
> +++ b/tests/test-tar-gzip-curl.c
> @@ -68,7 +68,7 @@ main (int argc, char *argv[])
> exit (77);
> #endif
>
> - sockpath = web_server (disk, NULL);
> + sockpath = web_server (disk, NULL, false);
> if (sockpath == NULL) {
> fprintf (stderr, "test-tar-gzip-curl: could not start web server thread\n");
> exit (EXIT_FAILURE);
> diff --git a/tests/test-tar-xz-curl.c b/tests/test-tar-xz-curl.c
> index ff92dda28..01d548bb5 100644
> --- a/tests/test-tar-xz-curl.c
> +++ b/tests/test-tar-xz-curl.c
> @@ -68,7 +68,7 @@ main (int argc, char *argv[])
> exit (77);
> #endif
>
> - sockpath = web_server (disk, NULL);
> + sockpath = web_server (disk, NULL, false);
> if (sockpath == NULL) {
> fprintf (stderr, "test-tar-xz-curl: could not start web server thread\n");
> exit (EXIT_FAILURE);
> diff --git a/tests/test-xz-curl.c b/tests/test-xz-curl.c
> index 52b37a0c7..af81cbac7 100644
> --- a/tests/test-xz-curl.c
> +++ b/tests/test-xz-curl.c
> @@ -68,7 +68,7 @@ main (int argc, char *argv[])
> exit (77);
> #endif
>
> - sockpath = web_server (disk, NULL);
> + sockpath = web_server (disk, NULL, false);
> if (sockpath == NULL) {
> fprintf (stderr, "test-xz-curl: could not start web server thread\n");
> exit (EXIT_FAILURE);
> diff --git a/tests/web-server.c b/tests/web-server.c
> index dbc8bc845..ba85e3205 100644
> --- a/tests/web-server.c
> +++ b/tests/web-server.c
> @@ -67,16 +67,19 @@ static int fd = -1;
> static struct stat statbuf;
> static char request[16384];
> static check_request_t check_request;
> +static bool head_fails_with_403 = false;
>
> static void *start_web_server (void *arg);
> static void handle_requests (int s);
> static void handle_file_request (int s, enum method method);
> static void handle_mirror_redirect_request (int s);
> static void handle_mirror_data_request (int s, enum method method, char byte);
> +static void send_403_forbidden (int s);
> static void send_404_not_found (int s);
> static void send_405_method_not_allowed (int s);
> static void send_500_internal_server_error (int s);
> static void xwrite (int s, const char *buf, size_t len);
> +static void xwrite_allow_epipe (int s, const char *buf, size_t len);
> static void xpread (char *buf, size_t count, off_t offset);
>
> static void
> @@ -99,7 +102,8 @@ ignore_sigpipe (void)
> }
>
> const char *
> -web_server (const char *filename, check_request_t _check_request)
> +web_server (const char *filename, check_request_t _check_request,
> + bool _head_fails_with_403)
We shouldn't start identifiers with an underscore.
> {
> struct sockaddr_un addr;
> pthread_t thread;
> @@ -108,6 +112,7 @@ web_server (const char *filename, check_request_t _check_request)
> ignore_sigpipe ();
>
> check_request = _check_request;
> + head_fails_with_403 = _head_fails_with_403;
>
> /* Open the file. */
> fd = open (filename, O_RDONLY|O_CLOEXEC);
> @@ -250,6 +255,11 @@ handle_requests (int s)
> }
> memcpy (path, &request[5], n);
> path[n] = '\0';
> + if (head_fails_with_403) {
> + send_403_forbidden (s);
> + eof = true;
> + break;
> + }
I'm not a fan of the pre-existent pattern where we both set "eof = true"
and break out of the loop. It would have to be cleaned up first, in a
different patch, but I'm not sure if we care enough.
> }
> else if (strncmp (request, "GET ", 4) == 0) {
> method = GET;
> @@ -361,7 +371,14 @@ handle_file_request (int s, enum method method)
> }
>
> xpread (data, length, offset);
> - xwrite (s, data, length);
> + if (!head_fails_with_403 || offset != 0)
> + xwrite (s, data, length);
> + else
> + /* In the special case where we are testing the fallback from HEAD
> + * request case, the curl plugin will issue a GET for the whole
> + * data, but not read it all. Ignore EPIPE errors here.
> + */
> + xwrite_allow_epipe (s, data, length);
>
> free (data);
> }
> @@ -449,6 +466,16 @@ handle_mirror_data_request (int s, enum method method, char byte)
> free (data);
> }
>
> +static void
> +send_403_forbidden (int s)
> +{
> + const char response[] =
> + "HTTP/1.1 403 Forbidden\r\n"
> + "Connection: close\r\n"
> + "\r\n";
> + xwrite (s, response, strlen (response));
> +}
> +
> static void
> send_404_not_found (int s)
> {
> @@ -498,6 +525,26 @@ xwrite (int s, const char *buf, size_t len)
> }
> }
>
> +static void
> +xwrite_allow_epipe (int s, const char *buf, size_t len)
> +{
> + ssize_t r;
> +
> + while (len > 0) {
> + r = write (s, buf, len);
> + if (r == -1) {
> + if (errno != EPIPE) {
> + perror ("write");
> + exit (EXIT_FAILURE);
> + }
> + else
> + return;
> + }
exit() doesn't return, so I suggest removing the "else" (i.e., unnesting
the "return").
> + buf += r;
> + len -= r;
> + }
> +}
> +
> static void
> xpread (char *buf, size_t count, off_t offset)
> {
> diff --git a/tests/web-server.h b/tests/web-server.h
> index 36f0ea1e1..0961f4c5d 100644
> --- a/tests/web-server.h
> +++ b/tests/web-server.h
> @@ -43,6 +43,8 @@
> #ifndef NBDKIT_WEB_SERVER_H
> #define NBDKIT_WEB_SERVER_H
>
> +#include <stdbool.h>
> +
> /* Starts a web server in a background thread. The web server will
> * serve 'filename' (only) - the URL in requests is ignored.
> *
> @@ -57,10 +59,15 @@
> * The optional check_request function is called when the request is
> * received (note: not in the main thread) and can be used to perform
> * checks for example that particular headers were sent.
> + *
> + * If head_fails_with_403 == true then we simulate a server that
> + * responds to GET but fails HEAD with 403 errors, see:
> + * https://github.com/kubevirt/containerized-data-importer/issues/2737#issuecomment-1577786849
> */
> typedef void (*check_request_t) (const char *request);
> extern const char *web_server (const char *filename,
> - check_request_t check_request)
> + check_request_t check_request,
> + bool head_fails_with_403)
> __attribute__ ((__nonnull__ (1)));
>
> #endif /* NBDKIT_WEB_SERVER_H */
I'd prefer the introduction of this new parameter to be a separate
patch. That would isolate much the boilerplate from the relevant feature
addition.
> diff --git a/.gitignore b/.gitignore
> index e62c23d8b..49af3998f 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -122,6 +122,7 @@ plugins/*/*.3
> /tests/test-curl
> /tests/test-curl-cookie-script
> /tests/test-curl-header-script
> +/tests/test-curl-head-forbidden
> /tests/test-data
> /tests/test-delay
> /tests/test-exit-with-parent
Reviewed-by: Laszlo Ersek <lersek at redhat.com>
More information about the Libguestfs
mailing list