[Libguestfs] [PATCH nbdkit] curl: Try to share as much as possible between handles in the pool
Laszlo Ersek
lersek at redhat.com
Wed Feb 22 15:52:35 UTC 2023
On 2/22/23 16:01, Richard W.M. Jones wrote:
> Using the libcurl share interface we can share data between the
> separate curl easy handles in the pool. For more about this see:
>
> https://curl.se/libcurl/c/CURLSHOPT_SHARE.html
> https://gist.github.com/bagder/7eccf74f8b6d70b5abefeb7f288dba9b
> https://everything.curl.dev/libcurl/sharing
> ---
> plugins/curl/curldefs.h | 3 +-
> plugins/curl/curl.c | 4 ++-
> plugins/curl/pool.c | 75 +++++++++++++++++++++++++++++++++++++++--
> 3 files changed, 78 insertions(+), 4 deletions(-)
>
> diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h
> index c2a3432fc..d614379d0 100644
> --- a/plugins/curl/curldefs.h
> +++ b/plugins/curl/curldefs.h
> @@ -117,9 +117,10 @@ struct curl_handle {
> };
>
> /* pool.c */
> +extern void load_pool (void);
> +extern void unload_pool (void);
> extern struct curl_handle *get_handle (void);
> extern void put_handle (struct curl_handle *ch);
> -extern void free_all_handles (void);
>
> /* scripts.c */
> extern int do_scripts (struct curl_handle *ch);
> diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c
> index b5927b5b4..b8624a6f8 100644
> --- a/plugins/curl/curl.c
> +++ b/plugins/curl/curl.c
> @@ -101,6 +101,8 @@ curl_load (void)
> nbdkit_error ("libcurl initialization failed: %d", (int) r);
> exit (EXIT_FAILURE);
> }
> +
> + load_pool ();
> }
>
> static void
> @@ -112,7 +114,7 @@ curl_unload (void)
> free (password);
> free (proxy_password);
> scripts_unload ();
> - free_all_handles ();
> + unload_pool ();
> curl_global_cleanup ();
> }
>
> diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c
> index b6f4f8e5f..536123388 100644
> --- a/plugins/curl/pool.c
> +++ b/plugins/curl/pool.c
> @@ -52,6 +52,7 @@
>
> #include <nbdkit-plugin.h>
>
> +#include "array-size.h"
> #include "ascii-ctype.h"
> #include "ascii-string.h"
> #include "cleanup.h"
> @@ -73,6 +74,18 @@ static int debug_cb (CURL *handle, curl_infotype type,
> static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque);
> static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque);
> static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque);
> +static void lock_cb (CURL *handle, curl_lock_data data,
> + curl_lock_access access, void *userptr);
> +static void unlock_cb (CURL *handle, curl_lock_data data,
> + void *userptr);
> +
> +/* These locks protect access to the curl share data. See:
> + * https://gist.github.com/bagder/7eccf74f8b6d70b5abefeb7f288dba9b
> + */
> +static pthread_rwlock_t lockarray[CURL_LOCK_DATA_LAST];
> +
> +/* Curl share data. */
> +static CURLSH *share;
>
> /* This lock protects access to the curl_handles vector below. */
> static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
> @@ -90,18 +103,46 @@ static curl_handle_list curl_handles = empty_vector;
> static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
> static size_t in_use = 0, waiting = 0;
>
> -/* Close and free all handles in the pool. */
> +/* Initialize pool structures. */
> void
> -free_all_handles (void)
> +load_pool (void)
> {
> size_t i;
>
> + for (i = 0; i < ARRAY_SIZE (lockarray); ++i)
> + pthread_rwlock_init (&lockarray[i], NULL);
error checking missing
> +
> + share = curl_share_init ();
> + if (share == NULL) {
> + nbdkit_error ("curl_share_init: %m");
> + exit (EXIT_FAILURE);
> + }
> + curl_share_setopt (share, CURLSHOPT_LOCKFUNC, lock_cb);
> + curl_share_setopt (share, CURLSHOPT_UNLOCKFUNC, unlock_cb);
> + curl_share_setopt (share, CURLSHOPT_SHARE, CURL_LOCK_DATA_CONNECT);
> + curl_share_setopt (share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE);
> + curl_share_setopt (share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
> + curl_share_setopt (share, CURLSHOPT_SHARE, CURL_LOCK_DATA_SSL_SESSION);
error checking missing
> +}
> +
> +void
> +unload_pool (void)
> +{
> + size_t i;
> +
> + /* Close and free all handles in the pool. */
> nbdkit_debug ("free_all_handles: number of curl handles allocated: %zu",
> curl_handles.len);
>
> for (i = 0; i < curl_handles.len; ++i)
> free_handle (curl_handles.ptr[i]);
> curl_handle_list_reset (&curl_handles);
> +
> + /* Free share data. */
> + curl_share_cleanup (share);
> +
> + for (i = 0; i < ARRAY_SIZE (lockarray); ++i)
> + pthread_rwlock_destroy (&lockarray[i]);
> }
>
> /* Get a handle from the pool.
> @@ -221,6 +262,9 @@ allocate_handle (void)
> goto err;
> }
>
> + /* Share settings with other handles. */
> + curl_easy_setopt (ch->c, CURLOPT_SHARE, share);
> +
> /* Various options we always set.
> *
> * NB: Both here and below constants must be explicitly long because
> @@ -519,3 +563,30 @@ free_handle (struct curl_handle *ch)
> curl_slist_free_all (ch->headers_copy);
> free (ch);
> }
> +
> +static void
> +lock_cb (CURL *handle, curl_lock_data data, curl_lock_access access,
> + void *userptr)
> +{
> + assert (data < ARRAY_SIZE (lockarray));
> +
> + switch (access) {
> + case CURL_LOCK_ACCESS_SHARED:
> + pthread_rwlock_rdlock (&lockarray[data]);
> + break;
> + case CURL_LOCK_ACCESS_SINGLE:
> + pthread_rwlock_wrlock (&lockarray[data]);
> + break;
we could at least assert the return values
> + default:
> + /* CURL_LOCK_ACCESS_NONE is never used in the current libcurl code. */
> + abort ();
> + }
> +}
> +
> +static void
> +unlock_cb (CURL *handle, curl_lock_data data, void *userptr)
> +{
> + assert (data < ARRAY_SIZE (lockarray));
> +
> + pthread_rwlock_unlock (&lockarray[data]);
same here
> +}
Seems sane to me in general, except for the fact that the documentation
at <https://curl.se/libcurl/c/CURLSHOPT_SHARE.html> writes:
"""
CURL_LOCK_DATA_CONNECT
Put the connection cache in the share object and make all easy handles
using this share object share the connection cache.
Note that due to a known bug, it is not safe to share connections this
way between multiple concurrent threads. [...]
"""
Ugh, what? If it's not safe to share the connection cache between
threads, then CURL_LOCK_DATA_CONNECT is effectively unusable, and no
connection pooling can be done. How does that *not* make this whole curl
facility useless?
The facility in general looks super weird; what sense does it make *not*
to share some particular CURL_LOCK_DATA_xxx?
Laszlo
More information about the Libguestfs
mailing list