[Libguestfs] [PATCH nbdkit] curl: Try to share as much as possible between handles in the pool

Laszlo Ersek lersek at redhat.com
Wed Feb 22 15:52:35 UTC 2023


On 2/22/23 16:01, Richard W.M. Jones wrote:
> Using the libcurl share interface we can share data between the
> separate curl easy handles in the pool.  For more about this see:
> 
> https://curl.se/libcurl/c/CURLSHOPT_SHARE.html
> https://gist.github.com/bagder/7eccf74f8b6d70b5abefeb7f288dba9b
> https://everything.curl.dev/libcurl/sharing
> ---
>  plugins/curl/curldefs.h |  3 +-
>  plugins/curl/curl.c     |  4 ++-
>  plugins/curl/pool.c     | 75 +++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 78 insertions(+), 4 deletions(-)
> 
> diff --git a/plugins/curl/curldefs.h b/plugins/curl/curldefs.h
> index c2a3432fc..d614379d0 100644
> --- a/plugins/curl/curldefs.h
> +++ b/plugins/curl/curldefs.h
> @@ -117,9 +117,10 @@ struct curl_handle {
>  };
>  
>  /* pool.c */
> +extern void load_pool (void);
> +extern void unload_pool (void);
>  extern struct curl_handle *get_handle (void);
>  extern void put_handle (struct curl_handle *ch);
> -extern void free_all_handles (void);
>  
>  /* scripts.c */
>  extern int do_scripts (struct curl_handle *ch);
> diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c
> index b5927b5b4..b8624a6f8 100644
> --- a/plugins/curl/curl.c
> +++ b/plugins/curl/curl.c
> @@ -101,6 +101,8 @@ curl_load (void)
>      nbdkit_error ("libcurl initialization failed: %d", (int) r);
>      exit (EXIT_FAILURE);
>    }
> +
> +  load_pool ();
>  }
>  
>  static void
> @@ -112,7 +114,7 @@ curl_unload (void)
>    free (password);
>    free (proxy_password);
>    scripts_unload ();
> -  free_all_handles ();
> +  unload_pool ();
>    curl_global_cleanup ();
>  }
>  
> diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c
> index b6f4f8e5f..536123388 100644
> --- a/plugins/curl/pool.c
> +++ b/plugins/curl/pool.c
> @@ -52,6 +52,7 @@
>  
>  #include <nbdkit-plugin.h>
>  
> +#include "array-size.h"
>  #include "ascii-ctype.h"
>  #include "ascii-string.h"
>  #include "cleanup.h"
> @@ -73,6 +74,18 @@ static int debug_cb (CURL *handle, curl_infotype type,
>  static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque);
>  static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque);
>  static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque);
> +static void lock_cb (CURL *handle, curl_lock_data data,
> +                     curl_lock_access access, void *userptr);
> +static void unlock_cb (CURL *handle, curl_lock_data data,
> +                       void *userptr);
> +
> +/* These locks protect access to the curl share data.  See:
> + * https://gist.github.com/bagder/7eccf74f8b6d70b5abefeb7f288dba9b
> + */
> +static pthread_rwlock_t lockarray[CURL_LOCK_DATA_LAST];
> +
> +/* Curl share data. */
> +static CURLSH *share;
>  
>  /* This lock protects access to the curl_handles vector below. */
>  static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
> @@ -90,18 +103,46 @@ static curl_handle_list curl_handles = empty_vector;
>  static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
>  static size_t in_use = 0, waiting = 0;
>  
> -/* Close and free all handles in the pool. */
> +/* Initialize pool structures. */
>  void
> -free_all_handles (void)
> +load_pool (void)
>  {
>    size_t i;
>  
> +  for (i = 0; i < ARRAY_SIZE (lockarray); ++i)
> +    pthread_rwlock_init (&lockarray[i], NULL);

error checking missing

> +
> +  share = curl_share_init ();
> +  if (share == NULL) {
> +    nbdkit_error ("curl_share_init: %m");
> +    exit (EXIT_FAILURE);
> +  }
> +  curl_share_setopt (share, CURLSHOPT_LOCKFUNC, lock_cb);
> +  curl_share_setopt (share, CURLSHOPT_UNLOCKFUNC, unlock_cb);
> +  curl_share_setopt (share, CURLSHOPT_SHARE, CURL_LOCK_DATA_CONNECT);
> +  curl_share_setopt (share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE);
> +  curl_share_setopt (share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
> +  curl_share_setopt (share, CURLSHOPT_SHARE, CURL_LOCK_DATA_SSL_SESSION);

error checking missing

> +}
> +
> +void
> +unload_pool (void)
> +{
> +  size_t i;
> +
> +  /* Close and free all handles in the pool. */
>    nbdkit_debug ("free_all_handles: number of curl handles allocated: %zu",
>                  curl_handles.len);
>  
>    for (i = 0; i < curl_handles.len; ++i)
>      free_handle (curl_handles.ptr[i]);
>    curl_handle_list_reset (&curl_handles);
> +
> +  /* Free share data. */
> +  curl_share_cleanup (share);
> +
> +  for (i = 0; i < ARRAY_SIZE (lockarray); ++i)
> +    pthread_rwlock_destroy (&lockarray[i]);
>  }
>  
>  /* Get a handle from the pool.
> @@ -221,6 +262,9 @@ allocate_handle (void)
>      goto err;
>    }
>  
> +  /* Share settings with other handles. */
> +  curl_easy_setopt (ch->c, CURLOPT_SHARE, share);
> +
>    /* Various options we always set.
>     *
>     * NB: Both here and below constants must be explicitly long because
> @@ -519,3 +563,30 @@ free_handle (struct curl_handle *ch)
>      curl_slist_free_all (ch->headers_copy);
>    free (ch);
>  }
> +
> +static void
> +lock_cb (CURL *handle, curl_lock_data data, curl_lock_access access,
> +         void *userptr)
> +{
> +  assert (data < ARRAY_SIZE (lockarray));
> +
> +  switch (access) {
> +  case CURL_LOCK_ACCESS_SHARED:
> +    pthread_rwlock_rdlock (&lockarray[data]);
> +    break;
> +  case CURL_LOCK_ACCESS_SINGLE:
> +    pthread_rwlock_wrlock (&lockarray[data]);
> +    break;

we could at least assert the return values

> +  default:
> +    /* CURL_LOCK_ACCESS_NONE is never used in the current libcurl code. */
> +    abort ();
> +  }
> +}
> +
> +static void
> +unlock_cb (CURL *handle, curl_lock_data data, void *userptr)
> +{
> +  assert (data < ARRAY_SIZE (lockarray));
> +
> +  pthread_rwlock_unlock (&lockarray[data]);

same here

> +}

Seems sane to me in general, except for the fact that the documentation
at <https://curl.se/libcurl/c/CURLSHOPT_SHARE.html> writes:

"""
CURL_LOCK_DATA_CONNECT

Put the connection cache in the share object and make all easy handles
using this share object share the connection cache.

Note that due to a known bug, it is not safe to share connections this
way between multiple concurrent threads. [...]
"""

Ugh, what? If it's not safe to share the connection cache between
threads, then CURL_LOCK_DATA_CONNECT is effectively unusable, and no
connection pooling can be done. How does that *not* make this whole curl
facility useless?

The facility in general looks super weird; what sense does it make *not*
to share some particular CURL_LOCK_DATA_xxx?

Laszlo


More information about the Libguestfs mailing list