[Freeipa-devel] [PATCH 92 WIP] Flush whole zone from cache during zone renaming/removal.

Adam Tkac atkac at redhat.com
Tue Dec 4 13:36:26 UTC 2012


On Thu, Nov 15, 2012 at 07:06:37PM +0100, Petr Spacek wrote:
> Hello,
> 
> attached patch is preliminary implementation of selective zone flush.
> 
> 
> Implementation is not so straight-forward as I want to see. Before
> discussing the patch itself - can we consider per-zone caches? In
> that case, we can simply deallocate whole per-zone RBT and we are
> done.
> 
> Pros:
> * Potentially better concurrency, simpler code, much less corner cases.
> 
> Cons:
> * We have to look into Zone register before searching the cache.
> * It can limit concurrency ... with many extra small zones? I'm not sure.

Hi Peter,

In my opinion per-zone caches are better. Look into zone register isn't
costly operation.

Regards, Adam

> ------------
> 
> Now we can dive into all the gory details of single-tree-cache implementation:
> 
> Function discard_zone_from_cache() contains a long comment about
> potential problems, please send me your opinions.
> 
> Functions dbg_* can be simply deleted after end of testing.
> 
> 
> I encountered some questions about locking:
> How I should lock these two locks properly?
> 	RWLOCK(&zr->rwlock, isc_rwlocktype_read);
> 	LOCK(&cache->mutex);
> 
> AFAIK without some more intelligent algorithm or locking protocol it
> can simply deadlock if two threads attempt to get both locks in
> different order.
> 
> For now I chosen isc_task_beginexclusive() way. Hopefully, zone
> flush should be rare operation so it can be enough.
> 
> It raises another question:
> Is it possible for a thread to hold some lock during isc_task_beginexclusive()?
> 
> I mean this situation:
> thread 1: lock(&cache->mutex)
> thread 1: store a pointer to the middle of the cache
> thread 2: isc_task_beginexclusive()
> thread 2: do something with cache
> thread 2: isc_task_endexclusive()
> thread 1: dereference stored pointer -> CRASH - thread 2 changed the
> data and pointer is invalid ... but thread 1 held the lock!
> 
> I'm not really sure about this part of BIND. My guess:
> During "exclusive mode" all threads except single one (= thread
> which called isc_task_beginexclusive()) are blocked somewhere near
> dispatch() ... so they do nothing and thus they should not hold any
> lock.
> 
> Is my guess correct? I looked into task.c and related code but I
> can't say "I understood!" :-(
> 
> 
> Now the funny part - RBT tree before and after per-zone flush.
> 
> Expected behaviour when removing zone 'test.'
> =============================================
> // cache tree before "test." zone removal
> . (empty node)
>                     4.34.10.in-addr.arpa
>                         89.4.34.10.in-addr.arpa
>             brq.redhat.com
>                 pspacek.brq.redhat.com
>                 test.brq.redhat.com
>     test
>         e.test
>             _kerberos.e.test
>             _tcp.e.test (empty node)
>                 _kerberos._tcp.e.test
>                 _kerberos-master._tcp.e.test
>                 _kpasswd._tcp.e.test
>                 _ldap._tcp.e.test
>             _udp.e.test (empty node)
>                 _kerberos._udp.e.test
>                 _kerberos-master._udp.e.test
>                 _kpasswd._udp.e.test
>                 _ntp._udp.e.test
>             c182.e.test
>             pspacek.e.test
>             test.e.test
>         rec.test
>         sec.test
>         sub.test
>             ns.sub.test
>             rec.sub.test
> 
> 
> // cache tree after 'test.' zone removal
> // zones 'e.test.', 'sub.test.' and 'sec.test.' are still present
> // record 'rec.test.' disappeared
> . (empty node)
>                     4.34.10.in-addr.arpa
>                         89.4.34.10.in-addr.arpa
>             brq.redhat.com
>                 pspacek.brq.redhat.com
>                 test.brq.redhat.com
>     test (empty node)
>         e.test
>             _kerberos.e.test
>             _tcp.e.test (empty node)
>                 _kerberos._tcp.e.test
>                 _kerberos-master._tcp.e.test
>                 _kpasswd._tcp.e.test
>                 _ldap._tcp.e.test
>             _udp.e.test (empty node)
>                 _kerberos._udp.e.test
>                 _kerberos-master._udp.e.test
>                 _kpasswd._udp.e.test
>                 _ntp._udp.e.test
>             c182.e.test
>             pspacek.e.test
>             test.e.test
>         sec.test
>         sub.test
>             ns.sub.test
>             rec.sub.test
> 
> -- 
> Petr^2 Spacek

> From 91df0bb70398f985d6b13c282672c9a87cf98f41 Mon Sep 17 00:00:00 2001
> From: Petr Spacek <pspacek at redhat.com>
> Date: Thu, 15 Nov 2012 18:32:00 +0100
> Subject: [PATCH] [WIP] Flush whole zone from cache during zone
>  renaming/removal.
> 
> Signed-off-by: Petr Spacek <pspacek at redhat.com>
> ---
>  src/cache.c       | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  src/cache.h       |   5 ++
>  src/ldap_helper.c |   4 +-
>  3 files changed, 249 insertions(+), 1 deletion(-)
> 
> diff --git a/src/cache.c b/src/cache.c
> index 898d48b291a83da7f77dbcf79e2bd3e7ff8281aa..5f969f33d911bd4f111059beeb3e0dd920fba226 100644
> --- a/src/cache.c
> +++ b/src/cache.c
> @@ -23,19 +23,23 @@
>  #include <isc/result.h>
>  #include <isc/time.h>
>  #include <isc/util.h>
> +#include <isc/rwlock.h>
> +#include <isc/task.h>
>  
>  #include <dns/rbt.h>
>  #include <dns/result.h>
>  #include <dns/log.h>
> +#include <dns/fixedname.h>
>  
>  #include <string.h>
>  
>  #include "cache.h"
>  #include "ldap_helper.h"
>  #include "log.h"
>  #include "rdlist.h"
>  #include "settings.h"
>  #include "util.h"
> +#include "zone_register.h"
>  
>  struct ldap_cache {
>  	isc_mutex_t	mutex; /* TODO: RWLOCK? */
> @@ -51,6 +55,70 @@ typedef struct {
>  	isc_time_t		valid_until;
>  } cache_node_t;
>  
> +/************** Use following functions only for debug purposes **************/
> +void
> +dbg_print_name_indent(dns_name_t *name) {
> +	int label_count;
> +
> +	label_count = dns_name_countlabels(name);
> +	label_count -= 1; /* root is not indented */
> +	label_count *= 4; /* indentation for single domain level */
> +
> +	printf("%2$*1$s", label_count, "");
> +}
> +
> +void
> +dbg_print_name(dns_name_t *name) {
> +	char printbuff[DNS_NAME_FORMATSIZE];
> +
> +	dns_name_format(name, printbuff, DNS_NAME_FORMATSIZE);
> +	printf("%s", printbuff);
> +}
> +
> +void
> +dbg_print_rbt_names(isc_mem_t *mctx, dns_rbt_t *rbt) {
> +	isc_result_t result = ISC_R_SUCCESS;
> +	dns_rbtnodechain_t chain;
> +
> +	dns_rbtnodechain_init(&chain, mctx);
> +	result = dns_rbtnodechain_first(&chain, rbt, NULL, NULL);
> +	while (result != ISC_R_NOMORE && result != ISC_R_NOTFOUND) {
> +		dns_fixedname_t name;
> +		dns_fixedname_t origin;
> +		dns_fixedname_t concat;
> +		dns_fixedname_init(&name);
> +		dns_fixedname_init(&origin);
> +		dns_fixedname_init(&concat);
> +		dns_rbtnode_t *node = NULL;
> +		char *node_desc = "";
> +
> +		result = dns_rbtnodechain_current(&chain, dns_fixedname_name(&name),
> +						  dns_fixedname_name(&origin), &node);
> +		if (!(result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN))
> +			break;
> +
> +		CHECK(dns_name_concatenate(dns_fixedname_name(&name),
> +					   dns_fixedname_name(&origin),
> +					   dns_fixedname_name(&concat),
> +					   NULL));
> +		dbg_print_name_indent(dns_fixedname_name(&concat));
> +		dbg_print_name(dns_fixedname_name(&concat));
> +		if (node->data == NULL) {
> +			node_desc = " (empty node)";
> +		}
> +		printf("%s\n", node_desc);
> +
> +		result = dns_rbtnodechain_next(&chain, NULL, NULL);
> +		if (!(result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN))
> +			break;
> +	}
> +	dns_rbtnodechain_invalidate(&chain);
> +
> +cleanup:
> +	return;
> +}
> +/************************ End of debug only functions ************************/
> +
>  static void
>  cache_node_deleter(void *data, void *deleter_arg)
>  {
> @@ -179,6 +247,7 @@ ldap_cache_getrdatalist(isc_mem_t *mctx, ldap_cache_t *cache,
>  		return ISC_R_NOTFOUND;
>  
>  	LOCK(&cache->mutex);
> +	//dbg_print_rbt_names(cache->mctx, cache->rbt);
>  	result = dns_rbt_findname(cache->rbt, name, 0, NULL, (void *)&node);
>  	switch (result) {
>  	case ISC_R_SUCCESS:
> @@ -304,6 +373,178 @@ discard_from_cache(ldap_cache_t *cache, dns_name_t *name)
>  	return result;
>  }
>  
> +/**
> + * Delete all records associated with single DNS zone from the cache.
> + *
> + * Each individual record in the cache is tested and deleted only if:
> + * 1) Cache record belongs to deleted zone (specified by del_zone parameter).
> + *    and at same time
> + * 2) Cache record doesn't belong to some *sub*-zone of deleted zone
> + *    (according to zone register).
> + *
> + * Task will be switched to and from exclusive mode automatically.
> + *
> + * @param[in] del_zone Absolute DNS name of deleted zone
> + * @param[in] task     Task which will be locked prior the cache RBT iteration
> + *
> + * @retval ISC_R_SUCCESS All records beloging to del_zone were deleted.
> + * @retval other         Any error including ISC_R_NOMEMORY.
> + *
> + * @attention
> + * There is a case when some records wouldn't be deleted.
> + * Let's have two master zones 'test' and 'sub.test' with following records:
> + *
> + * @attention
> + * zone 'test.':
> + * - @      SOA
> + * - sub    NS  ns.sub
> + * - ns.sub A   1.2.3.4
> + * - eg.sub TXT "blah blah"
> + *
> + * @attention
> + * zone 'sub.test.':
> + * - @      SOA
> + * - @      NS  ns
> + * - ns     A   1.2.3.4
> + *
> + * @attention
> + * In that case deletion of zone 'test.' woudn't delete records 'sub.test.',
> + * 'ns.sub.test.' and 'eg.sub.test.' from cache because zone 'sub.test.'
> + * is present in ZR.
> + *
> + * @attention
> + * Records in zone 'test.' with names ending with 'sub.test.' are not
> + * authoritative and have to be exactly same as in zone 'sub.test'
> + * (see http://tools.ietf.org/html/rfc1034#section-4.2.1)
> + *
> + * @warning
> + * For reasons stated above record 'eg.sub.test.' in zone 'test.'
> + * should not exist at all and should never appear in cache.
> + */
> +isc_result_t
> +discard_zone_from_cache(ldap_cache_t *cache, zone_register_t *zr,
> +			dns_name_t *del_zone, isc_task_t *task)
> +{
> +	isc_result_t result;
> +	dns_rbtnodechain_t chain;
> +	dns_fixedname_t name;
> +	dns_fixedname_t origin;
> +	dns_fixedname_t concat;
> +	void *nodedata = NULL;
> +	dns_rbt_t *zr_rbt = NULL;
> +	dns_fixedname_t zr_foundname;
> +	dns_namelist_t del_names_list;
> +	dns_name_t *del_name = NULL;
> +	isc_result_t lock_status = ISC_R_IGNORE;
> +
> +	REQUIRE(cache != NULL);
> +	REQUIRE(dns_name_isabsolute(del_zone));
> +
> +	dns_fixedname_init(&name);
> +	dns_fixedname_init(&origin);
> +	dns_fixedname_init(&concat);
> +	dns_rbtnodechain_init(&chain, cache->mctx);
> +	ISC_LIST_INIT(del_names_list);
> +
> +	if (cache->rbt == NULL)
> +		CLEANUP_WITH(ISC_R_NOMORE);
> +
> +	lock_status = isc_task_beginexclusive(task);
> +	RUNTIME_CHECK(lock_status == ISC_R_SUCCESS ||
> +		      lock_status == ISC_R_LOCKBUSY);
> +
> +	/* ???? can somebody hold some locks during isc_task_beginexclusive()?
> +	RWLOCK(&zr->rwlock, isc_rwlocktype_read);
> +	LOCK(&cache->mutex);
> +	*/
> +
> +	/* Iterate over cache RBT and remember names for deletion. */
> +	zr_rbt = zr_get_rbt(zr);
> +	result = dns_rbtnodechain_first(&chain,
> +					cache->rbt,
> +					dns_fixedname_name(&name),
> +					dns_fixedname_name(&origin));
> +	while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
> +		nodedata = NULL;
> +
> +		CHECK(dns_name_concatenate(dns_fixedname_name(&name),
> +					   dns_fixedname_name(&origin),
> +					   dns_fixedname_name(&concat),
> +					   NULL));
> +
> +		if (dns_name_issubdomain(dns_fixedname_name(&concat), del_zone)) {
> +			dns_fixedname_init(&zr_foundname);
> +			result = dns_rbt_findname(zr_rbt,
> +						  dns_fixedname_name(&concat),
> +						  DNS_RBTFIND_NOOPTIONS,
> +						  dns_fixedname_name(&zr_foundname),
> +						  &nodedata);
> +
> +			/* Delete record if no sub-domain of del_zone was found
> +			 * in zone register. */
> +			if (result == ISC_R_NOTFOUND ||
> +			   ((result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) &&
> +			    (dns_name_equal(dns_fixedname_name(&zr_foundname), del_zone) ||
> +			     !dns_name_issubdomain(dns_fixedname_name(&zr_foundname), del_zone)))			    ) {
> +				del_name = isc_mem_get(cache->mctx, sizeof(dns_name_t));
> +				if (del_name == NULL)
> +					CLEANUP_WITH(ISC_R_NOMEMORY);
> +				dns_name_init(del_name, NULL);
> +				ISC_LIST_APPEND(del_names_list, del_name, link);
> +				CHECK(dns_name_dup(dns_fixedname_name(&concat),
> +						   cache->mctx, del_name));
> +			} else if (result != ISC_R_SUCCESS &&
> +				   result != DNS_R_PARTIALMATCH)
> +					goto cleanup;
> +		}
> +
> +		result = dns_rbtnodechain_next(&chain, dns_fixedname_name(&name),
> +				  dns_fixedname_name(&origin));
> +	}
> +
> +cleanup:
> +	if (result != ISC_R_NOMORE && result != ISC_R_NOTFOUND) {
> +		log_error_r("cache flush failed during 'sieve' phase");
> +		goto cleanup;
> +	} else {
> +		result = ISC_R_SUCCESS;
> +	}
> +
> +	dbg_print_rbt_names(cache->mctx, cache->rbt);
> +
> +	/* free all memory - even in case of error */
> +	while (!ISC_LIST_EMPTY(del_names_list)) {
> +		del_name = ISC_LIST_HEAD(del_names_list);
> +		ISC_LIST_UNLINK(del_names_list, del_name, link);
> +		if (result == ISC_R_SUCCESS) {
> +			result = dns_rbt_deletename(cache->rbt, del_name,
> +						    ISC_FALSE);
> +			if (result == ISC_R_NOTFOUND)
> +				result = ISC_R_SUCCESS;
> +			else if (result != ISC_R_SUCCESS)
> +				log_error_r("unable to delete name from cache");
> +		}
> +		if (dns_name_dynamic(del_name))
> +			dns_name_free(del_name, cache->mctx);
> +		isc_mem_put(cache->mctx, del_name, sizeof(dns_name_t));
> +	}
> +
> +	dbg_print_rbt_names(cache->mctx, cache->rbt);
> +
> +	if (lock_status == ISC_R_SUCCESS)
> +		isc_task_endexclusive(task);
> +
> +
> +	/* ???? can somebody hold some locks during isc_task_beginexclusive()?
> +	if (cache->rbt != NULL) {
> +		UNLOCK(&cache->mutex);
> +		RWUNLOCK(&zr->rwlock, isc_rwlocktype_read);
> +	}
> +	*/
> +
> +	return result;
> +}
> +
>  isc_result_t
>  flush_ldap_cache(ldap_cache_t *cache)
>  {
> diff --git a/src/cache.h b/src/cache.h
> index a7aa5b7e889d9e195484a11dcf4f9a10d811f623..63f0d5ea727121af898d7f8651067d2433385ffc 100644
> --- a/src/cache.h
> +++ b/src/cache.h
> @@ -23,6 +23,7 @@
>  #define _LD_CACHE_H_
>  
>  #include "types.h"
> +#include "zone_register.h"
>  
>  typedef struct ldap_cache ldap_cache_t;
>  
> @@ -77,6 +78,10 @@ ldap_cache_enabled(ldap_cache_t *cache);
>  isc_result_t
>  discard_from_cache(ldap_cache_t *cache, dns_name_t *name);
>  
> +isc_result_t
> +discard_zone_from_cache(ldap_cache_t *cache, zone_register_t *zr,
> +			dns_name_t *del_zone, isc_task_t *task);
> +
>  /**
>   * Discard all names from the cache and re-initialize internal RB-tree.
>   * @return ISC_R_SUCCESS even if cache is disabled.
> diff --git a/src/ldap_helper.c b/src/ldap_helper.c
> index 8a6f603d1393d322561a8cbb8fe4abf188c71dd0..abe19f1534c0794b69400589175fbd41937e0a59 100644
> --- a/src/ldap_helper.c
> +++ b/src/ldap_helper.c
> @@ -870,7 +870,9 @@ ldap_delete_zone2(ldap_instance_t *inst, dns_name_t *name, isc_boolean_t lock,
>  	}
>  
>  	/* TODO: flush cache records belonging to deleted zone */
> -	CHECK(discard_from_cache(inst->cache, name));
> +	//CHECK(discard_from_cache(inst->cache, name));
> +	CHECK(discard_zone_from_cache(inst->cache, inst->zone_register, name,
> +				      inst->task));
>  
>  	result = zr_get_zone_ptr(inst->zone_register, name, &zone);
>  	if (result == ISC_R_NOTFOUND || result == DNS_R_PARTIALMATCH) {
> -- 
> 1.7.11.7
> 


-- 
Adam Tkac, Red Hat, Inc.




More information about the Freeipa-devel mailing list