[Freeipa-devel] [PATCH 0018] Deadlock detection logic

Petr Spacek pspacek at redhat.com
Mon May 7 10:35:56 UTC 2012


On 05/03/2012 02:18 PM, Adam Tkac wrote:
> On Tue, Apr 24, 2012 at 03:52:00PM +0200, Petr Spacek wrote:
>> On 04/24/2012 03:21 PM, Petr Spacek wrote:
>>> Hello,
>>>
>>> this patch adds deadlock detection (based on simple timeout) to current code.
>>> If (probable) deadlock is detected, current action is stopped with proper error.
>>>
>>> It properly detects "Simo's deadlock" with 'connections' parameter == 1.
>>> (Described in https://fedorahosted.org/bind-dyndb-ldap/ticket/66)
>>>
>>> Deadlock itself will be fixed by separate patch.
>>>
>>> Petr^2 Spacek
>>
>> Self-NACK :-)
>>
>> Second version of the patch is attached. ldap_pool_getconnection()
>> and ldap_pool_putconnection() now has same interface and more
>> consistent behaviour.
>>
>> Overall functionality is same.
>
> Hi,
>
> although I'm not fully happy with current design of the detection logic, we can
> include it before we create something better, for example based on thread IDs
> (one thread can acquire semaphore only one time).
I agree, it's far from ideal. Connection and result handling needs redesign at 
first. After that I can modify detection logic to be more accurate.

>
> Please check my comments inside the patch.
All done.

Petr^2 Spacek

>
> Regards, Adam
>
>>  From ed7596a9410269c0c29418247971f262b7fa77f3 Mon Sep 17 00:00:00 2001
>> From: Petr Spacek<pspacek at redhat.com>
>> Date: Tue, 24 Apr 2012 15:09:32 +0200
>> Subject: [PATCH] Add simple semaphore deadlock detection logic.
>>   Signed-off-by: Petr Spacek<pspacek at redhat.com>
>>
>> ---
>>   src/ldap_helper.c |   78 ++++++++++++++++++++++++++++++++---------------------
>>   src/semaphore.c   |   26 +++++++++++++++---
>>   src/semaphore.h   |    6 +++-
>>   3 files changed, 74 insertions(+), 36 deletions(-)
>>
>> diff --git a/src/ldap_helper.c b/src/ldap_helper.c
>> index 47c0559..f4c4d02 100644
>> --- a/src/ldap_helper.c
>> +++ b/src/ldap_helper.c
>> @@ -296,9 +296,10 @@ ldap_delete_zone2(ldap_instance_t *inst, dns_name_t *name, isc_boolean_t lock);
>>   static isc_result_t ldap_pool_create(isc_mem_t *mctx, unsigned int connections,
>>   		ldap_pool_t **poolp);
>>   static void ldap_pool_destroy(ldap_pool_t **poolp);
>> -static ldap_connection_t * ldap_pool_getconnection(ldap_pool_t *pool);
>> +static isc_result_t ldap_pool_getconnection(ldap_pool_t *pool,
>> +		ldap_connection_t ** conn);
>>   static void ldap_pool_putconnection(ldap_pool_t *pool,
>> -		ldap_connection_t *ldap_conn);
>> +		ldap_connection_t ** conn);
>>   static isc_result_t ldap_pool_connect(ldap_pool_t *pool,
>>   		ldap_instance_t *ldap_inst);
>>
>> @@ -402,6 +403,10 @@ new_ldap_instance(isc_mem_t *mctx, const char *db_name,
>>   	ldap_settings[i++].target =&ldap_inst->dyn_update;
>>   	CHECK(set_settings(ldap_settings, argv));
>>
>> +	/* Set timer for deadlock detection inside semaphore_wait_timed . */
>> +	if (semaphore_wait_timeout.seconds<  ldap_inst->timeout+SEM_WAIT_TIMEOUT_ADD)
>> +		semaphore_wait_timeout.seconds = ldap_inst->timeout+SEM_WAIT_TIMEOUT_ADD;
>
> I'm affraid such low timeout (12 sec by default) will cause too many false
> positives. I recommend to set semaphore timeout to at least 60 seconds.
>
>>   	/* Validate and check settings. */
>>   	str_toupper(ldap_inst->sasl_mech);
>>   	if (ldap_inst->connections<  1) {
>> @@ -1089,7 +1094,7 @@ isc_result_t
>>   refresh_zones_from_ldap(ldap_instance_t *ldap_inst)
>>   {
>>   	isc_result_t result = ISC_R_SUCCESS;
>> -	ldap_connection_t *ldap_conn;
>> +	ldap_connection_t *ldap_conn = NULL;
>>   	int zone_count = 0;
>>   	ldap_entry_t *entry;
>>   	dns_rbt_t *rbt = NULL;
>> @@ -1114,7 +1119,7 @@ refresh_zones_from_ldap(ldap_instance_t *ldap_inst)
>>
>>   	log_debug(2, "refreshing list of zones for %s", ldap_inst->db_name);
>>
>> -	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
>> +	CHECK(ldap_pool_getconnection(ldap_inst->pool,&ldap_conn));
>>
>>   	CHECK(ldap_query(ldap_inst, ldap_conn, str_buf(ldap_inst->base),
>>   			 LDAP_SCOPE_SUBTREE, config_attrs, 0,
>> @@ -1227,7 +1232,7 @@ cleanup:
>>   	if (invalidate_nodechain)
>>   		dns_rbtnodechain_invalidate(&chain);
>>
>> -	ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
>> +	ldap_pool_putconnection(ldap_inst->pool,&ldap_conn);
>>
>>   	log_debug(2, "finished refreshing list of zones");
>>
>> @@ -1381,7 +1386,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *nam
>>   		     dns_name_t *origin, ldapdb_nodelist_t *nodelist)
>>   {
>>   	isc_result_t result;
>> -	ldap_connection_t *ldap_conn;
>> +	ldap_connection_t *ldap_conn = NULL;
>>   	ldap_entry_t *entry;
>>   	ld_string_t *string = NULL;
>>   	ldapdb_node_t *node;
>> @@ -1392,7 +1397,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *nam
>>   	REQUIRE(nodelist != NULL);
>>
>>   	/* RRs aren't in the cache, perform ordinary LDAP query */
>> -	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
>> +	CHECK(ldap_pool_getconnection(ldap_inst->pool,&ldap_conn));
>>
>>   	INIT_LIST(*nodelist);
>>   	CHECK(str_new(mctx,&string));
>> @@ -1439,7 +1444,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *nam
>>   	result = ISC_R_SUCCESS;
>>
>>   cleanup:
>> -	ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
>> +	ldap_pool_putconnection(ldap_inst->pool,&ldap_conn);
>>   	str_destroy(&string);
>>
>>   	return result;
>> @@ -1450,7 +1455,7 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *na
>>   		     dns_name_t *origin, ldapdb_rdatalist_t *rdatalist)
>>   {
>>   	isc_result_t result;
>> -	ldap_connection_t *ldap_conn;
>> +	ldap_connection_t *ldap_conn = NULL;
>>   	ldap_entry_t *entry;
>>   	ld_string_t *string = NULL;
>>   	ldap_cache_t *cache;
>> @@ -1468,12 +1473,11 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *na
>>   		return result;
>>
>>   	/* RRs aren't in the cache, perform ordinary LDAP query */
>> -	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
>> -
>>   	INIT_LIST(*rdatalist);
>>   	CHECK(str_new(mctx,&string));
>>   	CHECK(dnsname_to_dn(ldap_inst->zone_register, name, string));
>>
>> +	CHECK(ldap_pool_getconnection(ldap_inst->pool,&ldap_conn));
>>   	CHECK(ldap_query(ldap_inst, ldap_conn, str_buf(string),
>>   			 LDAP_SCOPE_BASE, NULL, 0, "(objectClass=idnsRecord)"));
>>
>> @@ -1500,7 +1504,7 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *na
>>   		result = ISC_R_NOTFOUND;
>>
>>   cleanup:
>> -	ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
>> +	ldap_pool_putconnection(ldap_inst->pool,&ldap_conn);
>>   	str_destroy(&string);
>>
>>   	if (result != ISC_R_SUCCESS)
>> @@ -2250,7 +2254,7 @@ modify_ldap_common(dns_name_t *owner, ldap_instance_t *ldap_inst,
>>   		zone_dn += 1; /* skip whitespace */
>>   	}
>>
>> -	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
>> +	CHECK(ldap_pool_getconnection(ldap_inst->pool,&ldap_conn));
>>   	CHECK(ldap_query(ldap_inst, ldap_conn, zone_dn,
>>   					 LDAP_SCOPE_BASE, attrs, 0,
>>   					 "(&(objectClass=idnsZone)(idnsZoneActive=TRUE))"));
>> @@ -2481,9 +2485,7 @@ modify_ldap_common(dns_name_t *owner, ldap_instance_t *ldap_inst,
>>   	}
>>   	
>>   cleanup:
>> -	if (ldap_conn != NULL)
>> -		ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
>> -
>> +	ldap_pool_putconnection(ldap_inst->pool,&ldap_conn);
>>   	str_destroy(&owner_dn_ptr);
>>   	str_destroy(&owner_dn);
>>   	free_ldapmod(mctx,&change[0]);
>> @@ -2565,15 +2567,18 @@ ldap_pool_destroy(ldap_pool_t **poolp)
>>   	MEM_PUT_AND_DETACH(pool);
>>   }
>>
>> -static ldap_connection_t *
>> -ldap_pool_getconnection(ldap_pool_t *pool)
>> +static isc_result_t
>> +ldap_pool_getconnection(ldap_pool_t *pool, ldap_connection_t ** conn)
>>   {
>>   	ldap_connection_t *ldap_conn = NULL;
>>   	unsigned int i;
>> +	isc_result_t result;
>>
>>   	REQUIRE(pool != NULL);
>> +	REQUIRE(conn != NULL&&  *conn == NULL);
>> +	ldap_conn = *conn;
>>
>> -	semaphore_wait(&pool->conn_semaphore);
>> +	CHECK(semaphore_wait_timed(&pool->conn_semaphore));
>>   	for (i = 0; i<  pool->connections; i++) {
>>   		ldap_conn = pool->conns[i];
>>   		if (isc_mutex_trylock(&ldap_conn->lock) == ISC_R_SUCCESS)
>> @@ -2583,16 +2588,30 @@ ldap_pool_getconnection(ldap_pool_t *pool)
>>   	RUNTIME_CHECK(ldap_conn != NULL);
>>
>>   	INIT_LIST(ldap_conn->ldap_entries);
>> +	*conn = ldap_conn;
>>
>> -	return ldap_conn;
>> +cleanup:
>> +	if (result != ISC_R_SUCCESS) {
>> +		log_error("timeout in ldap_pool_getconnection(): try to raise "
>> +				"'connections' parameter; potential deadlock?");
>> +	}
>> +	return result;
>>   }
>>
>>   static void
>> -ldap_pool_putconnection(ldap_pool_t *pool, ldap_connection_t *ldap_conn)
>> +ldap_pool_putconnection(ldap_pool_t *pool, ldap_connection_t **conn)
>>   {
>> +	REQUIRE(conn);
>
> Although this is correct, please use REQUIRE(conn != NULL);
>
>> +	ldap_connection_t *ldap_conn = *conn;
>> +
>> +	if (ldap_conn == NULL)
>> +		return;
>> +
>>   	ldap_query_free(ldap_conn);
>>   	UNLOCK(&ldap_conn->lock);
>>   	semaphore_signal(&pool->conn_semaphore);
>> +
>> +	*conn = NULL;
>>   }
>>
>>   static isc_result_t
>> @@ -2719,7 +2738,7 @@ update_action(isc_task_t *task, isc_event_t *event)
>>   	ldap_psearchevent_t *pevent = (ldap_psearchevent_t *)event;
>>   	isc_result_t result ;
>>   	ldap_instance_t *inst = NULL;
>> -	ldap_connection_t *conn;
>> +	ldap_connection_t *conn = NULL;
>>   	ldap_entry_t *entry;
>>   	isc_boolean_t delete = ISC_TRUE;
>>   	isc_mem_t *mctx;
>> @@ -2736,7 +2755,7 @@ update_action(isc_task_t *task, isc_event_t *event)
>>   	if (result != ISC_R_SUCCESS)
>>   		goto cleanup;
>>
>> -	conn = ldap_pool_getconnection(inst->pool);
>> +	CHECK(ldap_pool_getconnection(inst->pool,&conn));
>>
>>   	CHECK(ldap_query(inst, conn, pevent->dn,
>>   			 LDAP_SCOPE_BASE, attrs, 0,
>> @@ -2754,14 +2773,13 @@ update_action(isc_task_t *task, isc_event_t *event)
>>   	if (delete)
>>   		CHECK(ldap_delete_zone(inst, pevent->dn, ISC_TRUE));
>>
>> -        ldap_pool_putconnection(inst->pool, conn);
>> -
>>   cleanup:
>>   	if (result != ISC_R_SUCCESS)
>>   		log_error("update_action (psearch) failed for %s. "
>>   			  "Zones can be outdated, run `rndc reload`",
>>   			  pevent->dn);
>>
>> +	ldap_pool_putconnection(inst->pool,&conn);
>>   	isc_mem_free(mctx, pevent->dbname);
>>   	isc_mem_free(mctx, pevent->dn);
>>   	isc_mem_detach(&mctx);
>> @@ -2790,7 +2808,7 @@ update_config(isc_task_t *task, isc_event_t *event)
>>   	if (result != ISC_R_SUCCESS)
>>   		goto cleanup;
>>
>> -	conn = ldap_pool_getconnection(inst->pool);
>> +	CHECK(ldap_pool_getconnection(inst->pool,&conn));
>>
>>   	CHECK(ldap_query(inst, conn, pevent->dn,
>>   			 LDAP_SCOPE_BASE, attrs, 0,
>> @@ -2809,14 +2827,12 @@ update_config(isc_task_t *task, isc_event_t *event)
>>
>>
>>   cleanup:
>> -	if (conn != NULL)
>> -		ldap_pool_putconnection(inst->pool, conn);
>> -
>>   	if (result != ISC_R_SUCCESS)
>>   		log_error("update_config (psearch) failed for %s. "
>>   			  "Configuration can be outdated, run `rndc reload`",
>>   			  pevent->dn);
>>
>> +	ldap_pool_putconnection(inst->pool,&conn);
>>   	isc_mem_free(mctx, pevent->dbname);
>>   	isc_mem_free(mctx, pevent->dn);
>>   	isc_mem_detach(&mctx);
>> @@ -3079,7 +3095,7 @@ ldap_psearch_watcher(isc_threadarg_t arg)
>>   	tv.tv_usec = 0;
>>
>>   	/* Pick connection, one is reserved purely for this thread */
>> -	conn = ldap_pool_getconnection(inst->pool);
>> +	CHECK(ldap_pool_getconnection(inst->pool,&conn));
>>
>>   	/* Try to connect. */
>>   	while (conn->handle == NULL) {
>> @@ -3189,7 +3205,7 @@ soft_err:
>>   	log_debug(1, "Ending ldap_psearch_watcher");
>>
>>   cleanup:
>> -	ldap_pool_putconnection(inst->pool, conn);
>> +	ldap_pool_putconnection(inst->pool,&conn);
>>
>>   	return (isc_threadresult_t)0;
>>   }
>> diff --git a/src/semaphore.c b/src/semaphore.c
>> index 41d6a30..76e6aa2 100644
>> --- a/src/semaphore.c
>> +++ b/src/semaphore.c
>> @@ -27,8 +27,19 @@
>>   #include<isc/condition.h>
>>   #include<isc/result.h>
>>   #include<isc/util.h>
>> +#include<isc/time.h>
>>
>>   #include "semaphore.h"
>> +#include "util.h"
>> +
>> +/*
>> + * Timer setting for deadlock detection. Format: seconds, nanoseconds.
>> + * These values will be overwriten during initialization
>> + * from set_settings() with max(setting+SEM_WAIT_TIMEOUT_ADD, curr_value).
>> + *
>> + * Initial value can be useful in early phases of initialization.
>> + */
>> +isc_interval_t semaphore_wait_timeout = { 3, 0 };
>>
>>   /*
>>    * Initialize a semaphore.
>> @@ -74,20 +85,27 @@ semaphore_destroy(semaphore_t *sem)
>>   /*
>>    * Wait on semaphore. This operation will try to acquire a lock on the
>>    * semaphore. If the semaphore is already acquired as many times at it allows,
>> - * the function will block until someone releases the lock.
>> + * the function will block until someone releases the lock OR timeout expire.
>> + *
>> + * @return ISC_R_SUCCESS or ISC_R_TIMEDOUT or other errors from ISC libs
>>    */
>> -void
>> -semaphore_wait(semaphore_t *sem)
>> +isc_result_t
>> +semaphore_wait_timed(semaphore_t *sem)
>>   {
>> +	isc_result_t result;
>> +	isc_time_t abs_timeout;
>>   	REQUIRE(sem != NULL);
>>
>>   	LOCK(&sem->mutex);
>> +	CHECK(isc_time_nowplusinterval(&abs_timeout,&semaphore_wait_timeout));
>
> The isc_time_nowplusinterval doesn't modify shared data, please call it before
> LOCK().
>
>>   	while (sem->value<= 0)
>> -		WAIT(&sem->cond,&sem->mutex);
>> +		CHECK(WAITUNTIL(&sem->cond,&sem->mutex,&abs_timeout));
>>   	sem->value--;
>>
>> +cleanup:
>>   	UNLOCK(&sem->mutex);
>> +	return result;
>>   }
>>
>>   /*
>> diff --git a/src/semaphore.h b/src/semaphore.h
>> index 4ca4f65..5aef5d2 100644
>> --- a/src/semaphore.h
>> +++ b/src/semaphore.h
>> @@ -24,6 +24,10 @@
>>   #include<isc/condition.h>
>>   #include<isc/mutex.h>
>>
>> +/* How many seconds will be added to user-defined connection parameter 'timeout'. */
>> +#define SEM_WAIT_TIMEOUT_ADD 2 /* seconds */
>> +extern isc_interval_t semaphore_wait_timeout;
>> +
>>   /*
>>    * Semaphore can be "acquired" multiple times. However, it has a maximum
>>    * number of times someone can acquire him. If a semaphore is already acquired
>> @@ -40,7 +44,7 @@ typedef struct semaphore	semaphore_t;
>>   /* Public functions. */
>>   isc_result_t	semaphore_init(semaphore_t *sem, int value);
>>   void		semaphore_destroy(semaphore_t *sem);
>> -void		semaphore_wait(semaphore_t *sem);
>> +isc_result_t	semaphore_wait_timed(semaphore_t *sem);
>>   void		semaphore_signal(semaphore_t *sem);
>>
>>   #endif /* !_LD_SEMAPHORE_H_ */
>> --
>> 1.7.7.6
>>
>
>

-------------- next part --------------
From ff06086e55d64cd6893a6d064d8e101f09a71956 Mon Sep 17 00:00:00 2001
From: Petr Spacek <pspacek at redhat.com>
Date: Tue, 24 Apr 2012 15:09:32 +0200
Subject: [PATCH] Add simple semaphore deadlock detection logic.
 Signed-off-by: Petr Spacek <pspacek at redhat.com>

---
 src/ldap_helper.c |   78 ++++++++++++++++++++++++++++++++---------------------
 src/semaphore.c   |   26 +++++++++++++++---
 src/semaphore.h   |    6 +++-
 3 files changed, 74 insertions(+), 36 deletions(-)

diff --git a/src/ldap_helper.c b/src/ldap_helper.c
index 47c05599cb48d22e172244b2c3229b9320f37d59..304c37296f8f3a428c4c72b45fe4154b2c9e954c 100644
--- a/src/ldap_helper.c
+++ b/src/ldap_helper.c
@@ -296,9 +296,10 @@ ldap_delete_zone2(ldap_instance_t *inst, dns_name_t *name, isc_boolean_t lock);
 static isc_result_t ldap_pool_create(isc_mem_t *mctx, unsigned int connections,
 		ldap_pool_t **poolp);
 static void ldap_pool_destroy(ldap_pool_t **poolp);
-static ldap_connection_t * ldap_pool_getconnection(ldap_pool_t *pool);
+static isc_result_t ldap_pool_getconnection(ldap_pool_t *pool,
+		ldap_connection_t ** conn);
 static void ldap_pool_putconnection(ldap_pool_t *pool,
-		ldap_connection_t *ldap_conn);
+		ldap_connection_t ** conn);
 static isc_result_t ldap_pool_connect(ldap_pool_t *pool,
 		ldap_instance_t *ldap_inst);
 
@@ -402,6 +403,10 @@ new_ldap_instance(isc_mem_t *mctx, const char *db_name,
 	ldap_settings[i++].target = &ldap_inst->dyn_update;
 	CHECK(set_settings(ldap_settings, argv));
 
+	/* Set timer for deadlock detection inside semaphore_wait_timed . */
+	if (semaphore_wait_timeout.seconds < ldap_inst->timeout*SEM_WAIT_TIMEOUT_MUL)
+		semaphore_wait_timeout.seconds = ldap_inst->timeout*SEM_WAIT_TIMEOUT_MUL;
+
 	/* Validate and check settings. */
 	str_toupper(ldap_inst->sasl_mech);
 	if (ldap_inst->connections < 1) {
@@ -1089,7 +1094,7 @@ isc_result_t
 refresh_zones_from_ldap(ldap_instance_t *ldap_inst)
 {
 	isc_result_t result = ISC_R_SUCCESS;
-	ldap_connection_t *ldap_conn;
+	ldap_connection_t *ldap_conn = NULL;
 	int zone_count = 0;
 	ldap_entry_t *entry;
 	dns_rbt_t *rbt = NULL;
@@ -1114,7 +1119,7 @@ refresh_zones_from_ldap(ldap_instance_t *ldap_inst)
 
 	log_debug(2, "refreshing list of zones for %s", ldap_inst->db_name);
 
-	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
+	CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
 
 	CHECK(ldap_query(ldap_inst, ldap_conn, str_buf(ldap_inst->base),
 			 LDAP_SCOPE_SUBTREE, config_attrs, 0,
@@ -1227,7 +1232,7 @@ cleanup:
 	if (invalidate_nodechain)
 		dns_rbtnodechain_invalidate(&chain);
 
-	ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
+	ldap_pool_putconnection(ldap_inst->pool, &ldap_conn);
 
 	log_debug(2, "finished refreshing list of zones");
 
@@ -1381,7 +1386,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *nam
 		     dns_name_t *origin, ldapdb_nodelist_t *nodelist)
 {
 	isc_result_t result;
-	ldap_connection_t *ldap_conn;
+	ldap_connection_t *ldap_conn = NULL;
 	ldap_entry_t *entry;
 	ld_string_t *string = NULL;
 	ldapdb_node_t *node;
@@ -1392,7 +1397,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *nam
 	REQUIRE(nodelist != NULL);
 
 	/* RRs aren't in the cache, perform ordinary LDAP query */
-	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
+	CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
 
 	INIT_LIST(*nodelist);
 	CHECK(str_new(mctx, &string));
@@ -1439,7 +1444,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *nam
 	result = ISC_R_SUCCESS;
 
 cleanup:
-	ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
+	ldap_pool_putconnection(ldap_inst->pool, &ldap_conn);
 	str_destroy(&string);
 
 	return result;
@@ -1450,7 +1455,7 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *na
 		     dns_name_t *origin, ldapdb_rdatalist_t *rdatalist)
 {
 	isc_result_t result;
-	ldap_connection_t *ldap_conn;
+	ldap_connection_t *ldap_conn = NULL;
 	ldap_entry_t *entry;
 	ld_string_t *string = NULL;
 	ldap_cache_t *cache;
@@ -1468,12 +1473,11 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *na
 		return result;
 
 	/* RRs aren't in the cache, perform ordinary LDAP query */
-	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
-
 	INIT_LIST(*rdatalist);
 	CHECK(str_new(mctx, &string));
 	CHECK(dnsname_to_dn(ldap_inst->zone_register, name, string));
 
+	CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
 	CHECK(ldap_query(ldap_inst, ldap_conn, str_buf(string),
 			 LDAP_SCOPE_BASE, NULL, 0, "(objectClass=idnsRecord)"));
 
@@ -1500,7 +1504,7 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t *ldap_inst, dns_name_t *na
 		result = ISC_R_NOTFOUND;
 
 cleanup:
-	ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
+	ldap_pool_putconnection(ldap_inst->pool, &ldap_conn);
 	str_destroy(&string);
 
 	if (result != ISC_R_SUCCESS)
@@ -2250,7 +2254,7 @@ modify_ldap_common(dns_name_t *owner, ldap_instance_t *ldap_inst,
 		zone_dn += 1; /* skip whitespace */
 	}
 
-	ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
+	CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
 	CHECK(ldap_query(ldap_inst, ldap_conn, zone_dn,
 					 LDAP_SCOPE_BASE, attrs, 0,
 					 "(&(objectClass=idnsZone)(idnsZoneActive=TRUE))"));
@@ -2481,9 +2485,7 @@ modify_ldap_common(dns_name_t *owner, ldap_instance_t *ldap_inst,
 	}
 	
 cleanup:
-	if (ldap_conn != NULL)
-		ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
-
+	ldap_pool_putconnection(ldap_inst->pool, &ldap_conn);
 	str_destroy(&owner_dn_ptr);
 	str_destroy(&owner_dn);
 	free_ldapmod(mctx, &change[0]);
@@ -2565,34 +2567,51 @@ ldap_pool_destroy(ldap_pool_t **poolp)
 	MEM_PUT_AND_DETACH(pool);
 }
 
-static ldap_connection_t *
-ldap_pool_getconnection(ldap_pool_t *pool)
+static isc_result_t
+ldap_pool_getconnection(ldap_pool_t *pool, ldap_connection_t ** conn)
 {
 	ldap_connection_t *ldap_conn = NULL;
 	unsigned int i;
+	isc_result_t result;
 
 	REQUIRE(pool != NULL);
+	REQUIRE(conn != NULL && *conn == NULL);
+	ldap_conn = *conn;
 
-	semaphore_wait(&pool->conn_semaphore);
+	CHECK(semaphore_wait_timed(&pool->conn_semaphore));
 	for (i = 0; i < pool->connections; i++) {
 		ldap_conn = pool->conns[i];
 		if (isc_mutex_trylock(&ldap_conn->lock) == ISC_R_SUCCESS)
 			break;
 	}
 
 	RUNTIME_CHECK(ldap_conn != NULL);
 
 	INIT_LIST(ldap_conn->ldap_entries);
+	*conn = ldap_conn;
 
-	return ldap_conn;
+cleanup:
+	if (result != ISC_R_SUCCESS) {
+		log_error("timeout in ldap_pool_getconnection(): try to raise "
+				"'connections' parameter; potential deadlock?");
+	}
+	return result;
 }
 
 static void
-ldap_pool_putconnection(ldap_pool_t *pool, ldap_connection_t *ldap_conn)
+ldap_pool_putconnection(ldap_pool_t *pool, ldap_connection_t **conn)
 {
+	REQUIRE(conn != NULL);
+	ldap_connection_t *ldap_conn = *conn;
+
+	if (ldap_conn == NULL)
+		return;
+
 	ldap_query_free(ldap_conn);
 	UNLOCK(&ldap_conn->lock);
 	semaphore_signal(&pool->conn_semaphore);
+
+	*conn = NULL;
 }
 
 static isc_result_t
@@ -2719,7 +2738,7 @@ update_action(isc_task_t *task, isc_event_t *event)
 	ldap_psearchevent_t *pevent = (ldap_psearchevent_t *)event;
 	isc_result_t result ;
 	ldap_instance_t *inst = NULL;
-	ldap_connection_t *conn;
+	ldap_connection_t *conn = NULL;
 	ldap_entry_t *entry;
 	isc_boolean_t delete = ISC_TRUE;
 	isc_mem_t *mctx;
@@ -2736,7 +2755,7 @@ update_action(isc_task_t *task, isc_event_t *event)
 	if (result != ISC_R_SUCCESS)
 		goto cleanup;
 
-	conn = ldap_pool_getconnection(inst->pool);
+	CHECK(ldap_pool_getconnection(inst->pool, &conn));
 
 	CHECK(ldap_query(inst, conn, pevent->dn,
 			 LDAP_SCOPE_BASE, attrs, 0,
@@ -2754,14 +2773,13 @@ update_action(isc_task_t *task, isc_event_t *event)
 	if (delete)
 		CHECK(ldap_delete_zone(inst, pevent->dn, ISC_TRUE));
 
-        ldap_pool_putconnection(inst->pool, conn);
-
 cleanup:
 	if (result != ISC_R_SUCCESS)
 		log_error("update_action (psearch) failed for %s. "
 			  "Zones can be outdated, run `rndc reload`",
 			  pevent->dn);
 
+	ldap_pool_putconnection(inst->pool, &conn);
 	isc_mem_free(mctx, pevent->dbname);
 	isc_mem_free(mctx, pevent->dn);
 	isc_mem_detach(&mctx);
@@ -2790,7 +2808,7 @@ update_config(isc_task_t *task, isc_event_t *event)
 	if (result != ISC_R_SUCCESS)
 		goto cleanup;
 
-	conn = ldap_pool_getconnection(inst->pool);
+	CHECK(ldap_pool_getconnection(inst->pool, &conn));
 
 	CHECK(ldap_query(inst, conn, pevent->dn,
 			 LDAP_SCOPE_BASE, attrs, 0,
@@ -2809,14 +2827,12 @@ update_config(isc_task_t *task, isc_event_t *event)
 
 
 cleanup:
-	if (conn != NULL)
-		ldap_pool_putconnection(inst->pool, conn);
-
 	if (result != ISC_R_SUCCESS)
 		log_error("update_config (psearch) failed for %s. "
 			  "Configuration can be outdated, run `rndc reload`",
 			  pevent->dn);
 
+	ldap_pool_putconnection(inst->pool, &conn);
 	isc_mem_free(mctx, pevent->dbname);
 	isc_mem_free(mctx, pevent->dn);
 	isc_mem_detach(&mctx);
@@ -3079,7 +3095,7 @@ ldap_psearch_watcher(isc_threadarg_t arg)
 	tv.tv_usec = 0;
 
 	/* Pick connection, one is reserved purely for this thread */
-	conn = ldap_pool_getconnection(inst->pool);
+	CHECK(ldap_pool_getconnection(inst->pool, &conn));
 
 	/* Try to connect. */
 	while (conn->handle == NULL) {
@@ -3189,7 +3205,7 @@ soft_err:
 	log_debug(1, "Ending ldap_psearch_watcher");
 
 cleanup:
-	ldap_pool_putconnection(inst->pool, conn);
+	ldap_pool_putconnection(inst->pool, &conn);
 
 	return (isc_threadresult_t)0;
 }
diff --git a/src/semaphore.c b/src/semaphore.c
index 41d6a306b76022a35967cd157ff767cdf2f2588d..352219f113a233218b5522beea5520dddbd748e6 100644
--- a/src/semaphore.c
+++ b/src/semaphore.c
@@ -27,8 +27,19 @@
 #include <isc/condition.h>
 #include <isc/result.h>
 #include <isc/util.h>
+#include <isc/time.h>
 
 #include "semaphore.h"
+#include "util.h"
+
+/*
+ * Timer setting for deadlock detection. Format: seconds, nanoseconds.
+ * These values will be overwriten during initialization
+ * from set_settings() with max(setting+SEM_WAIT_TIMEOUT_ADD, curr_value).
+ *
+ * Initial value can be useful in early phases of initialization.
+ */
+isc_interval_t semaphore_wait_timeout = { 3, 0 };
 
 /*
  * Initialize a semaphore.
@@ -74,20 +85,27 @@ semaphore_destroy(semaphore_t *sem)
 /*
  * Wait on semaphore. This operation will try to acquire a lock on the
  * semaphore. If the semaphore is already acquired as many times at it allows,
- * the function will block until someone releases the lock.
+ * the function will block until someone releases the lock OR timeout expire.
+ *
+ * @return ISC_R_SUCCESS or ISC_R_TIMEDOUT or other errors from ISC libs
  */
-void
-semaphore_wait(semaphore_t *sem)
+isc_result_t
+semaphore_wait_timed(semaphore_t *sem)
 {
+	isc_result_t result;
+	isc_time_t abs_timeout;
 	REQUIRE(sem != NULL);
 
+	CHECK(isc_time_nowplusinterval(&abs_timeout, &semaphore_wait_timeout));
 	LOCK(&sem->mutex);
 
 	while (sem->value <= 0)
-		WAIT(&sem->cond, &sem->mutex);
+		CHECK(WAITUNTIL(&sem->cond, &sem->mutex, &abs_timeout));
 	sem->value--;
 
+cleanup:
 	UNLOCK(&sem->mutex);
+	return result;
 }
 
 /*
diff --git a/src/semaphore.h b/src/semaphore.h
index 4ca4f652f599b520d759f656f42aa782c4ba9b38..1367747c03b9c022dce82b1b0191a496c5d359af 100644
--- a/src/semaphore.h
+++ b/src/semaphore.h
@@ -24,6 +24,10 @@
 #include <isc/condition.h>
 #include <isc/mutex.h>
 
+/* Multiplier for to user-defined connection parameter 'timeout'. */
+#define SEM_WAIT_TIMEOUT_MUL 6 /* times */
+extern isc_interval_t semaphore_wait_timeout;
+
 /*
  * Semaphore can be "acquired" multiple times. However, it has a maximum
  * number of times someone can acquire him. If a semaphore is already acquired
@@ -40,7 +44,7 @@ typedef struct semaphore	semaphore_t;
 /* Public functions. */
 isc_result_t	semaphore_init(semaphore_t *sem, int value);
 void		semaphore_destroy(semaphore_t *sem);
-void		semaphore_wait(semaphore_t *sem);
+isc_result_t	semaphore_wait_timed(semaphore_t *sem);
 void		semaphore_signal(semaphore_t *sem);
 
 #endif /* !_LD_SEMAPHORE_H_ */
-- 
1.7.7.6



More information about the Freeipa-devel mailing list