[Pki-devel] [PATCH] 0117..0119 Retry key retrieval on failure
Fraser Tweedale
ftweedal at redhat.com
Wed Jun 1 04:31:41 UTC 2016
Hi team,
The attached patches implement key retrieval retry with backoff
(ticket: https://fedorahosted.org/pki/ticket/2293).
Thanks,
Fraser
-------------- next part --------------
From 93db05ea3cb892be0d19c65fdf7dc9be6759a651 Mon Sep 17 00:00:00 2001
From: Fraser Tweedale <ftweedal at redhat.com>
Date: Tue, 31 May 2016 21:38:37 +1000
Subject: [PATCH 117/119] Limit key retrieval to a single thread per CA
Before implementing lightweight CA key retrieval retry with
exponential backoff, ensure that only one key retriever thread can
execute at a time, for each CA.
Also make SigningUnit initialisation (initSigUnit) synchronised.
Part of: https://fedorahosted.org/pki/ticket/2293
---
.../src/com/netscape/ca/CertificateAuthority.java | 28 +++++++++++++++++-----
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/base/ca/src/com/netscape/ca/CertificateAuthority.java b/base/ca/src/com/netscape/ca/CertificateAuthority.java
index 5b2f382c29a716f3e72695b7da5406bb85b34845..c2a7d0c907b4dd5774b22cfbb404194da162a535 100644
--- a/base/ca/src/com/netscape/ca/CertificateAuthority.java
+++ b/base/ca/src/com/netscape/ca/CertificateAuthority.java
@@ -204,6 +204,8 @@ public class CertificateAuthority
private static final Map<AuthorityID, ICertificateAuthority> caMap =
Collections.synchronizedSortedMap(new TreeMap<AuthorityID, ICertificateAuthority>());
+ private static final Map<AuthorityID, Thread> keyRetrieverThreads =
+ Collections.synchronizedSortedMap(new TreeMap<AuthorityID, Thread>());
protected CertificateAuthority hostCA = null;
protected AuthorityID authorityID = null;
protected AuthorityID authorityParentID = null;
@@ -1460,7 +1462,7 @@ public class CertificateAuthority
/**
* init CA signing unit & cert chain.
*/
- private boolean initSigUnit(boolean retrieveKeys)
+ private synchronized boolean initSigUnit(boolean retrieveKeys)
throws EBaseException {
try {
// init signing unit
@@ -1491,11 +1493,16 @@ public class CertificateAuthority
CMS.debug("CA signing key and cert not (yet) present in NSSDB");
signingUnitException = e;
if (retrieveKeys == true) {
- CMS.debug("Starting KeyRetrieverRunner thread");
- new Thread(
- new KeyRetrieverRunner(this),
- "KeyRetrieverRunner-" + authorityID
- ).start();
+ if (!keyRetrieverThreads.containsKey(authorityID)) {
+ CMS.debug("Starting KeyRetrieverRunner thread");
+ Thread t = new Thread(
+ new KeyRetrieverRunner(this),
+ "KeyRetrieverRunner-" + authorityID);
+ t.start();
+ keyRetrieverThreads.put(authorityID, t);
+ } else {
+ CMS.debug("KeyRetriever thread already running for authority " + authorityID);
+ }
}
return false;
}
@@ -3180,6 +3187,15 @@ public class CertificateAuthority
}
public void run() {
+ try {
+ _run();
+ } finally {
+ // remove self from tracker
+ keyRetrieverThreads.remove(ca.authorityID);
+ }
+ }
+
+ private void _run() {
String KR_CLASS_KEY = "features.authority.keyRetrieverClass";
String className = null;
try {
--
2.5.5
-------------- next part --------------
From f6eed44501b6b65f1da1e32c9c6755db180b8776 Mon Sep 17 00:00:00 2001
From: Fraser Tweedale <ftweedal at redhat.com>
Date: Tue, 31 May 2016 22:20:06 +1000
Subject: [PATCH 118/119] Don't update obsolete CertificateAuthority after key
retrieval
If additional LDAP events are processed for a lightweight CA while
key retrieval proceeds in another thread, when retrieval is
complete, the KeyRetrieverRunner reinitialises the signing unit of a
stale object.
Instead of holding onto a CertificateAuthority, hold onto the
AuthorityID and look it up afresh when ready to reinitialise its
SigningUnit.
Part of: https://fedorahosted.org/pki/ticket/2293
---
.../src/com/netscape/ca/CertificateAuthority.java | 31 +++++++++++++++++-----
1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/base/ca/src/com/netscape/ca/CertificateAuthority.java b/base/ca/src/com/netscape/ca/CertificateAuthority.java
index c2a7d0c907b4dd5774b22cfbb404194da162a535..289ab7ac703fcd7e35b11b589f0dfb2b57488006 100644
--- a/base/ca/src/com/netscape/ca/CertificateAuthority.java
+++ b/base/ca/src/com/netscape/ca/CertificateAuthority.java
@@ -1496,7 +1496,7 @@ public class CertificateAuthority
if (!keyRetrieverThreads.containsKey(authorityID)) {
CMS.debug("Starting KeyRetrieverRunner thread");
Thread t = new Thread(
- new KeyRetrieverRunner(this),
+ new KeyRetrieverRunner(authorityID, mNickname, authorityKeyHosts),
"KeyRetrieverRunner-" + authorityID);
t.start();
keyRetrieverThreads.put(authorityID, t);
@@ -3180,10 +3180,15 @@ public class CertificateAuthority
}
private class KeyRetrieverRunner implements Runnable {
- private CertificateAuthority ca;
+ private AuthorityID aid;
+ private String nickname;
+ private Collection<String> hosts;
- public KeyRetrieverRunner(CertificateAuthority ca) {
- this.ca = ca;
+ public KeyRetrieverRunner(
+ AuthorityID aid, String nickname, Collection<String> hosts) {
+ this.aid = aid;
+ this.nickname = nickname;
+ this.hosts = hosts;
}
public void run() {
@@ -3191,7 +3196,7 @@ public class CertificateAuthority
_run();
} finally {
// remove self from tracker
- keyRetrieverThreads.remove(ca.authorityID);
+ keyRetrieverThreads.remove(aid);
}
}
@@ -3226,7 +3231,7 @@ public class CertificateAuthority
KeyRetriever.Result krr = null;
try {
- krr = kr.retrieveKey(ca.mNickname, ca.authorityKeyHosts);
+ krr = kr.retrieveKey(nickname, hosts);
} catch (Throwable e) {
CMS.debug("Caught exception during execution of KeyRetriever.retrieveKey");
CMS.debug(e);
@@ -3254,16 +3259,28 @@ public class CertificateAuthority
CryptoUtil.importPKIArchiveOptions(
token, unwrappingKey, pubkey, paoData);
- cert = manager.importUserCACertPackage(certBytes, ca.mNickname);
+ cert = manager.importUserCACertPackage(certBytes, nickname);
} catch (Throwable e) {
CMS.debug("Caught exception during cert/key import");
CMS.debug(e);
return;
}
+ CertificateAuthority ca;
boolean initSigUnitSucceeded = false;
try {
CMS.debug("Reinitialising SigningUnit");
+
+ /* While we were retrieving the key and cert, the
+ * CertificateAuthority instance in the caMap might
+ * have been replaced, so look it up afresh.
+ */
+ ca = (CertificateAuthority) getCA(aid);
+ if (ca == null) {
+ CMS.debug("Authority is no longer in caMap; returning.");
+ return;
+ }
+
// re-init signing unit, but avoid triggering
// key replication if initialisation fails again
// for some reason
--
2.5.5
-------------- next part --------------
From de46a100698c77168467d72b16823de2630dda55 Mon Sep 17 00:00:00 2001
From: Fraser Tweedale <ftweedal at redhat.com>
Date: Wed, 1 Jun 2016 09:46:56 +1000
Subject: [PATCH 119/119] Retry failed key retrieval with backoff
If lightweight CA key retrieval fails, retry the retieval after a
delay of 10 seconds initially, increasing thereafter.
Fixes: https://fedorahosted.org/pki/ticket/2293
---
.../src/com/netscape/ca/CertificateAuthority.java | 58 ++++++++++++++++------
1 file changed, 44 insertions(+), 14 deletions(-)
diff --git a/base/ca/src/com/netscape/ca/CertificateAuthority.java b/base/ca/src/com/netscape/ca/CertificateAuthority.java
index 289ab7ac703fcd7e35b11b589f0dfb2b57488006..1505b9951cffae2cae8dcc715bb60fa56b78cae8 100644
--- a/base/ca/src/com/netscape/ca/CertificateAuthority.java
+++ b/base/ca/src/com/netscape/ca/CertificateAuthority.java
@@ -3193,21 +3193,39 @@ public class CertificateAuthority
public void run() {
try {
- _run();
+ long d = 10000; // initial delay of 10 seconds
+ while (!_run()) {
+ CMS.debug("Retrying in " + d / 1000 + " seconds");
+ try {
+ Thread.sleep(d);
+ } catch (InterruptedException e) {
+ break;
+ }
+ d += d / 2; // back off
+ }
} finally {
// remove self from tracker
keyRetrieverThreads.remove(aid);
}
}
- private void _run() {
+ /**
+ * Main routine of key retrieval and key import.
+ *
+ * @return false if retrieval should be retried, or true if
+ * the process is "done". Note that a result of true
+ * does not necessarily imply that the process fully
+ * completed. See comments at sites of 'return true;'
+ * below.
+ */
+ private boolean _run() {
String KR_CLASS_KEY = "features.authority.keyRetrieverClass";
String className = null;
try {
className = CMS.getConfigStore().getString(KR_CLASS_KEY);
} catch (EBaseException e) {
CMS.debug("Unable to read key retriever class from CS.cfg: " + e);
- return;
+ return false;
}
KeyRetriever kr = null;
@@ -3218,15 +3236,15 @@ public class CertificateAuthority
} catch (ClassNotFoundException e) {
CMS.debug("Could not find class: " + className);
CMS.debug(e);
- return;
+ return false;
} catch (ClassCastException e) {
CMS.debug("Class is not an instance of KeyRetriever: " + className);
CMS.debug(e);
- return;
+ return false;
} catch (InstantiationException | IllegalAccessException e) {
CMS.debug("Could not instantiate class: " + className);
CMS.debug(e);
- return;
+ return false;
}
KeyRetriever.Result krr = null;
@@ -3235,12 +3253,12 @@ public class CertificateAuthority
} catch (Throwable e) {
CMS.debug("Caught exception during execution of KeyRetriever.retrieveKey");
CMS.debug(e);
- return;
+ return false;
}
if (krr == null) {
CMS.debug("KeyRetriever did not return a result.");
- return;
+ return false;
}
CMS.debug("Importing key and cert");
@@ -3263,7 +3281,7 @@ public class CertificateAuthority
} catch (Throwable e) {
CMS.debug("Caught exception during cert/key import");
CMS.debug(e);
- return;
+ return false;
}
CertificateAuthority ca;
@@ -3277,8 +3295,11 @@ public class CertificateAuthority
*/
ca = (CertificateAuthority) getCA(aid);
if (ca == null) {
- CMS.debug("Authority is no longer in caMap; returning.");
- return;
+ /* We got the key, but the authority has been
+ * deleted. Do not retry.
+ */
+ CMS.debug("Authority was deleted; returning.");
+ return true;
}
// re-init signing unit, but avoid triggering
@@ -3289,22 +3310,31 @@ public class CertificateAuthority
} catch (Throwable e) {
CMS.debug("Caught exception during SigningUnit re-init");
CMS.debug(e);
- return;
+ return false;
}
if (!initSigUnitSucceeded) {
CMS.debug("Failed to re-init SigningUnit");
- return;
+ return false;
}
CMS.debug("Adding self to authorityKeyHosts attribute");
try {
ca.addInstanceToAuthorityKeyHosts();
} catch (Throwable e) {
+ /* We retrieved key, imported it, and successfully
+ * re-inited the signing unit. The only thing that
+ * failed was adding this host to the list of hosts
+ * that possess the key. This is unlikely, and the
+ * key is available elsewhere, so no need to retry.
+ */
CMS.debug("Failed to add self to authorityKeyHosts");
CMS.debug(e);
- return;
+ return true;
}
+
+ /* All good! */
+ return true;
}
}
--
2.5.5
More information about the Pki-devel
mailing list