[Pki-devel] [PATCH] 0117..0119 Retry key retrieval on failure

Fraser Tweedale ftweedal at redhat.com
Wed Jun 1 04:31:41 UTC 2016


Hi team,

The attached patches implement key retrieval retry with backoff
(ticket: https://fedorahosted.org/pki/ticket/2293).

Thanks,
Fraser
-------------- next part --------------
From 93db05ea3cb892be0d19c65fdf7dc9be6759a651 Mon Sep 17 00:00:00 2001
From: Fraser Tweedale <ftweedal at redhat.com>
Date: Tue, 31 May 2016 21:38:37 +1000
Subject: [PATCH 117/119] Limit key retrieval to a single thread per CA

Before implementing lightweight CA key retrieval retry with
exponential backoff, ensure that only one key retriever thread can
execute at a time, for each CA.

Also make SigningUnit initialisation (initSigUnit) synchronised.

Part of: https://fedorahosted.org/pki/ticket/2293
---
 .../src/com/netscape/ca/CertificateAuthority.java  | 28 +++++++++++++++++-----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/base/ca/src/com/netscape/ca/CertificateAuthority.java b/base/ca/src/com/netscape/ca/CertificateAuthority.java
index 5b2f382c29a716f3e72695b7da5406bb85b34845..c2a7d0c907b4dd5774b22cfbb404194da162a535 100644
--- a/base/ca/src/com/netscape/ca/CertificateAuthority.java
+++ b/base/ca/src/com/netscape/ca/CertificateAuthority.java
@@ -204,6 +204,8 @@ public class CertificateAuthority
 
     private static final Map<AuthorityID, ICertificateAuthority> caMap =
         Collections.synchronizedSortedMap(new TreeMap<AuthorityID, ICertificateAuthority>());
+    private static final Map<AuthorityID, Thread> keyRetrieverThreads =
+        Collections.synchronizedSortedMap(new TreeMap<AuthorityID, Thread>());
     protected CertificateAuthority hostCA = null;
     protected AuthorityID authorityID = null;
     protected AuthorityID authorityParentID = null;
@@ -1460,7 +1462,7 @@ public class CertificateAuthority
     /**
      * init CA signing unit & cert chain.
      */
-    private boolean initSigUnit(boolean retrieveKeys)
+    private synchronized boolean initSigUnit(boolean retrieveKeys)
             throws EBaseException {
         try {
             // init signing unit
@@ -1491,11 +1493,16 @@ public class CertificateAuthority
                 CMS.debug("CA signing key and cert not (yet) present in NSSDB");
                 signingUnitException = e;
                 if (retrieveKeys == true) {
-                    CMS.debug("Starting KeyRetrieverRunner thread");
-                    new Thread(
-                        new KeyRetrieverRunner(this),
-                        "KeyRetrieverRunner-" + authorityID
-                    ).start();
+                    if (!keyRetrieverThreads.containsKey(authorityID)) {
+                        CMS.debug("Starting KeyRetrieverRunner thread");
+                        Thread t = new Thread(
+                            new KeyRetrieverRunner(this),
+                            "KeyRetrieverRunner-" + authorityID);
+                        t.start();
+                        keyRetrieverThreads.put(authorityID, t);
+                    } else {
+                        CMS.debug("KeyRetriever thread already running for authority " + authorityID);
+                    }
                 }
                 return false;
             }
@@ -3180,6 +3187,15 @@ public class CertificateAuthority
         }
 
         public void run() {
+            try {
+                _run();
+            } finally {
+                // remove self from tracker
+                keyRetrieverThreads.remove(ca.authorityID);
+            }
+        }
+
+        private void _run() {
             String KR_CLASS_KEY = "features.authority.keyRetrieverClass";
             String className = null;
             try {
-- 
2.5.5

-------------- next part --------------
From f6eed44501b6b65f1da1e32c9c6755db180b8776 Mon Sep 17 00:00:00 2001
From: Fraser Tweedale <ftweedal at redhat.com>
Date: Tue, 31 May 2016 22:20:06 +1000
Subject: [PATCH 118/119] Don't update obsolete CertificateAuthority after key
 retrieval

If additional LDAP events are processed for a lightweight CA while
key retrieval proceeds in another thread, when retrieval is
complete, the KeyRetrieverRunner reinitialises the signing unit of a
stale object.

Instead of holding onto a CertificateAuthority, hold onto the
AuthorityID and look it up afresh when ready to reinitialise its
SigningUnit.

Part of: https://fedorahosted.org/pki/ticket/2293
---
 .../src/com/netscape/ca/CertificateAuthority.java  | 31 +++++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/base/ca/src/com/netscape/ca/CertificateAuthority.java b/base/ca/src/com/netscape/ca/CertificateAuthority.java
index c2a7d0c907b4dd5774b22cfbb404194da162a535..289ab7ac703fcd7e35b11b589f0dfb2b57488006 100644
--- a/base/ca/src/com/netscape/ca/CertificateAuthority.java
+++ b/base/ca/src/com/netscape/ca/CertificateAuthority.java
@@ -1496,7 +1496,7 @@ public class CertificateAuthority
                     if (!keyRetrieverThreads.containsKey(authorityID)) {
                         CMS.debug("Starting KeyRetrieverRunner thread");
                         Thread t = new Thread(
-                            new KeyRetrieverRunner(this),
+                            new KeyRetrieverRunner(authorityID, mNickname, authorityKeyHosts),
                             "KeyRetrieverRunner-" + authorityID);
                         t.start();
                         keyRetrieverThreads.put(authorityID, t);
@@ -3180,10 +3180,15 @@ public class CertificateAuthority
     }
 
     private class KeyRetrieverRunner implements Runnable {
-        private CertificateAuthority ca;
+        private AuthorityID aid;
+        private String nickname;
+        private Collection<String> hosts;
 
-        public KeyRetrieverRunner(CertificateAuthority ca) {
-            this.ca = ca;
+        public KeyRetrieverRunner(
+                AuthorityID aid, String nickname, Collection<String> hosts) {
+            this.aid = aid;
+            this.nickname = nickname;
+            this.hosts = hosts;
         }
 
         public void run() {
@@ -3191,7 +3196,7 @@ public class CertificateAuthority
                 _run();
             } finally {
                 // remove self from tracker
-                keyRetrieverThreads.remove(ca.authorityID);
+                keyRetrieverThreads.remove(aid);
             }
         }
 
@@ -3226,7 +3231,7 @@ public class CertificateAuthority
 
             KeyRetriever.Result krr = null;
             try {
-                krr = kr.retrieveKey(ca.mNickname, ca.authorityKeyHosts);
+                krr = kr.retrieveKey(nickname, hosts);
             } catch (Throwable e) {
                 CMS.debug("Caught exception during execution of KeyRetriever.retrieveKey");
                 CMS.debug(e);
@@ -3254,16 +3259,28 @@ public class CertificateAuthority
                 CryptoUtil.importPKIArchiveOptions(
                     token, unwrappingKey, pubkey, paoData);
 
-                cert = manager.importUserCACertPackage(certBytes, ca.mNickname);
+                cert = manager.importUserCACertPackage(certBytes, nickname);
             } catch (Throwable e) {
                 CMS.debug("Caught exception during cert/key import");
                 CMS.debug(e);
                 return;
             }
 
+            CertificateAuthority ca;
             boolean initSigUnitSucceeded = false;
             try {
                 CMS.debug("Reinitialising SigningUnit");
+
+                /* While we were retrieving the key and cert, the
+                 * CertificateAuthority instance in the caMap might
+                 * have been replaced, so look it up afresh.
+                 */
+                ca = (CertificateAuthority) getCA(aid);
+                if (ca == null) {
+                    CMS.debug("Authority is no longer in caMap; returning.");
+                    return;
+                }
+
                 // re-init signing unit, but avoid triggering
                 // key replication if initialisation fails again
                 // for some reason
-- 
2.5.5

-------------- next part --------------
From de46a100698c77168467d72b16823de2630dda55 Mon Sep 17 00:00:00 2001
From: Fraser Tweedale <ftweedal at redhat.com>
Date: Wed, 1 Jun 2016 09:46:56 +1000
Subject: [PATCH 119/119] Retry failed key retrieval with backoff

If lightweight CA key retrieval fails, retry the retieval after a
delay of 10 seconds initially, increasing thereafter.

Fixes: https://fedorahosted.org/pki/ticket/2293
---
 .../src/com/netscape/ca/CertificateAuthority.java  | 58 ++++++++++++++++------
 1 file changed, 44 insertions(+), 14 deletions(-)

diff --git a/base/ca/src/com/netscape/ca/CertificateAuthority.java b/base/ca/src/com/netscape/ca/CertificateAuthority.java
index 289ab7ac703fcd7e35b11b589f0dfb2b57488006..1505b9951cffae2cae8dcc715bb60fa56b78cae8 100644
--- a/base/ca/src/com/netscape/ca/CertificateAuthority.java
+++ b/base/ca/src/com/netscape/ca/CertificateAuthority.java
@@ -3193,21 +3193,39 @@ public class CertificateAuthority
 
         public void run() {
             try {
-                _run();
+                long d = 10000;  // initial delay of 10 seconds
+                while (!_run()) {
+                    CMS.debug("Retrying in " + d / 1000 + " seconds");
+                    try {
+                        Thread.sleep(d);
+                    } catch (InterruptedException e) {
+                        break;
+                    }
+                    d += d / 2;  // back off
+                }
             } finally {
                 // remove self from tracker
                 keyRetrieverThreads.remove(aid);
             }
         }
 
-        private void _run() {
+        /**
+         * Main routine of key retrieval and key import.
+         *
+         * @return false if retrieval should be retried, or true if
+         *         the process is "done".  Note that a result of true
+         *         does not necessarily imply that the process fully
+         *         completed.  See comments at sites of 'return true;'
+         *         below.
+         */
+        private boolean _run() {
             String KR_CLASS_KEY = "features.authority.keyRetrieverClass";
             String className = null;
             try {
                 className = CMS.getConfigStore().getString(KR_CLASS_KEY);
             } catch (EBaseException e) {
                 CMS.debug("Unable to read key retriever class from CS.cfg: " + e);
-                return;
+                return false;
             }
 
             KeyRetriever kr = null;
@@ -3218,15 +3236,15 @@ public class CertificateAuthority
             } catch (ClassNotFoundException e) {
                 CMS.debug("Could not find class: " + className);
                 CMS.debug(e);
-                return;
+                return false;
             } catch (ClassCastException e) {
                 CMS.debug("Class is not an instance of KeyRetriever: " + className);
                 CMS.debug(e);
-                return;
+                return false;
             } catch (InstantiationException | IllegalAccessException e) {
                 CMS.debug("Could not instantiate class: " + className);
                 CMS.debug(e);
-                return;
+                return false;
             }
 
             KeyRetriever.Result krr = null;
@@ -3235,12 +3253,12 @@ public class CertificateAuthority
             } catch (Throwable e) {
                 CMS.debug("Caught exception during execution of KeyRetriever.retrieveKey");
                 CMS.debug(e);
-                return;
+                return false;
             }
 
             if (krr == null) {
                 CMS.debug("KeyRetriever did not return a result.");
-                return;
+                return false;
             }
 
             CMS.debug("Importing key and cert");
@@ -3263,7 +3281,7 @@ public class CertificateAuthority
             } catch (Throwable e) {
                 CMS.debug("Caught exception during cert/key import");
                 CMS.debug(e);
-                return;
+                return false;
             }
 
             CertificateAuthority ca;
@@ -3277,8 +3295,11 @@ public class CertificateAuthority
                  */
                 ca = (CertificateAuthority) getCA(aid);
                 if (ca == null) {
-                    CMS.debug("Authority is no longer in caMap; returning.");
-                    return;
+                    /* We got the key, but the authority has been
+                     * deleted.  Do not retry.
+                     */
+                    CMS.debug("Authority was deleted; returning.");
+                    return true;
                 }
 
                 // re-init signing unit, but avoid triggering
@@ -3289,22 +3310,31 @@ public class CertificateAuthority
             } catch (Throwable e) {
                 CMS.debug("Caught exception during SigningUnit re-init");
                 CMS.debug(e);
-                return;
+                return false;
             }
 
             if (!initSigUnitSucceeded) {
                 CMS.debug("Failed to re-init SigningUnit");
-                return;
+                return false;
             }
 
             CMS.debug("Adding self to authorityKeyHosts attribute");
             try {
                 ca.addInstanceToAuthorityKeyHosts();
             } catch (Throwable e) {
+                /* We retrieved key, imported it, and successfully
+                 * re-inited the signing unit.  The only thing that
+                 * failed was adding this host to the list of hosts
+                 * that possess the key.  This is unlikely, and the
+                 * key is available elsewhere, so no need to retry.
+                 */
                 CMS.debug("Failed to add self to authorityKeyHosts");
                 CMS.debug(e);
-                return;
+                return true;
             }
+
+            /* All good! */
+            return true;
         }
     }
 
-- 
2.5.5



More information about the Pki-devel mailing list