[Cluster-devel] conga/luci/site/luci/Extensions cluster_adapte ...
rmccabe at sourceware.org
rmccabe at sourceware.org
Tue Oct 31 00:16:19 UTC 2006
CVSROOT: /cvs/cluster
Module name: conga
Changes by: rmccabe at sourceware.org 2006-10-31 00:16:15
Modified files:
luci/site/luci/Extensions: cluster_adapters.py
Log message:
more logging and exception robustness
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/cluster_adapters.py.diff?cvsroot=cluster&r1=1.133&r2=1.134
--- conga/luci/site/luci/Extensions/cluster_adapters.py 2006/10/30 22:52:00 1.133
+++ conga/luci/site/luci/Extensions/cluster_adapters.py 2006/10/31 00:16:14 1.134
@@ -1637,6 +1637,7 @@
try:
svcname = req.form['servicename']
except:
+ luci_log.debug_verbose('serviceStart error: no service name')
return None
try:
@@ -1645,22 +1646,28 @@
try:
nodename = req.form['nodename']
except:
- return None
+ nodename = None
+ cluname = None
try:
cluname = req['clustername']
except KeyError, e:
try:
- cluname = req.form['clusterName']
+ cluname = req.form['clustername']
except:
- return None
+ pass
+
+ if cluname is None:
+ luci_log.debug_verbose('serviceStart error: %s no service name' \
+ % svcname)
+ return None
ricci_agent = rc.hostname()
batch_number, result = startService(rc, svcname, nodename)
- if not batch_number or not result:
- luci_log.debug_verbose('startService %s @ %s call failed' \
- % (svcname, nodename))
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('startService %s call failed' \
+ % svcname)
return None
#Now we need to create a DB flag for this system.
@@ -1704,17 +1711,14 @@
try:
cluname = req.form['clustername']
except:
- try:
- cluname = rc.cluster_info()[0]
- except:
- pass
+ pass
if cluname is None:
luci_log.debug_verbose('unable to determine cluser name for serviceRestart %s' % svcname)
return None
batch_number, result = restartService(rc, svcname)
- if not batch_number or not result:
+ if batch_number is None or result is None:
luci_log.debug_verbose('restartService for %s failed' % svcname)
return None
@@ -1762,17 +1766,14 @@
try:
cluname = req.form['clustername']
except:
- try:
- cluname = rc.cluster_info()[0]
- except:
- pass
+ pass
if cluname is None:
luci_log.debug_verbose('unable to determine cluser name for serviceStop %s' % svcname)
return None
batch_number, result = stopService(rc, svcname)
- if not batch_number or not result:
+ if batch_number is None or result is None:
luci_log.debug_verbose('stopService for %s failed' % svcname)
return None
@@ -2097,7 +2098,7 @@
clustername = request['clustername']
except KeyError, e:
try:
- clustername = request.form['clusterName']
+ clustername = request.form['clustername']
except:
luci_log.debug('missing cluster name for NTP')
return None
@@ -2194,16 +2195,20 @@
return None
batch_number, result = nodeLeaveCluster(rc)
- batch_id = str(batch_number)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('nodeLeaveCluster error: batch_number and/or result is None')
+ return None
+ batch_id = str(batch_number)
objpath = str(path + "/" + objname)
+
try:
nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
#Now we need to annotate the new DB object
flag = self.restrictedTraverse(objpath)
flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE,NODE_LEAVE_CLUSTER, "string")
- flag.manage_addProperty(FLAG_DESC,"Node \'" + nodename + "\' leaving cluster", "string")
+ flag.manage_addProperty(TASKTYPE, NODE_LEAVE_CLUSTER, "string")
+ flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' leaving cluster", "string")
except:
luci_log.debug('An error occurred while setting flag %s' % objpath)
@@ -2212,34 +2217,52 @@
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_JOIN_CLUSTER:
batch_number, result = nodeJoinCluster(rc)
- path = CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved
- nodefolder = self.restrictedTraverse(path)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('nodeJoin error: batch_number and/or result is None')
+ return None
+
+ path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved)
batch_id = str(batch_number)
- objname = nodename_resolved + "____flag"
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- objpath = path + "/" + objname
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID,batch_id, "string")
- flag.manage_addProperty(TASKTYPE,NODE_JOIN_CLUSTER, "string")
- flag.manage_addProperty(FLAG_DESC,"Node \'" + nodename + "\' joining cluster", "string")
+ objname = str(nodename_resolved + "____flag")
+ objpath = str(path + "/" + objname)
+
+ try:
+ nodefolder = self.restrictedTraverse(path)
+ nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
+ #Now we need to annotate the new DB object
+ flag = self.restrictedTraverse(objpath)
+ flag.manage_addProperty(BATCH_ID, batch_id, "string")
+ flag.manage_addProperty(TASKTYPE, NODE_JOIN_CLUSTER, "string")
+ flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' joining cluster", "string")
+ except Exception, e:
+ luci_log.debug_verbose('nodeJoin error: creating flags at %s: %s' \
+ % (path, str(e)))
response = request.RESPONSE
#Once again, is this correct? Should we re-direct to the cluster page?
response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
elif task == NODE_REBOOT:
batch_number, result = nodeReboot(rc)
- path = CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved
- nodefolder = self.restrictedTraverse(path)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('nodeReboot: batch_number and/or result is None')
+ return None
+
+ path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved)
batch_id = str(batch_number)
- objname = nodename_resolved + "____flag"
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- objpath = path + "/" + objname
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID, batch_id, "string")
- flag.manage_addProperty(TASKTYPE, NODE_REBOOT, "string")
- flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being rebooted", "string")
+ objname = str(nodename_resolved + "____flag")
+ objpath = str(path + "/" + objname)
+
+ try:
+ nodefolder = self.restrictedTraverse(path)
+ nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
+ #Now we need to annotate the new DB object
+ flag = self.restrictedTraverse(objpath)
+ flag.manage_addProperty(BATCH_ID, batch_id, "string")
+ flag.manage_addProperty(TASKTYPE, NODE_REBOOT, "string")
+ flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being rebooted", "string")
+ except Exception, e:
+ luci_log.debug_verbose('nodeReboot err: creating flags at %s: %s' \
+ % (path, str(e)))
response = request.RESPONSE
#Once again, is this correct? Should we re-direct to the cluster page?
@@ -2250,16 +2273,19 @@
try:
clusterfolder = self.restrictedTraverse(path)
if not clusterfolder:
- raise
- except:
- luci_log.debug('The cluster folder for %s could not be found.' \
- % clustername)
+ raise Exception, 'no cluster folder at %s' % path
+ except Exception, e:
+ luci_log.debug('The cluster folder for %s could not be found: %s' \
+ % (clustername, str(e)))
return None
try:
nodes = clusterfolder.objectItems('Folder')
- except:
- luci_log.debug('No cluster nodes for %s were found' % clustername)
+ if not nodes or len(nodes) < 1:
+ raise Exception, 'no cluster nodes'
+ except Exception, e:
+ luci_log.debug('No cluster nodes for %s were found: %s' \
+ % (clustername, str(e)))
return None
found_one = False
@@ -2299,17 +2325,26 @@
return None
batch_number, result = nodeFence(rc, nodename)
- path = path + "/" + nodename_resolved
- nodefolder = self.restrictedTraverse(path)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('nodeFence: batch_number and/or result is None')
+ return None
+
+ path = str(path + "/" + nodename_resolved)
batch_id = str(batch_number)
- objname = nodename_resolved + "____flag"
- nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
- objpath = path + "/" + objname
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID,batch_id, "string")
- flag.manage_addProperty(TASKTYPE,NODE_FENCE, "string")
- flag.manage_addProperty(FLAG_DESC,"Node \'" + nodename + "\' is being fenced", "string")
+ objname = str(nodename_resolved + "____flag")
+ objpath = str(path + "/" + objname)
+
+ try:
+ nodefolder = self.restrictedTraverse(path)
+ nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
+ #Now we need to annotate the new DB object
+ flag = self.restrictedTraverse(objpath)
+ flag.manage_addProperty(BATCH_ID, batch_id, "string")
+ flag.manage_addProperty(TASKTYPE, NODE_FENCE, "string")
+ flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being fenced", "string")
+ except Exception, e:
+ luci_log.debug_verbose('nodeFence err: creating flags at %s: %s' \
+ % (path, str(e)))
response = request.RESPONSE
#Once again, is this correct? Should we re-direct to the cluster page?
@@ -2320,17 +2355,25 @@
#and propogate it. We will need two ricci agents for this task.
# Make sure we can find a second node before we hose anything.
- path = CLUSTER_FOLDER_PATH + clustername
+ path = str(CLUSTER_FOLDER_PATH + clustername)
try:
clusterfolder = self.restrictedTraverse(path)
if not clusterfolder:
- raise
- except:
+ raise Exception, 'no cluster folder at %s' % path
+ except Exception, e:
+ luci_log.debug_verbose('node delete error for cluster %s: %s' \
+ % (clustername, str(e)))
return None
- nodes = clusterfolder.objectItems('Folder')
- found_one = False
+ try:
+ nodes = clusterfolder.objectItems('Folder')
+ if not nodes or len(nodes) < 1:
+ raise Exception, 'no cluster nodes in DB'
+ except Exception, e:
+ luci_log.debug_verbose('node delete error for cluster %s: %s' \
+ % (clustername, str(e)))
+ found_one = False
for node in nodes:
if node[1].getId().find(nodename) != (-1):
continue
@@ -2339,38 +2382,59 @@
# in the cluster we believe it is.
try:
rc2 = RicciCommunicator(node[1].getId())
- if not rc2.authed():
- # set the flag
- rc2 = None
- if not rc2:
- raise
- found_one = True
- break
+ except Exception, e:
+ luci_log.info('ricci %s error: %s' % (node[0], str(e)))
+ continue
except:
continue
+ if not rc2.authed():
+ try:
+ setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ try:
+ snode = getStorageNode(self, node[0])
+ setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
+ except:
+ pass
+
+ luci_log.debug_verbose('%s is not authed' % node[0])
+ rc2 = None
+ continue
+ else:
+ found_one = True
+ break
+
if not found_one:
+ luci_log.debug_verbose('unable to find ricci node to delete %s from %s' % (nodename, clustername))
return None
#First, delete cluster.conf from node to be deleted.
#next, have node leave cluster.
batch_number, result = nodeLeaveCluster(rc, purge=True)
+ if batch_number is None or result is None:
+ luci_log.debug_verbose('nodeDelete: batch_number and/or result is None')
+ return None
#It is not worth flagging this node in DB, as we are going
#to delete it anyway. Now, we need to delete node from model
#and send out new cluster.conf
delete_target = None
- try:
- nodelist = model.getNodes()
- find_node = lower(nodename)
- for n in nodelist:
+ nodelist = model.getNodes()
+ find_node = lower(nodename)
+ for n in nodelist:
+ try:
if lower(n.getName()) == find_node:
delete_target = n
break
- except:
- pass
+ except:
+ continue
if delete_target is None:
+ luci_log.debug_verbose('unable to find delete target for %s in %s' \
+ % (nodename, clustername))
return None
model.deleteNode(delete_target)
@@ -2386,6 +2450,7 @@
# propagate the new cluster.conf via the second node
batch_number, result = setClusterConf(rc2, str(str_buf))
if batch_number is None:
+ luci_log.debug_verbose('batch number is None after del node in NTP')
return None
#Now we need to delete the node from the DB
@@ -2396,19 +2461,24 @@
delnode = self.restrictedTraverse(del_path)
clusterfolder = self.restrictedTraverse(path)
clusterfolder.manage_delObjects(delnode[0])
- except:
- # XXX - we need to handle this
- pass
+ except Exception, e:
+ luci_log.debug_verbose('error deleting %s: %s' % (del_path, str(e)))
batch_id = str(batch_number)
objname = str(nodename_resolved + "____flag")
- clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
- #Now we need to annotate the new DB object
objpath = str(path + "/" + objname)
- flag = self.restrictedTraverse(objpath)
- flag.manage_addProperty(BATCH_ID,batch_id, "string")
- flag.manage_addProperty(TASKTYPE,NODE_DELETE, "string")
- flag.manage_addProperty(FLAG_DESC,"Deleting node \'" + nodename + "\'", "string")
+
+ try:
+ clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
+ #Now we need to annotate the new DB object
+ flag = self.restrictedTraverse(objpath)
+ flag.manage_addProperty(BATCH_ID, batch_id, "string")
+ flag.manage_addProperty(TASKTYPE, NODE_DELETE, "string")
+ flag.manage_addProperty(FLAG_DESC, "Deleting node \'" + nodename + "\'", "string")
+ except Exception, e:
+ luci_log.debug_verbose('nodeDelete %s err setting flag at %s: %s' \
+ % (nodename, objpath, str(e)))
+
response = request.RESPONSE
response.redirect(request['HTTP_REFERER'] + "&busyfirst=true")
More information about the Cluster-devel
mailing list