[Cluster-devel] conga/luci/site/luci/Extensions LuciSyslog.py ...

rmccabe at sourceware.org rmccabe at sourceware.org
Tue Oct 24 16:36:24 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	conga
Branch: 	RHEL5
Changes by:	rmccabe at sourceware.org	2006-10-24 16:36:23

Modified files:
	luci/site/luci/Extensions: LuciSyslog.py cluster_adapters.py 
	                           conga_constants.py 
	                           homebase_adapters.py ricci_bridge.py 
	                           ricci_communicator.py 

Log message:
	see bz# 211375
	
	These patches add verbose logging that'll allow QE to better
	debug defects (in a lot of places, there was no useful output
	for them to check).
	
	Two new global variables are introduced:
	
	LUCI_DEBUG_MODE = 1
	LUCI_DEBUG_VERBOSITY = 2
	
	Both of these need to be set to 0 for GA. LUCI_DEBUG_VERBOSITY > 1 will
	cause ricci_xml input and output to be logged using using syslog
	LOG_DAEMON/LOG_DEBUG. syslogd is not configured to log this severity by default,
	so an entry along the lines of *.debug /var/log/debug should be added if this
	facility is used.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/LuciSyslog.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.2&r2=1.2.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/cluster_adapters.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.120.2.4&r2=1.120.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/conga_constants.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.19&r2=1.19.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/homebase_adapters.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.34&r2=1.34.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_bridge.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.30.2.1&r2=1.30.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_communicator.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.9&r2=1.9.2.1

--- conga/luci/site/luci/Extensions/LuciSyslog.py	2006/10/16 04:59:52	1.2
+++ conga/luci/site/luci/Extensions/LuciSyslog.py	2006/10/24 16:36:23	1.2.2.1
@@ -1,3 +1,4 @@
+from conga_constants import LUCI_DEBUG_MODE, LUCI_DEBUG_VERBOSITY
 from syslog import openlog, closelog, syslog, \
 		LOG_DAEMON, LOG_PID, LOG_NDELAY, LOG_INFO, \
 		LOG_WARNING, LOG_AUTH, LOG_DEBUG
@@ -12,34 +13,55 @@
 """
 class LuciSyslog:
 	def __init__(self):
+		self.__init = 0
 		try:
-			openlog('luci', LOG_DAEMON, LOG_PID | LOG_NDELAY)
+			openlog('luci', LOG_PID | LOG_NDELAY, LOG_DAEMON)
+			self.__init = 1
 		except:
 			raise LuciSyslogError, 'unable to setup syslog functionality.'
 
 	def info(self, msg):
+		if not self.__init:
+			return
 		try:
 			syslog(LOG_INFO, msg)
 		except:
 			raise LuciSyslogError, 'syslog info call failed'
 
 	def warn(self, msg):
+		if not self.__init:
+			return
 		try:
 			syslog(LOG_WARNING, msg)
 		except:
 			raise LuciSyslogError, 'syslog warn call failed'
 
 	def private(self, msg):
+		if not self.__init:
+			return
 		try:
 			syslog(LOG_AUTH, msg)
 		except:
 			raise LuciSyslogError, 'syslog private call failed'
 
+	def debug_verbose(self, msg):
+		if not LUCI_DEBUG_MODE or LUCI_DEBUG_VERBOSITY < 2 or not self.__init:
+			return
+		try:
+			syslog(LOG_DEBUG, msg)
+		except:
+			raise LuciSyslogError, 'syslog debug calle failed'
+
 	def debug(self, msg):
+		if not LUCI_DEBUG_MODE or not self.__init:
+			return
 		try:
 			syslog(LOG_DEBUG, msg)
 		except:
 			raise LuciSyslogError, 'syslog debug calle failed'
 
 	def close(self):
-		closelog()
+		try:
+			closelog()
+		except:
+			pass
--- conga/luci/site/luci/Extensions/cluster_adapters.py	2006/10/24 01:42:52	1.120.2.4
+++ conga/luci/site/luci/Extensions/cluster_adapters.py	2006/10/24 16:36:23	1.120.2.5
@@ -22,7 +22,8 @@
 from clusterOS import resolveOSType
 from GeneralError import GeneralError
 from UnknownClusterError import UnknownClusterError
-from homebase_adapters import nodeUnauth, nodeAuth, manageCluster, createClusterSystems, havePermCreateCluster, setNodeFlag, delNodeFlag, userAuthenticated
+from homebase_adapters import nodeUnauth, nodeAuth, manageCluster, createClusterSystems, havePermCreateCluster, setNodeFlag, delNodeFlag, userAuthenticated, getStorageNode, getClusterNode
+from LuciSyslog import LuciSyslogError, LuciSyslog
 
 #Policy for showing the cluster chooser menu:
 #1) If there are no clusters in the ManagedClusterSystems
@@ -34,6 +35,11 @@
 
 CLUSTER_FOLDER_PATH = '/luci/systems/cluster/'
 
+try:
+	luci_log = LuciSyslog()
+except LuciSyslogError, e:
+	pass
+
 def validateClusterNodes(request, sessionData, clusterName, numStorage):
 	nodeList = list()
 	nodeHash = {}
@@ -205,11 +211,24 @@
 		batch_id_map = {}
 		rc = None
 		for i in nodeList:
+			success = True
 			try:
 				rc = RicciCommunicator(i['ricci_host'])
-				resultNode = rc.process_batch(batchNode, async=True)
-				batch_id_map[i['ricci_host']] = resultNode.getAttribute('batch_id')
+			except RicciError, e:
+				luci_log.debug('Unable to connect to the ricci agent on %s: %s'\
+					% (i['ricci_host'], str(e)))
+				success = False
 			except:
+				success = False
+
+			if success == True:
+				try:
+					resultNode = rc.process_batch(batchNode, async=True)
+					batch_id_map[i['ricci_host']] = resultNode.getAttribute('batch_id')
+				except:
+					success = False
+
+			if not success:
 				nodeUnauth(nodeList)
 				cluster_properties['isComplete'] = False
 				errors.append('An error occurred while attempting to add cluster node \"' + i['ricci_host'] + '\"')
@@ -294,6 +313,7 @@
 		clusterObj = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName)
 		cluster_os = clusterObj.manage_getProperty('cluster_os')
 		if not cluster_os:
+			luci_log.debug('The cluster OS property is missing for cluster ' + clusterName)
 			raise Exception, 'no cluster OS was found.'
 		try:
 			if len(filter(lambda x: x['os'] != cluster_os, nodeList)) > 0:
@@ -342,17 +362,28 @@
 	batch_id_map = {}
 	for i in nodeList:
 		clunode = nodeList[i]
+		success = True
 		try:
 			rc = RicciCommunicator(clunode['ricci_host'])
-			resultNode = rc.process_batch(batchNode, async=True)
-			batch_id_map[clunode['ricci_host']] = resultNode.getAttribute('batch_id')
-			messages.append('Cluster join initiated for host \"' + clunode['ricci_host'] + '\"')
 		except:
+			luci_log.info('Unable to connect to the ricci daemon on host ' + clunode['ricci_host'])
+			success = False
+
+		if success:
+			try:
+				resultNode = rc.process_batch(batchNode, async=True)
+				batch_id_map[clunode['ricci_host']] = resultNode.getAttribute('batch_id')
+			except:
+				success = False
+
+		if not success:
 			nodeUnauth(nodeList)
 			cluster_properties['isComplete'] = False
 			errors.append('An error occurred while attempting to add cluster node \"' + clunode['ricci_host'] + '\"')
 			return (False, {'errors': errors, 'requestResults': cluster_properties})
 
+			messages.append('Cluster join initiated for host \"' + clunode['ricci_host'] + '\"')
+
 	buildClusterCreateFlags(self, batch_id_map, clusterName)
 	return (True, {'errors': errors, 'messages': messages})
 
@@ -412,6 +443,7 @@
 		try:
 			resObj = resourceAddHandler[res_type](self, dummy_form)
 		except:
+			luci_log('res type %d is invalid' % res_type)
 			resObj = None
 
 		if resObj is None:
@@ -1304,9 +1336,12 @@
 	try:
 		clusterfolder = self.restrictedTraverse(path)
 		if not clusterfolder:
+			luci_log.debug('cluster folder %s for %s is missing.' \
+				% (path, clustername))
 			raise
 		nodes = clusterfolder.objectItems('Folder')
 		if len(nodes) < 1:
+			luci_log.debug('no cluster nodes for %s found.' % clustername)
 			return None
 	except:
 		return None
@@ -1324,15 +1359,15 @@
 
 		try:
 			rc = RicciCommunicator(hostname)
-			if not rc:
-				raise
-		except:
-			#raise Exception, ('unable to communicate with the ricci agent on %s', hostname)
+		except RicciError, e:
+			luci_log.debug('ricci error: %s' % str(e))
 			continue
 
 		try:
 			clu_info = rc.cluster_info()
 			if cluname != lower(clu_info[0]) and cluname != lower(clu_info[1]):
+				luci_log.debug('%s reports it\'s in cluster %s:%s; we expect %s' \
+					 % (hostname, clu_info[0], clu_info[1], cluname))
 				# node reports it's in a different cluster
 				raise
 		except:
@@ -1340,7 +1375,9 @@
 
 		if rc.authed():
 			return rc
-		setNodeFlag(self, node[1], CLUSTER_NODE_NEED_AUTH)
+		setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
+
+	luci_log.debug('no ricci agent could be found for cluster %s' % cluname)
 	return None
 
 def getRicciAgentForCluster(self, req):
@@ -1352,11 +1389,13 @@
 			if not clustername:
 				raise
 		except:
+			luci_log.debug('no cluster name was specified in getRicciAgentForCluster')
 			return None
 	return getRicciAgent(self, clustername)
 
 def getClusterStatus(self, rc):
 	clustatus_batch ='<?xml version="1.0" ?><batch><module name="cluster"><request API_version="1.0"><function_call name="status"/></request></module></batch>'
+
 	try:
 		clustatuscmd_xml = minidom.parseString(clustatus_batch).firstChild
 	except:
@@ -1364,6 +1403,8 @@
 
 	try:
 		ricci_xml = rc.process_batch(clustatuscmd_xml, async=False)
+	except RicciError, e:
+		luci_log.debug('ricci error: %s', str(e))
 	except:
 		return {}
 
@@ -1968,6 +2009,7 @@
 		try:
 			clustername = request.form['clusterName']
 		except:
+			luci_log.debug('missing cluster name for NTP')
 			return None
 
 	try:
@@ -1976,20 +2018,21 @@
 		try:
 			nodename = request.form['nodename']
 		except:
+			luci_log.debug('missing nodename name for NTP')
 			return None
 
 	try:
 		task = request['task']
-		if not task:
-			raise
 	except KeyError, e:
 		try:
 			task = request.form['task']
 		except:
+			luci_log.debug('missing task for NTP')
 			return None
 
 	nodename_resolved = resolve_nodename(self, clustername, nodename)
 	if not nodename_resolved or not nodename or not task or not clustername:
+		luci_log.debug('resolve_nodename failed for NTP')
 		return None
 
 	if task != NODE_FENCE:
@@ -1998,33 +2041,81 @@
 		# to be performed.
 		try:
 			rc = RicciCommunicator(nodename_resolved)
-			# XXX - check the cluster
-			if not rc.authed():
-				# set the flag
-				rc = None
-
-			if not rc:
-				raise
+		except RicciError, e:
+			luci_log.debug('ricci error from %s: %s' \
+				% (nodename_resolved, str(e)))
+			return None
 		except:
 			return None
 
+		cluinfo = rc.cluster_info()
+		if not cluinfo[0] and not cluinfo[1]:
+			luci_log.debug('host %s not in a cluster (expected %s)' \
+				% (nodename_resolved, clustername))
+			return None
+
+		cname = lower(clustername)
+		if cname != lower(cluinfo[0]) and cname != lower(cluinfo[1]):
+			luci_log.debug('host %s in unknown cluster %s:%s (expected %s)' \
+				% (nodename_resolved, cluinfo[0], cluinfo[1], clustername))
+			return None
+
+		if not rc.authed():
+			rc = None
+			try:
+				snode = getStorageNode(self, nodename)
+				setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
+			except:
+				# we'll hit it again, and try again then
+				pass
+
+			try:
+				cnode = getClusterNode(self, nodename, clustername)
+				setNodeFlag(cnode, CLUSTER_NODE_NEED_AUTH)
+			except:
+				# we'll hit it again, and try again then
+				pass
+
+		if rc is None:
+			return None
+
 	if task == NODE_LEAVE_CLUSTER:
-		batch_number, result = nodeLeaveCluster(rc)
+		path = str(CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved)
 
-		path = CLUSTER_FOLDER_PATH + clustername + "/" + nodename_resolved
-		nodefolder = self.restrictedTraverse(path)
+		try:
+			nodefolder = self.restrictedTraverse(path)
+			if not nodefolder:
+				raise Exception, 'cannot find directory at %s' % path
+		except Exception, e:
+			luci_log.debug('node_leave_cluster err: %s' % str(e))
+			return None
+
+		objname = str(nodename_resolved + "____flag")
+
+		fnpresent = noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved)
+		if fnpresent is None:
+			luci_log.debug('An error occurred while checking flags for %s' \
+				% nodename_resolved)
+			return None
+
+		if fnpresent == False:
+			luci_log.debug('flags are still present for %s -- bailing out' \
+				% nodename_resolved)
+			return None
+
+		batch_number, result = nodeLeaveCluster(rc)
 		batch_id = str(batch_number)
-		objname = nodename_resolved + "____flag"
-		if noNodeFlagsPresent(self, nodefolder, objname, nodename_resolved) == False:
-			raise UnknownClusterError("Fatal", "An unfinished task flag exists for node %s" % nodename)
 
-		nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
-		#Now we need to annotate the new DB object
-		objpath = path + "/" + objname
-		flag = self.restrictedTraverse(objpath)
-		flag.manage_addProperty(BATCH_ID,batch_id, "string")
-		flag.manage_addProperty(TASKTYPE,NODE_LEAVE_CLUSTER, "string")
-		flag.manage_addProperty(FLAG_DESC,"Node \'" + nodename + "\' leaving cluster", "string")
+		objpath = str(path + "/" + objname)
+		try:
+			nodefolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
+			#Now we need to annotate the new DB object
+			flag = self.restrictedTraverse(objpath)
+			flag.manage_addProperty(BATCH_ID, batch_id, "string")
+			flag.manage_addProperty(TASKTYPE,NODE_LEAVE_CLUSTER, "string")
+			flag.manage_addProperty(FLAG_DESC,"Node \'" + nodename + "\' leaving cluster", "string")
+		except:
+			luci_log.debug('An error occurred while setting flag %s' % objpath)
 
 		response = request.RESPONSE
 		#Is this correct? Should we re-direct to the cluster page?
@@ -2056,40 +2147,64 @@
 		#Now we need to annotate the new DB object
 		objpath = path + "/" + objname
 		flag = self.restrictedTraverse(objpath)
-		flag.manage_addProperty(BATCH_ID,batch_id, "string")
-		flag.manage_addProperty(TASKTYPE,NODE_REBOOT, "string")
-		flag.manage_addProperty(FLAG_DESC,"Node \'" + nodename + "\' is being rebooted", "string")
+		flag.manage_addProperty(BATCH_ID, batch_id, "string")
+		flag.manage_addProperty(TASKTYPE, NODE_REBOOT, "string")
+		flag.manage_addProperty(FLAG_DESC, "Node \'" + nodename + "\' is being rebooted", "string")
 
 		response = request.RESPONSE
 		#Once again, is this correct? Should we re-direct to the cluster page?
 		response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clustername)
 	elif task == NODE_FENCE:
 		#here, we DON'T want to open connection to node to be fenced.
-		path = CLUSTER_FOLDER_PATH + clustername
+		path = str(CLUSTER_FOLDER_PATH + clustername)
 		try:
 			clusterfolder = self.restrictedTraverse(path)
 			if not clusterfolder:
 				raise
 		except:
+			luci_log.debug('The cluster folder for %s could not be found.' \
+				 % clustername)
+			return None
+
+		try:
+			nodes = clusterfolder.objectItems('Folder')
+		except:
+			luci_log.debug('No cluster nodes for %s were found' % clustername)
 			return None
 
-		nodes = clusterfolder.objectItems('Folder')
 		found_one = False
 		for node in nodes:
-			if node[1].getID().find(nodename) != (-1):
+			if node[1].getId().find(nodename) != (-1):
 				continue
 
 			try:
 				rc = RicciCommunicator(node[1].getId())
-				if not rc.authed():
-					# set the node flag
-					rc = None
 				if not rc:
-					raise
-				found_one = True
-				break
+					continue
+			except RicciError, e:
+				luci_log.debug('ricci error for host %s: %s' \
+					% (node[0], str(e)))
+				continue
 			except:
 				continue
+
+			if not rc.authed():
+				rc = None
+				try:
+					snode = getStorageNode(self, node[1].getId())
+					setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
+				except:
+					pass
+
+				try:
+					setNodeFlag(node[1], CLUSTER_NODE_NEED_AUTH)
+				except:
+					pass
+
+				continue
+			found_one = True
+			break
+
 		if not found_one:
 			return None
 
@@ -2149,7 +2264,7 @@
 
 		#First, delete cluster.conf from node to be deleted.
 		#next, have node leave cluster.
-		batch_number, result = nodeLeaveCluster(rc)
+		batch_number, result = nodeLeaveCluster(rc, purge=True)
 
 		#It is not worth flagging this node in DB, as we are going
 		#to delete it anyway. Now, we need to delete node from model
@@ -2501,13 +2616,21 @@
 	except:
 		return "Unable to resolve node name %s to retrieve logging information" % nodename_resolved
 
+	if not rc.authed():
+		try:
+			snode = getStorageNode(self, nodename)
+			setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
+		except:
+			pass
+		return "Luci is not authenticated to node %s. Please reauthenticate first." % nodename
+
 	return getNodeLogs(rc)
 
 def processXenVM(self, req):
   model = req.SESSION.get('model')
   isNew = False
   try:
-    xenvmname = req	['servicename']
+    xenvmname = req['servicename']
   except KeyError, e:
     isNew = True
   
@@ -2530,14 +2653,27 @@
     
 
 def getXenVMInfo(self, model, request):
-  try:
-    xenvmname = request['servicename']
-  except KeyError, e:
-    return {}
-  
-  xenvm = model.retrieveXenVMsByName(xenvmname)
-  map = xenvm.getAttributes()
-  return map
+	try:
+		xenvmname = request['servicename']
+	except KeyError, e:
+		try:
+			xenvmname = request.form['servicename']
+		except:
+			luci_log.debug_verbose('servicename is missing from request')
+			return {}
+	except:
+		luci_log.debug_verbose('servicename is missing from request')
+		return {}
+
+	try:  
+		xenvm = model.retrieveXenVMsByName(xenvmname)
+	except:
+		luci_log.debug('An error occurred while attempting to get VM %s' \
+			% xenvmname)
+		return {}
+
+	map = xenvm.getAttributes()
+	return map
 
 def isClusterBusy(self, req):
   items = None
@@ -2556,21 +2692,29 @@
       try:
         cluname = req.form['clusterName']
       except:
+        luci_log.debug_verbose('No cluster name -- returning empty map')
         return map
 
   path = CLUSTER_FOLDER_PATH + cluname
   try:
     clusterfolder = self.restrictedTraverse(str(path))
     if not clusterfolder:
-      raise
-  except:
+      raise Exception, 'clusterfolder is None'
+  except Exception, e:
+    luci_log.debug_verbose('cluster %s [%s] folder missing: %s -- returning empty map' % (cluname, path, str(e)))
     return map
+  except:
+    luci_log.debug_verbose('cluster %s [%s] folder missing: returning empty map' % (cluname, path))
 
   try:
     items = clusterfolder.objectItems('ManagedSystem')
-    if len(items) == 0:
+    if not items or len(items) < 1:
       return map  #This returns an empty map, and should indicate not busy
+  except Exception, e:
+    luci_log.debug('An error occurred while looking for cluster %s flags at path %s: %s' % (cluname, path, str(e)))
+    return map
   except:
+    luci_log.debug('An error occurred while looking for cluster %s flags at path %s' % (cluname, path))
     return map
     
   map['busy'] = "true"
@@ -2601,14 +2745,30 @@
       node_report['desc'] = item[1].getProperty(FLAG_DESC) 
       batch_xml = None
       ricci = item[0].split("____") #This removes the 'flag' suffix
+
       try:
         rc = RicciCommunicator(ricci[0])
-        batch_xml = rc.batch_report(item[1].getProperty(BATCH_ID))
-        if batch_xml != None:
-          (creation_status, total) = batch_status(batch_xml)
+      except RicciError, e:
+        rc = None
+        luci_log.debug_verbose('ricci returned error in iCB for %s: %s' \
+          % (cluname, str(e)))
       except:
-        creation_status = RICCI_CONNECT_FAILURE  #No contact with ricci (-1000)
-        batch_xml = "bloody_failure" #set to avoid next if statement
+        rc = None
+        luci_log.info('ricci connection failed for cluster %s' % cluname)
+
+      if rc is not None:
+        try:
+          batch_xml = rc.batch_report(item[1].getProperty(BATCH_ID))
+          if batch_xml != None:
+            (creation_status, total) = batch_status(batch_xml)
+          else:
+            luci_log.debug_verbose('batch report for cluster %s, item %s is None' % (cluname, item[0]))
+        except:
+          creation_status = RICCI_CONNECT_FAILURE  #No contact with ricci (-1000)
+          batch_xml = "bloody_failure" #set to avoid next if statement
+      else:
+          creation_status = RICCI_CONNECT_FAILURE  #No contact with ricci (-1000)
+          batch_xml = "bloody_failure" #set to avoid next if statement
 
       if batch_xml == None:  #The job is done and gone from queue
         if redirect_message == False: #We have not displayed this message yet
@@ -2617,6 +2777,8 @@
           node_report['errormessage'] = ""
           nodereports.append(node_report)
           redirect_message = True
+
+        luci_log.debug_verbose('batch job is done -- deleting %s' % item[0])
         clusterfolder.manage_delObjects(item[0])
         continue
 
@@ -2667,7 +2829,10 @@
           node_report['statusmessage'] = "Node created successfully" + REDIRECT_MSG
           node_report['statusindex'] = creation_status
           nodereports.append(node_report)
-          clusterfolder.manage_delObjects(item[0])
+          try:
+              clusterfolder.manage_delObjects(item[0])
+          except Exception, e:
+              luci_log.info('Unable to delete %s: %s' % (item[0], str(e)))
           continue
         else:
           map['busy'] = "true"
@@ -2690,13 +2855,17 @@
       if finished == True:
         node_report['desc'] = item[1].getProperty(FLAG_DESC) + REDIRECT_MSG
         nodereports.append(node_report)
-        clusterfolder.manage_delObjects(item[0])
+        try:
+            clusterfolder.manage_delObjects(item[0])
+        except Exception, e:
+            luci_log.info('Unable to delete %s: %s' % (item[0], str(e)))
       else:
         node_report = {}
         map['busy'] = "true"
         isBusy = True
         node_report['desc'] = item[1].getProperty(FLAG_DESC)
         nodereports.append(node_report)
+
   if isBusy:
     part1 = req['ACTUAL_URL']
     part2 = req['QUERY_STRING']
@@ -2716,12 +2885,14 @@
 
 def getClusterOS(self, rc):
 	map = {}
+
 	try:
 		os_str = resolveOSType(rc.os())
 		map['os'] = os_str
 		map['isVirtualized'] = rc.dom0()
 	except:
 		# default to rhel5 if something crazy happened.
+		luci_log.debug('An error occurred while attempting to get OS/Virt info for %s -- defaulting to rhel5/False' % rc.hostname())
 		map['os'] = 'rhel5'
 		map['isVirtualized'] = False
 	return map
@@ -2736,8 +2907,10 @@
 		try:
 			cluname = request.form['clustername']
 		except:
+			luci_log.debug_verbose('getResourcesInfo missing cluster name')
 			return resList
 	except:
+		luci_log.debug_verbose('getResourcesInfo missing cluster name')
 		return resList
 
 	for item in modelb.getResources():
@@ -2757,8 +2930,10 @@
 		try:
 			name = request.form['resourcename']
 		except:
+			luci_log.debug_verbose('getResourceInfo missing res name')
 			return {}
 	except:
+		luci_log.debug_verbose('getResourceInfo missing res name')
 		return {}
 
 	try:
@@ -2767,19 +2942,22 @@
 		try:
 			cluname = request.form['clustername']
 		except:
+			luci_log.debug_verbose('getResourceInfo missing cluster name')
 			return {}
 	except:
+		luci_log.debug_verbose('getResourceInfo missing cluster name')
 		return {}
 
 	try:
 		baseurl = request['URL']
 	except:
+		luci_log.debug_verbose('getResourceInfo missing URL')
 		return {}
 
 	for res in modelb.getResources():
 		if res.getName() == name:
-			resMap = {}
 			try:
+				resMap = {}
 				resMap['name'] = res.getName()
 				resMap['type'] = res.resource_type
 				resMap['tag_name'] = res.TAG_NAME
@@ -2787,7 +2965,7 @@
 				resMap['cfgurl'] = baseurl + "?" + "clustername=" + cluname + "&resourcename=" + res.getName() + "&pagetype=" + RESOURCE_CONFIG
 				return resMap
 			except:
-				return {}
+				continue
 
 def delResource(self, rc, request):
 	errstr = 'An error occurred in while attempting to set the cluster.conf'
@@ -2795,11 +2973,19 @@
 	try:
 		modelb = request.SESSION.get('model')
 	except:
+		luci_log.debug_verbose('delResource unable to extract model from SESSION')
 		return errstr
 
 	try:
 		name = request['resourcename']
 	except KeyError, e:
+		try:
+			name = request.form['resourcename']
+		except:
+			luci_log.debug_verbose('delResource missing resname %s' % str(e))
+			return errstr + ': ' + str(e)
+	except:
+		luci_log.debug_verbose('delResource missing resname')
 		return errstr + ': ' + str(e)
 
 	try:
@@ -2808,6 +2994,7 @@
 		try:
 			clustername = request.form['clustername']
 		except:
+			luci_log.debug_verbose('delResource missing cluster name')
 			return errstr + ': could not determine the cluster name.'
 
 	try:
@@ -2828,6 +3015,7 @@
 			break
 
 	if not found:
+		luci_log.debug_verbose('delresource cant find res %s' % name)
 		return errstr + ': the specified resource was not found.'
 
 	try:
@@ -2835,10 +3023,12 @@
 		if not conf:
 			raise
 	except:
+		luci_log.debug_verbose('exportModelAsString failed')
 		return errstr
 
 	batch_number, result = setClusterConf(str(conf))
 	if batch_number is None or result is None:
+		luci_log.debug_verbose('missing batch and/or result from setClusterConf')
 		return errstr
 
 	modelstr = ""
@@ -2846,13 +3036,20 @@
 	clusterfolder = self.restrictedTraverse(path)
 	batch_id = str(batch_number)
 	objname = str(ragent) + '____flag'
-	clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
-	#Now we need to annotate the new DB object
 	objpath = str(path + '/' + objname)
-	flag = self.restrictedTraverse(objpath)
-	flag.manage_addProperty(BATCH_ID, batch_id, "string")
-	flag.manage_addProperty(TASKTYPE, RESOURCE_REMOVE, "string")
-	flag.manage_addProperty(FLAG_DESC, "Removing Resource \'" + request['resourcename'] + "\'", "string")
+
+	try:
+		clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
+		#Now we need to annotate the new DB object
+		flag = self.restrictedTraverse(objpath)
+		flag.manage_addProperty(BATCH_ID, batch_id, "string")
+		flag.manage_addProperty(TASKTYPE, RESOURCE_REMOVE, "string")
+		flag.manage_addProperty(FLAG_DESC, "Removing Resource \'" + request['resourcename'] + "\'", "string")
+	except Exception, e:
+		luci_log.debug('An error occurred while setting flag %s: %s' \
+			% (objname, str(e)))
+	except:
+		luci_log.debug('An error occurred while setting flag %s' % objname)
 
 	response = request.RESPONSE
 	response.redirect(request['HTTP_REFERER'] + "&busyfirst=true")
@@ -2860,8 +3057,8 @@
 def addIp(request, form=None):
 	if form is None:
 		form = request.form
-	modelb = request.SESSION.get('model')
 
+	modelb = request.SESSION.get('model')
 	if not modelb or not form:
 		return None
 
@@ -2976,6 +3173,7 @@
 def addGfs(request, form=None):
 	if form is None:
 		form = request.form
+
 	modelb = request.SESSION.get('model')
 	if not modelb:
 		return None
@@ -2986,13 +3184,21 @@
 			if not oldname:
 				raise KeyError('oldname is blank.')
 			res = getResourceForEdit(modelb, oldname)
+			if not res:
+				luci_log.debug('resource %s was not found for editing' % oldname)
+				return None
 		except KeyError, e:
+			luci_log.debug('resource %s was not found for editing: %s' \
+				% (oldname, str(e)))
 			return None
 	else:
-		res = apply(Clusterfs)
-
-	if not res:
-		return None
+		try:
+			res = apply(Clusterfs)
+			if not res:
+				raise
+		except:
+			luci_log.debug('Error creating node Clusterfs resource')
+			return None
 
 	# XXX: sanity check these fields
 	try:
@@ -3001,30 +3207,35 @@
 			raise
 		res.attr_hash['name'] = name
 	except:
+		luci_log.debug_verbose('name is missing in clusterfs res')
 		return None
 
 	try:
 		mountpoint = form['mountpoint'].strip()
 		res.attr_hash['mountpoint'] = mountpoint
 	except:
+		luci_log.debug_verbose('mountpoint is missing in clusterfs res')
 		return None
 
 	try:
 		device = form['device'].strip()
 		res.attr_hash['device'] = device
 	except:
+		luci_log.debug_verbose('device is missing in clusterfs res')
 		return None
 
 	try:
 		options = form['options'].strip()
 		res.attr_hash['options'] = options
 	except:
+		luci_log.debug_verbose('options is missing in clusterfs res')
 		return None
 
 	try:
 		fsid = form['fsid'].strip()
 		res.attr_hash['fsid'] = fsid
 	except:
+		luci_log.debug_verbose('fsid is missing in clusterfs res')
 		return None
 
 	if form.has_key('forceunmount'):
@@ -3280,16 +3491,20 @@
 	try:
 		mb_nodes = modelb.getNodes()
 		if not mb_nodes or not len(mb_nodes):
-			raise
-	except:
-		return 'Unable to find cluster nodes for ' + clusterName
+			raise Exception, 'node list is empty'
+	except Exception, e:
+		luci_log.debug_verbose('no model builder nodes found for %s: %s' \
+				% (str(e), clusterName))
+		return 'Unable to find cluster nodes for %s' % clusterName
 
 	try:
 		cluster_node = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName)
 		if not cluster_node:
-			raise
-	except:
-		return 'Unable to find an entry for ' + clusterName + ' in the Luci database.'
+			raise Exception, 'cluster node is none'
+	except Exception, e:
+		luci_log.debug('cant find cluster node for %s: %s'
+			% (clusterName, str(e)))
+		return 'Unable to find an entry for %s in the Luci database.' % clusterName
 
 	try:
 		db_nodes = map(lambda x: x[0], cluster_node.objectItems('Folder'))
@@ -3376,9 +3591,11 @@
 	try:
 		ragent = rc.hostname()
 		if not ragent:
+			luci_log.debug('missing hostname')
 			raise
 		batch_number, result = setClusterConf(str(conf))
 		if batch_number is None or result is None:
+			luci_log.debug('missing batch_number or result')
 			raise
 	except:
 		return "Some error occured in setClusterConf\n"
@@ -3387,17 +3604,24 @@
 	clusterfolder = self.restrictedTraverse(path)
 	batch_id = str(batch_number)
 	objname = str(ragent + '____flag')
-	clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
-	#Now we need to annotate the new DB object
 	objpath = str(path + '/' + objname)
-	flag = self.restrictedTraverse(objpath)
-	flag.manage_addProperty(BATCH_ID, batch_id, "string")
-	flag.manage_addProperty(TASKTYPE, RESOURCE_ADD, "string")
 
-	if type != 'ip':
-		flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + request.form['resourceName'] + "\'", "string")
-	else:
-		flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + res.attr_hash['address'] + "\'", "string")
+	try:
+		clusterfolder.manage_addProduct['ManagedSystem'].addManagedSystem(objname)
+		#Now we need to annotate the new DB object
+		flag = self.restrictedTraverse(objpath)
+		flag.manage_addProperty(BATCH_ID, batch_id, "string")
+		flag.manage_addProperty(TASKTYPE, RESOURCE_ADD, "string")
+
+		if type != 'ip':
+			flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + request.form['resourceName'] + "\'", "string")
+		else:
+			flag.manage_addProperty(FLAG_DESC, "Creating New Resource \'" + res.attr_hash['address'] + "\'", "string")
+	except Exception, e:
+		try:
+			luci_log.info('Unable to create flag %s: %s' % (objpath, str(e)))
+		except:
+			pass
 
 	response = request.RESPONSE
 	response.redirect(request['HTTP_REFERER'] + "&busyfirst=true")
@@ -3410,48 +3634,92 @@
 		if res.getName() == name:
 			resPtr.removeChild(res)
 			return res
+
+	luci_log.debug_verbose('unable to find resource \"%s\"' % name)
 	raise KeyError, name
 
 def appendModel(request, model):
 	try:
 		request.SESSION.set('model', model)
 	except:
-		pass
-
-	return False
+		luci_log.debug_verbose('Appending model to request failed')
+		return False
 
 def resolve_nodename(self, clustername, nodename):
-	path = CLUSTER_FOLDER_PATH + clustername
-	clusterfolder = self.restrictedTraverse(path)
-	objs = clusterfolder.objectItems('Folder')
+	path = str(CLUSTER_FOLDER_PATH + clustername)
+
+	try:
+		clusterfolder = self.restrictedTraverse(path)
+		objs = clusterfolder.objectItems('Folder')
+	except Exception, e:
+		luci_log.info('resolve_nodename failed for %s/%s: %s' \
+			% (nodename, clustername, str(e)))
+
 	for obj in objs:
 		if obj[0].find(nodename) != (-1):
 			return obj[0]
-	raise
+
+	luci_log.info('resolve_nodename failed for %s/%s' % (nodename, clustername))
+	return None
 
 def noNodeFlagsPresent(self, nodefolder, flagname, hostname):
-	items = nodefolder.objectItems('ManagedSystem')
+	try:
+		items = nodefolder.objectItems('ManagedSystem')
+	except:
+		luci_log.debug('An error occurred while trying to list flags for cluster ' + nodefolder[0])
+		return None
 
 	for item in items:
 		if item[0] != flagname:
 			continue
 
 		#a flag already exists... try to delete it
-		rc = RicciCommunicator(hostname)
+		try:
+			rc = RicciCommunicator(hostname)
+		except RicciError, e:
+			luci_log.info('Unable to connect to the ricci daemon: %s' % str(e))
+			return None
+
+		if not rc.authed():
+			try:
+				snode = getStorageNode(self, hostname)
+				setNodeFlag(snode, CLUSTER_NODE_NEED_AUTH)
+			except:
+				pass
+			luci_log.info('Node %s is not authenticated' % item[0])
+			return None
+
 		finished = checkBatch(rc, item[1].getProperty(BATCH_ID))
 		if finished == True:
 			try:
 				nodefolder.manage_delObjects(item[0])
-			except:
-				return False
+			except Exception, e:
+				luci_log.info('manage_delObjects for %s failed: %s' \
+					% (item[0], str(e)))
+				return None
 			return True
 		else:
 			#Not finished, so cannot remove flag
 			return False
+
 	return True
 
-def getModelBuilder(rc,isVirtualized):
-	cluster_conf_node = getClusterConf(rc)
-	modelb = ModelBuilder(0, None, None, cluster_conf_node)
+def getModelBuilder(rc, isVirtualized):
+	try:
+		cluster_conf_node = getClusterConf(rc)
+		if not cluster_conf_node:
+			raise;
+	except:
+		luci_log.debug('unable to get cluster_conf_node in getModelBuilder')
+		return None
+
+	try:
+		modelb = ModelBuilder(0, None, None, cluster_conf_node)
+	except Exception, e:
+		try:
+			luci_log.debug('An error occurred while trying to get modelb for conf \"%s\": %s' % (cluster_conf_node.toxml(), str(e)))
+		except:
+			pass
+
 	modelb.setIsVirtualized(isVirtualized)
 	return modelb
--- conga/luci/site/luci/Extensions/conga_constants.py	2006/10/16 20:46:46	1.19
+++ conga/luci/site/luci/Extensions/conga_constants.py	2006/10/24 16:36:23	1.19.2.1
@@ -113,3 +113,6 @@
 CLUSTER_NODE_ADDED = 0x04
 
 PLONE_ROOT='luci'
+
+LUCI_DEBUG_MODE = 1
+LUCI_DEBUG_VERBOSITY = 2
--- conga/luci/site/luci/Extensions/homebase_adapters.py	2006/10/16 20:46:46	1.34
+++ conga/luci/site/luci/Extensions/homebase_adapters.py	2006/10/24 16:36:23	1.34.2.1
@@ -1367,7 +1367,7 @@
 		pass
 	return False
 
-def setNodeFlag(self, node, flag_mask):
+def setNodeFlag(node, flag_mask):
 	try:
 		flags = node.getProperty('flags')
 		node.manage_changeProperties({ 'flags': flags | flag_mask })
@@ -1377,7 +1377,7 @@
 		except:
 			pass
 
-def delNodeFlag(self, node, flag_mask):
+def delNodeFlag(node, flag_mask):
 	try:
 		flags = node.getProperty('flags')
 		if flags & flag_mask != 0:
--- conga/luci/site/luci/Extensions/ricci_bridge.py	2006/10/23 19:31:15	1.30.2.1
+++ conga/luci/site/luci/Extensions/ricci_bridge.py	2006/10/24 16:36:23	1.30.2.2
@@ -298,7 +298,7 @@
 		return (None, None)
 	return batchAttemptResult(doc)
 
-def nodeLeaveCluster(rc, cluster_shutdown=False, purge=True):
+def nodeLeaveCluster(rc, cluster_shutdown=False, purge=False):
 	cshutdown = 'false'
 	if cluster_shutdown == True:
 		cshutdown = 'true'
--- conga/luci/site/luci/Extensions/ricci_communicator.py	2006/10/16 07:39:27	1.9
+++ conga/luci/site/luci/Extensions/ricci_communicator.py	2006/10/24 16:36:23	1.9.2.1
@@ -1,17 +1,21 @@
-
-
 from time import *
 from socket import *
 import xml
 import xml.dom
 from xml.dom import minidom
-
-
+from LuciSyslog import LuciSyslog
 from HelperFunctions import access_to_host_allowed
 
-
 CERTS_DIR_PATH = '/var/lib/luci/var/certs/'
 
+try:
+    luci_log = LuciSyslog()
+except:
+    pass
+
+class RicciError(Exception):
+    pass
+
 class RicciCommunicator:
     def __init__(self, hostname, port=11111):
         self.__hostname = hostname
@@ -21,16 +25,32 @@
         self.__cert_file = CERTS_DIR_PATH + 'cacert.pem'
         
         # socket
-        sock = socket(AF_INET, SOCK_STREAM)
-        sock.settimeout(2.0)
-        sock.connect((self.__hostname, self.__port))
-        self.ss = ssl(sock, self.__privkey_file, self.__cert_file)
-        sock.settimeout(600.0) # 10 minutes
-        # TODO: data transfer timeout should be much less, 
-        # leave until all calls are async ricci calls
+        try:
+            sock = socket(AF_INET, SOCK_STREAM)
+            sock.settimeout(2.0)
+            sock.connect((self.__hostname, self.__port))
+        except Exception, e:
+            raise RicciError, 'Error connecting to %s:%d: %s' \
+                    % (self.__hostname, self.__port, str(e))
+        luci_log.debug_verbose('Connected to %s:%d' \
+            % (self.__hostname, self.__port))
+        try:
+            self.ss = ssl(sock, self.__privkey_file, self.__cert_file)
+            # TODO: data transfer timeout should be much less, 
+            # leave until all calls are async ricci calls
+            sock.settimeout(600.0) # 10 minutes
+        except Exception, e:
+            raise RicciError, 'Error setting up SSL for connection to %s: %s' \
+                % (self.__hostname, str(e))
         
         # receive ricci header
         hello = self.__receive()
+        try:
+            luci_log.debug_verbose('Received header from %s: \"%s\"' \
+                % (self.__hostname, hello.toxml()))
+        except:
+            pass
+
         self.__authed = hello.firstChild.getAttribute('authenticated') == 'true'
         self.__cluname = hello.firstChild.getAttribute('clustername')
         self.__clualias = hello.firstChild.getAttribute('clusteralias')
@@ -42,21 +62,35 @@
     
     
     def hostname(self):
+        luci_log.debug_verbose('[auth %d] reported hostname = %s' \
+            % (self.__authed, self.__hostname))
         return self.__hostname
     def authed(self):
+        luci_log.debug_verbose('reported authed = %d for %s' \
+            % (self.__authed, self.__hostname))
         return self.__authed
     def system_name(self):
+        luci_log.debug_verbose('[auth %d] reported system_name = %s for %s' \
+            % (self.__authed, self.__reported_hostname, self.__hostname))
         return self.__reported_hostname
     def cluster_info(self):
+        luci_log.debug_verbose('[auth %d] reported cluster_info = (%s,%s) for %s' \
+            % (self.__authed, self.__cluname, self.__clualias, self.__hostname))
         return (self.__cluname, self.__clualias)
     def os(self):
+        luci_log.debug_verbose('[auth %d] reported system_name = %s for %s' \
+            % (self.__authed, self.__os, self.__hostname))
         return self.__os
     def dom0(self):
+        luci_log.debug_verbose('[auth %d] reported system_name = %s for %s' \
+            % (self.__authed, self.__dom0, self.__hostname))
         return self.__dom0
     
     
     def auth(self, password):
         if self.authed():
+            luci_log.debug_verbose('already authenticated to %s' \
+                % self.__hostname)
             return True
         
         # send request
@@ -71,8 +105,9 @@
         # receive response
         resp = self.__receive()
         self.__authed = resp.firstChild.getAttribute('authenticated') == 'true'
-        
-        return self.authed()
+
+        luci_log.debug_verbose('auth call returning %d' % self.__authed)
+        return self.__authed
 
 
     def unauth(self):
@@ -83,15 +118,33 @@
         doc.appendChild(ricci)
         self.__send(doc)
         resp = self.__receive()
-        ret = resp.firstChild.getAttribute('success')
-        if ret != '0':
-            raise Exception(str(ret))
+
+        luci_log.debug_verbose('trying to unauthenticate to %s' \
+            % self.__hostname)
+
+        try:
+            ret = resp.firstChild.getAttribute('success')
+            luci_log.debug_verbose('unauthenticate returned %s for %s' \
+                % (ret, self.__hostname))
+            if ret != '0':
+                raise Exception, 'Invalid response'
+        except:
+            errstr = 'Error authenticating to host %s: %s' \
+                        % (self.__hostname, str(ret))
+            luci_log.debug(errstr)
+            raise RicciError, errstr
         return True
 
 
     def process_batch(self, batch_xml, async=False):
+        try:
+            luci_log.debug_verbose('auth=%d to %s for batch %s [async=%d]' \
+                % (self.__authed, self.__hostname, batch_xml.toxml(), async))
+        except:
+            pass
+
         if not self.authed():
-            raise 'not authenticated'
+            raise RicciError, 'not authenticated to host %s', self.__hostname
         
         # construct request
         doc = minidom.Document()
@@ -108,13 +161,26 @@
         ricci.appendChild(batch_xml.cloneNode(True))
         
         # send request
-        self.__send(doc)
-        
+        try:
+            self.__send(doc)
+        except Exception, e:
+            luci_log.debug('Error sending XML \"%s\" to host %s' \
+                % (doc.toxml(), self.__hostname))
+            raise RicciError, 'Error sending XML to host %s: %s' \
+                    % (self.__hostname, str(e))
         
         # receive response
         doc = self.__receive()
+        try:
+            luci_log.debug_verbose('received from %s XML \"%s\"' \
+                % (self.__hostname, doc.toxml()))
+        except:
+            pass
+ 
         if doc.firstChild.getAttribute('success') != '0':
-            raise 'ricci reported error'
+            luci_log.debug_verbose('batch command failed')
+            raise RicciError, 'The last ricci command to host %s failed' \
+                    % self.__hostname
         
         batch_node = None
         for node in doc.firstChild.childNodes:
@@ -122,26 +188,42 @@
                 if node.nodeName == 'batch':
                     batch_node = node.cloneNode(True)
         if batch_node == None:
-            raise 'missing <batch/> in ricci\'s response'
+            luci_log.debug_verbose('batch node missing <batch/>')
+            raise RicciError, 'missing <batch/> in ricci\'s response from %s' \
+                    % self.__hostname
 
         return batch_node
     
     def batch_run(self, batch_str, async=True):
         try:
             batch_xml_str = '<?xml version="1.0" ?><batch>' + batch_str + '</batch>'
+            luci_log.debug_verbose('attempting batch \"%s\" for host %s' \
+                % (batch_xml_str, self.__hostname))
             batch_xml = minidom.parseString(batch_xml_str).firstChild
-        except:
-            return None
+        except Exception, e:
+            luci_log.debug('received invalid batch XML for %s: \"%s\"' \
+                % (self.__hostname, batch_xml_str))
+            raise RicciError, 'batch XML is malformed'
 
         try:
             ricci_xml = self.process_batch(batch_xml, async)
+            try:
+                luci_log.debug_verbose('received XML \"%s\" from host %s in response to batch command.' \
+                    % (ricci_xml.toxml(), self.__hostname))
+            except:
+                pass
         except:
+            luci_log.debug('An error occurred while trying to process the batch job: %s' % batch_xml_str)
             return None
+
         return ricci_xml
 
     def batch_report(self, batch_id):
+        luci_log.debug_verbose('[auth=%d] asking for batchid# %d for host %s' \
+            % (self.__authed, batch_id, self.__hostname))
+
         if not self.authed():
-            raise 'not authenticated'
+            raise RicciError, 'Not authenticated to host %s' % self.__hostname
         
         # construct request
         doc = minidom.Document()
@@ -153,22 +235,21 @@
         
         # send request
         self.__send(doc)
-        
-        
+ 
+       
         # receive response
         doc = self.__receive()
         if doc.firstChild.getAttribute('success') == '12':
             return None
         if doc.firstChild.getAttribute('success') != '0':
-            raise 'ricci reported error'
-        
+            raise RicciError, 'Error while retrieving batch report for batch #%s from host %s' % (batch_id, self.__hostname)
         batch_node = None
         for node in doc.firstChild.childNodes:
             if node.nodeType == xml.dom.Node.ELEMENT_NODE:
                 if node.nodeName == 'batch':
                     batch_node = node.cloneNode(True)
         if batch_node == None:
-            raise 'missing <batch/> in ricci\'s response'
+            raise RicciError, 'Missing <batch/> in ricci\'s response from host %s' % self.__hostname
         return batch_node
     
     
@@ -177,13 +258,22 @@
     
     def __send(self, xml_doc):
         buff = xml_doc.toxml() + '\n'
-        #print buff
         while len(buff) != 0:
-            pos = self.ss.write(buff)
+            try:
+                pos = self.ss.write(buff)
+            except Exception, e:
+                luci_log.debug('Error sending XML \"%s\" to %s' \
+                    % (buff, self.__hostname))
+                raise RicciError, 'write error while sending XML to host %s' \
+                        % self.__hostname
             buff = buff[pos:]
+        try:
+            luci_log.debug_verbose('Sent XML \"%s\" to host %s' \
+                % (xml_doc.toxml(), self.__hostname))
+        except:
+            pass
         return
     
-    
     def __receive(self):
         doc = None
         xml_in = ''
@@ -197,18 +287,38 @@
                     doc = minidom.parseString(xml_in)
                     break
                 except:
-                    pass
-        except:
-            pass
+                    # we haven't received all of the XML data yet.
+                    continue
+        except Exception, e:
+            luci_log.debug('Error reading data from %s: %s' \
+                % (self.__hostname, str(e)))
+            raise RicciError, 'Error reading data from host %s' \
+                    % self.__hostname
+        luci_log.debug_verbose('Received XML \"%s\" from host %s' \
+            % (xml_in, self.__hostname))
+
         try:
             if doc == None:
                 doc = minidom.parseString(xml_in)
-            if doc.firstChild.nodeName != 'ricci':
-                raise ''
-        except:
-            raise 'invalid ricci response'
+        except Exception, e:
+            luci_log.debug('Error parsing XML \"%s" from %s' \
+                % (xml_in, str(e)))
+            raise RicciError, 'Error parsing XML from host %s: %s' \
+                    % (self.__hostname, str(e))
+
+        if not doc or not doc.firstChild:
+            raise RicciError, \
+                    'Error an empty response was received from host %s' \
+                    % self.__hostname
         
-        #print doc.toxml()
+        try:        
+            if doc.firstChild.nodeName != 'ricci':
+                luci_log.debug('Expecting \"ricci\" got XML \"%s\" from %s' %
+                    (xml_in, self.__hostname))
+                raise Exception, 'Expecting first XML child node to be \"ricci\"'
+        except Exception, e:
+            raise RicciError, 'Invalid XML ricci response from host %s' \
+                    % self.__hostname
         
         return doc
     
@@ -220,7 +330,9 @@
     
     try:
         return RicciCommunicator(hostname)
-    except:
+    except Exception, e:
+        luci_log.debug('Error creating a ricci connection to %s: %s' \
+            % (hostname, str(e)))
         return None
     pass
 
@@ -268,7 +380,12 @@
 #             module (-num) failed (next module won't be processed)
 def batch_status(batch_xml):
     if batch_xml.nodeName != 'batch':
-        raise 'not a batch'
+        try:
+            luci_log.debug('Expecting an XML batch node. Got \"%s\"' \
+                % batch_xml.toxml())
+        except:
+            pass
+        raise RicciError, 'Not an XML batch node'
     total = 0
     last  = 0
     for node in batch_xml.childNodes:
@@ -283,6 +400,12 @@
                     # failure
                     last = last + 1
                     last = last - 2 * last
+    try:
+        luci_log.debug_verbose('Returning (%s, %s) for batch_status(\"%s\")' \
+            % (last, total, batch_xml.toxml()))
+    except:
+        pass
+
     return (last, total)
 
 
@@ -307,7 +430,9 @@
 # * error_msg:  error message
 def extract_module_status(batch_xml, module_num=1):
     if batch_xml.nodeName != 'batch':
-        raise 'not a batch'
+        luci_log.debug('Expecting \"batch\" got \"%s\"' % batch_xml.toxml())
+        raise RicciError, 'Invalid XML node; expecting a batch node'
+
     c = 0
     for node in batch_xml.childNodes:
         if node.nodeType == xml.dom.Node.ELEMENT_NODE:
@@ -349,5 +474,5 @@
                     elif status == '5':
                         return -103, 'module removed from schedule'
     
-    raise Exception, str('no ' + str(module_num) + 'th module in the batch, or malformed response')
+    raise RicciError, str('no ' + str(module_num) + 'th module in the batch, or malformed response')
 




More information about the Cluster-devel mailing list