extras-buildsys/client CONFIG.py,1.6,1.7 buildclient.py,1.13,1.14

Fri Jun 24 12:51:23 UTC 2005

Author: dcbw

Update of /cvs/fedora/extras-buildsys/client
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv3560/client

Modified Files:
	CONFIG.py buildclient.py 
Log Message:
2005-06-24  Dan Williams <dcbw at redhat.com>

    * server/Repo.py
        - Deal with repo locking during createrepo stages

    * Ensure repositories can run createrepo without being accessed
        by clients during the operation.  Since createrepo runs can take
        a long time, during that time clients cannot try to install their
        buildroots or do any operations against the repository, otherwise
        they may fail randomly with yum errors.  So we lock access to the
        repository on a 2-level lock: when a build job is done, it asks the
        repo to copy its finished RPMs, and the repo enters lock level 1.
        Level 1 prevents new build jobs from entering their 'prep' state.
        When all currently running jobs have finished their prep state, and
        the repo is in lock level 1, the repo promotes to lock leve 2 and
        is able to run createrepo after copying any new RPMs into the repo.
        When this is done, all waiting clients are released into their
        'prep' states.

        This requires an absolute latest mock from CVS.




Index: CONFIG.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys/client/CONFIG.py,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7

--- CONFIG.py	17 Jun 2005 16:20:06 -0000	1.6
+++ CONFIG.py	24 Jun 2005 12:51:21 -0000	1.7
@@ -1,6 +1,8 @@
 # Configuration file for archwelder.py
 
 config_opts = {}
+config_opts['debug'] = True
+
 config_opts['builder_cmd'] = "/usr/bin/mock"
 
 # Distro and Repo:


Index: buildclient.py
===================================================================
RCS file: /cvs/fedora/extras-buildsys/client/buildclient.py,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- buildclient.py	17 Jun 2005 03:10:28 -0000	1.13
+++ buildclient.py	24 Jun 2005 12:51:21 -0000	1.14
@@ -29,20 +29,14 @@
 import sys
 import string
 import time
+import fcntl
 import urllib
+import errno
 import CONFIG
 import SimpleHTTPSServer
 import FileDownloader
 import SimpleSSLXMLRPCServer
 
-DEBUG = False
-def debugprint(stuff=''):
-    if DEBUG:
-        print stuff
-
-def log(stuff=''):
-    print stuff
-
 g_our_hostname = None
 certs = {}
 certs['cert'] = CONFIG.get('client_cert')
@@ -66,37 +60,72 @@
 class BuildClientMock:
     """puts things together for an arch - baseclass for handling builds for 
        other arches"""
+
     def __init__(self, uniqid, target, srpm_url):
         self._uniqid = uniqid
         self._status = 'init'
+        self._repo_locked = True
         self._files = []
         self._pobj = None
         self._target = target
         self._srpm_url = srpm_url
+        self._log_fd = None
+
+        self._result_dir = os.path.join(CONFIG.get('client_work_dir'), self._uniqid, "result")
+        if not os.path.exists(self._result_dir):
+            os.makedirs(self._result_dir)
+
+        self._state_dir = os.path.join(CONFIG.get('client_work_dir'), self._uniqid, "mock-state")
+        if not os.path.exists(self._state_dir):
+            os.makedirs(self._state_dir)
+
+        logfile = os.path.join(self._result_dir, "buildclient.log")
+        self._log_fd = open(logfile, "w+")
+
+        self.log("""Starting job:
+   Time: %s
+   Target: %s
+   UID: %s
+   Architecture: %s
+   SRPM: %s\n\n""" % (time.asctime(time.gmtime()), self._target, self._uniqid, self.buildarch, srpm_url))
 
         srpm_filename = FileDownloader.get_base_filename_from_url(srpm_url, ['.src.rpm'])
         if not srpm_filename:
             self._status = 'failed'
             self._srpm_path = None
-            print "%s: failed in __init__, couldn't get SRPM filename." % self._uniqid
+            self.log("Failed in __init__, couldn't extract SRPM filename.\n")
         else:
             self._srpm_path = os.path.join(CONFIG.get('client_work_dir'), self._uniqid, "source", srpm_filename)
 
-        self._result_dir = os.path.join(CONFIG.get('client_work_dir'), self._uniqid, "result")
-        if not os.path.exists(self._result_dir):
-            os.makedirs(self._result_dir)
-
     def die(self, sig=15):
         if self._pobj and self._pobj.pid:  # Can't kill the package download from build server
             try:
+                self.log("Killing build process...\n")
                 os.kill(self._pobj.pid, sig)
             except OSError, e:
-                print "Couldn't kill process %d: %s" % (self._pobj.pid, e)
+                self.log("Couldn't kill process %d: %s\n" % (self._pobj.pid, e))
+            else:
+                self.log("Killed.\n");
         self._status = 'killed'
         return True
 
+    def log(self, string):
+        if string and self._log_fd:
+            self._log_fd.write(string)
+            self._log_fd.flush()
+            os.fsync(self._log_fd.fileno())
+            if CONFIG.get('debug'):
+                s = "%s: " % self._uniqid
+                sys.stdout.write(s + string)
+                sys.stdout.flush()
+
+    def start(self):
+        # check for existence of srpm before going on
+        self._download_srpm()
+
     def _download_srpm(self):
         self._status = 'downloading'
+        self.log("Starting download of %s.\n" % self._srpm_url)
         target_dir = os.path.dirname(self._srpm_path)
         dl_thread = FileDownloader.FileDownloader(self.dl_callback, self._srpm_url, self._srpm_url,
                         target_dir, ['.src.rpm'], certs)
@@ -106,79 +135,171 @@
         url = cb_data
         if status == 'done':
             self._status = 'downloaded'
-            log("%s: Retrieved %s" % (self._uniqid, url))
+            self.log("Retrieved %s.\n" % url)
         elif status == 'failed':
             # Don't overwrite our status with 'failed' if we were cancelled
             # and a download error ocurred
             if not self.is_done_status():
                 self._status = 'failed'
-                log("%s: Failed to retrieve %s" % (self._uniqid, url))
+                self.log("Failed to retrieve %s.\n" % url)
 
     def _build(self):
-        print "%s: starting step 'building'" % self._uniqid
+        self.log("Starting step 'building' with command:\n")
         if not os.path.exists(self._result_dir):
             os.makedirs(self._result_dir)
-        cmd = '%s %s -r %s --resultdir=%s %s' % (self.arch_command,
+        if not os.path.exists(self._result_dir):
+            os.makedirs(self._result_dir)
+        cmd = '%s %s -r %s --resultdir=%s --statedir=%s %s' % (self.arch_command,
                             CONFIG.get('builder_cmd'), self.buildroot, 
-                            self._result_dir, self._srpm_path)
-        self._pobj = popen2.Popen4(cmd=cmd)
-        self._status = 'building'
+                            self._result_dir, self._state_dir, self._srpm_path)
+        self.log("   %s\n" % cmd)
+        self._pobj = popen2.Popen4(cmd=cmd, bufsize=1024)
+        fcntl.fcntl(self._pobj.fromchild.fileno(), fcntl.F_SETFL, os.O_NONBLOCK)
+        self._status = 'prepping'
+
+        # Poll a bit to wait for mock to write out the status file if
+        # its not there yet.
+        start_time = time.time()
+        mockstatusfile = os.path.join(self._state_dir, 'status')
+        while not os.path.exists(mockstatusfile):
+            time.sleep(0.5)
+            if time.time() - start_time > 5:
+                self.log("Timed out waiting for the mock status file!  %s\n" % mockstatusfile)
+                try:
+                    self.log("Killing mock...\n")
+                    os.kill(self._pobj.pid, 15)
+                except OSError, e:
+                    self.log("Couldn't kill mock process %d: %s\n" % (self._pobj.pid, e))
+                else:
+                    self.log("Killed.\n")
+                self._status = 'failed'
+                break
 
-    def start(self):
-        # check for existence of srpm before going on
-        self._download_srpm()
+    def _mock_is_prepping(self):
+        mock_status = self._get_mock_status()
+        if mock_status:
+            if mock_status == 'init':
+                return True
+            elif mock_status == 'clea':
+                return True
+            elif mock_status == 'prep':
+                return True
+            elif mock_status == 'setu':
+                return True
+        return False
+
+    def _mock_is_closed(self):
+        mock_status = self._get_mock_status()
+        if mock_status and mock_status == "done":
+            return True
+        return False
+
+    def _get_mock_status(self):
+        mockstatusfile = os.path.join(self._state_dir, 'status')
+        if not os.path.exists(mockstatusfile):
+            self.log("mock status file doesn't exist!  %s" % mockstatusfile)
+            return None
+
+        f = open(mockstatusfile, "r")
+        fcntl.fcntl(f.fileno(), fcntl.F_SETFL, os.O_NONBLOCK)
+        
+        while True:
+            try:
+                f.seek(0, 0)
+                string = f.read(4)
+            except OSError, e:
+                if e.errno == errno.EAGAIN:
+                    time.sleep(0.25)
+                    continue
+            else:
+                if len(string) < 4:
+                    continue
+                break
+        f.close()
+        string = string.lower()
+        print "Mock state is '%s'" % string
+        return string
 
     def process(self):
         if not self.is_done_status():
             if self._status == 'downloading':
                 pass
             elif self._status == 'downloaded':
-                self._build()
-            else:
-                # If we're done with a step, advance to next one
+                # We can't start doing anything with yum until the build
+                # server tells us the repo is unlocked.
+                if not self._repo_locked:
+                    self._build()
+            elif self._status == 'prepping':
+                if not self._mock_is_prepping():
+                    self._status = 'building'
+            elif self._status == 'building':
                 exit_status = self._pobj.poll()
                 if exit_status == 0:
-                    if self._status == 'building':
-                        print "%s: Job done." % self._uniqid
-                        self._status = 'done'
-                        self._files = self._find_files()
-                    else:
-                        print "Bad status %s encountered!" % self._status
+                    # mock completed successfully
+                    if self._status != 'building':
+                        self.log("Bad job end status %s encountered!" % self._status)
+                    self._status = 'done'
                 elif exit_status > 0:
-                    # Write out the failure log
-                    fail_log = os.path.join(self._result_dir, "mock_failure.log")
-                    f = open(fail_log, "w+")
-                    f.write("%s: job failed! mock exit status was %d\n\n" % (self._uniqid, exit_status))
-                    f.writelines(self._pobj.fromchild)
-                    f.close()
+                    # mock exited with an error
                     self._status = 'failed'
-                    print "%s: failed due to mock errors" % self._uniqid
-                    self._files = self._find_files()
-                else:
-                    # builder process still running
-                    pass
+
+            if self._pobj:
+                # Grab any mock output and write it to a log
+                string = ' '
+                while len(string) > 0:
+                    try:
+                        string = os.read(self._pobj.fromchild.fileno(), 1024)
+                    except OSError, e:
+                        if e.errno == errno.EAGAIN:     # Resource temporarily unavailable
+                            break
+                        else:
+                            self.log("Error reading mock output: %s\n" % e)
+                    else:
+                        print string
+                        self._log_fd.write(string)
+                        self._log_fd.flush()
+                        os.fsync(self._log_fd.fileno())
+
+            if self.is_done_status():
+                self._files = self._find_files()
+                self.log("\n\n-----------------------\n\n")
+                if self._status == 'done':
+                    self.log("Job completed successfully.\n")
+                elif self._status == 'failed':
+                    self.log("Job failed due to mock errors!  mock exit status: %d\n" % exit_status)
+                elif self._status == 'killed':
+                    self.log("Job failed because it was killed.\n")
+
+            print self._status
+        else:
+            if self._log_fd:
+                self._log_fd.close()
+                self._log_fd = None
 
     def _find_files(self):
         # Grab the list of files in our job's result dir and URL encode them
         files_in_dir = os.listdir(self._result_dir)
         file_list = []
+        self.log("\n\nOutput File List:\n-----------------\n")
         for f in files_in_dir:
             file_url = get_url_for_file(os.path.join(self._result_dir, f))
             if file_url:
                 file_list.append(file_url)
+                self.log("  Output File: %s\n" % urllib.unquote(file_url))
             else:
-                print "Couldn't get file URL for %s" % f
+                self.log("  Error: Couldn't get file URL for file %s" % f)
         return file_list
 
     def status(self):
         return self._status
 
     def files(self):
-        # ?? What to do with mock output...  another log file?
-        #if self._pobj:
-        #    self._output.extend(self._pobj.fromchild.readlines())
         return self._files
 
+    def repo_unlocked(self):
+        self._repo_locked = False
+        return 0
+
     def is_done_status(self):
         if (self._status is 'done') or (self._status is 'killed') or (self._status is 'failed'):
             return True
@@ -262,6 +383,11 @@
     bcp = builder(uniqid, target, buildarch, srpm_url)
     return bcp
 
+
+def log(string):
+    if CONFIG.get('debug'):
+        print string
+
 class XMLRPCBuildClientServer:
     def __init__(self, localarches):
         self.ids = {} # unique id => awclass instance
@@ -279,7 +405,7 @@
 
     def start(self, target, buildarch, srpm_url):
         if self.cur_job != 0:
-            print "Tried to build '%s' when already buiding something" % srpm_url
+            log("Tried to build '%s' when already buiding something" % srpm_url)
             return 0
 
         cur_time = time.time()
@@ -324,6 +450,10 @@
         bcp = self.ids[uniqid]
         return bcp.files()
     
+    def repo_unlocked(self, uniqid):
+        bcp = self.ids[uniqid]
+        return bcp.repo_unlocked()
+    
     def listjobs(self):
         return self.ids.keys()
 
@@ -379,8 +509,8 @@
             break
 
         cur_time = time.time()
-        if cur_time >= last_time + 5:
-            # do some work every 5s or so
+        if cur_time >= last_time + 3:
+            # do some work every 3s or so
             bcs._process()
             last_time = time.time()