check-mirrors check-mirrors.py,1.16,1.17
Michael Patrick McGrath (mmcgrath)
fedora-extras-commits at redhat.com
Tue Aug 1 19:08:20 UTC 2006
Author: mmcgrath
Update of /cvs/fedora/check-mirrors
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv22484
Modified Files:
check-mirrors.py
Log Message:
Initial creation of a db backend. Still lots of work to do but this works.
Index: check-mirrors.py
===================================================================
RCS file: /cvs/fedora/check-mirrors/check-mirrors.py,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- check-mirrors.py 17 Jul 2006 18:28:44 -0000 1.16
+++ check-mirrors.py 1 Aug 2006 19:08:17 -0000 1.17
@@ -21,12 +21,12 @@
# TODO:
# - better error handling
-# - push into a db?
# - have it accept an option which specifies a section in the config file
# to operate on - rather than doing all of them.
+# - remove deleted mirrors from the database
-debug = False
+debug = True
__revision__ = '$Id$'
CONFIG = '/etc/check-mirrors.conf'
@@ -43,82 +43,154 @@
import socket
import urlparse
import glob
+import sqlite
from urlgrabber.grabber import URLGrabber
from urlgrabber.grabber import URLGrabError
-class YumBaseError(exceptions.Exception):
- def __init__(self, args=None):
- exceptions.Exception.__init__(self)
+class YumBaseError( exceptions.Exception ):
+ def __init__( self, args=None ):
+ exceptions.Exception.__init__( self )
self.args = args
-class RepoMDError(YumBaseError):
- def __init__(self, args=None):
- YumBaseError.__init__(self)
+class RepoMDError( YumBaseError ):
+ def __init__( self, args=None ):
+ YumBaseError.__init__( self )
self.args = args
-def ns_cleanup(qn):
- if qn.find('}') == -1: return qn
- return qn.split('}')[1]
+def ns_cleanup( qn ):
+ if qn.find( '}' ) == -1: return qn
+ return qn.split( '}' )[1]
-def errorprint(stuff):
- print >> sys.stderr, stuff
+def errorprint( error ):
+ print >> sys.stderr, error
-def check_and_make_dir(dir):
+def check_and_make_db( db ):
+ """
+ verify that we can create the sqlite DB file
+ """
+ try:
+ con = sqlite.connect( db )
+ cursor = con.cursor()
+ except sqlite.Error, errmsg:
+ errorprint( 'Failed to connect to database: %s' % db )
+ errorprint( 'Err: ' +str( errmsg ) )
+ return None, None
+
+ try:
+ query = "insert into mirrors (repo, arch, country, url, failures, lastgood) VALUES ('testrepo', 'testarch', 'testcountry', 'http://nowhere/', 0, DATETIME('now'));"
+ if debug:
+ print "Executing %s" % query
+ cursor.execute( query )
+ if debug:
+ print "deleting test %i" % cursor.lastrowid
+ cursor.execute( "delete from mirrors where m_id =" + str( cursor.lastrowid ) + ";" )
+ con.commit()
+ except sqlite.Error, errmsg:
+ if debug:
+ print 'db IO test failed: ' +str( errmsg )
+
+ try:
+ cursor.execute( 'CREATE TABLE mirrors (m_id INTEGER PRIMARY KEY, repo varchar(30), arch varchar(8), country varchar(2), url text, failures integer, lastgood date);' )
+ con.commit()
+ except sqlite.Error, errmsg:
+ errorprint( 'Err: ' +str( errmsg ) )
+ return None, None
+ return con, cursor
+
+
+def check_and_make_dir( dir ):
"""
check out the dir and make it, if possible, return 1 if done, else return 0
"""
- if os.path.exists(dir):
- if not os.path.isdir(dir):
- errorprint('%s is not a dir' % dir)
+ if os.path.exists( dir ):
+ if not os.path.isdir( dir ):
+ errorprint( '%s is not a dir' % dir )
result = False
else:
- if not os.access(dir, os.W_OK):
- errorprint('%s is not writable' % dir)
+ if not os.access( dir, os.W_OK ):
+ errorprint( '%s is not writable' % dir )
result = False
else:
result = True
else:
try:
- os.mkdir(dir)
+ os.mkdir( dir )
except OSError, e:
- errorprint('Error creating dir %s: %s' % (dir, e))
+ errorprint( 'Error creating dir %s: %s' % ( dir, e ) )
result = False
else:
result = True
return result
-
+
+def update_db( repo, arch, country, url, failure, dbconn, dbcursor ):
+ updated = 0
+ if not dbcursor:
+ errorprint( 'sqlite database check failed' )
+
+ if failure:
+ query = "update mirrors set failures=(select failures from mirrors where url='%s')+1 where url='%s' and repo='%s' and arch='%s';" % ( url, url, repo, arch)
+ else:
+ query = "update mirrors set failures='0', lastgood=DATETIME('now') where url='%s' and repo='%s' and arch='%s';" % ( url, repo, arch )
+ try:
+ if debug:
+ print "Executing: %s" % query
+ dbcursor.execute( query )
+ updated = dbcursor.rowcount
+ dbconn.commit()
+ except sqlite.Error, errmsg:
+ errorprint( 'DBerr: ' +str( errmsg ) )
+ errorprint(query)
+ if not updated:
+ try:
+ if failure:
+ lastgoodsql='0'
+ else:
+ lastgoodsql="DATETIME('now')"
+ query = "insert into mirrors (repo, arch, country, url, failures, lastgood) VALUES ('%s', '%s', '%s', '%s', '%s', %s);" % ( repo, arch, country, url, failure, lastgoodsql )
+ if debug:
+ print "Executing: %s" % query
+ dbcursor.execute( query )
+ updated = dbcursor.rowcount
+ dbconn.commit()
+ except sqlite.Error, errmsg:
+ errorprint( 'DBErr: ' +str( errmsg ) )
+ errorprint(query)
+ return None
+ return updated
+
+
class RepoData:
"""represents anything beneath a <data> tag"""
- def __init__(self, elem):
- self.type = elem.attrib.get('type')
- self.location = (None, None)
- self.checksum = (None,None) # type,value
- self.openchecksum = (None,None) # type,value
+ def __init__( self, elem ):
+ self.type = elem.attrib.get( 'type' )
+ self.location = ( None, None )
+ self.checksum = ( None, None ) # type,value
+ self.openchecksum = ( None, None ) # type,value
self.timestamp = None
- self.parse(elem)
+ self.parse( elem )
- def parse(self, elem):
+ def parse( self, elem ):
for child in elem:
- child_name = ns_cleanup(child.tag)
+ child_name = ns_cleanup( child.tag )
if child_name == 'location':
- relative = child.attrib.get('href')
- base = child.attrib.get('base')
- self.location = (base, relative)
-
+ relative = child.attrib.get( 'href' )
+ base = child.attrib.get( 'base' )
+ self.location = ( base, relative )
+
elif child_name == 'checksum':
csum_value = child.text
- csum_type = child.attrib.get('type')
- self.checksum = (csum_type,csum_value)
+ csum_type = child.attrib.get( 'type' )
+ self.checksum = ( csum_type, csum_value )
elif child_name == 'open-checksum':
csum_value = child.text
- csum_type = child.attrib.get('type')
- self.openchecksum = (csum_type, csum_value)
-
+ csum_type = child.attrib.get( 'type' )
+ self.openchecksum = ( csum_type, csum_value )
+
elif child_name == 'timestamp':
self.timestamp = child.text
@@ -126,44 +198,44 @@
class RepoMD:
"""represents the repomd xml file"""
- def __init__(self, repoid, srcfile):
+ def __init__( self, repoid, srcfile ):
"""takes a repoid and a filename for the repomd.xml"""
self.repoid = repoid
self.repoData = {}
- if type(srcfile) == type('str'):
+ if type( srcfile ) == type( 'str' ):
# srcfile is a filename string
- infile = open(srcfile, 'rt')
+ infile = open( srcfile, 'rt' )
else:
# srcfile is a file object
infile = srcfile
- parser = iterparse(infile)
+ parser = iterparse( infile )
try:
for event, elem in parser:
- elem_name = ns_cleanup(elem.tag)
+ elem_name = ns_cleanup( elem.tag )
if elem_name == "data":
- thisdata = RepoData(elem=elem)
+ thisdata = RepoData( elem=elem )
self.repoData[thisdata.type] = thisdata
except SyntaxError, e:
raise RepoMDError, "Damaged repomd.xml file"
-
- def fileTypes(self):
+
+ def fileTypes( self ):
"""return list of metadata file types available"""
return self.repoData.keys()
-
- def getData(self, type):
- if self.repoData.has_key(type):
+
+ def getData( self, type ):
+ if self.repoData.has_key( type ):
return self.repoData[type]
else:
raise RepoMDError, "Error: requested datatype %s not available" % type
-
- def dump(self):
+
+ def dump( self ):
"""dump fun output"""
-
+
for ft in self.fileTypes():
thisdata = self.repoData[ft]
print 'datatype: %s' % thisdata.type
@@ -172,41 +244,41 @@
print 'checksum: %s -%s' % thisdata.checksum
print 'open checksum: %s - %s' % thisdata.openchecksum
-class MirrorContainer(object):
+class MirrorContainer( object ):
"""Holder for info about a specific mirror"""
-
- def __init__(self, url, grabber, archlist, gi):
+
+ def __init__( self, url, grabber, archlist, gi ):
self.url = url
self.grabber = grabber
self.geoip = gi
self.timestamps = {}
self.archlist = archlist
self.country = None
- self.get_timestamp(url)
- self.get_country(url)
-
- def get_timestamp(self, url):
+ self.get_timestamp( url )
+ self.get_country( url )
+
+ def get_timestamp( self, url ):
url = '%s/repodata/repomd.xml' % url
- (suburl, count) = re.subn('\$ARCH', '$BASEARCH', url)
- (suburl, count) = re.subn('\$BASEARCH','$basearch', suburl)
-
+ ( suburl, count ) = re.subn( '\$ARCH', '$BASEARCH', url )
+ ( suburl, count ) = re.subn( '\$BASEARCH', '$basearch', suburl )
+
for arch in self.archlist:
- (finurl, count) = re.subn('\$basearch', arch, suburl)
+ ( finurl, count ) = re.subn( '\$basearch', arch, suburl )
try:
- fo = self.grabber.urlopen(finurl)
+ fo = self.grabber.urlopen( finurl )
except URLGrabError, e:
if debug:
print 'error on %s' % finurl
continue
try:
- p = RepoMD('fooid', fo)
+ p = RepoMD( 'fooid', fo )
except RepoMDError, e:
if debug:
print e
continue
except URLGrabError, e:
- errorprint("Grabber error on %s arch %s was:\n%s" % (url, arch, e))
+ errorprint( "Grabber error on %s arch %s was:\n%s" % ( url, arch, e ) )
continue
else:
thisdata = p.repoData['primary']
@@ -214,202 +286,213 @@
del p
fo.close()
del fo
-
- def get_country(self, url):
- url_parts = urlparse.urlparse(url)
- h = url_parts[1]
- addr = socket.gethostbyname(h)
- self.country = self.geoip.country_code_by_addr(addr)
-
-
+ def get_country( self, url ):
+ url_parts = urlparse.urlparse( url )
+ h = url_parts[1]
+ addr = socket.gethostbyname( h )
+ self.country = self.geoip.country_code_by_addr( addr )
-class MirrorListInfo(object):
+class MirrorListInfo( object ):
"""Holder for config info from the configuration file about the
mirrorlist being checked"""
-
- def __init__(self):
+
+ def __init__( self ):
self.archlist = ['i386', 'x86_64', 'ppc']
self.mirrorid = None
self.inputfile = None
self.outputpath = None
- self.timeout = 10
+ self.timeout = 4
self.canonical = None
+ self.db = None
self.mirrorlist = []
-
- def populate_mirrorlist(self, grabber_inst):
+
+ def populate_mirrorlist( self, grabber_inst ):
try:
- fo = grabber_inst.urlopen(self.inputfile)
+ fo = grabber_inst.urlopen( self.inputfile )
except IOError, e:
return
else:
content = fo.readlines()
for line in content:
- if re.match('^\s*\#.*', line) or re.match('^\s*$', line):
+ if re.match( '^\s*\#.*', line ) or re.match( '^\s*$', line ):
continue
- mirror = re.sub('\n$', '', line) # no more trailing \n's
- self.mirrorlist.append(mirror)
-
+ mirror = re.sub( '\n$', '', line ) # no more trailing \n's
+ self.mirrorlist.append( mirror )
+
fo.close()
-def config(cfg):
+def config( cfg ):
sections = []
conf = ConfigParser.ConfigParser()
- conf.read(cfg)
-
+ conf.read( cfg )
+
for section in conf.sections():
item = MirrorListInfo()
- if conf.has_option(section, 'file_prefix'):
- item.mirrorid = conf.get(section, 'file_prefix')
+ if conf.has_option( section, 'file_prefix' ):
+ item.mirrorid = conf.get( section, 'file_prefix' )
else:
item.mirrorid = '%s' % section
broken = False
-
- if conf.has_option(section, 'inputfile'):
- item.inputfile = conf.get(section, 'inputfile')
+
+ if conf.has_option( section, 'inputfile' ):
+ item.inputfile = conf.get( section, 'inputfile' )
else:
- errorprint('missing inputfile')
+ errorprint( 'missing inputfile' )
broken = True
-
- if conf.has_option(section, 'outputpath'):
- item.outputpath = conf.get(section, 'outputpath')
+
+ if conf.has_option( section, 'outputpath' ):
+ item.outputpath = conf.get( section, 'outputpath' )
else:
- errorprint('missing outputpath')
+ errorprint( 'missing outputpath' )
broken = True
-
- if conf.has_option(section, 'canonical'):
- item.canonical = conf.get(section, 'canonical')
+
+ if conf.has_option( section, 'canonical' ):
+ item.canonical = conf.get( section, 'canonical' )
else:
- errorprint('missing canonical url')
+ errorprint( 'missing canonical url' )
broken = True
-
+
+ if conf.has_option( section, 'db' ):
+ item.db = conf.get( section, 'db' )
+ else:
+ errorprint( 'missing db filename' )
+ broken = True
+
if broken:
- errorprint("Broooooooooooooken config, in section %s, bailing" % section)
- sys.exit(1)
-
- if conf.has_option(section, 'timeout'):
- item.timeout = conf.getint(section, 'timeout')
+ errorprint( "Broooooooooooooken config, in section %s, bailing" % section )
+ sys.exit( 1 )
+ if conf.has_option( section, 'timeout' ):
+ item.timeout = conf.getint( section, 'timeout' )
- if conf.has_option(section, 'archlist'):
- a_string = conf.get(section, 'archlist')
-
- a_holder = a_string.replace('\n', ' ')
- a_holder = a_holder.replace(',', ' ')
+
+ if conf.has_option( section, 'archlist' ):
+ a_string = conf.get( section, 'archlist' )
+ a_holder = a_string.replace( '\n', ' ' )
+ a_holder = a_holder.replace( ',', ' ' )
a_list = a_holder.split()
-
+
item.archlist = a_list
- sections.append(item)
-
+ sections.append( item )
+
return sections
-def main(cfg_file):
- if not os.path.exists(cfg_file):
- errorprint("config file %s does not exist" % cfg_file)
- sys.exit(1)
+def main( cfg_file ):
+ if not os.path.exists( cfg_file ):
+ errorprint( "config file %s does not exist" % cfg_file )
+ sys.exit( 1 )
+
+ sections = config( cfg_file )
+ gi = GeoIP.new( GeoIP.GEOIP_STANDARD )
- sections = config(cfg_file)
- gi = GeoIP.new(GeoIP.GEOIP_STANDARD)
-
# grab the canonical mirrors info
for s in sections:
mirrors = []
-
- ug = URLGrabber(timeout=s.timeout)
- s.populate_mirrorlist(ug)
- if len(s.mirrorlist) < 1:
- errorprint("no mirrors to look at for %s, something is broken, skipping" % s.mirrorid)
+ badmirrors = []
+
+ ug = URLGrabber( timeout=s.timeout )
+ s.populate_mirrorlist( ug )
+ if len( s.mirrorlist ) < 1:
+ errorprint( "no mirrors to look at for %s, something is broken, skipping" % s.mirrorid )
continue
- if not check_and_make_dir(s.outputpath):
- errorprint('Error creating output path %s for %s' % (s.outputpath, s.mirrorid))
+ dbconn, dbcursor = check_and_make_db( s.db )
+
+ if not check_and_make_dir( s.outputpath ):
+ errorprint( 'Error creating output path %s for %s' % ( s.outputpath, s.mirrorid ) )
continue
# get the list of the old files
new_file_list = []
old_file_list = []
- filematch = '%s/%s*' % (s.outputpath, s.mirrorid)
+ filematch = '%s/%s*' % ( s.outputpath, s.mirrorid )
if debug: print filematch
- old_file_list.extend(glob.glob(filematch))
+ old_file_list.extend( glob.glob( filematch ) )
if debug: print old_file_list
-
- canon = MirrorContainer(s.canonical, ug, s.archlist, gi)
- if len(canon.timestamps.keys()) < len(s.archlist):
+
+ canon = MirrorContainer( s.canonical, ug, s.archlist, gi )
+ if len( canon.timestamps.keys() ) < len( s.archlist ):
# if we can't get info for all arches for the canonical mirror, exit
- errorprint("Cannot contact canonical host for all archs for mirrorlists of %s skipping" % s.mirrorid)
+ errorprint( "Cannot contact canonical host for all archs for mirrorlists of %s skipping" % s.mirrorid )
continue
-
+
if debug:
# debug only - just printing out info
for arch in s.archlist:
- if canon.timestamps.has_key(arch):
- print '%s - %s: %s' % (s.mirrorid, arch, canon.timestamps[arch])
+ if canon.timestamps.has_key( arch ):
+ print '%s - %s: %s' % ( s.mirrorid, arch, canon.timestamps[arch] )
# get the info for all the mirrors
-
+
for url in s.mirrorlist:
try:
- m = MirrorContainer(url, ug, s.archlist, gi)
+ m = MirrorContainer( url, ug, s.archlist, gi )
except socket.gaierror, e:
- errorprint("Cannot get address for mirror %s" % url)
+ errorprint( "Cannot get address for mirror %s" % url )
continue
else:
if m:
- mirrors.append(m)
-
+ mirrors.append( m )
+
# print them out per-arch and per-country
for arch in s.archlist:
glob_urls = []
country_specific = {}
for m in mirrors:
- if m.timestamps.has_key(arch):
+ goodmirror = 0
+ if m.timestamps.has_key( arch ):
if m.timestamps[arch] == canon.timestamps[arch]:
if debug: print 'adding %s' % m.url
- glob_urls.append(m.url)
+ glob_urls.append( m.url )
+ goodmirror=1
if m.country:
- if not country_specific.has_key(m.country):
+ if not country_specific.has_key( m.country ):
country_specific[m.country] = []
- if debug: print 'adding to %s: %s' % (m.country, m.url)
- country_specific[m.country].append(m.url)
-
- global_file = '%s/%s-global-%s.txt' % (s.outputpath, s.mirrorid, arch)
- glob_fo = open(global_file, 'w')
+ goodmirror=1
+ if debug: print 'adding to %s: %s' % ( m.country, m.url )
+ country_specific[m.country].append( m.url )
+ if not goodmirror:
+ print "Bad: %s, %s, %s, %s" % (s.mirrorid, arch, m.country, m.url)
+ if not update_db(s.mirrorid, arch, m.country, m.url, '1', dbconn, dbcursor):
+ errorprint( "Error updating: %s" % url)
+ global_file = '%s/%s-global-%s.txt' % ( s.outputpath, s.mirrorid, arch )
+ glob_fo = open( global_file, 'w' )
for url in glob_urls:
- glob_fo.write('%s\n' % url)
+ glob_fo.write( '%s\n' % url )
glob_fo.close()
- new_file_list.append(os.path.normpath(global_file))
-
+ new_file_list.append( os.path.normpath( global_file ) )
+
for code in country_specific.keys():
- country_file = '%s/%s-%s-%s.txt' % (s.outputpath, s.mirrorid, code, arch)
- country_fo = open(country_file, 'w')
+ country_file = '%s/%s-%s-%s.txt' % ( s.outputpath, s.mirrorid, code, arch )
+ country_fo = open( country_file, 'w' )
for url in country_specific[code]:
- country_fo.write('%s\n' % url)
- country_fo.close()
- new_file_list.append(os.path.normpath(country_file))
-
+ country_fo.write( '%s\n' % url )
+ if not update_db( s.mirrorid, arch, code, url, 0, dbconn, dbcursor ):
+ errorprint( "Error updating: %s" % url )
+ new_file_list.append( os.path.normpath( country_file ) )
+
# clean up
for fn in old_file_list:
- fn = os.path.normpath(fn)
+ fn = os.path.normpath( fn )
if fn not in new_file_list:
if debug: print "removing old file %s" % fn
- os.unlink(fn)
-
-
+ os.unlink( fn )
+# dbconn.close()
+# dbcursor.close()
if __name__ == '__main__':
- if len(sys.argv) < 2:
+ if len( sys.argv ) < 2:
conf_fn = CONFIG
else:
conf_fn = sys.argv[1]
- main(conf_fn)
-
-
+ main( conf_fn )
More information about the fedora-extras-commits
mailing list