extras-buildsys/utils/pushscript/repomd init.py, NONE, 1.1 mdErrors.py, NONE, 1.1 mdUtils.py, NONE, 1.1 packageObject.py, NONE, 1.1 packageSack.py, NONE, 1.1 repoMDObject.py, NONE, 1.1 test.py, NONE, 1.1

Sun Oct 15 12:30:46 UTC 2006

Author: mschwendt

Update of /cvs/fedora/extras-buildsys/utils/pushscript/repomd
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv30805/repomd

Added Files:
	__init__.py mdErrors.py mdUtils.py packageObject.py 
	packageSack.py repoMDObject.py test.py 
Log Message:
Add the patched yum 2.6.1 here, so we remove the dependency on the
system-installed one.

--- NEW FILE __init__.py ---

--- NEW FILE mdErrors.py ---
#!/usr/bin/python -tt
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2003 Duke University

import exceptions

class RepoMDError(exceptions.Exception):
    def __init__(self, args=None):
        exceptions.Exception.__init__(self)    
        self.args = args

class PackageSackError(exceptions.Exception):
    def __init__(self, args=None):
        exceptions.Exception.__init__(self)
        self.args = args

--- NEW FILE mdUtils.py ---
#!/usr/bin/python -tt

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2003 Duke University

import rpm
import types

def rpmOutToStr(arg):
    if type(arg) != types.StringType:
    # and arg is not None:
        arg = str(arg)

    return arg

def compareEVR((e1, v1, r1), (e2, v2, r2)):
    # return 1: a is newer than b
    # 0: a and b are the same version
    # -1: b is newer than a
    e1 = rpmOutToStr(e1)
    v1 = rpmOutToStr(v1)
    r1 = rpmOutToStr(r1)
    e2 = rpmOutToStr(e2)
    v2 = rpmOutToStr(v2)
    r2 = rpmOutToStr(r2)
    #print '%s, %s, %s vs %s, %s, %s' % (e1, v1, r1, e2, v2, r2)
    rc = rpm.labelCompare((e1, v1, r1), (e2, v2, r2))
    #print '%s, %s, %s vs %s, %s, %s = %s' % (e1, v1, r1, e2, v2, r2, rc)
    return rc

def newestInList(pkgs):
    # return the newest in the list of packages
    ret = [ pkgs.pop() ]
    newest = ret[0].returnEVR()
    for pkg in pkgs:
        rc = compareEVR(pkg.returnEVR(), newest)
        if rc > 0:
            ret = [ pkg ]
            newest = pkg.returnEVR()
        elif rc == 0:
            ret.append(pkg)
    return ret

###########
# Title: Remove duplicates from a sequence
# Submitter: Tim Peters 
# From: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52560                      

def unique(s):
    """Return a list of the elements in s, but without duplicates.

    For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
    unique("abcabc") some permutation of ["a", "b", "c"], and
    unique(([1, 2], [2, 3], [1, 2])) some permutation of
    [[2, 3], [1, 2]].

    For best speed, all sequence elements should be hashable.  Then
    unique() will usually work in linear time.

    If not possible, the sequence elements should enjoy a total
    ordering, and if list(s).sort() doesn't raise TypeError it's
    assumed that they do enjoy a total ordering.  Then unique() will
    usually work in O(N*log2(N)) time.

    If that's not possible either, the sequence elements must support
    equality-testing.  Then unique() will usually work in quadratic
    time.
    """

    n = len(s)
    if n == 0:
        return []

    # Try using a dict first, as that's the fastest and will usually
    # work.  If it doesn't work, it will usually fail quickly, so it
    # usually doesn't cost much to *try* it.  It requires that all the
    # sequence elements be hashable, and support equality comparison.
    u = {}
    try:
        for x in s:
            u[x] = 1
    except TypeError:
        del u  # move on to the next method
    else:
        return u.keys()

    # We can't hash all the elements.  Second fastest is to sort,
    # which brings the equal elements together; then duplicates are
    # easy to weed out in a single pass.
    # NOTE:  Python's list.sort() was designed to be efficient in the
    # presence of many duplicate elements.  This isn't true of all
    # sort functions in all languages or libraries, so this approach
    # is more effective in Python than it may be elsewhere.
    try:
        t = list(s)
        t.sort()
    except TypeError:
        del t  # move on to the next method
    else:
        assert n > 0
        last = t[0]
        lasti = i = 1
        while i < n:
            if t[i] != last:
                t[lasti] = last = t[i]
                lasti += 1
            i += 1
        return t[:lasti]

    # Brute force is all that's left.
    u = []
    for x in s:
        if x not in u:
            u.append(x)
    return u

--- NEW FILE packageObject.py ---
#!/usr/bin/python -tt
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2003 Duke University

import mdUtils

# consider making an XMLPackageObject
# BasePackageObject - with just methods and the init'd storage dicts
# XMLPackageobject will be used to build the rpmXMLPAckageObject - which is 
# formatnode stuff for rpm.

class PackageObject:
    """Base Package Object - sets up the default storage dicts and the
       most common returns"""

    def __init__(self):
        self.simple = {} # simple things, name, arch, e,v,r, size, etc
        self.checksums = [] # (type, checksum, id(0,1)

    def __str__(self):
        return self.returnNevraPrintable()

    def returnSimple(self, varname):
        return self.simple[varname]

    def simpleItems(self):
        return self.simple.keys()            

    def returnID(self):
        return self.returnSimple('id')

    def returnPackageTuple(self):
        return (self.returnSimple('name'), self.returnSimple('arch'), 
                self.returnSimple('epoch'),self.returnSimple('version'), 
                self.returnSimple('release'))

    def returnNevraTuple(self):
        return (self.returnSimple('name'), self.returnSimple('epoch'), 
                self.returnSimple('version'),self.returnSimple('release'), 
                self.returnSimple('arch'))

    def returnNevraPrintable(self):
        """return printable string for the pkgname/object
           name - epoch:ver-rel.arch"""
        if self.returnSimple('epoch') == '0':
            string = '%s - %s-%s.%s' % (self.returnSimple('name'), 
                                        self.returnSimple('version'),
                                        self.returnSimple('release'), 
                                        self.returnSimple('arch'))
        else:
            string = '%s - %s:%s-%s.%s' % (self.returnSimple('name'), 
                                           self.returnSimple('epoch'), 
                                           self.returnSimple('version'), 
                                           self.returnSimple('release'), 
                                           self.returnSimple('arch'))
        return string                                           

    def returnEVR(self):
        """returns a tuple of epoch, ver, rel"""
        return (self.returnSimple('epoch'), self.returnSimple('version'), self.returnSimple('release'))

        return                            

    def returnChangelog(self):
        """return changelog entries"""
        return self.changelog

class XMLPackageObject(PackageObject):
    """inherits from PackageObject, does the functions to parse an xml package format
       file to pull packages in"""

    def __init__(self):
        PackageObject.__init__(self)

    def parseVersion(self, node):
        """takes a version element, returns a tuple of (epoch, ver, rel)"""
        epoch = node.GetAttribute('epoch')
        ver = node.GetAttribute('ver')
        rel = node.GetAttribute('rel')
        return (epoch, ver, rel)

    def parseChecksum(self, node):
        """takes a checksum element, returns a tuple of (type, checksum, 
           if it is the checksum to be used for the the package id)"""

        csumtype = node.GetAttribute('type')
        csumid = node.GetAttribute('pkgid')
        if csumid is None or csumid.upper() == 'NO':
            csumid = 0
        elif csumid.upper() == 'YES':
            csumid = 1
        else:
            #FIXME - raise an exception
            print 'broken csumid - invalid document'
            csumid = 0
        node.Read()
        csum = node.Value()
        return (csumtype, csum, csumid)

    def parseSize(self, node):
        """takes a size element, returns  package, 
           installed and archive size"""

        pkg = node.GetAttribute('package')
        installed = node.GetAttribute('installed')
        archive = node.GetAttribute('archive')
        return pkg, installed, archive

    def parseTime(self, node):
        """takes a time element, returns buildtime, filetime(mtime)"""

        build = node.GetAttribute('build')
        mtime = node.GetAttribute('file')
        return build, mtime

    def parseLocation(self, node):
        """takes a location element, returnsbase url path, relative path to package"""

        base = node.GetAttribute('base')
        relative = node.GetAttribute('href')
        return base, relative

    def parseSimple(self, node):
        """takes a simple unattributed CDATA element and returns its value"""
        if node.IsEmptyElement():
            return ''
        node.Read() # get the next node
        return node.Value()

    def readPkgNode(self, reader):
        """primary package node reading and dumping"""

        mydepth = reader.Depth()
        ret = reader.Read()        
        while ret:
            if reader.NodeType() == 14:
                ret = reader.Read()
                continue

            if reader.NodeType() == 15 and reader.Depth() == mydepth:
                return

            if reader.NodeType() == 1:
                if reader.Depth() == mydepth:
                    #print 'oh crap - we are outside - how did that happen??'
                    return

                nodeName = reader.LocalName()

                if nodeName in ['name', 'arch', 'summary', 'description', 
                                'url', 'packager', 'buildtime', 'filetime']:

                    self.simple[nodeName] = self.parseSimple(reader)

                elif nodeName == 'version': 
                    (self.simple['epoch'], self.simple['version'], 
                     self.simple['release']) = self.parseVersion(reader)

                elif nodeName == 'size':
                    self.simple['packagesize'], self.simple['installedsize'], \
                     self.simple['archivesize'] = self.parseSize(reader)

                elif nodeName == 'time':
                    self.simple['buildtime'], self.simple['filetime'], = \
                     self.parseTime(reader)

                elif nodeName == 'location':
                    self.simple['basepath'], self.simple['relativepath'] = \
                     self.parseLocation(reader)

                elif nodeName == 'checksum':
                    (sumtype, sumdata, sumid) = self.parseChecksum(reader)
                    self.checksums.append((sumtype, sumdata, sumid))
                    if sumid:
                        self.simple['id'] = sumdata

                elif nodeName == 'format':
                    try:
                        self.readFormatNode(reader)
                    except AttributeError:
                        # FIXME - should raise an exception
                        print 'No method to handle format element'
                else:
                    pass
                    # FIXME - should raise an exception
                    print 'unknown element in package: %s' % nodeName

            ret = reader.Read()
            continue

class RpmBase:
    """return functions and storage for rpm-specific data"""

    def __init__(self):
        self.prco = {}
        self.prco['obsoletes'] = [] # (name, flag, (e,v,r))
        self.prco['conflicts'] = [] # (name, flag, (e,v,r))
        self.prco['requires'] = [] # (name, flag, (e,v,r))
        self.prco['provides'] = [] # (name, flag, (e,v,r))
        self.files = {}
        self.files['file'] = []
        self.files['dir'] = []
        self.files['ghost'] = []
        self.changelog = [] # (ctime, cname, ctext)
        self.licenses = []

    def returnPrco(self, prcotype):
        """return list of provides, requires, conflicts or obsoletes"""
        if self.prco.has_key(prcotype):
            return self.prco[prcotype]
        else:
            return []

    def checkPrco(self, prcotype, prcotuple):
        """returns 1 or 0 if the pkg contains the requested tuple/tuple range"""
        # get rid of simple cases - nothing
        if not self.prco.has_key(prcotype):
            return 0
        # exact match    
        if prcotuple in self.prco[prcotype]:
            return 1
        else:
            # make us look it up and compare
            (reqn, reqf, (reqe, reqv ,reqr)) = prcotuple
            if reqf is not None:
                if self.inPrcoRange(prcotype, prcotuple):
                    return 1
                else:
                    return 0
            else:
                for (n, f, (e, v, r)) in self.returnPrco(prcotype):
                    if reqn == n:
                        return 1

        return 0

    def inPrcoRange(self, prcotype, reqtuple):
        """returns true if the package has a the prco that satisfies 
           the reqtuple range, assume false.
           Takes: prcotype, requested prco tuple"""
        # we only ever get here if we have a versioned prco
        # nameonly shouldn't ever raise it
        (reqn, reqf, (reqe, reqv, reqr)) = reqtuple
        # find the named entry in pkgobj, do the comparsion
        for (n, f, (e, v, r)) in self.returnPrco(prcotype):
            if reqn == n:
                # found it
                if f != 'EQ':
                    # isn't this odd, it's not 'EQ' - it really should be
                    # use the pkgobj's evr for the comparison
                    (e, v, r) = self.returnEVR()
                # and you thought we were done having fun
                # if the requested release is left out then we have
                # to remove release from the package prco to make sure the match
                # is a success - ie: if the request is EQ foo 1:3.0.0 and we have 
                # foo 1:3.0.0-15 then we have to drop the 15 so we can match
                if reqr is None:
                    r = None
                if reqe is None:
                    e = None
                if reqv is None: # just for the record if ver is None then we're going to segfault
                    v = None
                rc = mdUtils.compareEVR((e, v, r), (reqe, reqv, reqr))

                if rc >= 1:
                    if reqf in ['GT', 'GE', 4, 12]:
                        return 1
                if rc == 0:
                    if reqf in ['GE', 'LE', 'EQ', 8, 10, 12]:
                        return 1
                if rc <= -1:
                    if reqf in ['LT', 'LE', 2, 10]:
                        return 1
        return 0

    def returnChangelog(self):
        """return changelog entries"""
        return self.changelog

    def returnFileEntries(self, ftype='file'):
        """return list of files based on type"""
        if self.files.has_key(ftype):
            return self.files[ftype]
        else:
            return []

    def returnFileTypes(self):
        """return list of types of files in the package"""
        return self.files.keys()

class RpmXMLPackageObject(XMLPackageObject, RpmBase):
    """used class - inherits from XMLPackageObject, which inherits from 
       Package Object also inherits from RpmBase for return functions"""

    def __init__(self, node, repoid):
        XMLPackageObject.__init__(self)
        RpmBase.__init__(self)

        self.simple['repoid'] = repoid

        self.readPkgNode(node)
        self.repoid = repoid

    def dumpPkg(self):
        fconv = { 'EQ':'=', 'LT':'<', 'LE':'<=',
                  'GT':'>', 'GE':'>='} 
        for item in self.simpleItems():
            print '%s = %s' % (item, self.returnSimple(item))
        for csum in self.checksums:
            print csum
        for thing in ['requires', 'provides', 'obsoletes', 'conflicts']:
            if len(self.prco[thing]) > 0:
                print '%s:' % thing
                for (n,f,(e,v,r)) in self.prco[thing]:
                    if f is None:
                        print '\t%s ' % n
                    else:
                        print '\t',
                        print n,
                        print fconv[f],
                        print '%s:%s-%s' %(e,v,r)
                print ''

    def readFormatNode(self, reader):
        """reads the <format> element and hands off the elements to be 
           parsed elsewhere"""

        mydepth = reader.Depth()
        ret = reader.Read()        
        while ret:
            if reader.NodeType() == 14:
                ret = reader.Read()
                continue

            if reader.NodeType() == 15 and reader.Depth() == mydepth:
                return

            if reader.NodeType() == 1:
                if reader.Depth() == mydepth:
                    #print 'oh crap - we are outside - how did that happen??'
                    return

                nodeName = reader.LocalName()

                if nodeName in ['vendor', 'group', 'buildhost', 'sourcerpm']:
                    self.simple[nodeName] = self.parseSimple(reader)

                elif nodeName == 'license':
                    self.licenses.append(self.parseSimple(reader))

                elif nodeName == 'header-range':
                    self.simple['hdrstart'], self.simple['hdrend'] = \
                     self.parseHdrRange(reader)

                elif nodeName in ['obsoletes', 'provides', 'requires', 'conflicts']:
                    objlist = self.parsePrco(reader)
                    self.prco[nodeName].extend(objlist)

                elif nodeName == 'file':
                    self.loadFileEntry(reader)

                else:
                    # FIXME - should raise an exception
                    print 'unknown element in format: %s' % nodeName
                    #pass

            ret = reader.Read()
            continue

    def parseHdrRange(self, node):
        """parse header-range, returns (start, end) tuple"""

        start = node.GetAttribute('start')
        end = node.GetAttribute('end')
        return start, end

    def parsePrco(self, reader):
        """parse a provides,requires,obsoletes,conflicts element"""
        objlist = []
        mydepth = reader.Depth()
        ret = reader.Read()        
        while ret:
            if reader.NodeType() == 14:
                ret = reader.Read()
                continue

            if reader.NodeType() == 15 and reader.Depth() == mydepth:
                return objlist

            if reader.NodeType() == 1:
                if reader.Depth() == mydepth:
                    #print 'oh crap - we are outside - how did that happen??'
                    return objlist

                prcoName = reader.LocalName()

                if prcoName == 'entry':
                    name = reader.GetAttribute('name')
                    flag = reader.GetAttribute('flags')
                    e = reader.GetAttribute('epoch')
                    v = reader.GetAttribute('ver')
                    r = reader.GetAttribute('rel')
                    pre = reader.GetAttribte('pre')
                    objlist.append((name, flag, (e, v, r), pre))

            ret = reader.Read()
            continue

        return objlist

    def loadFileEntry(self, node):
        """load a file/dir entry"""
        ftype = node.GetAttribute('type')
        node.Read() # content is file
        file = node.Value()
        if not ftype:
            ftype = 'file'
        if not self.files.has_key(ftype):
            self.files[ftype] = []
        #if file not in self.files[ftype]:
        self.files[ftype].append(file)

        return (ftype, file)

    def loadChangeLogEntry(self, node):
        """load changelog data"""
        time = node.GetAttribute('date')
        author = node.GetAttribute('author')
        node.Read()
        content = node.Value()
        self.changelog.append((time, author, content))

--- NEW FILE packageSack.py ---
#!/usr/bin/python -tt
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2003 Duke University

import libxml2
from mdErrors import PackageSackError
import mdUtils

class PackageSack:
    """represents sets (sacks) of Package Objects"""
    def __init__(self):
        self.nevra = {} #nevra[(Name, Epoch, Version, Release, Arch)] = []
        self.obsoletes = {} #obs[obsoletename] = [pkg1, pkg2, pkg3] 
                 #the package lists are packages that obsolete the key name
        self.requires = {} #req[reqname] = [pkg1, pkg2, pkg3]
                 #the package lists are packages that require the key name
        self.provides = {} #ditto of above but for provides
        self.conflicts = {} #ditto of above but for conflicts
        self.filenames = {} # duh
        self.pkgsByRepo = {} #pkgsByRepo['repoid']= [pkg1, pkg2, pkg3]
        self.pkgsByID = {} #pkgsById[pkgid] = [pkg1, pkg2] (should really only ever be one value but
                           #you might have repos with the same package in them
        self.compatarchs = None # dict of compatible archs for addPackage
        self.indexesBuilt = 0

    def __len__(self):
        return len(self.simplePkgList())

    def __iter__(self):
        if hasattr(self.returnPackages(), '__iter__'):
            return self.returnPackages().__iter__()
        else:
            return iter(self.returnPackages())

    def _checkIndexes(self, failure='error'):
        """check to see if the indexes are built, if not do what failure demands
           either error out or build the indexes, default is to error out"""

        if not self.indexesBuilt:
            if failure == 'error':
                raise PackageSackError, 'Indexes not yet built, cannot search'
            elif failure == 'build':
                self.buildIndexes()

    def packagesByTuple(self, pkgtup):
        """return a list of package objects by (n,a,e,v,r) tuple"""
        (n,a,e,v,r) = pkgtup
        return self.searchNevra(name=n, arch=a, epoch=e, ver=v, rel=r)

    def searchNevra(self, name=None, epoch=None, ver=None, rel=None, arch=None):
        """return list of pkgobjects matching the nevra requested"""
        self._checkIndexes(failure='build')
        if self.nevra.has_key((name, epoch, ver, rel, arch)):
            return self.nevra[(name, epoch, ver, rel, arch)]
        else:
            return []

    def searchID(self, pkgid):
        """return list of packages based on pkgid"""
        self._checkIndexes(failure='build')        
        if self.pkgsByID.has_key(pkgid):
            return self.pkgsByID[pkgid]
        else:
            return []

    def searchRequires(self, name):
        """return list of package requiring the name (any evr and flag)"""
        self._checkIndexes(failure='build')        
        if self.requires.has_key(name):
            return self.requires[name]
        else:
            return []

    def searchProvides(self, name):
        """return list of package providing the name (any evr and flag)"""
        # FIXME - should this do a pkgobj.checkPrco((name, flag, (e,v,r,))??
        # has to do a searchFiles and a searchProvides for things starting with /
        self._checkIndexes(failure='build')        
        returnList = []
        if name[0] == '/':
             returnList.extend(self.searchFiles(name))
        if self.provides.has_key(name):
            returnList.extend(self.provides[name])
        return returnList

    def searchConflicts(self, name):
        """return list of package conflicting with the name (any evr and flag)"""
        self._checkIndexes(failure='build')        
        if self.conflicts.has_key(name):
            return self.conflicts[name]
        else:
            return []

    def searchObsoletes(self, name):
        """return list of package obsoleting the name (any evr and flag)"""
        self._checkIndexes(failure='build')        
        if self.obsoletes.has_key(name):
            return self.obsoletes[name]
        else:
            return []

    def returnObsoletes(self):
        """returns a dict of obsoletes dict[obsoleting pkgtuple] = [list of obs]"""
        obs = {}
        for po in self.returnPackages():
            pkgtuple = po.returnPackageTuple()
            if len(po.returnPrco('obsoletes')) == 0:
                continue

            if not obs.has_key(pkgtuple):
                obs[pkgtuple] = po.returnPrco('obsoletes')
            else:
                obs[pkgtuple].extend(po.returnPrco('obsoletes'))

        return obs

    def searchFiles(self, file):
        """return list of packages by filename
           FIXME - need to add regex match against keys in file list
        """
        self._checkIndexes(failure='build')
        if self.filenames.has_key(file):
            return self.filenames[file]
        else:
            return []

    def _addToDictAsList(self, dict, key, data):
        if not dict.has_key(key):
            dict[key] = []
        #if data not in dict[key]: - if I enable this the whole world grinds to a halt
        # need a faster way of looking for the object in any particular list
        dict[key].append(data)

    def _delFromListOfDict(self, dict, key, data):
        if not dict.has_key(key):
            dict[key] = []
        try:
            dict[key].remove(data)
        except ValueError:
            pass

        if len(dict[key]) == 0: # if it's an empty list of the dict, then kill it
            del dict[key]

    def addPackage(self, obj):
        """add a pkgobject to the packageSack"""

        repoid = obj.returnSimple('repoid')
        (name, epoch, ver, rel, arch) = obj.returnNevraTuple()

        if self.compatarchs:
            if self.compatarchs.has_key(arch):
                self._addToDictAsList(self.pkgsByRepo, repoid, obj)
        else:
            self._addToDictAsList(self.pkgsByRepo, repoid, obj)

    def buildIndexes(self):
        """builds the useful indexes for searching/querying the packageSack
           This should be called after all the necessary packages have been 
           added/deleted"""

        # blank out the indexes
        self.obsoletes = {}
        self.requires = {}
        self.provides = {}
        self.conflicts = {}
        self.filenames = {}
        self.nevra = {}
        self.pkgsByID = {}

        for repoid in self.pkgsByRepo.keys():
            for obj in self.pkgsByRepo[repoid]:
            # store the things provided just on name, not the whole require+version
            # this lets us reduce the set of pkgs to search when we're trying to depSolve
                for (n, fl, (e,v,r)) in obj.returnPrco('obsoletes'):
                    self._addToDictAsList(self.obsoletes, n, obj)
                for (n, fl, (e,v,r)) in obj.returnPrco('requires'):
                    self._addToDictAsList(self.requires, n, obj)
                for (n, fl, (e,v,r)) in obj.returnPrco('provides'):
                    self._addToDictAsList(self.provides, n, obj)
                for (n, fl, (e,v,r)) in obj.returnPrco('conflicts'):
                    self._addToDictAsList(self.conflicts, n, obj)
                for ftype in obj.returnFileTypes():
                    for file in obj.returnFileEntries(ftype):
                        self._addToDictAsList(self.filenames, file, obj)
                self._addToDictAsList(self.pkgsByID, obj.returnSimple('id'), obj)
                (name, epoch, ver, rel, arch) = obj.returnNevraTuple()
                self._addToDictAsList(self.nevra, (name, epoch, ver, rel, arch), obj)
                self._addToDictAsList(self.nevra, (name, None, None, None, None), obj)

        self.indexesBuilt = 1

    def delPackage(self, obj):
        """delete a pkgobject"""
        self._delFromListOfDict(self.pkgsByRepo, obj.returnSimple('repoid'), obj)
        if self.indexesBuilt: # if we've built indexes, delete it b/c we've just deleted something
            self.indexesBuilt = 0

    def returnPackages(self, repoid=None):
        """return list of all packages, takes optional repoid"""
        returnList = []
        if repoid is None:
            for repo in self.pkgsByRepo.keys():
                returnList.extend(self.pkgsByRepo[repo])
        else:
            try:
                returnList = self.pkgsByRepo[repoid]
            except KeyError:
                # nothing to return
                pass

        return returnList

    def returnNewestByNameArch(self, naTup=None):
        """return list of newest packages based on name, arch matching
           this means(in name.arch form): foo.i386 and foo.noarch are not 
           compared to each other for highest version only foo.i386 and 
           foo.i386 will be compared"""
        highdict = {}
        # If naTup is set, only iterate through packages that match that
        # name
        if (naTup):
            where = self.nevra.get((naTup[0],None,None,None,None))
            if (not where):
                raise PackageSackError, 'No Package Matching %s.%s' % naTup
        else:
            where = self.returnPackages()

        for pkg in where:
            (n, e, v ,r, a) = pkg.returnNevraTuple()
            if not highdict.has_key((n, a)):
                highdict[(n, a)] = pkg
            else:
                pkg2 = highdict[(n, a)]
                (e2, v2, r2) = pkg2.returnEVR()
                rc = mdUtils.compareEVR((e,v,r), (e2, v2, r2))
                if rc > 0:
                    highdict[(n, a)] = pkg

        if naTup:
            if highdict.has_key(naTup):
                return highdict[naTup]
            else:
                raise PackageSackError, 'No Package Matching %s.%s' % naTup

        return highdict.values()

    def returnNewestByName(self, name=None):
        """return list of newest packages based on name matching
           this means(in name.arch form): foo.i386 and foo.noarch will
           be compared to each other for highest version"""
        highdict = {}
        for pkg in self.returnPackages():
            (n, e, v ,r, a) = pkg.returnNevraTuple()
            if not highdict.has_key(n):
                highdict[n] = []
                highdict[n].append(pkg)
            else:
                pkg2 = highdict[n][0]
                (e2, v2, r2) = pkg2.returnEVR()
                rc = mdUtils.compareEVR((e,v,r), (e2, v2, r2))
                if rc > 0:
                    highdict[n] = [pkg]
                elif rc == 0:
                    highdict[n].append(pkg)

        if name:
            if highdict.has_key(name):
                return highdict[name]
            else:
                raise PackageSackError, 'No Package Matching  %s' % name

        return highdict.values()

    def simplePkgList(self, repoid=None):
        """returns a list of pkg tuples (n, a, e, v, r) optionally from a single repoid"""
        simplelist = []
        for pkg in self.returnPackages(repoid):
            simplelist.append(pkg.returnPackageTuple())
        return simplelist

    def printPackages(self):
        for pkg in self.returnPackages():
            print pkg.returnNevraPrintable()

    def excludeArchs(self, archlist):
        """exclude incompatible arches. archlist is a list of compatible arches"""

        for pkg in self.returnPackages():
            if pkg.arch not in archlist:
                self.delPackage(pkg)

# packageSack should be a base class
# two derived classes could be DBPackageSack and XMLPackageSack
# one for importing this data from the localdb 
# another from XML metadata files

class XMLPackageSack(PackageSack):
    """Derived class from PackageSack to build list from XML metadata file. 
       Needs the Package Object Class passed to it for the Sack"""
    def __init__(self, pkgObjectClass):
        PackageSack.__init__(self)
        self.repoStatus = {} #[repoid]= [primary, filelist, other] (so you can tell 
                             #what things have been loaded or not - b/c w/o primary, 
                             #filelist and other really can't be loaded
        self.pkgObjectClass = pkgObjectClass                           

    def addFile(self, repoid, file, callback=None):
        """takes a repository id and an xml file. It populates whatever it can, 
           if you try to populate with a filelist or other metadata file 
           before the primary metadata you'll not like the results"""
        try:
            reader = libxml2.newTextReaderFilename(file)
        except libxml2.treeError:
            raise PackageSackError, "Invalid or non-existent file: %s" % (file)

        else:
            reader.Read()
            xmlfiletype=reader.Name() # - first node should be the type
            if xmlfiletype == 'metadata':
                if not self._checkRepoStatus(repoid, itemcheck='primary'):
                    self.loadPrimaryMD(reader, repoid, callback)

            elif xmlfiletype == 'filelists':
                if not self._checkRepoStatus(repoid, itemcheck='filelists'):
                    self.loadFileMD(reader, repoid, callback)

            elif xmlfiletype == 'otherdata':
                if not self._checkRepoStatus(repoid, itemcheck='other'):
                    self.loadOtherMD(reader, repoid, callback)

            else:
                print 'Error: other unknown root element %s' % xmlfiletype 

    def _checkRepoStatus(self, repoid, itemcheck='primary'):
        """return 1 if itemcheck is in repo"""
        if self.repoStatus.has_key(repoid):
            if itemcheck in self.repoStatus[repoid]:
                return 1
        return 0

    def loadPrimaryMD(self, reader, repoid, callback=None):
        """load all the data from the primary metadata xml file"""

        pkgcount = 9999 # big number
        current = 1
        if reader.HasAttributes():
            pkgcount = int(reader.GetAttribute('packages'))

        ret = reader.Read()
        while ret:
            if reader.NodeType() == 14:
                ret = reader.Read()
                continue

            if reader.NodeType() == 1 and reader.Name() == 'package':
                if reader.HasAttributes():
                    if reader.GetAttribute('type') == 'rpm':
                        current+=1
                        po = self.pkgObjectClass(reader, repoid)
                        self.addPackage(po)
            if callback: callback(current, pkgcount, name=repoid)
            ret = reader.Read()
            continue

        # update the repoStatus                
        if not self.repoStatus.has_key(repoid):
            self.repoStatus[repoid] = []
        if not 'primary' in self.repoStatus[repoid]:
            self.repoStatus[repoid].append('primary')

    def loadFileMD(self, reader, repoid, callback=None):
        """load all the filelist metadata from the file"""

        pkgcount = 9999 # big number
        current = 1
        if reader.HasAttributes():
            pkgcount = int(reader.GetAttribute('packages'))

        ret = reader.Read()
        while ret:
            if reader.NodeType() == 14:
                ret = reader.Read()
                continue

            if reader.NodeType() == 1 and reader.Name() == 'package':
                if reader.HasAttributes():
                    pkgid = reader.GetAttribute('pkgid')
                    pkgs = self.searchID(pkgid)
                    pkgmatch = 0
                    mydepth = reader.Depth()
                    current+=1

                    for pkg in pkgs:
                        if pkg.returnSimple('repoid') == repoid: # check for matching repo
                            reader.Read()
                            pkgmatch+=1

                            while 1:
                                if reader.NodeType() == 15 and reader.Depth() == mydepth:
                                    break

                                elif reader.NodeType() == 14:
                                    ret = reader.Read()
                                    continue

                                elif reader.NodeType() == 1:
                                    if reader.LocalName() == 'file':
                                        (ftype, file) = pkg.loadFileEntry(reader)
                                        #self._addToDictAsList(self.filenames, file, pkg)

                                ret = reader.Read()
                                continue        

                    if pkgmatch < 1:
                        # FIXME - raise a warning? Emit error? bitch? moan?
                        pass

            ret = reader.Read()
            if callback: callback(current, pkgcount, name=repoid) # give us some pretty output
            continue

        # update the repostatus
        if not 'filelist' in self.repoStatus[repoid]:
            self.repoStatus[repoid].append('filelist')
        # we've just added file items - build up the indexes again
        self.buildIndexes()

    def loadOtherMD(self, reader, repoid, callback=None):
        """load the changelog, etc data from the other.xml file"""

        pkgcount = 9999 # big number
        current = 1
        if reader.HasAttributes():
            pkgcount = int(reader.GetAttribute('packages'))

        ret = reader.Read()
        while ret:
            if reader.NodeType() == 14:
                ret = reader.Read()
                continue

            if reader.NodeType() == 1 and reader.Name() == 'package':
                current+=1
                if reader.HasAttributes():
                    pkgid = reader.GetAttribute('pkgid')
                    pkgs = self.searchID(pkgid)
                    pkgmatch = 0
                    mydepth = reader.Depth()
                    #current+=1

                    for pkg in pkgs:
                        if pkg.returnSimple('repoid') == repoid: # check for matching repo
                            reader.Read()
                            pkgmatch+=1

                            while 1:
                                if reader.NodeType() == 15 and reader.Depth() == mydepth:
                                    break

                                elif reader.NodeType() == 14:
                                    ret = reader.Read()                                                        
                                    continue

                                elif reader.NodeType() == 1:
                                    if reader.LocalName() == 'changelog':
                                        pkg.loadChangeLogEntry(reader)

                                ret = reader.Read()
                                continue        

                    if pkgmatch < 1:
                        # FIXME - raise a warning? Emit error? bitch? moan?
                        pass
            if callback: callback(current, pkgcount, name=repoid)
            ret = reader.Read()
            continue

        if not 'other' in self.repoStatus[repoid]:
            self.repoStatus[repoid].append('other')
        # we've just added file items - build up the indexes again
        self.buildIndexes()

class ListPackageSack(PackageSack):
    """Derived class from PackageSack to build new Sack from list of
       pkgObjects - like one returned from self.returnNewestByNameArch()
       or self.returnNewestByName()"""

    def __init__(self, Objlist=None):
        PackageSack.__init__(self)
        if Objlist is not None:
            self.addList(Objlist)

    def addList(self, ObjList):
        for pkgobj in ObjList:
            self.addPackage(pkgobj)

--- NEW FILE repoMDObject.py ---
#!/usr/bin/python -tt
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2003 Duke University

import libxml2
from mdErrors import RepoMDError

class RepoMD:
    """represents the repomd xml file"""
    def __init__(self, repoid, file):
        """takes a repoid and a filename for the repomd.xml"""

        self.repoid = repoid
        self.repoData = {}
        try:
            doc = libxml2.parseFile(file)
        except libxml2.parserError:
            raise RepoMDError, 'Error: could not parse file %s' % file
        root = doc.getRootElement()
        xmlfiletype = root.name
        node = root.children
        if xmlfiletype == 'repomd':
            self.loadRepoMD(node)
        else:
            raise RepoMDError, 'Error: other unknown root element %s' % xmlfiletype 
        doc.freeDoc()

    def _returnData(self, mdtype, request):
        """ return the data from the repository Data"""
        if self.repoData.has_key(mdtype):
            ds = self.repoData[mdtype]
            if ds.has_key(request):
                return ds[request]
            else:
                raise RepoMDError, "Error: request %s not in %s data" % (request, mdtype)
        else:
            raise RepoMDError, "Error: odd MDtype requested: %s" % mdtype

    def _storeRepoData(self, mdtype, dataname, data):
        """stores repository data
           mdtype = primary, filelists, other, group
           dataname = checksum, timestamp, basepath, relativepath
        """
        if self.repoData.has_key(mdtype):
            ds = self.repoData[mdtype]
            if not ds.has_key(dataname):
                ds[dataname] = data
            else:
                raise RepoMDError, "Warning: duplicate data of %s description inputted" % dataname
        else:
            raise RepoMDError, "Warning: odd mdtype being put in %s" % mdtype

    def loadRepoDataNode(self, node):
        """loads a repository data node into the class"""
        mdtype = node.prop('type') # get the 'type' property for the datanode
        if not self.repoData.has_key(mdtype):
            self.repoData[mdtype] = {}

        datanode = node.children            
        while datanode is not None:
            if datanode.type != 'element':
                datanode = datanode.next
                continue

            if datanode.name  == 'location':
                base = datanode.prop('base')
                relative = datanode.prop('href')    
                self._storeRepoData(mdtype, 'basepath', base)
                self._storeRepoData(mdtype, 'relativepath', relative)
            elif datanode.name == 'checksum':
                csumType = datanode.prop('type')
                csum = datanode.content
                self._storeRepoData(mdtype, 'checksum', (csumType, csum))
            elif datanode.name == 'timestamp':
                timestamp = datanode.content
                self._storeRepoData(mdtype, 'timestamp', timestamp)

            datanode = datanode.next    
            continue

    def loadRepoMD(self, node):
        """iterates through the data nodes and populates some simple data areas"""

        while node is not None:
            if node.type != 'element':
                node = node.next
                continue

            if node.name == 'data':
                self.loadRepoDataNode(node)

            node = node.next
            continue

    def _checksum(self, mdtype):
        """returns a tuple of (checksum type, checksum) for the specified Metadata
           file"""
        return self._returnData(mdtype, 'checksum')

    def _location(self, mdtype):
        """returns location to specified metadata file, (base, relative)"""
        base = self._returnData(mdtype, 'basepath')
        relative = self._returnData(mdtype, 'relativepath')

        return (base, relative)

    def _timestamp(self, mdtype):
        """returns timestamp for specified metadata file"""
        return self._returnData(mdtype, 'timestamp')

    def otherChecksum(self):
        """returns a tuple of (checksum type, checksum) for the other Metadata file"""
        return self._checksum('other')

    def otherLocation(self):
        """returns location to other metadata file, (base, relative)"""
        return self._location('other')

    def otherTimestamp(self):
        """returns timestamp for other metadata file"""
        return self._timestamp('other')

    def primaryChecksum(self):
        """returns a tuple of (checksum type, checksum) for the primary Metadata file"""
        return self._checksum('primary')

    def primaryLocation(self):
        """returns location to primary metadata file, (base, relative)"""
        return self._location('primary')

    def primaryTimestamp(self):
        """returns timestamp for primary metadata file"""
        return self._timestamp('primary')

    def filelistsChecksum(self):
        """returns a tuple of (checksum type, checksum) for the filelists Metadata file"""
        return self._checksum('filelists')

    def filelistsLocation(self):
        """returns location to filelists metadata file, (base, relative)"""
        return self._location('filelists')

    def filelistsTimestamp(self):
        """returns timestamp for filelists metadata file"""
        return self._timestamp('filelists')

    def groupChecksum(self):
        """returns a tuple of (checksum type, checksum) for the group Metadata file"""
        return self._checksum('group')

    def groupLocation(self):
        """returns location to group metadata file, (base, relative)"""
        return self._location('group')

    def groupTimestamp(self):
        """returns timestamp for group metadata file"""
        return self._timestamp('group')

    def fileTypes(self):
        """return list of metadata file types available"""
        return self.repoData.keys()

--- NEW FILE test.py ---
#!/usr/bin/python -tt
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# Copyright 2003 Duke University

# classes for parsing the metadata files for the new metadata format

# used with python -i :)
import sys
import os
import time
import rpm
import packageSack
import packageObject
import repoMDObject
import mdUtils
import mdErrors

def process(current, total, name=None):
    sys.stdout.write('\r' + ' ' * 80)
    sys.stdout.write('\rNode %d of %d' % (current, total))
    sys.stdout.flush()

if len(sys.argv) < 4:
    print 'test.py: /path/to/repo /other/repo somepackagename'
    sys.exit(1)

print time.time()
repos = sys.argv[1:3]
pkgSack = packageSack.XMLPackageSack(packageObject.RpmXMLPackageObject)
numid = 0
for repo in repos:
    numid+=1
    basepath = repo
    repomdxmlfile = os.path.join(basepath, 'repodata/repomd.xml')
    repoid = repo

    try:
        repodata = repoMDObject.RepoMD(repoid, repomdxmlfile)
    except mdErrors.RepoMDError, e:
        print >> sys.stderr, e
        sys.exit(1)

    (pbase, phref) = repodata.primaryLocation()
    (fbase, fhref) = repodata.filelistsLocation()
    (obase, ohref) = repodata.otherLocation()

    processlist = [phref]
    for file in processlist:
        print time.time()
        print 'importing %s from %s' % (file, repoid)
        complete = basepath + '/' + file
        try:
            pkgSack.addFile(repoid, complete, process)
        except mdErrors.PackageSackError, e:
            print >> sys.stderr, e
            sys.exit(1)

    print ' '
    print time.time()

for pkg in pkgSack.searchNevra(sys.argv[3]):
    print pkg
    for reqtup in pkg.returnPrco('requires'):
        (reqn, reqf, (reqe,reqv,reqr)) = reqtup
        # rpmlib deps should be handled on their own
        if reqn[:6] == 'rpmlib':
            continue
        # kill self providers, too
        if pkg.checkPrco('provides', reqtup):
            continue

        # get a list of all pkgs that match the reqn
        providers = pkgSack.searchProvides(reqn)
        if len(providers) == 0:
            print 'unresolved: %s  %s %s:%s-%s' % (reqn, reqf, reqe, reqv, reqr)
            continue

        if len(providers) == 1:
            if reqf is None:
                print '%s: %s from %s' % (reqn, providers[0], providers[0].returnSimple('relativepath'))
                continue

            # only one entry but we need to match out it out
            if providers[0].checkPrco('provides', reqtup):
                print '%s: %s from %s' % (reqn, providers[0], providers[0].returnSimple('relativepath'))
                continue

        output = '%s:' % reqn
        for prov in providers:
            if reqf is not None:
                if prov.checkPrco('provides', reqtup):
                    output = output + '||' + prov.__str__()
                else:
                    print '%s does not provide %s %s %s %s %s' % (prov, reqn, reqf, reqe, reqv, reqr)                
            else:
                output = output + '||' + prov.__str__()

        print output
print time.time()

extras-buildsys/utils/pushscript/repomd __init__.py, NONE, 1.1 mdErrors.py, NONE, 1.1 mdUtils.py, NONE, 1.1 packageObject.py, NONE, 1.1 packageSack.py, NONE, 1.1 repoMDObject.py, NONE, 1.1 test.py, NONE, 1.1

extras-buildsys/utils/pushscript/repomd init.py, NONE, 1.1 mdErrors.py, NONE, 1.1 mdUtils.py, NONE, 1.1 packageObject.py, NONE, 1.1 packageSack.py, NONE, 1.1 repoMDObject.py, NONE, 1.1 test.py, NONE, 1.1