#!/bin/env python ## echo_pull - pulls echo icons off the fedora wiki ## Copyright (C) 2006 Red Hat, Inc. ## Copyright (C) 2006 John (J5) Palmieri ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. base_url='http://fedoraproject.org/' echo_dir='wiki/Artwork/EchoDevelopment' base_dir='echo_art' import sys import os import urllib2 import HTMLParser import re href_re=re.compile('\/(.*)\?.*target=(.*[(\.svg)(\.png)])') def _mkdir(newdir): """works the way a good mkdir should :) - already exists, silently complete - regular file in the way, raise an exception - parent directory(ies) does not exist, make them as well """ if os.path.isdir(newdir): pass elif os.path.isfile(newdir): raise OSError("a file with the same name as the desired " \ "dir, '%s', already exists." % newdir) else: head, tail = os.path.split(newdir) if head and not os.path.isdir(head): _mkdir(head) #print "_mkdir %s" % repr(newdir) if tail: os.mkdir(newdir) class ArtParser(HTMLParser.HTMLParser): def handle_starttag(self, tag, attr): if tag == 'a': for a in attr: if a[0] == 'href': match = href_re.match(a[1]) if match: self.filter_and_download (base_url, a[1], match.group(2)) def download(self, url, directory, filename): if not os.path.isdir(directory): _mkdir(directory) file_path = os.path.join(directory, filename) file = os.popen ('wget "%s" -O %s'%(url, file_path)) error = file.close() if error: sys.stderr.write ('Error downloading %s to %s\n'%(url, file_path)) def filter(self, file): if file.startswith('image-missing'): return else: return (base_dir, file) def filter_and_download (self, base_url, resource, file): art_url = "%s%s"%(base_url, resource) filter = self.filter(file) if filter: (filtered_dir, filtered_file) = filter self.download(art_url, filtered_dir, filtered_file) def main(): #get the main page url = '%s%s'%(base_url, echo_dir) try: data = urllib2.urlopen(url).read() except urllib2.HTTPError, e: print "HTTP error: %d" % e.code exit(1) except urllib2.URLError, e: print "Network error: %s" % e exit(2) #pull out tags with a target graphics in the href p = ArtParser() p.feed(data) p.close() main()