#!/bin/env python

## echo_pull - pulls echo icons off the fedora wiki 

## Copyright (C) 2006 Red Hat, Inc.
## Copyright (C) 2006 John (J5) Palmieri <johnp@redhat.com>

## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

base_url='http://fedoraproject.org/'
echo_dir='wiki/Artwork/EchoDevelopment'
base_dir='echo_art'

import sys
import os
import urllib2
import HTMLParser 

import re

href_re=re.compile('\/(.*)\?.*target=(.*[(\.svg)(\.png)])')

def _mkdir(newdir):
    """works the way a good mkdir should :)
        - already exists, silently complete
        - regular file in the way, raise an exception
        - parent directory(ies) does not exist, make them as well
    """
    if os.path.isdir(newdir):
        pass
    elif os.path.isfile(newdir):
        raise OSError("a file with the same name as the desired " \
                      "dir, '%s', already exists." % newdir)
    else:
        head, tail = os.path.split(newdir)
        if head and not os.path.isdir(head):
            _mkdir(head)
        #print "_mkdir %s" % repr(newdir)
        if tail:
            os.mkdir(newdir)

class ArtParser(HTMLParser.HTMLParser):

    def handle_starttag(self, tag, attr):
        if tag == 'a':
            for a in attr:
                if a[0] == 'href':
                    match = href_re.match(a[1])
                    if match:
                        self.filter_and_download (base_url, 
                                                  a[1], 
                                                  match.group(2))
    def download(self, url, directory, filename):
        if not os.path.isdir(directory):
            _mkdir(directory)
           
        file_path = os.path.join(directory, filename)
        file = os.popen ('wget "%s" -O %s'%(url, file_path))
        error = file.close()

        if error:
            sys.stderr.write ('Error downloading %s to %s\n'%(url, 
                                                              file_path)) 
 
    def filter(self, file):
        if file.startswith('image-missing'):
            return
        else:
            return (base_dir, file) 

    def filter_and_download (self, base_url, resource, file):
        art_url = "%s%s"%(base_url, 
                          resource)
 
        filter = self.filter(file)
        if filter:
            (filtered_dir, filtered_file) = filter
            self.download(art_url, filtered_dir, filtered_file)

def main():
    #get the main page
    url = '%s%s'%(base_url, echo_dir)
    try:
        data = urllib2.urlopen(url).read()
    except urllib2.HTTPError, e:
        print "HTTP error: %d" % e.code
        exit(1)
    except urllib2.URLError, e:
        print "Network error: %s" % e
        exit(2)

    #pull out <a > tags with a target graphics in the href
    p = ArtParser()
    p.feed(data)

    p.close()

main()