extras-buildsys/common FileDownloader.py, NONE, 1.1 FileServer.py, NONE, 1.1

Wed Jun 8 15:55:59 UTC 2005

Author: dcbw

Update of /cvs/fedora/extras-buildsys/common
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv761/common

Added Files:
	FileDownloader.py FileServer.py 
Log Message:
2005-06-08  Dan Williams <dcbw at redhat.com>

    * Refactor FileDownload.py and FileServer.py, moving them into a shared
      directory common/.  Fix client & server to point to these modules and
      implement callbacks in each that are required by FileDownload.py.  Since
      at this time we don't install anything to site-packages, the build-client
      and build-server scripts are necessary to properly set up PYTHONPATH to
      find stuff in the common/ directory.


--- NEW FILE FileDownloader.py ---
#!/usr/bin/python -t
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# Copyright 2005 Dan Williams and Red Hat, Inc.
#

import threading
import urlgrabber
import urllib
import string
import os


def get_base_filename_from_url(url, legal_exts):
    """ Safely unquotes a URL and gets the base file name from it.
        We're not using urlparse here because it doesn't un-escape stuff """

    unquoted = url
    last_unquoted = None
    count = 5
    
    # Keep unquoting the string until the last two unquote operations
    # produce the same string
    while (unquoted != last_unquoted) and (count > 0):
        last_unquoted = unquoted
        unquoted = urllib.unquote_plus(unquoted)
        count = count - 1

    # If after 5 iterations of unquoting, the strings still aren't the same,
    # something is wrong.
    if (count == 0) and (unquoted != last_unquoted):
        return None

    # Try to grab the filename off the end of the URL
    index = url.rfind('/')
    if index is -1:
        return None
    filename = url[index+1:]

    # Only accept certain file extensions
    ext_ok = False
    for ext in legal_exts:
        if filename.endswith(ext):
            ext_ok = True
            break

    if not ext_ok:
        return None

    # FIXME: what other validation can we do here?
    for c in filename:
        # For now, legal characters are '_-.' plus alphanumeric
        if (c == '_') or (c == '-') or (c == '.') or c.isalnum():
            pass
        else:
            return None

    return filename


class FileDownloader(threading.Thread):

    def __init__(self, callback, cb_data, url, target_dir, legal_exts):
        self._callback = callback
        self._cb_data = cb_data
        self._url = url
        self._target_dir = target_dir
        self._filename = get_base_filename_from_url(self._url, legal_exts)
        if not self._filename:
            print "Couldn't get base filename from url!!  target_dir=%s, url=%s" % (target_dir, url)
        threading.Thread.__init__(self)

    def run(self):
        success = False
        if self._url and self._target_dir and self._filename:
            if not os.path.exists(self._target_dir):
                os.makedirs(self._target_dir)
            os.chdir(self._target_dir)
            target_file = os.path.join(self._target_dir, self._filename)
            result = urlgrabber.urlgrab(self._url, target_file)
            if result:
                success = True

        if success:
            self._callback('done', self._cb_data)
        else:
            self._callback('failed', self._cb_data)


--- NEW FILE FileServer.py ---
#!/usr/bin/python -t
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# Copyright 2005 Dan Williams and Red Hat, Inc.
#

import SimpleHTTPServer
import SocketServer
import threading
import os
import urllib
import posixpath
import CONFIG

BaseRequestHandler = SimpleHTTPServer.SimpleHTTPRequestHandler
BaseHttpServer = SocketServer.TCPServer

class HttpRequestHandler(BaseRequestHandler):

    def __init__(self, request, client_address, server):
        self._server = server
        BaseRequestHandler.__init__(self, request, client_address, server)

    def list_directory(self, path):
        self.send_error(404, "No permission to list directory")

    def log_request(self, code='-', size='-'):
        # Don't log requests
        pass

    def translate_path(self, path):
        """Translate a /-separated PATH to the local filename syntax.

        Components that mean special things to the local file system
        (e.g. drive or directory names) are ignored.  (XXX They should
        probably be diagnosed.)

        This code is lifted from SimpleHTTPRequestHandler so that we can
        make sure the request is always based in our download directory,
        not the current directory.
        """
        path = posixpath.normpath(urllib.unquote(path))
        words = path.split('/')
        words = filter(None, words)
        path = self._server.http_dir
        for word in words:
            drive, word = os.path.splitdrive(word)
            head, word = os.path.split(word)
            if word in (os.curdir, os.pardir): continue
            path = os.path.join(path, word)
        return path

    def do_GET(self):
        BaseRequestHandler.do_GET(self)
#        try:
#            BaseRequestHandler.do_GET(self)
#        except Exception, e:
#            # We get an exception if the client drops the transfer
#            pass

class ThreadingHttpServer(SocketServer.ThreadingTCPServer):

    def __init__(self, server_address, RequestHandlerClass, http_dir):
        self.protocol_version = "HTTP/1.0"    # Don't want keepalive
        self.allow_reuse_address = 1
        self.http_dir = http_dir
        BaseHttpServer.__init__(self, server_address, RequestHandlerClass)


class FileServer(threading.Thread):

    def __init__(self, address_tuple, http_dir):
        self._server = ThreadingHttpServer(address_tuple, HttpRequestHandler, http_dir)
        threading.Thread.__init__(self)

    def run(self):
        self._server.serve_forever()