Metalinks / Code / [r817] /checker/download.py

[r817]: / checker / download.py Maximize Restore History

2236 lines (1895 with data), 71.4 kB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
########################################################################
#
# Project: Metalink Checker
# URL: http://www.nabber.org/projects/
# E-mail: webmaster@nabber.org
#
# Copyright: (C) 2007-2011, Neil McNab
# License: GNU General Public License Version 2
#   (http://www.gnu.org/copyleft/gpl.html)
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Filename: $URL$
# Last Updated: $Date$
# Author(s): Neil McNab
#
# Description:
#   Download library that can handle metalink files, RFC 5854, RFC 6249.
# Also supports Instance Digests, RFC 3230.
#
# Library Instructions:
#   - Use as expected.
#
# import download
#
# files = download.get("file.metalink", os.getcwd())
#
# Callback Definitions:
# def cancel():
#   Returns True to cancel, False otherwise
# def pause():
#   Returns True to pause, False to continue/resume
# def status(block_count, block_size, total_size):
#   Same format as urllib.urlretrieve reporthook
#   block_count - a count of blocks transferred so far
#   block_size - a block size in bytes
#   total_size - the total size of the file in bytes
# def bitrate(bitrate):
#   bitrate - kilobits per second (float)
#
########################################################################
#import logging

#import utils
import urllib2
import urlparse
import hashlib
import os
import metalink
import locale
import threading
#import thread
import time
import copy
import socket
import ftplib
import httplib
#import logging
import base64
import sys
import gettext
import binascii
import random

try: import GPG
except: pass

import proxy

# for jython support
#try: import bz2
#except ImportError: pass
import BaseHTTPServer

try: import win32api
except: pass

try: import win32con
except: pass

USER_AGENT = "Metalink Checker/6.0 +http://www.nabber.org/projects/"

SEGMENTED = True
LIMIT_PER_HOST = 1
HOST_LIMIT = 5
MAX_REDIRECTS = 20
CONNECT_RETRY_COUNT = 3

MAX_CHUNKS = 256
DEFAULT_CHUNK_SIZE = 262144

LANG = []
OS = None
COUNTRY = None

lang = locale.getdefaultlocale()[0]
if lang == None:
    lang = "LC_ALL"
lang = lang.replace("_", "-").lower()
LANG = [lang]

if len(lang) == 5:
    COUNTRY = lang[-2:]

PGP_KEY_DIR="."
PGP_KEY_EXTS = (".gpg", ".asc")
PGP_KEY_STORE=None

# Streaming server setings to use
HOST = "localhost"
PORT = None

# Protocols to use for segmented downloads
PROTOCOLS=("http","https","ftp")
#PROTOCOLS=("ftp")

# See http://www.poeml.de/transmetalink-test/README
MIME_TYPE = "application/metalink+xml"

DIGESTS = "md5,sha,sha-256,sha-384,sha-512"

def translate():
    '''
    Setup translation path
    '''
    if __name__=="__main__":
        base = ""
        localedir = ""
        try:
            base = os.path.basename(__file__)[:-3]
            localedir = os.path.join(os.path.dirname(__file__), "locale")
        except NameError:
            if sys.executable != None:
                base = os.path.basename(sys.executable)[:-4]
                localedir = os.path.join(os.path.dirname(sys.executable), "locale")
    else:
        temp = __name__.split(".")
        base = temp[-1]
        localedir = os.path.join("/".join(["%s" % k for k in temp[:-1]]), "locale")

    #print base, localedir
    localelang = locale.getdefaultlocale()[0]
    if localelang == None:
        localelang = "LC_ALL"
    t = gettext.translation(base, localedir, [localelang], None, 'en')
    return t.ugettext

_ = translate()
    
def urlopen(url, data = None, metalink_header=False, headers = {}):
    #print "URLOPEN:", url, headers
    url = complete_url(url)
    req = urllib2.Request(url, data, headers)
    req.add_header('User-agent', USER_AGENT)
    req.add_header('Cache-Control', "no-cache")
    req.add_header('Pragma', "no-cache")
    req.add_header('Accept-Encoding', 'gzip')
    req.add_header('Want-Digest', DIGESTS)
    if metalink_header:
        req.add_header('Accept', MIME_TYPE + ", */*")

    fp = urllib2.urlopen(req) 
    try:
        if fp.headers['Content-Encoding'] == "gzip":
            return metalink.open_compressed(fp)
    except KeyError: pass

    return fp

def urlhead(url, metalink_header=False, headers = {}):
    '''
    raise IOError for example if the URL does not exist
    '''
    #print "URLHEAD:", url, headers
    url = complete_url(url)
    req = urllib2.Request(url, None, headers)
    req.add_header('User-agent', USER_AGENT)
    req.add_header('Cache-Control', "no-cache")
    req.add_header('Pragma', "no-cache")
    req.add_header('Want-Digest', DIGESTS)    
    if metalink_header:
        req.add_header('Accept', MIME_TYPE + ", */*")

    req.get_method = lambda: "HEAD"
    #logging.debug(url)
    fp = urllib2.urlopen(req)
    newheaders = fp.headers
    fp.close()
    return newheaders

def digest_parse(digest):
    if digest is None:
        return {}
        
    hashes = digest.split(",")
    digestsums = {}
    for myhash in hashes:
        parts = myhash.split("=", 1)
        # create digest list here
        if parts[0].strip() == 'sha':
            digestsums['sha-1'] = binascii.hexlify(binascii.a2b_base64(parts[1].strip()))
        else:
            digestsums[parts[0].strip()] = binascii.hexlify(binascii.a2b_base64(parts[1].strip()))
    return digestsums            
    

def get(src, path, checksums = {}, force = False, handlers = {}, segmented = SEGMENTED, headers = {}):
    '''
    Download a file, decodes metalinks.
    First parameter, file to download, URL or file path to download from
    Second parameter, file path to save to
    Third parameter, optional, expected dictionary of checksums
    Fourth parameter, optional, force a new download even if a valid copy already exists
    Fifth parameter, optional, progress handler callback
    Sixth parameter, optional, boolean to try using segmented downloads
    Returns list of file paths if download(s) is successful
    Returns False otherwise (checksum fails)
    raise socket.error e.g. "Operation timed out"
    '''
    if src.endswith(".jigdo"):
        return download_jigdo(src, path, force, handlers, segmented, headers)
    # assume metalink if ends with .metalink

    result = download_metalink(src, path, force, handlers, segmented, headers)
    if result != None:
        return result
            
    # assume normal file download here
    # parse out filename portion here
    filename = os.path.basename(src)
    result = download_file(src, os.path.join(path, filename), 
            0, checksums, force, handlers, segmented = segmented, headers = headers)
    if result:
        return [result]
    return False
    
def download_file(url, local_file, size=0, checksums={}, force = False, 
        handlers = {}, segmented = SEGMENTED, chunksums = {}, chunk_size = 0, headers = {}):
    '''
    url {string->URL} locations of the file
    local_file string local file name to save to
    checksums ?
    force ?
    handler ?
    segmented ?
    chunksums ?
    chunk_size ?
    returns ? 
    unicode Returns file path if download is successful.
        Returns False otherwise (checksum fails).    
    '''
    # convert string filename into something we can use
    #urllist = {}
    #urllist[url] = URL(url)

    fileobj = metalink.MetalinkFile(local_file)
    # Need to set this again for absolute file paths
    fileobj.filename = local_file
    fileobj.set_size(size)
    fileobj.hashlist = checksums
    fileobj.pieces = chunksums
    fileobj.piecelength = chunk_size
    fileobj.add_url(url)

    return download_file_urls(fileobj, force, handlers, segmented, headers)
    
def download_file_urls(metalinkfile, force = False, handlers = {}, segmented = SEGMENTED, headers = {}):
    '''
    Download a file.
    MetalinkFile object to download
    Second parameter, optional, force a new download even if a valid copy already exists
    Third parameter, optional, progress handler callback
    Fourth parameter, optional, try to use segmented downloading
    Returns file path if download is successful
    Returns False otherwise (checksum fails)    
    '''
    
    if os.path.exists(metalinkfile.filename) and (not force):
        actsize = os.stat(metalinkfile.filename).st_size
        if len(metalinkfile.hashlist) > 0:
            checksum = verify_checksum(metalinkfile.filename, metalinkfile.hashlist)
            if checksum:
                handlers["status"](1, actsize, actsize)
                print ""
                print _("Already downloaded %s.") % os.path.basename(metalinkfile.filename)
                return metalinkfile.filename
            else:
                print _("Checksum failed, retrying download of %s.") % os.path.basename(metalinkfile.filename)
                
        if metalinkfile.size == actsize:
            if handlers.has_key("status"):
                handlers["status"](1, actsize, actsize)
            print ""
            print _("Already downloaded %s.") % os.path.basename(metalinkfile.filename)
            return metalinkfile.filename

        if os.path.exists(metalinkfile.filename + ".temp"):
            print _("Resuming download of %s.") % os.path.basename(metalinkfile.filename)

    directory = os.path.dirname(metalinkfile.filename)
    if not os.path.isdir(directory):
        os.makedirs(directory)

    if metalinkfile.piecelength == 0:
        metalinkfile.piecelength = DEFAULT_CHUNK_SIZE

    print _("Downloading to %s.") % metalinkfile.filename
        
    seg_result = False
    if segmented:
        manager = Segment_Manager(metalinkfile, headers)
        manager.set_callbacks(handlers)
        seg_result = manager.run()
        
        if not seg_result:
            #seg_result = verify_checksum(local_file, checksums)
            print "\n" + _("Could not download all segments of the file, trying one mirror at a time.")

    if (not segmented) or (not seg_result):
        manager = NormalManager(metalinkfile, headers)
        manager.set_callbacks(handlers)
        manager.run()
        
    if manager.get_status():
        return metalinkfile.filename
    return False
            
class Manager:
    def __init__(self):
        self.cancel_handler = None
        self.pause_handler = None
        self.status_handler = None
        self.bitrate_handler = None
        self.time_handler = None
        self.status = True
        self.size = -1
        self.end_bitrate()

    def set_time_callback(self, handler):
        self.time_handler = handler        
        
    def set_cancel_callback(self, handler):
        self.cancel_handler = handler
        
    def set_pause_callback(self, handler):
        self.pause_handler = handler
        
    def set_status_callback(self, handler):
        self.status_handler = handler

    def set_bitrate_callback(self, handler):
        self.bitrate_handler = handler

    def set_callbacks(self, callbackdict):
        for key in callbackdict.keys():
            setattr(self, key + "_handler", callbackdict[key])

    def run(self, wait=None):
        result = self.status
        while result:
            if self.pause_handler != None and self.pause_handler():
                self.end_bitrate()
                time.sleep(1)
            else:
                if wait != None:
                    time.sleep(wait)
                result = self.cycle()
            
        return self.get_status()
        
    def cycle(self):
        pass
         
    def get_status(self):
        return self.status
    
    def close_handler(self):
        return

    def start_bitrate(self, bytes):
        '''
        Pass in current byte count
        '''
        self.oldsize = bytes
        self.oldtime = time.time()

    def end_bitrate(self):
        self.oldsize = 0
        self.oldtime = None
        
    def get_bitrate(self, bytes):
        '''
        Pass in current byte count
        '''
        if self.oldtime != None and (time.time() - self.oldtime) != 0:
            return ((bytes - self.oldsize) * 8 / 1024)/(time.time() - self.oldtime)
        return 0
        
    def get_time(self, bytes):
        bitrate = self.get_bitrate(bytes)
        if bitrate == 0 or (self.size - bytes) < 0:
            return "??:??"
        
        secondsleft = (self.size - bytes)/(bitrate*1024/8)
        hours = secondsleft / 3600
        minutes = (secondsleft % 3600) / 60
        seconds = (secondsleft % 60)
        if int(hours) > 0:
            return "%.2d:%.2d:%.2d" % (hours, minutes, seconds)
        return "%.2d:%.2d" % (minutes, seconds)
        
class NormalManager(Manager):
    def __init__(self, metalinkfile, headers = {}):
        Manager.__init__(self)
        self.local_file = metalinkfile.filename
        self.size = metalinkfile.size
        self.chunksums = metalinkfile.get_piece_dict()
        self.checksums = metalinkfile.hashlist
        self.urllist = start_sort(metalinkfile.get_url_dict())
        self.start_number = 0
        self.number = 0
        self.count = 1
        self.headers = headers.copy()

    def random_start(self):
        # do it the old way
        # choose a random url tag to start with
        #urllist = list(urllist)
        #number = int(random.random() * len(urllist))
        self.start_number = int(random.random() * len(self.urllist))
        self.number = self.start_number
        
    def cycle(self):
        if self.cancel_handler != None and self.cancel_handler():
            return False
        try:
            self.status = True
            remote_file = complete_url(self.urllist[self.number])

            manager = URLManager(remote_file, self.local_file, self.checksums, self.headers)
            manager.set_status_callback(self.status_handler)
            manager.set_cancel_callback(self.cancel_handler)
            manager.set_pause_callback(self.pause_handler)
            manager.set_bitrate_callback(self.bitrate_handler)
            manager.set_time_callback(self.time_handler)
            self.get_bitrate = manager.get_bitrate
            self.status = manager.run()

            self.number = (self.number + 1) % len(self.urllist)
            self.count += 1
            
            return self.count <= len(self.urllist)
        except KeyboardInterrupt:
            print "Download Interrupted!"
            try:
                manager.close_handler()
            except: pass
            return False
    
class URLManager(Manager):
    def __init__(self, remote_file, filename, checksums = {}, headers = {}):
        '''
        modernized replacement for urllib.urlretrieve() for use with proxy
        '''
        Manager.__init__(self)
        self.filename = filename
        self.checksums = checksums
        self.block_size = 1024
        self.counter = 0
        self.total = 0

    ### FIXME need to check contents from previous download here
        self.resume = FileResume(filename + ".temp")
        self.resume.add_block(0)
    
        self.data = ThreadSafeFile(filename, 'wb+')
        
        try:
            self.temp = urlopen(remote_file, headers = headers)
        except:
            self.status = False
            self.close_handler()
            return
        myheaders = self.temp.info()

        if len(self.checksums) == 0:
            try:
                self.checksums = digest_parse(myheaders['Digest'])
            except KeyError:
                pass

        try:
            self.size = int(myheaders['Content-Length'])
        except KeyError:
            self.size = 0            
            
        self.streamserver = None
        if PORT != None:
            self.streamserver = StreamServer((HOST, PORT), StreamRequest)
            self.streamserver.set_stream(self.data)
        
            #thread.start_new_thread(self.streamserver.serve, ())
            mythread = threading.Thread(target=self.streamserver.serve)
            mythread.start()
 
    def close_handler(self):
        self.resume.complete()
        try:
            if PORT == None:
                self.data.close()
            self.temp.close()
        except: pass
        
        if self.status:
            self.status = filecheck(self.filename, self.checksums, self.size)
            
    def cycle(self):
        if self.oldtime == None:
            self.start_bitrate(self.counter * self.block_size)
        if self.cancel_handler != None and self.cancel_handler():
            self.close_handler()
            return False
        
        block = self.temp.read(self.block_size)
        self.data.acquire()
        self.data.write(block)
        self.data.release()
        self.counter += 1
        self.total += len(block)

        self.resume.set_block_size(self.counter * self.block_size)

        if self.streamserver != None:        
            self.streamserver.set_length(self.counter * self.block_size)
                        
        if self.status_handler != None:
            self.status_handler(self.total, 1, self.size)

        if self.bitrate_handler != None:
            self.bitrate_handler(self.get_bitrate(self.counter * self.block_size))

        if self.time_handler != None:
            self.time_handler(self.get_time(self.counter * self.block_size))    
            
        if not block:
            self.close_handler()

        #print self.get_bitrate(self.counter * self.block_size)
        return bool(block)
    
def filecheck(local_file, checksums, size, handler = None):
    if verify_checksum(local_file, checksums):
        actsize = 0
        try:
            actsize = os.stat(local_file).st_size
        except: pass
            
        if handler != None:
            tempsize = size
            if size == 0:
                tempsize = actsize
            handler(1, actsize, tempsize)

        if (int(actsize) == int(size) or size == 0):
            return True
    
    print "\n" + _("Checksum failed for %s.") % os.path.basename(local_file)
    return False
    
def parse_metalink(src, headers = {}, nocheck = False, ver=3):
    src = complete_url(src)
    is_metalink = nocheck
    
    # not all servers support HEAD where GET is also supported
    # also a WindowsError is thrown if a local file does not exist
    if src.endswith(".metalink") or src.endswith(".meta4"):
        is_metalink = True
    try:
        # add head check for metalink type, if MIME_TYPE or application/xml? treat as metalink
        myheaders = urlhead(src, metalink_header=True, headers = headers)
        if myheaders["content-type"].startswith(MIME_TYPE):
            print _("Metalink content-type detected.")
            is_metalink = True
        elif myheaders["link"]:
            # Metalink HTTP Link headers implementation, RFC 6249
            # does not check for describedby urls but we can't use any of those anyway
            # TODO this should be more robust and ignore commas in <> for urls
            links = myheaders['link'].split(",")
            fileobj = metalink.MetalinkFile4(os.path.basename(src))
            fileobj.set_size(myheaders["content-length"])
            for link in links:
                parts = link.split(";")
                mydict = {}
                for part in parts[1:]:
                    part1, part2 = part.split("=", 1)
                    mydict[part1.strip()] = part2.strip()
                
                pri = ""
                try:
                    pri = mydict["pri"]
                except KeyError: pass
                typestr = ""
                try:
                    typestr = mydict["type"]
                except KeyError: pass
                try:
                    if mydict['rel'] == '"describedby"' and type=="application/metalink4+xml":
                        # TODO support metalink describedby type
                        #fileobj.add_url(parts[0].strip(" <>"), preference=pri)
                        pass
                    elif mydict['rel'] == '"describedby"' and type=="application/pgp-signature":
                        # support openpgp describedby type
                        fp = urlopen(parts[0].strip(" <>"), headers = {"referer": src})
                        fileobj.hashlist['pgp'] = fp.read()
                        fp.close()
                    elif mydict['rel'] == '"describedby"' or mydict['rel'] == '"duplicate"':
                        fileobj.add_url(parts[0].strip(" <>"), type=typestr, priority=pri)
                except KeyError: pass
            try:
                fileobj.hashlist = digest_parse(myheaders['digest'])
            except KeyError:
                # RFC requires link headers to be ignored if no digest header, use standard download method
                return False
            print _("Using Metalink HTTP Link headers.")
            mobj = metalink.Metalink4()
            mobj.files.append(fileobj)
            return metalink.convert(mobj, ver)
    except KeyError:
        pass
    
    if not is_metalink:
        return False
        
    try:
        datasource = urlopen(src, metalink_header=True, headers = headers)
    except:
        return False

    metalinkobj = metalink.parsehandle(datasource, ver)
    datasource.close()
    return metalinkobj

def parse_rss(src, headers = {}):
    src = complete_url(src)

    try:
        datasource = urlopen(src, headers = headers)
    except:
        return False

    rssobj = metalink.RSSAtom()
    rssobj.parsehandle(datasource)
    datasource.close()
    return rssobj    
    
def download_rss(src, path, force = False, handlers = {}, segmented = SEGMENTED, headers = {}, nocheck = False):
    rssobj = parse_rss(src, headers)
    if not rssobj:
        return False
        
    urllist = rssobj.files
    if len(urllist) == 0:
        print _("No enclosures to download files from.")
        return False

    results = []
    for rssitem in urllist:
        result = download_file(rssitem.url, os.path.join(path, os.path.basename(rssitem.url)), rssitem.size, force=force, handlers=handlers, segmented=segmented)
        if result:
            results.append(result)
            
    if len(results) == 0:
        return False
    
    return results
    
def download_metalink(src, path, force = False, handlers = {}, segmented = SEGMENTED, headers = {}, nocheck = False):
    '''
    Decode a metalink file, can be local or remote
    First parameter, file to download, URL, file path or Metalink object to download from
    Second parameter, file path to save to
    Third parameter, optional, force a new download even if a valid copy already exists
    Fouth parameter, optional, progress handler callback
    Returns list of file paths if download(s) is successful
    Returns None is the file is not a metalink file (parse_metalink output is False)
    Returns False otherwise (checksum fails)
    '''
    myheaders = headers.copy()

    metalinkobj = parse_metalink(src, myheaders, nocheck)
    if not metalinkobj:
        return None
        
    if is_remote(src):
        myheaders['referer'] = src

    if metalinkobj.type == "dynamic":
        origin = metalinkobj.origin
        if origin != src and origin != "":
            print _("Downloading update from %s") % origin
            try:
                return download_metalink(origin, path, force, handlers, segmented, myheaders)
            except: pass

    urllist = metalinkobj.files
    if len(urllist) == 0:
        print _("No urls to download file from.")
        return False

    results = []
    for filenode in urllist:
        ostag = filenode.os
        langtag = filenode.language

        if OS == None or len(ostag) == 0 or ostag[0].lower() == OS.lower():
            if "any" in LANG or len(langtag) == 0 or langtag.lower() in LANG:
                result = download_file_node(filenode, path, force, handlers, segmented, myheaders)
                if result:
                    results.append(result)
    if len(results) == 0:
        return False
    
    return results


def download_jigdo(src, path, force = False, handlers = {}, segmented = SEGMENTED, headers = {}):
    '''
    Decode a jigdo file, can be local or remote
    First parameter, file to download, URL or file path to download from
    Second parameter, file path to save to
    Third parameter, optional, force a new download even if a valid copy already exists
    Fouth parameter, optional, progress handler callback
    Returns list of file paths if download(s) is successful
    Returns False otherwise (checksum fails)
    '''
    newsrc = complete_url(src)
    try:
        datasource = urlopen(newsrc, metalink_header=True, headers = headers)
    except:
        return False

    jigdo = metalink.Jigdo()
    jigdo.parsehandle(datasource)
    datasource.close()

    #print path_join(src, jigdo.template)
    template = get(path_join(src, jigdo.template), path, {"md5": jigdo.template_md5}, force, handlers, segmented, headers)
    if not template:
        print _("Could not download template file!")
        return False

    urllist = jigdo.files
    if len(urllist) == 0:
        print _("No urls to download file from.")
        return False

    results = []
    results.extend(template)
    for filenode in urllist:
        result = download_file_node(filenode, path, force, handlers, segmented, headers)
        if result:
            results.append(result)
    if len(results) == 0:
        return False

    print _("Reconstituting file...")
    md5 = jigdo.mkiso()
    checksum = verify_checksum(jigdo.filename, {'md5': md5})
    if not checksum:
        print _("Checksum failed.")
    
    return results

def convert_jigdo(src, headers = {}):
    '''
    Decode a jigdo file, can be local or remote
    First parameter, file to download, URL or file path to download from
    Returns metalink xml text, False on error
    '''
    
    newsrc = complete_url(src)
    try:
        datasource = urlopen(newsrc, metalink_header=True, headers = headers)
    except:
        return False

    jigdo = metalink.Jigdo()
    jigdo.parsehandle(datasource)
    datasource.close()

    fileobj = metalink.MetalinkFile(jigdo.template)
    fileobj.add_url(os.path.dirname(src) + "/" + jigdo.template)
    fileobj.add_checksum("md5", jigdo.template_md5)
    jigdo.files.insert(0, fileobj)

    urllist = jigdo.files
    if len(urllist) == 0:
        print _("No Jigdo data files!")
        return False
    
    return jigdo.generate()


def download_file_node(item, path, force = False, handler = None, segmented=SEGMENTED, headers = {}):
    '''
    First parameter, file XML node
    Second parameter, file path to save to
    Third parameter, optional, force a new download even if a valid copy already exists
    Fouth parameter, optional, progress handler callback
    Returns list of file paths if download(s) is successful
    Returns False otherwise (checksum fails)
    raise socket.error e.g. "Operation timed out"
    '''

    urllist = []

    for node in item.resources:
        urllist.append(node.url)
        
    if len(urllist) == 0:
        print _("No urls to download file from.")
        return False

    local_file = item.filename
    item.filename = path_join(path, local_file)

    return download_file_urls(item, force, handler, segmented, headers)

def complete_url(url):
    '''
    If no transport is specified in typical URL form, we assume it is a local
    file, perhaps only a relative path too.
    First parameter, string to convert to URL format
    Returns, string converted to URL format
    '''
    if get_transport(url) == "":
        absfile = os.path.abspath(url)
        if absfile[0] != "/":
            absfile = "/" + absfile
        return "file://" + absfile
    return url

def urlretrieve(url, filename, reporthook = None, headers = {}):
    '''
    modernized replacement for urllib.urlretrieve() for use with proxy
    '''
    block_size = 1024
    i = 0
    counter = 0
    temp = urlopen(url, headers = headers)
    myheaders = temp.info()
    
    try:
        size = int(myheaders['Content-Length'])
    except KeyError:
        size = 0

    data = open(filename, 'wb')
    block = True

    ### FIXME need to check contents from previous download here
    resume = FileResume(filename + ".temp")
    resume.add_block(0)

    while block:
        block = temp.read(block_size)
        data.write(block)
        i += block_size
        counter += 1

        resume.set_block_size(counter * block_size)
                        
        if reporthook != None:
            #print counter, block_size, size
            reporthook(counter, block_size, size)

    resume.complete()
            
    data.close()
    temp.close()

    return (filename, headers)


class FileResume:
    '''
    Manages the resume data file
    '''
    def __init__(self, filename):
        self.size = 0
        self.blocks = []
        self.filename = filename
        self._read()

    def set_block_size(self, size):
        '''
        Set the block size value without recomputing blocks
        '''
        self.size = int(size)
        self._write()

    def update_block_size(self, size):
        '''
        Recompute blocks based on new size
        '''
        if self.size == size:
            return

        newblocks = []
        count = 0
        total = 0
        offset = None
        
        for value in self.blocks:
            value = int(value)
            if value == count:
                if offset == None:
                    offset = count
                total += self.size
            elif offset != None:
                start = ((offset * self.size) / size)
                newblocks.extend(map(str, range(start, start + (total / size))))
                total = 0
                offset = None
            count += 1

        if offset != None:
            start = ((offset * self.size) / size)
            newblocks.extend(map(str, range(start, start + (total / size))))

        self.blocks = newblocks
        self.set_block_size(size)

    def start_byte(self):
        '''
        Returns byte to start at, all previous are OK
        '''
        if len(self.blocks) == 0:
            return 0
        
        count = 0
        for value in self.blocks:
            if int(value) != count:
                return (count + 1) * self.size
            count += 1
            
        return None

    def add_block(self, block_id):
        '''
        Add a block to list of completed
        '''
        if str(block_id) not in self.blocks:
            self.blocks.append(str(block_id))
        self._write()
        
    def remove_block(self, block_id):
        '''
        Remove a block from list of completed
        '''
        self.blocks.remove(str(block_id))
        self._write()
        
    def clear_blocks(self):
        '''
        Remove all blocks from completed list
        '''
        self.blocks = []
        self._write()

    def extend_blocks(self, blocks):
        '''
        Replace the list of block ids
        '''
        for block in blocks:
            if str(block) not in self.blocks:
                self.blocks.append(str(block))
        self._write()

    def _write(self):
        filehandle = open(self.filename, "w")
        filehandle.write("%s:" % str(self.size))
        #for block_id in self.blocks:
            #filehandle.write(str(block_id) + ",")
        #print self.blocks
        filehandle.write(",".join(self.blocks))
        filehandle.close()

    def _read(self):
        try:
            filehandle = open(self.filename, "r")
            resumestr = filehandle.readline()
            (size, blocks) = resumestr.split(":")
            self.blocks = blocks.split(",")
            self.size = int(size)
            filehandle.close()
        except (IOError, ValueError):
            self.blocks = []
            self.size = 0

    def complete(self):
        '''
        Download completed, remove block count file
        '''
        os.remove(self.filename)

def verify_chunk_checksum(chunkstring, checksums={}):
    '''
    Verify the checksum of a file
    First parameter, filename
    Second parameter, optional, expected dictionary of checksums
    Returns True if first checksum provided is valid
    Returns True if no checksums are provided
    Returns False otherwise
    '''

    try:
        checksums["sha512"]
        if hashlib.sha512(chunkstring).hexdigest() == checksums["sha512"].lower():
            return True
        else:
            return False
    except (KeyError, AttributeError): pass
    try:
        checksums["sha384"]
        if hashlib.sha384(chunkstring).hexdigest() == checksums["sha384"].lower():
            return True
        else:
            return False
    except (KeyError, AttributeError): pass
    try:
        checksums["sha256"]
        if hashlib.sha256(chunkstring).hexdigest() == checksums["sha256"].lower():
            return True
        else:
            return False
    except (KeyError, AttributeError): pass
    try:
        checksums["sha1"]
        if hashlib.sha1(chunkstring).hexdigest() == checksums["sha1"].lower():
            return True
        else:
            return False
    except KeyError: pass
    try:
        checksums["md5"]
        if hashlib.md5(chunkstring).hexdigest() == checksums["md5"].lower():
            return True
        else:
            return False
    except KeyError: pass
    
    # No checksum provided, assume OK
    return True

def verify_checksum(local_file, checksums={}):
    '''
    Verify the checksum of a file
    First parameter, filename
    Second parameter, optional, expected dictionary of checksums
    Returns True if first checksum provided is valid
    Returns True if no checksums are provided
    Returns False otherwise
    '''
    
    try:
        return pgp_verify_sig(local_file, checksums["pgp"])
    except (KeyError, AttributeError, ValueError, AssertionError): pass
    try:
        checksums["sha512"]
        if filehash(local_file, hashlib.sha512()) == checksums["sha512"].lower():
            return True
        else:
            #print "\nERROR: sha512 checksum failed for %s." % os.path.basename(local_file)
            return False
    except (KeyError, AttributeError): pass
    try:
        checksums["sha384"]
        if filehash(local_file, hashlib.sha384()) == checksums["sha384"].lower():
            return True
        else:
            #print "\nERROR: sha384 checksum failed for %s." % os.path.basename(local_file)
            return False
    except (KeyError, AttributeError): pass
    try:
        checksums["sha256"]
        if filehash(local_file, hashlib.sha256()) == checksums["sha256"].lower():
            return True
        else:
            #print "\nERROR: sha256 checksum failed for %s." % os.path.basename(local_file)
            return False
    except (KeyError, AttributeError): pass
    try:
        checksums["sha1"]
        if filehash(local_file, hashlib.sha1()) == checksums["sha1"].lower():
            return True
        else:
            #print "\nERROR: sha1 checksum failed for %s." % os.path.basename(local_file)
            return False
    except KeyError: pass
    try:
        checksums["md5"]
        if filehash(local_file, hashlib.md5()) == checksums["md5"].lower():
            return True
        else:
            #print "\nERROR: md5 checksum failed for %s." % os.path.basename(local_file)
            return False
    except KeyError: pass
    
    # No checksum provided, assume OK
    return True

def pgp_verify_sig(filename, sig):
    gpg = GPG.GPGSubprocess(keyring=PGP_KEY_STORE)

    for root, dirs, files in os.walk(PGP_KEY_DIR):
        for thisfile in files:
            if thisfile[-4:] in PGP_KEY_EXTS:
                gpg.import_key(open(thisfile).read())
    
    sign = gpg.verify_file_detached(filename, sig)

    print "\n-----" + _("BEGIN PGP SIGNATURE INFORMATION") + "-----"
    if sign.error != None:
        print sign.error
    else:
        #print sig.creation_date
        try:
            print "" + _("timestamp") + ":", time.strftime("%a, %d %b %Y %H:%M:%S (%Z)", time.localtime(float(sign.timestamp)))
        except TypeError: pass
        print "" + _("fingerprint") + ":", sign.fingerprint
        #print sig.signature_id
        #print sign.key_id
        print "" + _("uid") + ":", sign.username
    print "-----" + _("END PGP SIGNATURE INFORMATION") + "-----\n"

    if sign.error != None:
        raise AssertionError, sign.error
    
    if sign.is_valid():
        return True
    
    return False

def is_remote(name):
    transport = get_transport(name)
        
    if transport != "":
        return True
    return False

def is_local(name):
    transport = get_transport(name)
        
    if transport == "":
        return True
    return False

def get_transport(url):
    '''
    Gets transport type.  This is more accurate than the urlparse module which
    just does a split on colon.
    First parameter, url
    Returns the transport type
    '''
    url = str(url)
    result = url.split("://", 1)
    if len(result) == 1:
        transport = ""
    else:
        transport = result[0]
    return transport

def filehash(thisfile, filesha):
    '''
    First parameter, filename
    Returns SHA1 sum as a string of hex digits
    '''
    try:
        filehandle = open(thisfile, "rb")
    except:
        return ""

    chunksize = 1024*1024
    data = filehandle.read(chunksize)
    while(data != ""):
        filesha.update(data)
        data = filehandle.read(chunksize)

    filehandle.close()
    return filesha.hexdigest()

def path_join(first, second):
    '''
    A function that is called to join two paths, can be URLs or filesystem paths
    Parameters, two paths to be joined
    Returns new URL or filesystem path
    '''
    if first == "":
        return second
    if is_remote(second):
        return second

    if is_remote(first):
        if is_local(second):
            return urlparse.urljoin(first, second)
        return second

    return os.path.normpath(os.path.join(first, second))

def start_sort(urldict):
    urls = copy.deepcopy(urldict)
    localurls = {}
    if COUNTRY != None:
        for url in urls.keys():
            if COUNTRY.lower() == urls[url].location.lower():
                localurls[url] = urls[url]
                urls.pop(url)

    newurls = sort_prefs(localurls)
    newurls.extend(sort_prefs(urls))
    #for i in range(len(newurls)):
    #    print i, newurls[i]
    return newurls

def sort_prefs(mydict):
    newurls = []

    for url in mydict.keys():
        newurls.append((mydict[url].preference, mydict[url].url))

    newurls.sort()
    newurls.reverse()
    
    result = []
    for url in newurls:
        result.append(url[1])
    return result

############# segmented download functions #############

class ThreadSafeFile(file):
    def __init__(self, *args):
        file.__init__(self, *args)
        self.lock = threading.Lock()

    def acquire(self):
        return self.lock.acquire()
    
    def release(self):
        return self.lock.release()
    
class Segment_Manager(Manager):
    def __init__(self, metalinkfile, headers = {}):
        Manager.__init__(self)

        self.headers = headers.copy()
        self.sockets = []
        self.chunks = []
        self.limit_per_host = LIMIT_PER_HOST
        self.host_limit = HOST_LIMIT
        #self.size = 0
        #if metalinkfile.size != "":
        self.size = metalinkfile.get_size()
        self.orig_urls = metalinkfile.get_url_dict()
        self.urls = self.orig_urls
        self.chunk_size = int(metalinkfile.piecelength)
        self.chunksums = metalinkfile.get_piece_dict()
        self.checksums = metalinkfile.hashlist
        self.localfile = metalinkfile.filename
        self.filter_urls()
        
        self.status = True
        
        # Open the file.
        try:
            self.f = ThreadSafeFile(self.localfile, "rb+")
        except IOError:
            self.f = ThreadSafeFile(self.localfile, "wb+")
            
        self.resume = FileResume(self.localfile + ".temp")

        self.streamserver = None
        if PORT != None:
            self.streamserver = StreamServer((HOST, PORT), StreamRequest)
            self.streamserver.set_stream(self.f)
        
            #thread.start_new_thread(self.streamserver.serve, ())
            mythread = threading.Thread(target=self.streamserver.serve)
            mythread.start()

    def get_chunksum(self, index):
        mylist = {}
        try:
            for key in self.chunksums.keys():
                mylist[key] = self.chunksums[key][index]
        except: pass
        
        return mylist

    def get_size(self):
        '''
        Take a best guess at size based on first 3 matching servers
        
        raise socket.error e.g. "Operation timed out"
        '''
        i = 0
        sizes = []
        checksums = []
        urls = list(self.urls)
        
        while (i < len(urls) and (len(sizes) < 3)):
            url = urls[i]
            protocol = get_transport(url)
            if protocol == "http":
                status = httplib.MOVED_PERMANENTLY
                count = 0
                size = None
                checksum_dict = {}
                while (status == httplib.MOVED_PERMANENTLY or status == httplib.FOUND) and count < MAX_REDIRECTS:
                    http = Http_Host(url)
                    if http.conn != None: 
                        try:
                            headers = {}
                            headers['Want-Digest'] = DIGESTS
                            headers.update(self.headers)
                            http.conn.request("HEAD", url, headers = headers)
                            response = http.conn.getresponse()
                            status = response.status
                            url = response.getheader("Location")
                            size = response.getheader("content-length")
                            checksum_dict = digest_parse(response.getheader("Digest", None))
                        except: pass
                        http.close()
                    count += 1

                if (status == httplib.OK) and (size != None):
                    sizes.append(size)
                    if len(checksum_dict > 0):
                        checksums.append(checksum_dict)

            elif protocol == "ftp":
                ftp = Ftp_Host(url)
                
                try:
                    size = ftp.conn.size(url)
                except ftplib.error_perm:
                    size = None
                    
                if size != None:
                    sizes.append(size)
                
            i += 1
          
        if len(self.checksums) == 0 and len(checksums) > 0:
            if len(checksums) == 1:
                self.checksums = checksums[0]
            if checksums.count(checksums[0]) >= 2:
                self.checksums = checksums[0]
            if checksums.count(checksums[1]) >= 2:
                self.checksums = checksums[1]            

        if len(sizes) == 0:
            return None
        if len(sizes) == 1:
            return int(sizes[0])
        if sizes.count(sizes[0]) >= 2:
            return int(sizes[0])
        if sizes.count(sizes[1]) >= 2:
            return int(sizes[1])
        
        return None
    
    def filter_urls(self):
        #print self.urls
        newurls = {}
        for item in self.urls.keys():
            if (not item.endswith(".torrent")) and (get_transport(item) in PROTOCOLS):
                newurls[item] = self.urls[item]
        self.urls = newurls
        return newurls
            
    def run(self, wait=0.1):
        '''
        ?
        '''
        #try:
        if self.size == "" or self.size == 0:
            self.size = self.get_size()
            if self.size == None:
                #crap out and do it the old way
                self.close_handler()
                self.status = False
                return False

        # can't adjust chunk size if it has chunk hashes tied to that size
        if len(self.chunksums) == 0 and self.size/self.chunk_size > MAX_CHUNKS:
            self.chunk_size = self.size/MAX_CHUNKS
            #print "Set chunk size to %s." % self.chunk_size
        self.resume.update_block_size(self.chunk_size)
            
        return Manager.run(self, wait)

    def cycle(self):
        '''
        Runs one cycle
        Returns True if still downloading, False otherwise
        '''
        try:
            bytes = self.byte_total()

            index = self.get_chunk_index()
            if index != None and index > 0 and self.streamserver != None:
                self.streamserver.set_length((index - 1) * self.chunk_size)
            
            if self.oldtime == None:
                self.start_bitrate(bytes)
                
            # cancel was pressed here
            if self.cancel_handler != None and self.cancel_handler():
                self.status = False
                self.close_handler()
                return False
            
            self.update()
            self.resume.extend_blocks(self.chunk_list())

            if bytes >= self.size and self.active_count() == 0:
                self.resume.complete()
                self.close_handler()
                return False
            
            #crap out and do it the old way
            if len(self.urls) == 0:
                self.status = False
                self.close_handler()
                return False
            
            return True
        
        except KeyboardInterrupt:
            print "Download Interrupted!"
            self.close_handler()
            return False
            

    def update(self):
        if self.status_handler != None and self.size != None:
            #count = int(self.byte_total()/self.chunk_size)
            #if self.byte_total() % self.chunk_size:
            #    count += 1
            #self.status_handler(count, self.chunk_size, self.size)
            self.status_handler(self.byte_total(), 1, self.size)    
        if self.bitrate_handler != None:
            self.bitrate_handler(self.get_bitrate(self.byte_total()))
        if self.time_handler != None:
            self.time_handler(self.get_time(self.byte_total()))
            
        next = self.next_url()
        
        if next == None:
            return
        
        index = self.get_chunk_index()
        if index != None:
            start = index * self.chunk_size
            end = start + self.chunk_size
            if end > self.size:
                end = self.size

            if next.protocol == "http" or next.protocol == "https":
                segment = Http_Host_Segment(next, start, end, self.size, self.get_chunksum(index), self.headers)
                segment.set_cancel_callback(self.cancel_handler)
                self.chunks[index] = segment
                self.segment_init(index)
            if next.protocol == "ftp":
                #print "allocated to:", index, next.url
                segment = Ftp_Host_Segment(next, start, end, self.size, self.get_chunksum(index))
                segment.set_cancel_callback(self.cancel_handler)
                self.chunks[index] = segment
                self.segment_init(index)

    def segment_init(self, index):
        segment = self.chunks[index]
        if str(index) in self.resume.blocks:
            segment.end()
            if segment.error == None:
                segment.bytes = segment.byte_count
            else:
                self.resume.remove_block(index)
        else:
            segment.start()

    def get_chunk_index(self):
        i = -1
        for i in range(len(self.chunks)):
            if (self.chunks[i] == None or self.chunks[i].error != None):
                return i
            # weed out dead segments that have temp errors and reassign
            if (not self.chunks[i].isAlive() and self.chunks[i].bytes == 0):
                return i
        i += 1

        if (i * self.chunk_size) < self.size:
            self.chunks.append(None)
            return i
        
        return None
        
    def gen_count_array(self):
        temp = {}
        for item in self.sockets:
            try:
                temp[item.url] += 1
            except KeyError:
                temp[item.url] = 1
        return temp

    def active_count(self):
        count = 0
        for item in self.chunks:
            if item.isAlive():
                count += 1
        return count

    def next_url(self):
        ''' returns next socket to use or None if none available'''
        self.remove_errors()
  
        if (len(self.sockets) >= (self.host_limit * self.limit_per_host)) or (len(self.sockets) >= (self.limit_per_host * len(self.urls))):
            # We can't create any more sockets, but we can see what's available
            #print "existing sockets"
            for item in self.sockets:
                #print item.active, item.url
                if not item.get_active():
                    return item
            return None

        count = self.gen_count_array()
        # randomly start with a url index
        #urls = list(self.urls)
        #number = int(random.random() * len(self.urls))
        urls = start_sort(self.urls)
        number = 0
    
        countvar = 1
        while (countvar <= len(self.urls)):
            try:
                tempcount = count[urls[number]]
            except KeyError:
                tempcount = 0
            # check against limits
            if ((tempcount == 0) and (len(count) < self.host_limit)) or (0 < tempcount < self.limit_per_host):
                # check protocol type here
                protocol = get_transport(urls[number])
                if (not urls[number].endswith(".torrent")) and (protocol == "http" or protocol == "https"):
                    host = Http_Host(urls[number], self.f)
                    self.sockets.append(host)
                    return host
                if (protocol == "ftp"):
                    try:
                        host = Ftp_Host(urls[number], self.f)
                    except (socket.gaierror, socket.timeout, ftplib.error_temp, ftplib.error_perm, socket.error):
                        #print "FTP connect failed %s" % self.urls[number]
                        self.urls.pop(urls[number])
                        return None
                    self.sockets.append(host)
                    return host
                    
            number = (number + 1) % len(self.urls)
            countvar += 1

        return None

    def remove_errors(self):
        for item in self.chunks:
            if item != None and item.error != None:
                #print item.error
                if item.error == httplib.MOVED_PERMANENTLY or item.error == httplib.FOUND:
                    #print "location:", item.location
                    try:
                        newitem = copy.deepcopy(self.urls[item.url])
                        newitem.url = item.location
                        self.urls[item.location] = newitem
                    except KeyError: pass
                    self.filter_urls()
                    
                #print "removed %s" % item.url
                try:
                    self.urls.pop(item.url)
                except KeyError: pass

        for socketitem in self.sockets:
            if socketitem.url not in self.urls.keys():
                #print socketitem.url
                #socketitem.close()
                self.sockets.remove(socketitem)

        return

    def byte_total(self):
        total = 0
        count = 0
        for item in self.chunks:
            try:
                if item.error == None:
                    total += item.bytes
            except (AttributeError): pass
            count += 1
        return total

    def chunk_list(self):
        chunks = []
        for i in range(len(self.chunks)):
            #print i, self.chunks[i].bytes
            try:
                if self.chunks[i].bytes == self.chunk_size:
                    chunks.append(i)
            except (AttributeError): pass
        #print chunks
        return chunks
    
    def close_handler(self):
        if PORT == None:
            self.f.close()
        for host in self.sockets:
            host.close()

        self.update()

        #try:
        size = int(os.stat(self.localfile).st_size)
        if size == 0:
            try:
                os.remove(self.localfile)
                os.remove(self.localfile + ".temp")
            except: pass
            self.status = False
        elif self.status:
            self.status = filecheck(self.localfile, self.checksums, size)
        #except: pass

class Host_Base:
    '''
    Base class for various host protocol types.  Not to be used directly.
    '''
    def __init__(self, url, memmap):
        self.bytes = 0
        self.ttime = 0
        self.start_time = None
        self.error = None
        self.conn = None
        self.active = False
        
        self.url = url
        self.mem = memmap

        transport = get_transport(self.url)
        self.protocol = transport
        
    def import_stats(self, segment):
        pass

    def set_active(self, value):
        self.active = value

    def get_active(self):
        return self.active

class Ftp_Host(Host_Base):
    def __init__(self, url, memmap=None):
        Host_Base.__init__(self, url, memmap)

        self.connect()

    def connect(self):
        if self.protocol == "ftp":
            urlparts = urlparse.urlsplit(self.url)
            try:
                username = urlparts.username
                password = urlparts.password
            except AttributeError:
                # needed for python < 2.5
                username = None
                
            if username == None:
                username = "anonymous"
                password = "anonymous"
            try:
                port = urlparts.port
            except:
                port = ftplib.FTP_PORT
            if port == None:
                port = ftplib.FTP_PORT

            self.conn = proxy.FTP()
            self.conn.connect(urlparts[1], port)
            try:
                self.conn.login(username, password)
            except:
                #self.error = "login failed"
                raise
                return
            # set to binary mode, only works when not proxied
            try:
                self.conn.voidcmd("TYPE I")
            except: pass
        else:
            self.error = _("unsupported protocol")
            raise AssertionError
            #return
        
    def close(self):
        if self.conn != None:
            try:
                self.conn.quit()
            except:
                pass

    def reconnect(self):
        self.close()
        self.connect()
            
class Http_Host(Host_Base):
    def __init__(self, url, memmap=None):
        Host_Base.__init__(self, url, memmap)
        
        urlparts = urlparse.urlsplit(self.url)
        if self.url.endswith(".torrent"):
            self.error = _("unsupported protocol")
            return
        elif self.protocol == "http":
            try:
                port = urlparts.port
            except:
                port = httplib.HTTP_PORT
            if port == None:
                port = httplib.HTTP_PORT
            try:
                self.conn = proxy.HTTPConnection(urlparts[1], port)
            except httplib.InvalidURL:
                self.error = _("invalid url")
                return
        elif self.protocol == "https":
            try:
                port = urlparts.port
            except:
                port = httplib.HTTPS_PORT
            if port == None:
                port = httplib.HTTPS_PORT
            try:
                self.conn = proxy.HTTPSConnection(urlparts[1], port)
            except httplib.InvalidURL:
                self.error = _("invalid url")
                return
        else:
            self.error = _("unsupported protocol")
            return
        
    def close(self):
        if self.conn != None:
            self.conn.close()

class Host_Segment:
    '''
    Base class for various segment protocol types.  Not to be used directly.
    '''
    def __init__(self, host, start, end, filesize, checksums = {}, headers = {}):
        threading.Thread.__init__(self)
        self.host = host
        self.host.set_active(True)
        self.byte_start = start
        self.byte_end = end
        self.byte_count = end - start
        self.filesize = filesize
        self.url = host.url
        self.mem = host.mem
        self.checksums = checksums
        self.error = None        
        self.ttime = 0
        self.response = None
        self.bytes = 0
        self.buffer = ""
        self.temp = ""
        self.cancel_handler = None
        self.headers = headers.copy()
        
    def set_cancel_callback(self, handler):
        self.cancel_handler = handler

    def check_cancel(self):
        if self.cancel_handler == None:
            return False
        return self.cancel_handler()
        
    def avg_bitrate(self):
        bits = self.bytes * 8
        return bits/self.ttime

    def checksum(self):
        if self.check_cancel():
            return False
        
        try:
            self.mem.acquire()
            self.mem.seek(self.byte_start, 0)
            chunkstring = self.mem.read(self.byte_count)
            self.mem.release()
        except ValueError:
            return False

        return verify_chunk_checksum(chunkstring, self.checksums)

    def close(self):
        if self.error != None:
            self.host.close()

        self.host.set_active(False)

    def end(self):
        if self.error == None and not self.checksum():
            self.error = _("Chunk checksum failed")
        self.close()

class Ftp_Host_Segment(threading.Thread, Host_Segment):
    def __init__(self, *args):
        threading.Thread.__init__(self)
        Host_Segment.__init__(self, *args)

    def run(self):
        # Finish early if checksum is OK
        if self.checksum() and len(self.checksums) > 0:
            self.bytes += self.byte_count
            self.close()
            return
        
        # check for supported hosts/urls
        urlparts = urlparse.urlsplit(self.url)
        if self.host.conn == None:
            #print "bad socket"
            self.error = _("bad socket")
            self.close()
            return
        
        size = None
        retry = True
        count = 0
        while retry and count < CONNECT_RETRY_COUNT:
            retry = False
            try:
                (self.response, size) = self.host.conn.ntransfercmd("RETR " + urlparts.path, self.byte_start, self.byte_end)
            except (ftplib.error_perm), error:
                self.error = error.message
                self.close()
                return
            except (socket.gaierror, socket.timeout), error:
                self.error = error.args
                self.close()
                return
            except EOFError:
                self.error = _("EOFError")
                self.close()
                return
            except AttributeError:
                self.error = _("AttributeError")
                self.close()
                return
            except (socket.error), error:
                #print "reconnect", self.host.url
                try:
                    self.host.reconnect()
                except:
                    pass
                retry = True
                count += 1
            except (ftplib.error_temp), error:
                # this is not an error condition, most likely transfer TCP connection was closed
                #count += 1
                #self.error = "error temp", error.message
                self.temp = error.message
                self.close()
                return
            except (ftplib.error_reply), error:
                # this is likely just an extra chatty FTP server, ignore for now
                pass

            if count >= CONNECT_RETRY_COUNT:
                self.error = _("socket reconnect attempts failed")
                self.close()
                return
    
        if size != None:
            if self.filesize != size:
                self.error = _("bad file size")
                return
        
        self.start_time = time.time()
        while True:
            if self.readable():
                self.handle_read()
                self.ttime += (time.time() - self.start_time)
            else:
                self.end()
                return

    def readable(self):
        if self.check_cancel():
            return False

        if self.response == None:
            return False
        return True
    
    def handle_read(self):
        try:
            data = self.response.recv(1024)
        except socket.timeout:
            self.error = _("read timeout")
            self.response = None
            return

        if len(data) == 0:
            return

        self.buffer += data
        #print len(self.buffer), self.byte_count
        if len(self.buffer) >= self.byte_count:
            # When using a HTTP proxy there is no shutdown() call
            try:
                self.response.shutdown(socket.SHUT_RDWR)
            except AttributeError:
                pass

            tempbuffer = self.buffer[:self.byte_count]
            self.buffer = ""

            self.bytes += len(tempbuffer)

            try:
                self.mem.acquire()
                self.mem.seek(self.byte_start, 0)
                self.mem.write(tempbuffer)
                self.mem.flush()
                self.mem.release()
            except ValueError:
                self.error = _("bad file handle")            
        
            self.response = None
            
        # this method writes directly to file on each data grab, not working for some reason
##        if (self.bytes + len(data)) >= self.byte_count:
##            # When using a HTTP proxy there is no shutdown() call
##            try:
##                self.response.shutdown(socket.SHUT_RDWR)
##            except AttributeError:
##                pass
##
##            index = self.byte_count - (self.bytes + len(data))
##
##            writedata = data[:index]
##
##            self.mem.acquire()
##            self.mem.seek(self.byte_start + self.bytes, 0)
##            self.mem.write(writedata)
##            self.mem.flush()
##            
##            self.mem.release()
##
##            self.response = None
##        else:
##            writedata = data
##
##            lock = threading.Lock()
##            lock.acquire()
##            
##            self.mem.seek(self.byte_start + self.bytes, 0)
##            self.mem.write(writedata)
##            
##            lock.release()
##
##        self.bytes += len(writedata)

        
class Http_Host_Segment(threading.Thread, Host_Segment):
    def __init__(self, *args):
        threading.Thread.__init__(self)
        Host_Segment.__init__(self, *args)
        
    def run(self):
        #try:
            # Finish early if checksum is OK
            if self.checksum() and len(self.checksums) > 0:
                self.bytes += self.byte_count
                self.close()
                return
            
            if self.host.conn == None:
                self.error = _("bad socket")
                self.close()
                return
    
            try:
                self.headers.update({"Range": "bytes=%lu-%lu\r\n" % (self.byte_start, self.byte_end - 1)})
                self.host.conn.request("GET", self.url, "", self.headers)
            except:
                self.error = _("socket exception")
                self.close()
                return
            
            self.start_time = time.time()
            while True:
                if self.readable():
                    self.handle_read()
                    self.ttime += (time.time() - self.start_time)
                else:
                    self.end()
                    return
        #except BaseException, e:
        #    self.error = utils.get_exception_message(e)

    def readable(self):
        if self.check_cancel():
            return False

        if self.response == None:
            try:
                self.response = self.host.conn.getresponse()
            except socket.timeout:
                self.error = _("timeout")
                return False
            # not an error state, connection closed, kicks us out of thread
            except httplib.ResponseNotReady:
                return False
            except:
                self.error = _("response error")
                return False
            
        if self.response.status == httplib.PARTIAL_CONTENT:
            return True
        elif self.response.status == httplib.MOVED_PERMANENTLY or self.response.status == httplib.FOUND:
            self.location = self.response.getheader("Location")
            self.error = self.response.status
            self.response = None
            return False
        else:
            self.error = self.response.status
            self.response = None
            return False
        return False
    
    def handle_read(self):
        try:
            data = self.response.read()
        except socket.timeout:
            self.error = _("timeout")
            self.response = None
            return
        except httplib.IncompleteRead:
            self.error = _("incomplete read")
            self.response = None
            return
        except socket.error:
            self.error = _("socket error")
            self.response = None
            return
        except TypeError:
            self.response = None
            return

        if len(data) == 0:
            return

        rangestring = self.response.getheader("Content-Range")
        request_size = int(rangestring.split("/")[1])

        if request_size != self.filesize:
            self.error = _("bad file size")
            self.response = None
            return

        # Check digest headers against expected
        digest = self.response.getheader("Digest", None)
        if digest is not None:
            digestsums = digest_parse(digest)                    
            # check digest here, skip if missing, return if mismatch                        
            for hashtype in self.checksums.keys():
                try:
                    digestsums[hashtype]
                except KeyError:
                    continue
                    
                if self.checksums[hashtype] != digestsums[hashtype]:
                    return
            
        body = data
        size = len(body)
        
        # write out body to file
        try:
            self.mem.acquire()
            self.mem.seek(self.byte_start + self.bytes, 0)
            self.mem.write(body)
            self.mem.flush()
            self.mem.release()
        except ValueError:
            self.error = _("bad file handle")
            self.response = None
            return

        self.bytes += size
        #print self.bytes, self.byte_count
        if self.bytes >= self.byte_count:
            self.response = None


class StreamRequest(BaseHTTPServer.BaseHTTPRequestHandler):
    def do_GET(self):
        self.send_response(200)
        self.send_header("Content-Type", "application/octet-stream")
        self.send_header("Cache-Control", "no-cache")
        self.end_headers()

        start = 0
        while True:
            if self.server.fileobj != None and (self.server.length - start) > 0:
                try:
                    self.server.fileobj.acquire()
                    loc = self.server.fileobj.tell()
                    self.server.fileobj.seek(start, 0)
                    size = self.server.length - start
                    
                    data = self.server.fileobj.read(size)
                    if len(data) > 0:
                        self.wfile.write(data)

                    self.server.fileobj.seek(loc, 0)
                    self.server.fileobj.release()
                    start += len(data)
                except ValueError:
                    break
            time.sleep(.1)

class StreamServer(BaseHTTPServer.HTTPServer):
    def __init__(self, *args):
        BaseHTTPServer.HTTPServer.__init__(self, *args)
        self.fileobj = None
        self.length = 0

    # based on: http://code.activestate.com/recipes/425210/
    def server_bind(self):
        BaseHTTPServer.HTTPServer.server_bind(self)
        self.socket.setblocking(0)
        self.socket.settimeout(1)
        self.run = True

    def get_request(self):
        while self.run:
            try:
                sock, addr = self.socket.accept()
                sock.setblocking(0)
                sock.settimeout(30)
                return (sock, addr)
            except socket.timeout:
                pass

    def stop(self):
        self.run = False

    def serve(self):
        try:
            while self.run:
                self.handle_request()
        except KeyboardInterrupt:
            print "Server Interrupted!"
            self.fileobj.close()
            self.stop()
        
    def set_stream(self, fileobj):
        self.fileobj = fileobj

    def set_length(self, length):
        self.length = int(length)

# if __name__=="__main__":
    # testurls = ("ftp://ftp.freebsd.org/pub/FreeBSD/README.TXT", "http://www.google.com", "https://encrypted.google.com")
    # #testurls = ("http://www.google.com", "https://encrypted.google.com")
    # # for testfile in testurls:
        # # files = get(testfile, os.getcwd(), force=True, segmented=False)
        # # print "Downloaded:", files
        
    # # print "=" * 79
    
    # # for testfile in testurls:
        # # files = get(testfile, os.getcwd(), force=True, segmented=True)
        # # print "Downloaded:", files
        
    # # print "=" * 79        
        
    # proxystr = "http://localhost:8000"
    # proxy.HTTP_PROXY = proxystr
    # proxy.HTTPS_PROXY = proxystr
    # proxy.FTP_PROXY = proxystr
    # proxy.set_proxies()
    
    # # for testfile in testurls:
       # # files = get(testfile, os.getcwd(), force=True, segmented=False)
       # # print "Downloaded:", files
    
    # # print "=" * 79    
    
    # for testfile in testurls:
       # files = get(testfile, os.getcwd(), force=True, segmented=True)
       # print "Downloaded:", files
    
##myserver = StreamServer(("localhost", 8080), StreamRequest)
##myserver.set_stream(ThreadSafeFile("C:\\library\\avril\\Avril Lavigne - Complicated.mpg", "rb"))
##myserver.set_length(50000000)
##serverthread = threading.Thread(target=myserver.serve_forever)
##serverthread.start()

#myserver.serve_forever()
Metalinks Code

[r817]: / checker / download.py Maximize Restore History

2236 lines (1895 with data), 71.4 kB