Source code for abpytools.utils.downloads

from urllib import request, error


[docs]class Download:
    def __init__(self, url='', verbose=False, timeout=5):
        self.url = url
        self.verbose = verbose
        self.html = ''
        self.error = False
        self.timeout = timeout

[docs]    def download(self, user_agent='wswp', num_retries=2):
        # self.html, self.error = download(self.url, self.verbose, user_agent=user_agent, num_retries=num_retries,
        #                                  timeout=self.timeout)
        try:
            self.html = download(self.url, self.verbose, user_agent=user_agent, num_retries=num_retries,
                                 timeout=self.timeout)
        except IOError:
            raise ValueError("Could not download requested page.")


[docs]def download(url, verbose, user_agent='wswp', num_retries=2, decoding_format='utf-8', timeout=5):
    """
    Function to download contents from a given url

    Input:
            url: str
            string with the url to download from

            user_agent: str
            Default 'wswp'

            num_retries: int
            Number of times to retry downloading
            if there is an error

            verbose: bool
            Print out url and errors

            decoding: "utf-8"

    Output:
            returns: str
            string with contents of given url
    """

    # html_error = False
    if verbose:
        print('Downloading:', url)
    headers = {'User-agent': user_agent}
    request_obj = request.Request(url, headers=headers)
    try:
        with request.urlopen(request_obj, timeout=timeout) as response:
            html = response.read()
    except error.URLError as e:
        if verbose:
            print('Download error:', e.reason)
        # html = None
        # if num_retries > 0:
        #     if hasattr(e, 'code') and 500 <= e.code < 600:
        #         # retry 5XX HTTP errors
        #         return download(url, user_agent, num_retries - 1)[0]
        #     # elif hasattr(e, 'code') and e.code == 404:
        #     else:
        #         html_error = True
        raise IOError(e.reason)

    return html.decode(decoding_format)
Source code for abpytools.utils.downloads

AbPyTools

Navigation

Related Topics