YouCompleteMe/third_party/pythonfutures/crawl.py

"""Compare the speed of downloading URLs sequentially vs. using futures."""

import functools
import time
import timeit
import sys

try:
    from urllib2 import urlopen
except ImportError:
    from urllib.request import urlopen

from concurrent.futures import (as_completed, ThreadPoolExecutor,
                                ProcessPoolExecutor)

URLS = ['http://www.google.com/',
        'http://www.apple.com/',
        'http://www.ibm.com',
        'http://www.thisurlprobablydoesnotexist.com',
        'http://www.slashdot.org/',
        'http://www.python.org/',
        'http://www.bing.com/',
        'http://www.facebook.com/',
        'http://www.yahoo.com/',
        'http://www.youtube.com/',
        'http://www.blogger.com/']

def load_url(url, timeout):
    kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {}
    return urlopen(url, **kwargs).read()

def download_urls_sequential(urls, timeout=60):
    url_to_content = {}
    for url in urls:
        try:
            url_to_content[url] = load_url(url, timeout=timeout)
        except:
            pass
    return url_to_content

def download_urls_with_executor(urls, executor, timeout=60):
    try:
        url_to_content = {}
        future_to_url = dict((executor.submit(load_url, url, timeout), url)
                             for url in urls)

        for future in as_completed(future_to_url):
            try:
                url_to_content[future_to_url[future]] = future.result()
            except:
                pass
        return url_to_content
    finally:
        executor.shutdown()

def main():
    for name, fn in [('sequential',
                      functools.partial(download_urls_sequential, URLS)),
                     ('processes',
                      functools.partial(download_urls_with_executor,
                                        URLS,
                                        ProcessPoolExecutor(10))),
                     ('threads',
                      functools.partial(download_urls_with_executor,
                                        URLS,
                                        ThreadPoolExecutor(10)))]:
        sys.stdout.write('%s: ' % name.ljust(12))
        start = time.time()
        url_map = fn()
        sys.stdout.write('%.2f seconds (%d of %d downloaded)\n' %
                         (time.time() - start, len(url_map), len(URLS)))

if __name__ == '__main__':
    main()
Adding python-futures to third_party Download source: https://pythonfutures.googlecode.com/archive/05e0c9c1b3d493f0c7e5833723a1ea99d024bad4.zip 2013-10-06 21:24:52 -04:00			`"""Compare the speed of downloading URLs sequentially vs. using futures."""`

			`import functools`
			`import time`
			`import timeit`
			`import sys`

			`try:`
			`from urllib2 import urlopen`
			`except ImportError:`
			`from urllib.request import urlopen`

			`from concurrent.futures import (as_completed, ThreadPoolExecutor,`
			`ProcessPoolExecutor)`

			`URLS = ['http://www.google.com/',`
			`'http://www.apple.com/',`
			`'http://www.ibm.com',`
			`'http://www.thisurlprobablydoesnotexist.com',`
			`'http://www.slashdot.org/',`
			`'http://www.python.org/',`
			`'http://www.bing.com/',`
			`'http://www.facebook.com/',`
			`'http://www.yahoo.com/',`
			`'http://www.youtube.com/',`
			`'http://www.blogger.com/']`

			`def load_url(url, timeout):`
			`kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {}`
			`return urlopen(url, **kwargs).read()`

			`def download_urls_sequential(urls, timeout=60):`
			`url_to_content = {}`
			`for url in urls:`
			`try:`
			`url_to_content[url] = load_url(url, timeout=timeout)`
			`except:`
			`pass`
			`return url_to_content`

			`def download_urls_with_executor(urls, executor, timeout=60):`
			`try:`
			`url_to_content = {}`
			`future_to_url = dict((executor.submit(load_url, url, timeout), url)`
			`for url in urls)`

			`for future in as_completed(future_to_url):`
			`try:`
			`url_to_content[future_to_url[future]] = future.result()`
			`except:`
			`pass`
			`return url_to_content`
			`finally:`
			`executor.shutdown()`

			`def main():`
			`for name, fn in [('sequential',`
			`functools.partial(download_urls_sequential, URLS)),`
			`('processes',`
			`functools.partial(download_urls_with_executor,`
			`URLS,`
			`ProcessPoolExecutor(10))),`
			`('threads',`
			`functools.partial(download_urls_with_executor,`
			`URLS,`
			`ThreadPoolExecutor(10)))]:`
			`sys.stdout.write('%s: ' % name.ljust(12))`
			`start = time.time()`
			`url_map = fn()`
			`sys.stdout.write('%.2f seconds (%d of %d downloaded)\n' %`
			`(time.time() - start, len(url_map), len(URLS)))`

			`if __name__ == '__main__':`
			`main()`