diff --git a/bitter/VERSION b/bitter/VERSION index 05e8a45..faef31a 100644 --- a/bitter/VERSION +++ b/bitter/VERSION @@ -1 +1 @@ -0.6.6 +0.7.0 diff --git a/bitter/__init__.py b/bitter/__init__.py index 1ef9dc4..8bc6daf 100644 --- a/bitter/__init__.py +++ b/bitter/__init__.py @@ -3,13 +3,6 @@ Bitter module. A library and cli for Twitter using python-twitter. http://github.com/balkian/bitter """ -try: - from future.standard_library import install_aliases - install_aliases() -except ImportError: - # Avoid problems at setup.py and py3.x - pass - import os from .version import __version__ diff --git a/bitter/cli.py b/bitter/cli.py index 566cd1e..8d205d8 100644 --- a/bitter/cli.py +++ b/bitter/cli.py @@ -391,8 +391,9 @@ def stream(ctx): @click.option('-l', '--locations', default=None) @click.option('-t', '--track', default=None) @click.option('-f', '--file', help='File to store the stream of tweets') +@click.option('-p', '--politelyretry', help='Politely retry after a hangup/connection error', is_flag=True, default=True) @click.pass_context -def get_stream(ctx, locations, track, file): +def get_stream(ctx, locations, track, file, politelyretry): wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1) query_args = {} @@ -400,17 +401,28 @@ def get_stream(ctx, locations, track, file): query_args['locations'] = locations if track: query_args['track'] = track - if not query_args: - iterator = wq.statuses.sample() - else: - iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75") - if not file: file = sys.stdout else: file = open(file, 'a') - for tweet in tqdm(iterator): + def insist(): + lasthangup = time.time() + while True: + if not query_args: + iterator = wq.statuses.sample() + else: + iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75") + for i in iterator: + yield i + if not politelyretry: + return + thishangup = time.time() + if thishangup - lasthangup < 60: + raise Exception('Too many hangups in a row.') + time.sleep(3) + + for tweet in tqdm(insist()): print(json.dumps(tweet), file=file) if file != sys.stdout: file.close() diff --git a/bitter/crawlers.py b/bitter/crawlers.py index f898a08..e69b223 100644 --- a/bitter/crawlers.py +++ b/bitter/crawlers.py @@ -10,6 +10,11 @@ from twitter import * from collections import OrderedDict from threading import Lock from itertools import islice +try: + import itertools.ifilter as filter +except ImportError: + pass + from . import utils from . import config @@ -178,9 +183,13 @@ class TwitterQueue(QueueMixin): patience -= 1 def get_wait(self, uriparts): - available = next(lambda x: not x.busy, self.queue) - first_worker = min(available, key=lambda x: x.get_wait(uriparts)) - diff = first_worker.get_wait(uriparts) + # Stop as soon as one is available to avoid initiating the rest + for i in self.queue: + if not i.busy and i.get_wait(uriparts) == 0: + return 0 + # If None is available, let's see how much we have to wait + available = filter(lambda x: not x.busy, self.queue) + diff = min(worker.get_wait(uriparts) for worker in self.queue if not worker.busy) return diff diff --git a/bitter/utils.py b/bitter/utils.py index 407a5c9..aa85d47 100644 --- a/bitter/utils.py +++ b/bitter/utils.py @@ -11,7 +11,7 @@ from multiprocessing.pool import ThreadPool from itertools import islice from contextlib import contextmanager -from itertools import zip_longest +from future.moves.itertools import zip_longest from collections import Counter from twitter import TwitterHTTPError diff --git a/setup.py b/setup.py index 890042d..287da84 100644 --- a/setup.py +++ b/setup.py @@ -43,5 +43,14 @@ setup( entry_points=""" [console_scripts] bitter=bitter.cli:main - """ + """, + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + ] ) diff --git a/tests/test_crawlers.py b/tests/test_crawlers.py index 165ac12..dc2e8f6 100644 --- a/tests/test_crawlers.py +++ b/tests/test_crawlers.py @@ -6,7 +6,7 @@ import datetime import time from bitter import utils -from bitter.crawlers import TwitterQueue, TwitterWorker, TwitterQueueException +from bitter.crawlers import TwitterQueue, TwitterWorker, QueueException from bitter import config as c class TestUtils(TestCase): @@ -64,12 +64,9 @@ class TestUtils(TestCase): try: # resp = self.wq.friends.list(screen_name='balkian') self.wq.next(['friends', 'list']) - except TwitterQueueException: + except QueueException: failed = True assert failed l2 = w1.get_limit(['friends', 'list']) assert self.wq.get_wait(['friends', 'list']) > (l2['reset']-time.time()) assert self.wq.get_wait(['friends', 'list']) < (l2['reset']-time.time()+2) - time.sleep(w1.get_wait(['friends', 'list'])) - -