1
0
mirror of https://github.com/balkian/bitter.git synced 2024-12-22 00:18:12 +00:00

Py2 compatibility and queue handling

* Removed install_aliases(), which caused problems with urllib2
* Better waiting time calculation (used in queue handling)
This commit is contained in:
J. Fernando Sánchez 2016-11-23 12:27:53 +01:00
parent 67ef307cce
commit e3a78968da
7 changed files with 45 additions and 25 deletions

View File

@ -1 +1 @@
0.6.6 0.7.0

View File

@ -3,13 +3,6 @@ Bitter module. A library and cli for Twitter using python-twitter.
http://github.com/balkian/bitter http://github.com/balkian/bitter
""" """
try:
from future.standard_library import install_aliases
install_aliases()
except ImportError:
# Avoid problems at setup.py and py3.x
pass
import os import os
from .version import __version__ from .version import __version__

View File

@ -391,8 +391,9 @@ def stream(ctx):
@click.option('-l', '--locations', default=None) @click.option('-l', '--locations', default=None)
@click.option('-t', '--track', default=None) @click.option('-t', '--track', default=None)
@click.option('-f', '--file', help='File to store the stream of tweets') @click.option('-f', '--file', help='File to store the stream of tweets')
@click.option('-p', '--politelyretry', help='Politely retry after a hangup/connection error', is_flag=True, default=True)
@click.pass_context @click.pass_context
def get_stream(ctx, locations, track, file): def get_stream(ctx, locations, track, file, politelyretry):
wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1) wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1)
query_args = {} query_args = {}
@ -400,17 +401,28 @@ def get_stream(ctx, locations, track, file):
query_args['locations'] = locations query_args['locations'] = locations
if track: if track:
query_args['track'] = track query_args['track'] = track
if not query_args:
iterator = wq.statuses.sample()
else:
iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")
if not file: if not file:
file = sys.stdout file = sys.stdout
else: else:
file = open(file, 'a') file = open(file, 'a')
for tweet in tqdm(iterator): def insist():
lasthangup = time.time()
while True:
if not query_args:
iterator = wq.statuses.sample()
else:
iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")
for i in iterator:
yield i
if not politelyretry:
return
thishangup = time.time()
if thishangup - lasthangup < 60:
raise Exception('Too many hangups in a row.')
time.sleep(3)
for tweet in tqdm(insist()):
print(json.dumps(tweet), file=file) print(json.dumps(tweet), file=file)
if file != sys.stdout: if file != sys.stdout:
file.close() file.close()

View File

@ -10,6 +10,11 @@ from twitter import *
from collections import OrderedDict from collections import OrderedDict
from threading import Lock from threading import Lock
from itertools import islice from itertools import islice
try:
import itertools.ifilter as filter
except ImportError:
pass
from . import utils from . import utils
from . import config from . import config
@ -178,9 +183,13 @@ class TwitterQueue(QueueMixin):
patience -= 1 patience -= 1
def get_wait(self, uriparts): def get_wait(self, uriparts):
available = next(lambda x: not x.busy, self.queue) # Stop as soon as one is available to avoid initiating the rest
first_worker = min(available, key=lambda x: x.get_wait(uriparts)) for i in self.queue:
diff = first_worker.get_wait(uriparts) if not i.busy and i.get_wait(uriparts) == 0:
return 0
# If None is available, let's see how much we have to wait
available = filter(lambda x: not x.busy, self.queue)
diff = min(worker.get_wait(uriparts) for worker in self.queue if not worker.busy)
return diff return diff

View File

@ -11,7 +11,7 @@ from multiprocessing.pool import ThreadPool
from itertools import islice from itertools import islice
from contextlib import contextmanager from contextlib import contextmanager
from itertools import zip_longest from future.moves.itertools import zip_longest
from collections import Counter from collections import Counter
from twitter import TwitterHTTPError from twitter import TwitterHTTPError

View File

@ -43,5 +43,14 @@ setup(
entry_points=""" entry_points="""
[console_scripts] [console_scripts]
bitter=bitter.cli:main bitter=bitter.cli:main
""" """,
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
]
) )

View File

@ -6,7 +6,7 @@ import datetime
import time import time
from bitter import utils from bitter import utils
from bitter.crawlers import TwitterQueue, TwitterWorker, TwitterQueueException from bitter.crawlers import TwitterQueue, TwitterWorker, QueueException
from bitter import config as c from bitter import config as c
class TestUtils(TestCase): class TestUtils(TestCase):
@ -64,12 +64,9 @@ class TestUtils(TestCase):
try: try:
# resp = self.wq.friends.list(screen_name='balkian') # resp = self.wq.friends.list(screen_name='balkian')
self.wq.next(['friends', 'list']) self.wq.next(['friends', 'list'])
except TwitterQueueException: except QueueException:
failed = True failed = True
assert failed assert failed
l2 = w1.get_limit(['friends', 'list']) l2 = w1.get_limit(['friends', 'list'])
assert self.wq.get_wait(['friends', 'list']) > (l2['reset']-time.time()) assert self.wq.get_wait(['friends', 'list']) > (l2['reset']-time.time())
assert self.wq.get_wait(['friends', 'list']) < (l2['reset']-time.time()+2) assert self.wq.get_wait(['friends', 'list']) < (l2['reset']-time.time()+2)
time.sleep(w1.get_wait(['friends', 'list']))