1
0
mirror of https://github.com/balkian/bitter.git synced 2024-12-22 08:28:12 +00:00

Py2 compatibility and queue handling

* Removed install_aliases(), which caused problems with urllib2
* Better waiting time calculation (used in queue handling)
This commit is contained in:
J. Fernando Sánchez 2016-11-23 12:27:53 +01:00
parent 67ef307cce
commit e3a78968da
7 changed files with 45 additions and 25 deletions

View File

@ -1 +1 @@
0.6.6
0.7.0

View File

@ -3,13 +3,6 @@ Bitter module. A library and cli for Twitter using python-twitter.
http://github.com/balkian/bitter
"""
try:
from future.standard_library import install_aliases
install_aliases()
except ImportError:
# Avoid problems at setup.py and py3.x
pass
import os
from .version import __version__

View File

@ -391,8 +391,9 @@ def stream(ctx):
@click.option('-l', '--locations', default=None)
@click.option('-t', '--track', default=None)
@click.option('-f', '--file', help='File to store the stream of tweets')
@click.option('-p', '--politelyretry', help='Politely retry after a hangup/connection error', is_flag=True, default=True)
@click.pass_context
def get_stream(ctx, locations, track, file):
def get_stream(ctx, locations, track, file, politelyretry):
wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1)
query_args = {}
@ -400,17 +401,28 @@ def get_stream(ctx, locations, track, file):
query_args['locations'] = locations
if track:
query_args['track'] = track
if not query_args:
iterator = wq.statuses.sample()
else:
iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")
if not file:
file = sys.stdout
else:
file = open(file, 'a')
for tweet in tqdm(iterator):
def insist():
lasthangup = time.time()
while True:
if not query_args:
iterator = wq.statuses.sample()
else:
iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")
for i in iterator:
yield i
if not politelyretry:
return
thishangup = time.time()
if thishangup - lasthangup < 60:
raise Exception('Too many hangups in a row.')
time.sleep(3)
for tweet in tqdm(insist()):
print(json.dumps(tweet), file=file)
if file != sys.stdout:
file.close()

View File

@ -10,6 +10,11 @@ from twitter import *
from collections import OrderedDict
from threading import Lock
from itertools import islice
try:
import itertools.ifilter as filter
except ImportError:
pass
from . import utils
from . import config
@ -178,9 +183,13 @@ class TwitterQueue(QueueMixin):
patience -= 1
def get_wait(self, uriparts):
available = next(lambda x: not x.busy, self.queue)
first_worker = min(available, key=lambda x: x.get_wait(uriparts))
diff = first_worker.get_wait(uriparts)
# Stop as soon as one is available to avoid initiating the rest
for i in self.queue:
if not i.busy and i.get_wait(uriparts) == 0:
return 0
# If None is available, let's see how much we have to wait
available = filter(lambda x: not x.busy, self.queue)
diff = min(worker.get_wait(uriparts) for worker in self.queue if not worker.busy)
return diff

View File

@ -11,7 +11,7 @@ from multiprocessing.pool import ThreadPool
from itertools import islice
from contextlib import contextmanager
from itertools import zip_longest
from future.moves.itertools import zip_longest
from collections import Counter
from twitter import TwitterHTTPError

View File

@ -43,5 +43,14 @@ setup(
entry_points="""
[console_scripts]
bitter=bitter.cli:main
"""
""",
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
]
)

View File

@ -6,7 +6,7 @@ import datetime
import time
from bitter import utils
from bitter.crawlers import TwitterQueue, TwitterWorker, TwitterQueueException
from bitter.crawlers import TwitterQueue, TwitterWorker, QueueException
from bitter import config as c
class TestUtils(TestCase):
@ -64,12 +64,9 @@ class TestUtils(TestCase):
try:
# resp = self.wq.friends.list(screen_name='balkian')
self.wq.next(['friends', 'list'])
except TwitterQueueException:
except QueueException:
failed = True
assert failed
l2 = w1.get_limit(['friends', 'list'])
assert self.wq.get_wait(['friends', 'list']) > (l2['reset']-time.time())
assert self.wq.get_wait(['friends', 'list']) < (l2['reset']-time.time()+2)
time.sleep(w1.get_wait(['friends', 'list']))