mirror of
				https://github.com/balkian/bitter.git
				synced 2025-11-04 01:28:17 +00:00 
			
		
		
		
	Py2 compatibility and queue handling
* Removed install_aliases(), which caused problems with urllib2 * Better waiting time calculation (used in queue handling)
This commit is contained in:
		@@ -1 +1 @@
 | 
			
		||||
0.6.6
 | 
			
		||||
0.7.0
 | 
			
		||||
 
 | 
			
		||||
@@ -3,13 +3,6 @@ Bitter module. A library and cli for Twitter using python-twitter.
 | 
			
		||||
http://github.com/balkian/bitter
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    from future.standard_library import install_aliases
 | 
			
		||||
    install_aliases()
 | 
			
		||||
except ImportError:
 | 
			
		||||
    # Avoid problems at setup.py and py3.x
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
from .version import __version__
 | 
			
		||||
 
 | 
			
		||||
@@ -391,8 +391,9 @@ def stream(ctx):
 | 
			
		||||
@click.option('-l', '--locations', default=None)
 | 
			
		||||
@click.option('-t', '--track', default=None)
 | 
			
		||||
@click.option('-f', '--file', help='File to store the stream of tweets')
 | 
			
		||||
@click.option('-p', '--politelyretry', help='Politely retry after a hangup/connection error', is_flag=True, default=True)
 | 
			
		||||
@click.pass_context 
 | 
			
		||||
def get_stream(ctx, locations, track, file):
 | 
			
		||||
def get_stream(ctx, locations, track, file, politelyretry):
 | 
			
		||||
    wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1)
 | 
			
		||||
 | 
			
		||||
    query_args = {}
 | 
			
		||||
@@ -400,17 +401,28 @@ def get_stream(ctx, locations, track, file):
 | 
			
		||||
        query_args['locations'] = locations
 | 
			
		||||
    if track:
 | 
			
		||||
        query_args['track'] = track
 | 
			
		||||
    if not query_args:
 | 
			
		||||
        iterator = wq.statuses.sample()
 | 
			
		||||
    else:
 | 
			
		||||
        iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")
 | 
			
		||||
 | 
			
		||||
    if not file:
 | 
			
		||||
        file = sys.stdout
 | 
			
		||||
    else:
 | 
			
		||||
        file = open(file, 'a')
 | 
			
		||||
 | 
			
		||||
    for tweet in tqdm(iterator):
 | 
			
		||||
    def insist():
 | 
			
		||||
        lasthangup = time.time()
 | 
			
		||||
        while True:
 | 
			
		||||
            if not query_args:
 | 
			
		||||
                iterator = wq.statuses.sample()
 | 
			
		||||
            else:
 | 
			
		||||
                iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")
 | 
			
		||||
            for i in iterator:
 | 
			
		||||
                yield i
 | 
			
		||||
            if not politelyretry:
 | 
			
		||||
                return
 | 
			
		||||
            thishangup = time.time()
 | 
			
		||||
            if thishangup - lasthangup < 60:
 | 
			
		||||
                raise Exception('Too many hangups in a row.')
 | 
			
		||||
            time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    for tweet in tqdm(insist()):
 | 
			
		||||
        print(json.dumps(tweet), file=file)
 | 
			
		||||
    if file != sys.stdout:
 | 
			
		||||
        file.close()
 | 
			
		||||
 
 | 
			
		||||
@@ -10,6 +10,11 @@ from twitter import *
 | 
			
		||||
from collections import OrderedDict
 | 
			
		||||
from threading import Lock
 | 
			
		||||
from itertools import islice
 | 
			
		||||
try:
 | 
			
		||||
    import itertools.ifilter as filter
 | 
			
		||||
except ImportError:
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
from . import utils
 | 
			
		||||
from . import config
 | 
			
		||||
 | 
			
		||||
@@ -178,9 +183,13 @@ class TwitterQueue(QueueMixin):
 | 
			
		||||
                    patience -= 1
 | 
			
		||||
 | 
			
		||||
    def get_wait(self, uriparts):
 | 
			
		||||
        available = next(lambda x: not x.busy, self.queue)
 | 
			
		||||
        first_worker = min(available, key=lambda x: x.get_wait(uriparts))
 | 
			
		||||
        diff = first_worker.get_wait(uriparts)
 | 
			
		||||
        # Stop as soon as one is available to avoid initiating the rest
 | 
			
		||||
        for i in self.queue:
 | 
			
		||||
            if not i.busy and i.get_wait(uriparts) == 0:
 | 
			
		||||
                return 0
 | 
			
		||||
        # If None is available, let's see how much we have to wait
 | 
			
		||||
        available = filter(lambda x: not x.busy, self.queue)
 | 
			
		||||
        diff = min(worker.get_wait(uriparts) for worker in self.queue if not worker.busy)
 | 
			
		||||
        return diff
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,7 @@ from multiprocessing.pool import ThreadPool
 | 
			
		||||
 | 
			
		||||
from itertools import islice
 | 
			
		||||
from contextlib import contextmanager
 | 
			
		||||
from itertools import zip_longest
 | 
			
		||||
from future.moves.itertools import zip_longest
 | 
			
		||||
from collections import Counter
 | 
			
		||||
 | 
			
		||||
from twitter import TwitterHTTPError
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										11
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								setup.py
									
									
									
									
									
								
							@@ -43,5 +43,14 @@ setup(
 | 
			
		||||
    entry_points="""
 | 
			
		||||
        [console_scripts]
 | 
			
		||||
        bitter=bitter.cli:main
 | 
			
		||||
    """
 | 
			
		||||
    """,
 | 
			
		||||
    classifiers=[
 | 
			
		||||
        'Development Status :: 4 - Beta',
 | 
			
		||||
        'Intended Audience :: Developers',
 | 
			
		||||
        'Intended Audience :: Science/Research',
 | 
			
		||||
        'License :: OSI Approved :: Apache 2 License',
 | 
			
		||||
        'Programming Language :: Python :: 2',
 | 
			
		||||
        'Programming Language :: Python :: 2.7',
 | 
			
		||||
        'Programming Language :: Python :: 3',
 | 
			
		||||
    ]
 | 
			
		||||
)
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@ import datetime
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from bitter import utils
 | 
			
		||||
from bitter.crawlers import TwitterQueue, TwitterWorker, TwitterQueueException
 | 
			
		||||
from bitter.crawlers import TwitterQueue, TwitterWorker, QueueException
 | 
			
		||||
from bitter import config as c
 | 
			
		||||
 | 
			
		||||
class TestUtils(TestCase):
 | 
			
		||||
@@ -64,12 +64,9 @@ class TestUtils(TestCase):
 | 
			
		||||
        try:
 | 
			
		||||
            # resp = self.wq.friends.list(screen_name='balkian')
 | 
			
		||||
            self.wq.next(['friends', 'list'])
 | 
			
		||||
        except TwitterQueueException:
 | 
			
		||||
        except QueueException:
 | 
			
		||||
            failed = True
 | 
			
		||||
        assert failed
 | 
			
		||||
        l2 = w1.get_limit(['friends', 'list'])
 | 
			
		||||
        assert self.wq.get_wait(['friends', 'list']) > (l2['reset']-time.time())
 | 
			
		||||
        assert self.wq.get_wait(['friends', 'list']) < (l2['reset']-time.time()+2)
 | 
			
		||||
        time.sleep(w1.get_wait(['friends', 'list']))
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user