diff --git a/bitter/VERSION b/bitter/VERSION index 09a3acf..7ceb040 100644 --- a/bitter/VERSION +++ b/bitter/VERSION @@ -1 +1 @@ -0.6.0 \ No newline at end of file +0.6.1 \ No newline at end of file diff --git a/bitter/cli.py b/bitter/cli.py index 28a77e2..86d83cf 100644 --- a/bitter/cli.py +++ b/bitter/cli.py @@ -6,6 +6,7 @@ import time import sqlalchemy.types import threading import sqlite3 +from tqdm import tqdm from sqlalchemy import exists @@ -333,14 +334,49 @@ def stream(ctx): pass @stream.command('get') +@click.option('-l', '--locations', default=None) +@click.option('-t', '--track', default=None) +@click.option('-f', '--file', help='File to store the stream of tweets') @click.pass_context -def get_stream(ctx): +def get_stream(ctx, locations, track, file): wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1) - iterator = wq.statuses.sample() + query_args = {} + if locations: + query_args['locations'] = locations + if track: + query_args['track'] = track + if not query_args: + iterator = wq.statuses.sample() + else: + iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75") + + if not file: + file = sys.stdout + else: + file = open(file, 'a') + + for tweet in tqdm(iterator): + print(json.dumps(tweet), file=file) + if file != sys.stdout: + file.close() - for tweet in iterator: - print(tweet) +@stream.command('read') +@click.option('-f', '--file', help='File to read the stream of tweets from') +@click.pass_context +def read_stream(ctx, file): + for tweet in utils.read_file(file, tail=True): + print('{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['timestamp_ms'], screen_name=tweet['user']['screen_name'], text=tweet['text'])) + +@stream.command('tags') +@click.option('-f', '--file', help='File to read the stream of tweets from') +@click.argument('limit', required=False, default=None, type=int) +@click.pass_context +def tags_stream(ctx, file, limit): + c = utils.get_hashtags(utils.read_file(file)) + for count, tag in c.most_common(limit): + print('{} - {}'.format(count, tag)) + if __name__ == '__main__': main() diff --git a/bitter/utils.py b/bitter/utils.py index f762558..60bd74d 100644 --- a/bitter/utils.py +++ b/bitter/utils.py @@ -12,6 +12,7 @@ from multiprocessing.pool import ThreadPool from itertools import islice from contextlib import contextmanager from itertools import zip_longest +from collections import Counter from twitter import TwitterHTTPError @@ -86,6 +87,26 @@ def add_credentials(credfile=None, **creds): f.write('\n') +def get_hashtags(iter_tweets, best=None): + c = Counter() + for tweet in iter_tweets: + c.update(tag['text'] for tag in tweet.get('entities', {}).get('hashtags', {})) + return c + +def read_file(filename, tail=False): + with open(filename) as f: + while True: + line = f.readline() + if line not in (None, '', '\n'): + tweet = json.loads(line.strip()) + yield tweet + else: + if tail: + time.sleep(1) + else: + return line + + def get_users(wq, ulist, by_name=False, queue=None, max_users=100): t = 'name' if by_name else 'uid' logger.debug('Getting users by {}: {}'.format(t, ulist)) diff --git a/requirements.txt b/requirements.txt index 63537f8..2bed540 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ sqlalchemy twitter click +tqdm