Added stream to CLI

* Save stream to file * Parse file and get the most important hashtags
2026-02-14 13:28:17 +00:00 · 2016-11-19 20:16:56 +01:00
parent 738823c8a2
commit 38605ba2c8
4 changed files with 63 additions and 5 deletions
--- a/bitter/VERSION
+++ b/bitter/VERSION
@@ -1 +1 @@
-0.6.0
+0.6.1
--- a/bitter/cli.py
+++ b/bitter/cli.py
@@ -6,6 +6,7 @@ import time
 import sqlalchemy.types
 import threading
 import sqlite3
 from tqdm import tqdm
 from sqlalchemy import exists
@@ -333,14 +334,49 @@ def stream(ctx):
    pass
@stream.command('get')
@click.option('-l', '--locations', default=None)
@click.option('-t', '--track', default=None)
@click.option('-f', '--file', help='File to store the stream of tweets')
@click.pass_context 
-def get_stream(ctx):
+def get_stream(ctx, locations, track, file):
    wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1)
-    iterator = wq.statuses.sample()
+    query_args = {}
    if locations:
        query_args['locations'] = locations
    if track:
        query_args['track'] = track
    if not query_args:
        iterator = wq.statuses.sample()
    else:
        iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")
    if not file:
        file = sys.stdout
    else:
        file = open(file, 'a')
    for tweet in tqdm(iterator):
        print(json.dumps(tweet), file=file)
    if file != sys.stdout:
        file.close()
@stream.command('read')
@click.option('-f', '--file', help='File to read the stream of tweets from')
@click.pass_context 
 def read_stream(ctx, file):
    for tweet in utils.read_file(file, tail=True):
        print('{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['timestamp_ms'], screen_name=tweet['user']['screen_name'], text=tweet['text']))
@stream.command('tags')
@click.option('-f', '--file', help='File to read the stream of tweets from')
@click.argument('limit', required=False, default=None, type=int)
@click.pass_context 
 def tags_stream(ctx, file, limit):
    c = utils.get_hashtags(utils.read_file(file))
    for count, tag in c.most_common(limit):
        print('{} - {}'.format(count, tag))
    for tweet in iterator:
        print(tweet)
 if __name__ == '__main__':
    main()
--- a/bitter/utils.py
+++ b/bitter/utils.py
@@ -12,6 +12,7 @@ from multiprocessing.pool import ThreadPool
 from itertools import islice
 from contextlib import contextmanager
 from itertools import zip_longest
 from collections import Counter
 from twitter import TwitterHTTPError
@@ -86,6 +87,26 @@ def add_credentials(credfile=None, **creds):
            f.write('\n')
 def get_hashtags(iter_tweets, best=None):
    c = Counter()
    for tweet in iter_tweets:
        c.update(tag['text'] for tag in tweet.get('entities', {}).get('hashtags', {}))
    return c
 def read_file(filename, tail=False):
    with open(filename) as f:
        while True:
            line = f.readline()
            if line not in (None, '', '\n'):
                tweet = json.loads(line.strip())
                yield tweet
            else:
                if tail:
                    time.sleep(1)
                else:
                    return line
 def get_users(wq, ulist, by_name=False, queue=None, max_users=100):
    t = 'name' if by_name else 'uid'
    logger.debug('Getting users by {}: {}'.format(t, ulist))
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 sqlalchemy
 twitter
 click
 tqdm