Added stream to CLI

* Save stream to file * Parse file and get the most important hashtags
2026-06-18 13:31:58 +00:00 · 2016-11-19 20:16:56 +01:00
parent 738823c8a2
commit 38605ba2c8
4 changed files with 63 additions and 5 deletions
--- a/bitter/VERSION
+++ b/bitter/VERSION
@@ -1 +1 @@
-0.6.0
+0.6.1
--- a/bitter/cli.py
+++ b/bitter/cli.py
@@ -6,6 +6,7 @@ import time
 import sqlalchemy.types
 import threading
 import sqlite3
+from tqdm import tqdm

 from sqlalchemy import exists

@@ -333,14 +334,49 @@ def stream(ctx):
    pass

@stream.command('get')
+@click.option('-l', '--locations', default=None)
+@click.option('-t', '--track', default=None)
+@click.option('-f', '--file', help='File to store the stream of tweets')
@click.pass_context 
-def get_stream(ctx):
+def get_stream(ctx, locations, track, file):
    wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1)

-    iterator = wq.statuses.sample()
+    query_args = {}
+    if locations:
+        query_args['locations'] = locations
+    if track:
+        query_args['track'] = track
+    if not query_args:
+        iterator = wq.statuses.sample()
+    else:
+        iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")

-    for tweet in iterator:
-        print(tweet)
+    if not file:
+        file = sys.stdout
+    else:
+        file = open(file, 'a')
+
+    for tweet in tqdm(iterator):
+        print(json.dumps(tweet), file=file)
+    if file != sys.stdout:
+        file.close()
+
+@stream.command('read')
+@click.option('-f', '--file', help='File to read the stream of tweets from')
+@click.pass_context 
+def read_stream(ctx, file):
+    for tweet in utils.read_file(file, tail=True):
+        print('{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['timestamp_ms'], screen_name=tweet['user']['screen_name'], text=tweet['text']))
+
+@stream.command('tags')
+@click.option('-f', '--file', help='File to read the stream of tweets from')
+@click.argument('limit', required=False, default=None, type=int)
+@click.pass_context 
+def tags_stream(ctx, file, limit):
+    c = utils.get_hashtags(utils.read_file(file))
+    for count, tag in c.most_common(limit):
+        print('{} - {}'.format(count, tag))
+    

 if __name__ == '__main__':
    main()
--- a/bitter/utils.py
+++ b/bitter/utils.py
@@ -12,6 +12,7 @@ from multiprocessing.pool import ThreadPool
 from itertools import islice
 from contextlib import contextmanager
 from itertools import zip_longest
+from collections import Counter

 from twitter import TwitterHTTPError

@@ -86,6 +87,26 @@ def add_credentials(credfile=None, **creds):
            f.write('\n')


+def get_hashtags(iter_tweets, best=None):
+    c = Counter()
+    for tweet in iter_tweets:
+        c.update(tag['text'] for tag in tweet.get('entities', {}).get('hashtags', {}))
+    return c
+
+def read_file(filename, tail=False):
+    with open(filename) as f:
+        while True:
+            line = f.readline()
+            if line not in (None, '', '\n'):
+                tweet = json.loads(line.strip())
+                yield tweet
+            else:
+                if tail:
+                    time.sleep(1)
+                else:
+                    return line
+    
+
 def get_users(wq, ulist, by_name=False, queue=None, max_users=100):
    t = 'name' if by_name else 'uid'
    logger.debug('Getting users by {}: {}'.format(t, ulist))
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 sqlalchemy
 twitter
 click
+tqdm
@@ -1 +1 @@
 .6.0
 .6.1