mirror of
https://github.com/balkian/bitter.git
synced 2024-12-22 00:18:12 +00:00
Added stream to CLI
* Save stream to file * Parse file and get the most important hashtags
This commit is contained in:
parent
738823c8a2
commit
38605ba2c8
@ -1 +1 @@
|
||||
0.6.0
|
||||
0.6.1
|
@ -6,6 +6,7 @@ import time
|
||||
import sqlalchemy.types
|
||||
import threading
|
||||
import sqlite3
|
||||
from tqdm import tqdm
|
||||
|
||||
from sqlalchemy import exists
|
||||
|
||||
@ -333,14 +334,49 @@ def stream(ctx):
|
||||
pass
|
||||
|
||||
@stream.command('get')
|
||||
@click.option('-l', '--locations', default=None)
|
||||
@click.option('-t', '--track', default=None)
|
||||
@click.option('-f', '--file', help='File to store the stream of tweets')
|
||||
@click.pass_context
|
||||
def get_stream(ctx):
|
||||
def get_stream(ctx, locations, track, file):
|
||||
wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1)
|
||||
|
||||
iterator = wq.statuses.sample()
|
||||
query_args = {}
|
||||
if locations:
|
||||
query_args['locations'] = locations
|
||||
if track:
|
||||
query_args['track'] = track
|
||||
if not query_args:
|
||||
iterator = wq.statuses.sample()
|
||||
else:
|
||||
iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")
|
||||
|
||||
if not file:
|
||||
file = sys.stdout
|
||||
else:
|
||||
file = open(file, 'a')
|
||||
|
||||
for tweet in tqdm(iterator):
|
||||
print(json.dumps(tweet), file=file)
|
||||
if file != sys.stdout:
|
||||
file.close()
|
||||
|
||||
@stream.command('read')
|
||||
@click.option('-f', '--file', help='File to read the stream of tweets from')
|
||||
@click.pass_context
|
||||
def read_stream(ctx, file):
|
||||
for tweet in utils.read_file(file, tail=True):
|
||||
print('{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['timestamp_ms'], screen_name=tweet['user']['screen_name'], text=tweet['text']))
|
||||
|
||||
@stream.command('tags')
|
||||
@click.option('-f', '--file', help='File to read the stream of tweets from')
|
||||
@click.argument('limit', required=False, default=None, type=int)
|
||||
@click.pass_context
|
||||
def tags_stream(ctx, file, limit):
|
||||
c = utils.get_hashtags(utils.read_file(file))
|
||||
for count, tag in c.most_common(limit):
|
||||
print('{} - {}'.format(count, tag))
|
||||
|
||||
for tweet in iterator:
|
||||
print(tweet)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -12,6 +12,7 @@ from multiprocessing.pool import ThreadPool
|
||||
from itertools import islice
|
||||
from contextlib import contextmanager
|
||||
from itertools import zip_longest
|
||||
from collections import Counter
|
||||
|
||||
from twitter import TwitterHTTPError
|
||||
|
||||
@ -86,6 +87,26 @@ def add_credentials(credfile=None, **creds):
|
||||
f.write('\n')
|
||||
|
||||
|
||||
def get_hashtags(iter_tweets, best=None):
|
||||
c = Counter()
|
||||
for tweet in iter_tweets:
|
||||
c.update(tag['text'] for tag in tweet.get('entities', {}).get('hashtags', {}))
|
||||
return c
|
||||
|
||||
def read_file(filename, tail=False):
|
||||
with open(filename) as f:
|
||||
while True:
|
||||
line = f.readline()
|
||||
if line not in (None, '', '\n'):
|
||||
tweet = json.loads(line.strip())
|
||||
yield tweet
|
||||
else:
|
||||
if tail:
|
||||
time.sleep(1)
|
||||
else:
|
||||
return line
|
||||
|
||||
|
||||
def get_users(wq, ulist, by_name=False, queue=None, max_users=100):
|
||||
t = 'name' if by_name else 'uid'
|
||||
logger.debug('Getting users by {}: {}'.format(t, ulist))
|
||||
|
@ -1,3 +1,4 @@
|
||||
sqlalchemy
|
||||
twitter
|
||||
click
|
||||
tqdm
|
||||
|
Loading…
Reference in New Issue
Block a user