mirror of
https://github.com/balkian/bitter.git
synced 2024-12-22 08:28:12 +00:00
Added stream to CLI
* Save stream to file * Parse file and get the most important hashtags
This commit is contained in:
parent
738823c8a2
commit
38605ba2c8
@ -1 +1 @@
|
|||||||
0.6.0
|
0.6.1
|
@ -6,6 +6,7 @@ import time
|
|||||||
import sqlalchemy.types
|
import sqlalchemy.types
|
||||||
import threading
|
import threading
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
from sqlalchemy import exists
|
from sqlalchemy import exists
|
||||||
|
|
||||||
@ -333,14 +334,49 @@ def stream(ctx):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@stream.command('get')
|
@stream.command('get')
|
||||||
|
@click.option('-l', '--locations', default=None)
|
||||||
|
@click.option('-t', '--track', default=None)
|
||||||
|
@click.option('-f', '--file', help='File to store the stream of tweets')
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def get_stream(ctx):
|
def get_stream(ctx, locations, track, file):
|
||||||
wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1)
|
wq = crawlers.StreamQueue.from_credentials(bconf.CREDENTIALS, 1)
|
||||||
|
|
||||||
iterator = wq.statuses.sample()
|
query_args = {}
|
||||||
|
if locations:
|
||||||
|
query_args['locations'] = locations
|
||||||
|
if track:
|
||||||
|
query_args['track'] = track
|
||||||
|
if not query_args:
|
||||||
|
iterator = wq.statuses.sample()
|
||||||
|
else:
|
||||||
|
iterator = wq.statuses.filter(**query_args)#"-4.25,40.16,-3.40,40.75")
|
||||||
|
|
||||||
|
if not file:
|
||||||
|
file = sys.stdout
|
||||||
|
else:
|
||||||
|
file = open(file, 'a')
|
||||||
|
|
||||||
|
for tweet in tqdm(iterator):
|
||||||
|
print(json.dumps(tweet), file=file)
|
||||||
|
if file != sys.stdout:
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
@stream.command('read')
|
||||||
|
@click.option('-f', '--file', help='File to read the stream of tweets from')
|
||||||
|
@click.pass_context
|
||||||
|
def read_stream(ctx, file):
|
||||||
|
for tweet in utils.read_file(file, tail=True):
|
||||||
|
print('{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['timestamp_ms'], screen_name=tweet['user']['screen_name'], text=tweet['text']))
|
||||||
|
|
||||||
|
@stream.command('tags')
|
||||||
|
@click.option('-f', '--file', help='File to read the stream of tweets from')
|
||||||
|
@click.argument('limit', required=False, default=None, type=int)
|
||||||
|
@click.pass_context
|
||||||
|
def tags_stream(ctx, file, limit):
|
||||||
|
c = utils.get_hashtags(utils.read_file(file))
|
||||||
|
for count, tag in c.most_common(limit):
|
||||||
|
print('{} - {}'.format(count, tag))
|
||||||
|
|
||||||
for tweet in iterator:
|
|
||||||
print(tweet)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
@ -12,6 +12,7 @@ from multiprocessing.pool import ThreadPool
|
|||||||
from itertools import islice
|
from itertools import islice
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
from itertools import zip_longest
|
from itertools import zip_longest
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
from twitter import TwitterHTTPError
|
from twitter import TwitterHTTPError
|
||||||
|
|
||||||
@ -86,6 +87,26 @@ def add_credentials(credfile=None, **creds):
|
|||||||
f.write('\n')
|
f.write('\n')
|
||||||
|
|
||||||
|
|
||||||
|
def get_hashtags(iter_tweets, best=None):
|
||||||
|
c = Counter()
|
||||||
|
for tweet in iter_tweets:
|
||||||
|
c.update(tag['text'] for tag in tweet.get('entities', {}).get('hashtags', {}))
|
||||||
|
return c
|
||||||
|
|
||||||
|
def read_file(filename, tail=False):
|
||||||
|
with open(filename) as f:
|
||||||
|
while True:
|
||||||
|
line = f.readline()
|
||||||
|
if line not in (None, '', '\n'):
|
||||||
|
tweet = json.loads(line.strip())
|
||||||
|
yield tweet
|
||||||
|
else:
|
||||||
|
if tail:
|
||||||
|
time.sleep(1)
|
||||||
|
else:
|
||||||
|
return line
|
||||||
|
|
||||||
|
|
||||||
def get_users(wq, ulist, by_name=False, queue=None, max_users=100):
|
def get_users(wq, ulist, by_name=False, queue=None, max_users=100):
|
||||||
t = 'name' if by_name else 'uid'
|
t = 'name' if by_name else 'uid'
|
||||||
logger.debug('Getting users by {}: {}'.format(t, ulist))
|
logger.debug('Getting users by {}: {}'.format(t, ulist))
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
sqlalchemy
|
sqlalchemy
|
||||||
twitter
|
twitter
|
||||||
click
|
click
|
||||||
|
tqdm
|
||||||
|
Loading…
Reference in New Issue
Block a user