From 67ef307cce7fcdec3c910648ce84558f4ad30f87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Wed, 23 Nov 2016 10:48:35 +0100 Subject: [PATCH] Improved tweet extraction @ CLI --- bitter/VERSION | 2 +- bitter/cli.py | 45 ++++++++++++++++++++++++++++++++++++--------- bitter/crawlers.py | 5 ++++- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/bitter/VERSION b/bitter/VERSION index ef5e445..05e8a45 100644 --- a/bitter/VERSION +++ b/bitter/VERSION @@ -1 +1 @@ -0.6.5 +0.6.6 diff --git a/bitter/cli.py b/bitter/cli.py index 72c61f3..566cd1e 100644 --- a/bitter/cli.py +++ b/bitter/cli.py @@ -46,18 +46,45 @@ def tweet(ctx): pass @tweet.command('get') +@click.option('-w', '--write', is_flag=True, default=False) +@click.option('-f', '--folder', default="tweets") +@click.option('-u', '--update', help="Update the file even if the tweet exists", is_flag=True, default=False) @click.argument('tweetid') -@click.pass_context -def get_tweet(ctx, tweetid): - wq = crawlers.TwitterQueue.from_credentials(bconf.CREDENTIALS, 1) - t = utils.get_tweet(wq, tweetid) - print(json.dumps(t, indent=2)) +def get_tweet(tweetid, write, folder, update): + wq = crawlers.TwitterQueue.from_credentials(bconf.CREDENTIALS) + if not write: + t = utils.get_tweet(wq, tweetid) + js = json.dumps(t, indent=2) + print(js) + return + if not os.path.exists(folder): + os.makedirs(folder) + file = os.path.join(folder, '%s.json' % tweetid) + if not update and os.path.exists(file) and os.path.isfile(file): + print('%s: Tweet exists' % tweetid) + return + try: + t = utils.get_tweet(wq, tweetid) + with open(file, 'w') as f: + js = json.dumps(t, indent=2) + print(js, file=f) + except Exception as ex: + print('%s: %s' % (tweetid, ex), file=sys.stderr) +@tweet.command('get_all') +@click.argument('tweetsfile', 'File with a list of tweets to look up') +@click.option('-f', '--folder', default="tweets") +@click.pass_context +def get_tweets(ctx, tweetsfile, folder): + with open(tweetsfile) as f: + for line in f: + tid = line.strip() + ctx.invoke(get_tweet, folder=folder, tweetid=tid, write=True) @tweet.command('search') @click.argument('query') @click.pass_context -def get_tweet(ctx, query): +def search(ctx, query): wq = crawlers.TwitterQueue.from_credentials(bconf.CREDENTIALS) t = utils.search_tweet(wq, query) print(json.dumps(t, indent=2)) @@ -65,7 +92,7 @@ def get_tweet(ctx, query): @tweet.command('timeline') @click.argument('user') @click.pass_context -def get_tweet(ctx, user): +def timeline(ctx, user): wq = crawlers.TwitterQueue.from_credentials(bconf.CREDENTIALS) t = utils.user_timeline(wq, user) print(json.dumps(t, indent=2)) @@ -86,7 +113,7 @@ def list_users(ctx, db): for j in i.__dict__: print('\t{}: {}'.format(j, getattr(i,j))) -@users.command('get_one') +@users.command('get') @click.argument('user') @click.option('-w', '--write', is_flag=True, default=False) @click.option('-f', '--folder', default="users") @@ -109,7 +136,7 @@ def get_user(user, write, folder, update): js = json.dumps(u, indent=2) print(js, file=f) -@users.command('get') +@users.command('get_all') @click.argument('usersfile', 'File with a list of users to look up') @click.option('-f', '--folder', default="users") @click.pass_context diff --git a/bitter/crawlers.py b/bitter/crawlers.py index 41b2dcd..f898a08 100644 --- a/bitter/crawlers.py +++ b/bitter/crawlers.py @@ -96,7 +96,10 @@ class RestWorker(TwitterWorker): def get_limit(self, uriparts): uri = '/'+'/'.join(uriparts) - return self.limits.get('resources', {}).get(uriparts[0], {}).get(uri, {}) + for (ix, i) in self.limits.get('resources', {}).get(uriparts[0], {}).items(): + if ix.startswith(uri): + return i + return {} def set_limit(self, uriparts, value): uri = '/'+'/'.join(uriparts)