1
0
mirror of https://github.com/balkian/bitter.git synced 2024-12-22 08:28:12 +00:00

Added cli and fixed dependencies

This commit is contained in:
J. Fernando Sánchez 2016-03-16 15:23:47 +01:00
parent d0de6c2ea9
commit 97028e38b1
6 changed files with 95 additions and 37 deletions

View File

@ -1,4 +0,0 @@
.*
env
*.egg-info
dist

View File

@ -1,5 +1,6 @@
import click import click
import json import json
import os
import logging import logging
import time import time
import sqlalchemy.types import sqlalchemy.types
@ -26,8 +27,14 @@ def main(ctx, verbose, logging_level, config, credentials):
ctx.obj = {} ctx.obj = {}
ctx.obj['VERBOSE'] = verbose ctx.obj['VERBOSE'] = verbose
ctx.obj['CONFIG'] = config ctx.obj['CONFIG'] = config
ctx.obj['CREDENTIALS'] = credentials if os.path.isfile(credentials):
ctx.obj['CREDENTIALS'] = credentials
else:
global_file = os.path.expanduser('~/.bitter-credentials.json')
if os.path.isfile(global_file):
ctx.obj['CREDENTIALS'] = global_file
else:
raise Exception('You need to provide a valid credentials file')
@main.group() @main.group()
@click.pass_context @click.pass_context
@ -53,6 +60,15 @@ def get_tweet(ctx, query):
t = utils.search_tweet(c.client, query) t = utils.search_tweet(c.client, query)
print(json.dumps(t, indent=2)) print(json.dumps(t, indent=2))
@tweet.command('timeline')
@click.argument('user')
@click.pass_context
def get_tweet(ctx, user):
wq = crawlers.TwitterQueue.from_credentials(ctx.obj['CREDENTIALS'])
c = wq.next()
t = utils.user_timeline(c.client, user)
print(json.dumps(t, indent=2))
@main.group() @main.group()
@click.pass_context @click.pass_context
def users(ctx): def users(ctx):
@ -147,7 +163,6 @@ def get_users(ctx, usersfile, skip, until, threads, db):
session = make_session(dburl) session = make_session(dburl)
q_iter = iter(ids_queue.get, None) q_iter = iter(ids_queue.get, None)
for user in utils.get_users(wq, q_iter): for user in utils.get_users(wq, q_iter):
user['entities'] = json.dumps(user['entities'])
dbuser = User(**user) dbuser = User(**user)
session.add(dbuser) session.add(dbuser)
local_collected += 1 local_collected += 1
@ -228,6 +243,36 @@ def status_extractor(ctx, with_followers, with_not_pending):
for j in i.__dict__: for j in i.__dict__:
print('\t{}: {}'.format(j, getattr(i,j))) print('\t{}: {}'.format(j, getattr(i,j)))
@extractor.command('network')
@click.option('--as_json', is_flag=True, default=False)
@click.pass_context
def network_extractor(ctx, as_json):
session = ctx.obj['SESSION']
followers = session.query(Following)
follower_map = []
for i in followers:
if not as_json:
print('{} -> {}'.format(i.follower, i.isfollowed))
else:
follower_map.append({'source_id': i.follower,
'target_id': i.isfollowed,
'following': True})
if as_json:
import json
print(json.dumps(follower_map, indent=4))
@extractor.command('users')
@click.pass_context
def users_extractor(ctx):
session = ctx.obj['SESSION']
users = session.query(User)
import json
for i in users:
# print(json.dumps(i.as_dict(), indent=4))
dd = i.as_dict()
print(json.dumps(dd, indent=4))
@extractor.command() @extractor.command()
@click.option('--recursive', is_flag=True, help='Get following/follower/info recursively.', default=False) @click.option('--recursive', is_flag=True, help='Get following/follower/info recursively.', default=False)
@ -254,8 +299,6 @@ def reset_extractor(ctx):
session = make_session(db) session = make_session(db)
session.query(ExtractorEntry).filter(ExtractorEntry.pending==True).update({'pending':False}) session.query(ExtractorEntry).filter(ExtractorEntry.pending==True).update({'pending':False})
@api.command('limits') @api.command('limits')
@click.argument('url', required=False) @click.argument('url', required=False)
@click.pass_context @click.pass_context

View File

@ -1,4 +1,5 @@
import time import time
import json
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.types import BigInteger, Integer, Text, Boolean from sqlalchemy.types import BigInteger, Integer, Text, Boolean
@ -55,6 +56,18 @@ class User(Base):
verified = Column(Boolean) verified = Column(Boolean)
def as_dict(self):
dcopy = self.__dict__.copy()
for k,v in self.__dict__.items():
if k[0] == '_':
del dcopy[k]
try:
dcopy['entities'] = json.loads(dcopy['entities'])
except Exception:
print('Could not convert to dict')
pass
return dcopy
class Following(Base): class Following(Base):
__tablename__ = 'followers' __tablename__ = 'followers'

View File

@ -88,37 +88,38 @@ def extract(wq, recursive=False, user=None, initfile=None, dburi=None, extractor
session = make_session(dburi) session = make_session(dburi)
if initfile: screen_names = []
screen_names = [] user_ids = []
user_ids = []
if not user:
logger.info("No user. I will open %s" % initfile)
with open(initfile, 'r') as f:
for line in f:
user = line.strip().split(',')[0]
try:
int(user)
user_ids.append(user)
except ValueError:
screen_names.append(user.split('@')[-1])
else:
try:
user_ids.append(int(user))
logger.info("Added id")
except Exception as ex:
logger.info("Exception: {}".format(ex))
logger.info("Added screen_name")
screen_names.append(user)
nusers = list(get_users(wq, screen_names, by_name=True))
if user_ids:
nusers += list(get_users(wq, user_ids, by_name=False))
for i in nusers: def classify_user(id_or_name):
add_user(session, i, enqueue=True) try:
int(user)
user_ids.append(user)
logger.info("Added user id")
except ValueError:
logger.info("Added screen_name")
screen_names.append(user.split('@')[-1])
if user:
classify_user(user)
elif initfile:
logger.info("No user. I will open %s" % initfile)
with open(initfile, 'r') as f:
for line in f:
user = line.strip().split(',')[0]
classify_user(user)
else: else:
logger.info('Using pending users from last session') logger.info('Using pending users from last session')
nusers = list(get_users(wq, screen_names, by_name=True))
if user_ids:
nusers += list(get_users(wq, user_ids, by_name=False))
for i in nusers:
add_user(session, i, enqueue=True)
total_users = session.query(sqlalchemy.func.count(User.id)).scalar() total_users = session.query(sqlalchemy.func.count(User.id)).scalar()
logging.info('Total users: {}'.format(total_users)) logging.info('Total users: {}'.format(total_users))
def pending_entries(): def pending_entries():
@ -201,6 +202,12 @@ def get_tweet(c, tid):
def search_tweet(c, query): def search_tweet(c, query):
return c.search.tweets(q=query) return c.search.tweets(q=query)
def user_timeline(c, query):
try:
return c.statuses.user_timeline(user_id=int(query))
except ValueError:
return c.statuses.user_timeline(screen_name=query)
def get_user(c, user): def get_user(c, user):
try: try:
int(user) int(user)

View File

@ -1,4 +1,3 @@
sqlalchemy sqlalchemy
sqlite3
twitter twitter
click click

View File

@ -27,7 +27,7 @@ setup(
author='J. Fernando Sanchez', author='J. Fernando Sanchez',
author_email='balkian@gmail.com', author_email='balkian@gmail.com',
url="http://balkian.com", url="http://balkian.com",
version="0.2", version="0.3",
install_requires=install_reqs, install_requires=install_reqs,
tests_require=test_reqs, tests_require=test_reqs,
include_package_data=True, include_package_data=True,