diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 73e73a1..0000000 --- a/.dockerignore +++ /dev/null @@ -1,4 +0,0 @@ -.* -env -*.egg-info -dist diff --git a/bitter/cli.py b/bitter/cli.py index db37e85..07468fc 100644 --- a/bitter/cli.py +++ b/bitter/cli.py @@ -1,5 +1,6 @@ import click import json +import os import logging import time import sqlalchemy.types @@ -26,8 +27,14 @@ def main(ctx, verbose, logging_level, config, credentials): ctx.obj = {} ctx.obj['VERBOSE'] = verbose ctx.obj['CONFIG'] = config - ctx.obj['CREDENTIALS'] = credentials - + if os.path.isfile(credentials): + ctx.obj['CREDENTIALS'] = credentials + else: + global_file = os.path.expanduser('~/.bitter-credentials.json') + if os.path.isfile(global_file): + ctx.obj['CREDENTIALS'] = global_file + else: + raise Exception('You need to provide a valid credentials file') @main.group() @click.pass_context @@ -53,6 +60,15 @@ def get_tweet(ctx, query): t = utils.search_tweet(c.client, query) print(json.dumps(t, indent=2)) +@tweet.command('timeline') +@click.argument('user') +@click.pass_context +def get_tweet(ctx, user): + wq = crawlers.TwitterQueue.from_credentials(ctx.obj['CREDENTIALS']) + c = wq.next() + t = utils.user_timeline(c.client, user) + print(json.dumps(t, indent=2)) + @main.group() @click.pass_context def users(ctx): @@ -147,7 +163,6 @@ def get_users(ctx, usersfile, skip, until, threads, db): session = make_session(dburl) q_iter = iter(ids_queue.get, None) for user in utils.get_users(wq, q_iter): - user['entities'] = json.dumps(user['entities']) dbuser = User(**user) session.add(dbuser) local_collected += 1 @@ -228,6 +243,36 @@ def status_extractor(ctx, with_followers, with_not_pending): for j in i.__dict__: print('\t{}: {}'.format(j, getattr(i,j))) +@extractor.command('network') +@click.option('--as_json', is_flag=True, default=False) +@click.pass_context +def network_extractor(ctx, as_json): + session = ctx.obj['SESSION'] + followers = session.query(Following) + follower_map = [] + for i in followers: + if not as_json: + print('{} -> {}'.format(i.follower, i.isfollowed)) + else: + follower_map.append({'source_id': i.follower, + 'target_id': i.isfollowed, + 'following': True}) + if as_json: + import json + print(json.dumps(follower_map, indent=4)) + + +@extractor.command('users') +@click.pass_context +def users_extractor(ctx): + session = ctx.obj['SESSION'] + users = session.query(User) + import json + for i in users: + # print(json.dumps(i.as_dict(), indent=4)) + dd = i.as_dict() + print(json.dumps(dd, indent=4)) + @extractor.command() @click.option('--recursive', is_flag=True, help='Get following/follower/info recursively.', default=False) @@ -254,8 +299,6 @@ def reset_extractor(ctx): session = make_session(db) session.query(ExtractorEntry).filter(ExtractorEntry.pending==True).update({'pending':False}) - - @api.command('limits') @click.argument('url', required=False) @click.pass_context diff --git a/bitter/models.py b/bitter/models.py index 6e5c9da..59698e9 100644 --- a/bitter/models.py +++ b/bitter/models.py @@ -1,4 +1,5 @@ import time +import json from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.types import BigInteger, Integer, Text, Boolean @@ -55,6 +56,18 @@ class User(Base): verified = Column(Boolean) + def as_dict(self): + dcopy = self.__dict__.copy() + for k,v in self.__dict__.items(): + if k[0] == '_': + del dcopy[k] + try: + dcopy['entities'] = json.loads(dcopy['entities']) + except Exception: + print('Could not convert to dict') + pass + return dcopy + class Following(Base): __tablename__ = 'followers' diff --git a/bitter/utils.py b/bitter/utils.py index c902691..78721ee 100644 --- a/bitter/utils.py +++ b/bitter/utils.py @@ -88,37 +88,38 @@ def extract(wq, recursive=False, user=None, initfile=None, dburi=None, extractor session = make_session(dburi) - if initfile: - screen_names = [] - user_ids = [] - if not user: - logger.info("No user. I will open %s" % initfile) - with open(initfile, 'r') as f: - for line in f: - user = line.strip().split(',')[0] - try: - int(user) - user_ids.append(user) - except ValueError: - screen_names.append(user.split('@')[-1]) - else: - try: - user_ids.append(int(user)) - logger.info("Added id") - except Exception as ex: - logger.info("Exception: {}".format(ex)) - logger.info("Added screen_name") - screen_names.append(user) - nusers = list(get_users(wq, screen_names, by_name=True)) - if user_ids: - nusers += list(get_users(wq, user_ids, by_name=False)) - - for i in nusers: - add_user(session, i, enqueue=True) + screen_names = [] + user_ids = [] + + def classify_user(id_or_name): + try: + int(user) + user_ids.append(user) + logger.info("Added user id") + except ValueError: + logger.info("Added screen_name") + screen_names.append(user.split('@')[-1]) + + if user: + classify_user(user) + + elif initfile: + logger.info("No user. I will open %s" % initfile) + with open(initfile, 'r') as f: + for line in f: + user = line.strip().split(',')[0] + classify_user(user) else: logger.info('Using pending users from last session') + nusers = list(get_users(wq, screen_names, by_name=True)) + if user_ids: + nusers += list(get_users(wq, user_ids, by_name=False)) + + for i in nusers: + add_user(session, i, enqueue=True) + total_users = session.query(sqlalchemy.func.count(User.id)).scalar() logging.info('Total users: {}'.format(total_users)) def pending_entries(): @@ -201,6 +202,12 @@ def get_tweet(c, tid): def search_tweet(c, query): return c.search.tweets(q=query) +def user_timeline(c, query): + try: + return c.statuses.user_timeline(user_id=int(query)) + except ValueError: + return c.statuses.user_timeline(screen_name=query) + def get_user(c, user): try: int(user) diff --git a/requirements.txt b/requirements.txt index e5ab260..63537f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ sqlalchemy -sqlite3 twitter click diff --git a/setup.py b/setup.py index 83424b6..a410995 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ setup( author='J. Fernando Sanchez', author_email='balkian@gmail.com', url="http://balkian.com", - version="0.2", + version="0.3", install_requires=install_reqs, tests_require=test_reqs, include_package_data=True,