diff --git a/Dockerfile-3.4 b/Dockerfile-3.4 index b1410bc..a1d5129 100644 --- a/Dockerfile-3.4 +++ b/Dockerfile-3.4 @@ -2,6 +2,6 @@ From python:3.4-onbuild Maintainer J. Fernando Sánchez @balkian -RUN pip install -e "/usr/src/app/[server]" +RUN pip install ".[server]" ENTRYPOINT ["bitter"] diff --git a/Dockerfile.template b/Dockerfile.template index 90b3611..81320a1 100644 --- a/Dockerfile.template +++ b/Dockerfile.template @@ -2,6 +2,6 @@ From python:{{PYVERSION}}-onbuild Maintainer J. Fernando Sánchez @balkian -RUN pip install -e "/usr/src/app/[server]" +RUN pip install ".[server]" ENTRYPOINT ["bitter"] diff --git a/bitter/VERSION b/bitter/VERSION index 879be8a..a3df0a6 100644 --- a/bitter/VERSION +++ b/bitter/VERSION @@ -1 +1 @@ -0.7.7 +0.8.0 diff --git a/bitter/cli.py b/bitter/cli.py index e4431b3..92d395a 100644 --- a/bitter/cli.py +++ b/bitter/cli.py @@ -333,6 +333,7 @@ def reset_extractor(ctx): @click.pass_context def get_limits(ctx, url): wq = crawlers.TwitterQueue.from_config(bconf.CONFIG_FILE) + total = {} for worker in wq.queue: resp = worker.client.application.rate_limit_status() print('#'*20) @@ -345,9 +346,15 @@ def get_limits(ctx, url): limit = resp['resources'][cat].get(url, None) or resp['resources'][cat] else: print('Cat {} not found'.format(cat)) - print('{}: {}'.format(url, limit)) + continue + for k in limit: + total[k] = total.get(k, 0) + limit[k] + print('{}: {}'.format(url, limit)) else: print(json.dumps(resp, indent=2)) + if url: + print('Total for {}: {}'.format(url, total)) + @main.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=False)) @@ -358,9 +365,14 @@ def get_limits(ctx, url): @click.pass_context def api(ctx, cmd, tweets, users, api_args): opts = {} + mappings = { + 'id': '_id' + } i = iter(api_args) for k, v in zip(i, i): k = k.replace('--', '') + if k in mappings: + k = mappings[k] opts[k] = v wq = crawlers.TwitterQueue.from_config(bconf.CONFIG_FILE) if tweets: @@ -441,7 +453,7 @@ def get_stream(ctx, locations, track, file, politelyretry): def read_stream(ctx, file, tail): for tweet in utils.read_file(file, tail=tail): try: - print(u'{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['timestamp_ms'], screen_name=tweet['user']['screen_name'], text=tweet['text'])) + print(u'{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['created_at'], screen_name=tweet['user']['screen_name'], text=tweet['text'])) except (KeyError, TypeError): print('Raw tweet: {}'.format(tweet)) diff --git a/bitter/crawlers.py b/bitter/crawlers.py index da6c7b9..8ebd0c8 100644 --- a/bitter/crawlers.py +++ b/bitter/crawlers.py @@ -113,7 +113,7 @@ class RestWorker(TwitterWorker): def get_wait(self, uriparts): limits = self.get_limit(uriparts) - if limits['remaining'] > 0: + if limits.get('remaining', 1) > 0: return 0 reset = limits.get('reset', 0) now = time.time() diff --git a/bitter/models.py b/bitter/models.py index c2216d4..3bd0537 100644 --- a/bitter/models.py +++ b/bitter/models.py @@ -3,11 +3,13 @@ import json from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.types import BigInteger, Integer, Text, Boolean +from sqlalchemy.schema import ForeignKey from sqlalchemy.pool import SingletonThreadPool from sqlalchemy import Column, Index from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker +from functools import wraps Base = declarative_base() @@ -90,6 +92,34 @@ class ExtractorEntry(Base): busy = Column(Boolean, default=False) +class Search(Base): + __tablename__ = 'search_queries' + + id = Column(Integer, primary_key=True, index=True, unique=True) + endpoint = Column(Text, comment="Endpoint URL") + attrs = Column(Text, comment="Text version of the dictionary of parameters") + count = Column(Integer) + current_count = Column(Integer) + current_id = Column(BigInteger, comment='Oldest ID retrieved (should match max_id when done)') + since_id = Column(BigInteger) + +class SearchResults(Base): + __tablename__ = 'search_results' + id = Column(Integer, primary_key=True, index=True, unique=True) + search_id = Column(ForeignKey('search_queries.id')) + resource_id = Column(Text) + +def memoize(f): + memo = {} + @wraps(f) + def helper(self, **kwargs): + st = dict_to_str(kwargs) + key = (self.__uriparts, st) + if key not in memo: + memo[key] = f(self, **kwargs) + return memo[key] + return helper + def make_session(url): if not isinstance(url, str): print(url) @@ -100,24 +130,6 @@ def make_session(url): session = Session() return session -def test(db='sqlite:///users.db'): - - from sqlalchemy import exists - session = make_session(db) - - our_user = session.query(User).first() - - print(our_user.name) - print(session.query(User).count()) - fake_user = User(name="Fake user") - session.add(fake_user) - session.commit() - print(session.query(User).count()) - print(session.query(exists().where(User.name == "Fake user")).scalar()) - fake_committed = session.query(User).filter_by(name="Fake user").first() - print(fake_committed.id) - print(fake_committed.name) - session.delete(fake_committed) - session.commit() - print(session.query(User).count()) - print(list(session.execute('SELECT 1 from users where id=\'%s\'' % 1548))) + +def dict_to_str(args): + return json.dumps(args, sort_keys=True) diff --git a/bitter/utils.py b/bitter/utils.py index 6830cba..d520e39 100644 --- a/bitter/utils.py +++ b/bitter/utils.py @@ -91,7 +91,9 @@ def config(conffile=None): def read_config(conffile): p = conffile and get_config_path(conffile) - if p and os.path.exists(p): + if p: + if not os.path.exists(p): + raise Exception('{} file does not exist.'.format(p)) f = open(p, 'r') elif 'BITTER_CONFIG' not in os.environ: raise Exception('No config file or BITTER_CONFIG env variable.') @@ -121,7 +123,7 @@ def create_config_file(conffile=None): with open(conffile, 'a'): pass - + def get_credentials(conffile=None, inverse=False, **kwargs): creds = [] for i in iter_credentials(conffile): diff --git a/setup.py b/setup.py index ebf267b..d7bcf5b 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ except AttributeError: test_reqs = parse_requirements("test-requirements.txt") import sys +import os import itertools if sys.version_info <= (3, 0): install_reqs = itertools.chain(install_reqs, py2_reqs) @@ -23,7 +24,8 @@ if sys.version_info <= (3, 0): install_reqs = [str(ir.req) for ir in install_reqs] test_reqs = [str(ir.req) for ir in test_reqs] -from bitter import __version__ +with open(os.path.join('bitter', 'VERSION'), 'r') as f: + __version__ = f.read().strip() setup( name="bitter", diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..d85b354 --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,23 @@ +from unittest import TestCase + +import os +import types + +from bitter import utils +from bitter.models import * +from sqlalchemy import exists + +class TestModels(TestCase): + + def setUp(self): + self.session = make_session('sqlite://') + + def test_user(self): + fake_user = User(name="Fake user", id=1548) + self.session.add(fake_user) + self.session.commit() + fake_committed = self.session.query(User).filter_by(name="Fake user").first() + assert fake_committed + self.session.delete(fake_committed) + self.session.commit() + assert not list(self.session.execute('SELECT 1 from users where id=\'%s\'' % 1548))