1
0
mirror of https://github.com/balkian/bitter.git synced 2025-01-02 13:11:28 +00:00

Compose and bug fixes

This commit is contained in:
J. Fernando Sánchez 2018-03-19 14:36:05 +01:00
parent 53bb7edabc
commit 6259013978
9 changed files with 80 additions and 29 deletions

View File

@ -2,6 +2,6 @@
From python:3.4-onbuild From python:3.4-onbuild
Maintainer J. Fernando Sánchez @balkian Maintainer J. Fernando Sánchez @balkian
RUN pip install -e "/usr/src/app/[server]" RUN pip install ".[server]"
ENTRYPOINT ["bitter"] ENTRYPOINT ["bitter"]

View File

@ -2,6 +2,6 @@
From python:{{PYVERSION}}-onbuild From python:{{PYVERSION}}-onbuild
Maintainer J. Fernando Sánchez @balkian Maintainer J. Fernando Sánchez @balkian
RUN pip install -e "/usr/src/app/[server]" RUN pip install ".[server]"
ENTRYPOINT ["bitter"] ENTRYPOINT ["bitter"]

View File

@ -1 +1 @@
0.7.7 0.8.0

View File

@ -333,6 +333,7 @@ def reset_extractor(ctx):
@click.pass_context @click.pass_context
def get_limits(ctx, url): def get_limits(ctx, url):
wq = crawlers.TwitterQueue.from_config(bconf.CONFIG_FILE) wq = crawlers.TwitterQueue.from_config(bconf.CONFIG_FILE)
total = {}
for worker in wq.queue: for worker in wq.queue:
resp = worker.client.application.rate_limit_status() resp = worker.client.application.rate_limit_status()
print('#'*20) print('#'*20)
@ -345,9 +346,15 @@ def get_limits(ctx, url):
limit = resp['resources'][cat].get(url, None) or resp['resources'][cat] limit = resp['resources'][cat].get(url, None) or resp['resources'][cat]
else: else:
print('Cat {} not found'.format(cat)) print('Cat {} not found'.format(cat))
print('{}: {}'.format(url, limit)) continue
for k in limit:
total[k] = total.get(k, 0) + limit[k]
print('{}: {}'.format(url, limit))
else: else:
print(json.dumps(resp, indent=2)) print(json.dumps(resp, indent=2))
if url:
print('Total for {}: {}'.format(url, total))
@main.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=False)) @main.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=False))
@ -358,9 +365,14 @@ def get_limits(ctx, url):
@click.pass_context @click.pass_context
def api(ctx, cmd, tweets, users, api_args): def api(ctx, cmd, tweets, users, api_args):
opts = {} opts = {}
mappings = {
'id': '_id'
}
i = iter(api_args) i = iter(api_args)
for k, v in zip(i, i): for k, v in zip(i, i):
k = k.replace('--', '') k = k.replace('--', '')
if k in mappings:
k = mappings[k]
opts[k] = v opts[k] = v
wq = crawlers.TwitterQueue.from_config(bconf.CONFIG_FILE) wq = crawlers.TwitterQueue.from_config(bconf.CONFIG_FILE)
if tweets: if tweets:
@ -441,7 +453,7 @@ def get_stream(ctx, locations, track, file, politelyretry):
def read_stream(ctx, file, tail): def read_stream(ctx, file, tail):
for tweet in utils.read_file(file, tail=tail): for tweet in utils.read_file(file, tail=tail):
try: try:
print(u'{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['timestamp_ms'], screen_name=tweet['user']['screen_name'], text=tweet['text'])) print(u'{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['created_at'], screen_name=tweet['user']['screen_name'], text=tweet['text']))
except (KeyError, TypeError): except (KeyError, TypeError):
print('Raw tweet: {}'.format(tweet)) print('Raw tweet: {}'.format(tweet))

View File

@ -113,7 +113,7 @@ class RestWorker(TwitterWorker):
def get_wait(self, uriparts): def get_wait(self, uriparts):
limits = self.get_limit(uriparts) limits = self.get_limit(uriparts)
if limits['remaining'] > 0: if limits.get('remaining', 1) > 0:
return 0 return 0
reset = limits.get('reset', 0) reset = limits.get('reset', 0)
now = time.time() now = time.time()

View File

@ -3,11 +3,13 @@ import json
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.types import BigInteger, Integer, Text, Boolean from sqlalchemy.types import BigInteger, Integer, Text, Boolean
from sqlalchemy.schema import ForeignKey
from sqlalchemy.pool import SingletonThreadPool from sqlalchemy.pool import SingletonThreadPool
from sqlalchemy import Column, Index from sqlalchemy import Column, Index
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
from functools import wraps
Base = declarative_base() Base = declarative_base()
@ -90,6 +92,34 @@ class ExtractorEntry(Base):
busy = Column(Boolean, default=False) busy = Column(Boolean, default=False)
class Search(Base):
__tablename__ = 'search_queries'
id = Column(Integer, primary_key=True, index=True, unique=True)
endpoint = Column(Text, comment="Endpoint URL")
attrs = Column(Text, comment="Text version of the dictionary of parameters")
count = Column(Integer)
current_count = Column(Integer)
current_id = Column(BigInteger, comment='Oldest ID retrieved (should match max_id when done)')
since_id = Column(BigInteger)
class SearchResults(Base):
__tablename__ = 'search_results'
id = Column(Integer, primary_key=True, index=True, unique=True)
search_id = Column(ForeignKey('search_queries.id'))
resource_id = Column(Text)
def memoize(f):
memo = {}
@wraps(f)
def helper(self, **kwargs):
st = dict_to_str(kwargs)
key = (self.__uriparts, st)
if key not in memo:
memo[key] = f(self, **kwargs)
return memo[key]
return helper
def make_session(url): def make_session(url):
if not isinstance(url, str): if not isinstance(url, str):
print(url) print(url)
@ -100,24 +130,6 @@ def make_session(url):
session = Session() session = Session()
return session return session
def test(db='sqlite:///users.db'):
from sqlalchemy import exists def dict_to_str(args):
session = make_session(db) return json.dumps(args, sort_keys=True)
our_user = session.query(User).first()
print(our_user.name)
print(session.query(User).count())
fake_user = User(name="Fake user")
session.add(fake_user)
session.commit()
print(session.query(User).count())
print(session.query(exists().where(User.name == "Fake user")).scalar())
fake_committed = session.query(User).filter_by(name="Fake user").first()
print(fake_committed.id)
print(fake_committed.name)
session.delete(fake_committed)
session.commit()
print(session.query(User).count())
print(list(session.execute('SELECT 1 from users where id=\'%s\'' % 1548)))

View File

@ -91,7 +91,9 @@ def config(conffile=None):
def read_config(conffile): def read_config(conffile):
p = conffile and get_config_path(conffile) p = conffile and get_config_path(conffile)
if p and os.path.exists(p): if p:
if not os.path.exists(p):
raise Exception('{} file does not exist.'.format(p))
f = open(p, 'r') f = open(p, 'r')
elif 'BITTER_CONFIG' not in os.environ: elif 'BITTER_CONFIG' not in os.environ:
raise Exception('No config file or BITTER_CONFIG env variable.') raise Exception('No config file or BITTER_CONFIG env variable.')
@ -121,7 +123,7 @@ def create_config_file(conffile=None):
with open(conffile, 'a'): with open(conffile, 'a'):
pass pass
def get_credentials(conffile=None, inverse=False, **kwargs): def get_credentials(conffile=None, inverse=False, **kwargs):
creds = [] creds = []
for i in iter_credentials(conffile): for i in iter_credentials(conffile):

View File

@ -14,6 +14,7 @@ except AttributeError:
test_reqs = parse_requirements("test-requirements.txt") test_reqs = parse_requirements("test-requirements.txt")
import sys import sys
import os
import itertools import itertools
if sys.version_info <= (3, 0): if sys.version_info <= (3, 0):
install_reqs = itertools.chain(install_reqs, py2_reqs) install_reqs = itertools.chain(install_reqs, py2_reqs)
@ -23,7 +24,8 @@ if sys.version_info <= (3, 0):
install_reqs = [str(ir.req) for ir in install_reqs] install_reqs = [str(ir.req) for ir in install_reqs]
test_reqs = [str(ir.req) for ir in test_reqs] test_reqs = [str(ir.req) for ir in test_reqs]
from bitter import __version__ with open(os.path.join('bitter', 'VERSION'), 'r') as f:
__version__ = f.read().strip()
setup( setup(
name="bitter", name="bitter",

23
tests/test_models.py Normal file
View File

@ -0,0 +1,23 @@
from unittest import TestCase
import os
import types
from bitter import utils
from bitter.models import *
from sqlalchemy import exists
class TestModels(TestCase):
def setUp(self):
self.session = make_session('sqlite://')
def test_user(self):
fake_user = User(name="Fake user", id=1548)
self.session.add(fake_user)
self.session.commit()
fake_committed = self.session.query(User).filter_by(name="Fake user").first()
assert fake_committed
self.session.delete(fake_committed)
self.session.commit()
assert not list(self.session.execute('SELECT 1 from users where id=\'%s\'' % 1548))