1
0
mirror of https://github.com/balkian/bitter.git synced 2025-01-02 13:11:28 +00:00

Compose and bug fixes

This commit is contained in:
J. Fernando Sánchez 2018-03-19 14:36:05 +01:00
parent 53bb7edabc
commit 6259013978
9 changed files with 80 additions and 29 deletions

View File

@ -2,6 +2,6 @@
From python:3.4-onbuild
Maintainer J. Fernando Sánchez @balkian
RUN pip install -e "/usr/src/app/[server]"
RUN pip install ".[server]"
ENTRYPOINT ["bitter"]

View File

@ -2,6 +2,6 @@
From python:{{PYVERSION}}-onbuild
Maintainer J. Fernando Sánchez @balkian
RUN pip install -e "/usr/src/app/[server]"
RUN pip install ".[server]"
ENTRYPOINT ["bitter"]

View File

@ -1 +1 @@
0.7.7
0.8.0

View File

@ -333,6 +333,7 @@ def reset_extractor(ctx):
@click.pass_context
def get_limits(ctx, url):
wq = crawlers.TwitterQueue.from_config(bconf.CONFIG_FILE)
total = {}
for worker in wq.queue:
resp = worker.client.application.rate_limit_status()
print('#'*20)
@ -345,9 +346,15 @@ def get_limits(ctx, url):
limit = resp['resources'][cat].get(url, None) or resp['resources'][cat]
else:
print('Cat {} not found'.format(cat))
continue
for k in limit:
total[k] = total.get(k, 0) + limit[k]
print('{}: {}'.format(url, limit))
else:
print(json.dumps(resp, indent=2))
if url:
print('Total for {}: {}'.format(url, total))
@main.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=False))
@ -358,9 +365,14 @@ def get_limits(ctx, url):
@click.pass_context
def api(ctx, cmd, tweets, users, api_args):
opts = {}
mappings = {
'id': '_id'
}
i = iter(api_args)
for k, v in zip(i, i):
k = k.replace('--', '')
if k in mappings:
k = mappings[k]
opts[k] = v
wq = crawlers.TwitterQueue.from_config(bconf.CONFIG_FILE)
if tweets:
@ -441,7 +453,7 @@ def get_stream(ctx, locations, track, file, politelyretry):
def read_stream(ctx, file, tail):
for tweet in utils.read_file(file, tail=tail):
try:
print(u'{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['timestamp_ms'], screen_name=tweet['user']['screen_name'], text=tweet['text']))
print(u'{timestamp_ms}- @{screen_name}: {text}'.format(timestamp_ms=tweet['created_at'], screen_name=tweet['user']['screen_name'], text=tweet['text']))
except (KeyError, TypeError):
print('Raw tweet: {}'.format(tweet))

View File

@ -113,7 +113,7 @@ class RestWorker(TwitterWorker):
def get_wait(self, uriparts):
limits = self.get_limit(uriparts)
if limits['remaining'] > 0:
if limits.get('remaining', 1) > 0:
return 0
reset = limits.get('reset', 0)
now = time.time()

View File

@ -3,11 +3,13 @@ import json
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.types import BigInteger, Integer, Text, Boolean
from sqlalchemy.schema import ForeignKey
from sqlalchemy.pool import SingletonThreadPool
from sqlalchemy import Column, Index
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from functools import wraps
Base = declarative_base()
@ -90,6 +92,34 @@ class ExtractorEntry(Base):
busy = Column(Boolean, default=False)
class Search(Base):
__tablename__ = 'search_queries'
id = Column(Integer, primary_key=True, index=True, unique=True)
endpoint = Column(Text, comment="Endpoint URL")
attrs = Column(Text, comment="Text version of the dictionary of parameters")
count = Column(Integer)
current_count = Column(Integer)
current_id = Column(BigInteger, comment='Oldest ID retrieved (should match max_id when done)')
since_id = Column(BigInteger)
class SearchResults(Base):
__tablename__ = 'search_results'
id = Column(Integer, primary_key=True, index=True, unique=True)
search_id = Column(ForeignKey('search_queries.id'))
resource_id = Column(Text)
def memoize(f):
memo = {}
@wraps(f)
def helper(self, **kwargs):
st = dict_to_str(kwargs)
key = (self.__uriparts, st)
if key not in memo:
memo[key] = f(self, **kwargs)
return memo[key]
return helper
def make_session(url):
if not isinstance(url, str):
print(url)
@ -100,24 +130,6 @@ def make_session(url):
session = Session()
return session
def test(db='sqlite:///users.db'):
from sqlalchemy import exists
session = make_session(db)
our_user = session.query(User).first()
print(our_user.name)
print(session.query(User).count())
fake_user = User(name="Fake user")
session.add(fake_user)
session.commit()
print(session.query(User).count())
print(session.query(exists().where(User.name == "Fake user")).scalar())
fake_committed = session.query(User).filter_by(name="Fake user").first()
print(fake_committed.id)
print(fake_committed.name)
session.delete(fake_committed)
session.commit()
print(session.query(User).count())
print(list(session.execute('SELECT 1 from users where id=\'%s\'' % 1548)))
def dict_to_str(args):
return json.dumps(args, sort_keys=True)

View File

@ -91,7 +91,9 @@ def config(conffile=None):
def read_config(conffile):
p = conffile and get_config_path(conffile)
if p and os.path.exists(p):
if p:
if not os.path.exists(p):
raise Exception('{} file does not exist.'.format(p))
f = open(p, 'r')
elif 'BITTER_CONFIG' not in os.environ:
raise Exception('No config file or BITTER_CONFIG env variable.')

View File

@ -14,6 +14,7 @@ except AttributeError:
test_reqs = parse_requirements("test-requirements.txt")
import sys
import os
import itertools
if sys.version_info <= (3, 0):
install_reqs = itertools.chain(install_reqs, py2_reqs)
@ -23,7 +24,8 @@ if sys.version_info <= (3, 0):
install_reqs = [str(ir.req) for ir in install_reqs]
test_reqs = [str(ir.req) for ir in test_reqs]
from bitter import __version__
with open(os.path.join('bitter', 'VERSION'), 'r') as f:
__version__ = f.read().strip()
setup(
name="bitter",

23
tests/test_models.py Normal file
View File

@ -0,0 +1,23 @@
from unittest import TestCase
import os
import types
from bitter import utils
from bitter.models import *
from sqlalchemy import exists
class TestModels(TestCase):
def setUp(self):
self.session = make_session('sqlite://')
def test_user(self):
fake_user = User(name="Fake user", id=1548)
self.session.add(fake_user)
self.session.commit()
fake_committed = self.session.query(User).filter_by(name="Fake user").first()
assert fake_committed
self.session.delete(fake_committed)
self.session.commit()
assert not list(self.session.execute('SELECT 1 from users where id=\'%s\'' % 1548))