1
0
mirror of https://github.com/gsi-upm/senpy synced 2025-09-18 04:22:21 +00:00

Compare commits

...

6 Commits

Author SHA1 Message Date
J. Fernando Sánchez
1313853788 Several fixes and improvements
* Add Topic model
* Add PDB post-mortem debugging
* Add logger to plugins (`self.log`)
* Add NLTK resource auto-download
* Force installation of requirements even if adding doesn't work
* Add a method to find files in several possible locations. Now the plugin.open
method will try these locations IF the file is to be opened in read mode.
Otherwise only the SENPY_DATA folder will be used (to avoid writing to the
package folder).
2018-06-14 15:10:16 +02:00
J. Fernando Sánchez
697e779767 Fix schema issues and parameter validation 2018-05-16 11:16:32 +02:00
J. Fernando Sánchez
48f5ffafa1 Defer plugin validation to init 2018-05-14 11:38:02 +02:00
J. Fernando Sánchez
73f7cbbe8a Add extra-requirements for pip 2018-04-25 11:01:17 +02:00
J. Fernando Sánchez
07a41236f8 Do not push image tag for latest 2018-04-25 10:52:30 +02:00
J. Fernando Sánchez
55db97cf62 Add basic evaluation and fix installation
* Merge branch '44-add-basic-evaluation-with-gsitk'
* Refactor requirements (add extra-requirements)
* Skip evaluation tests in Py2
* Fix installation with PIP
* Implement the evaluation service inside the Senpy API
* Connect Plugins to GSITK's evaluation module
* Add an evaluation method inside the Senpy Context
* Add the evaluation models and schemas
* Add Evaluation to the Playground, with a table view
* Add evaluation tests
2018-04-25 10:12:26 +02:00
15 changed files with 141 additions and 73 deletions

View File

@@ -77,7 +77,6 @@ push-latest: $(addprefix push-latest-,$(PYVERSIONS)) ## Push the "latest" tag to
docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGEWTAG)' docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGEWTAG)'
docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGENAME)' docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGENAME)'
docker push '$(IMAGENAME):latest' docker push '$(IMAGENAME):latest'
docker push '$(IMAGEWTAG)'
push-latest-%: build-% ## Push the latest image for a specific python version push-latest-%: build-% ## Push the latest image for a specific python version
docker tag $(IMAGENAME):$(VERSION)-python$* $(IMAGENAME):python$* docker tag $(IMAGENAME):$(VERSION)-python$* $(IMAGENAME):python$*

View File

@@ -6,8 +6,6 @@ RUN apt-get update && apt-get install -y \
libblas-dev liblapack-dev liblapacke-dev gfortran \ libblas-dev liblapack-dev liblapacke-dev gfortran \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir --upgrade numpy scipy scikit-learn
RUN mkdir /cache/ /senpy-plugins /data/ RUN mkdir /cache/ /senpy-plugins /data/
VOLUME /data/ VOLUME /data/

View File

@@ -1,5 +1,6 @@
include requirements.txt include requirements.txt
include test-requirements.txt include test-requirements.txt
include extra-requirements.txt
include README.rst include README.rst
include senpy/VERSION include senpy/VERSION
graft senpy/plugins graft senpy/plugins

View File

@@ -9,3 +9,6 @@ jsonref
PyYAML PyYAML
rdflib rdflib
rdflib-jsonld rdflib-jsonld
numpy
scipy
scikit-learn

View File

@@ -130,7 +130,7 @@ def main():
return return
sp.activate_all() sp.activate_all()
if args.only_test: if args.only_test:
easy_test(sp.plugins()) easy_test(sp.plugins(), debug=args.debug)
return return
print('Senpy version {}'.format(senpy.__version__)) print('Senpy version {}'.format(senpy.__version__))
print('Server running on port %s:%d. Ctrl+C to quit' % (args.host, print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,

View File

@@ -147,7 +147,7 @@ def parse_params(indict, *specs):
for param, options in iteritems(spec): for param, options in iteritems(spec):
for alias in options.get("aliases", []): for alias in options.get("aliases", []):
# Replace each alias with the correct name of the parameter # Replace each alias with the correct name of the parameter
if alias in indict and alias is not param: if alias in indict and alias != param:
outdict[param] = indict[alias] outdict[param] = indict[alias]
del outdict[alias] del outdict[alias]
continue continue

View File

@@ -19,7 +19,7 @@ Blueprints for Senpy
""" """
from flask import (Blueprint, request, current_app, render_template, url_for, from flask import (Blueprint, request, current_app, render_template, url_for,
jsonify) jsonify)
from .models import Error, Response, Help, Plugins, read_schema, Datasets from .models import Error, Response, Help, Plugins, read_schema, dump_schema, Datasets
from . import api from . import api
from .version import __version__ from .version import __version__
from functools import wraps from functools import wraps
@@ -67,9 +67,9 @@ def index():
@api_blueprint.route('/schemas/<schema>') @api_blueprint.route('/schemas/<schema>')
def schema(schema="definitions"): def schema(schema="definitions"):
try: try:
return jsonify(read_schema(schema)) return dump_schema(read_schema(schema))
except Exception: # Should be FileNotFoundError, but it's missing from py2 except Exception as ex: # Should be FileNotFoundError, but it's missing from py2
return Error(message="Schema not found", status=404).flask() return Error(message="Schema not found: {}".format(ex), status=404).flask()
def basic_api(f): def basic_api(f):
@@ -133,6 +133,7 @@ def api_root():
req = api.parse_call(request.parameters) req = api.parse_call(request.parameters)
return current_app.senpy.analyse(req) return current_app.senpy.analyse(req)
@api_blueprint.route('/evaluate/', methods=['POST', 'GET']) @api_blueprint.route('/evaluate/', methods=['POST', 'GET'])
@basic_api @basic_api
def evaluate(): def evaluate():
@@ -145,6 +146,7 @@ def evaluate():
response = current_app.senpy.evaluate(params) response = current_app.senpy.evaluate(params)
return response return response
@api_blueprint.route('/plugins/', methods=['POST', 'GET']) @api_blueprint.route('/plugins/', methods=['POST', 'GET'])
@basic_api @basic_api
def plugins(): def plugins():
@@ -163,10 +165,10 @@ def plugin(plugin=None):
return sp.get_plugin(plugin) return sp.get_plugin(plugin)
@api_blueprint.route('/datasets/', methods=['POST','GET']) @api_blueprint.route('/datasets/', methods=['POST', 'GET'])
@basic_api @basic_api
def datasets(): def datasets():
sp = current_app.senpy sp = current_app.senpy
datasets = sp.datasets datasets = sp.datasets
dic = Datasets(datasets = list(datasets.values())) dic = Datasets(datasets=list(datasets.values()))
return dic return dic

View File

@@ -51,6 +51,10 @@ def read_schema(schema_file, absolute=False):
return jsonref.load(f, base_uri=schema_uri) return jsonref.load(f, base_uri=schema_uri)
def dump_schema(schema):
return jsonref.dumps(schema)
def load_context(context): def load_context(context):
logging.debug('Loading context: {}'.format(context)) logging.debug('Loading context: {}'.format(context))
if not context: if not context:
@@ -199,24 +203,27 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
context_uri=None, context_uri=None,
prefix=None, prefix=None,
expanded=False): expanded=False):
ser = self.serializable()
result = jsonld.compact( result = self.serializable()
ser, if context_uri or with_context:
self._context, result['@context'] = context_uri or self._context
options={
'base': prefix, # result = jsonld.compact(result,
'expandContext': self._context, # self._context,
'senpy': prefix # options={
}) # 'base': prefix,
if context_uri: # 'expandContext': self._context,
result['@context'] = context_uri # 'senpy': prefix
# })
if expanded: if expanded:
result = jsonld.expand( result = jsonld.expand(
result, options={'base': prefix, result, options={'base': prefix,
'expandContext': self._context}) 'expandContext': self._context})
if not with_context: if not with_context:
try:
del result['@context'] del result['@context']
except KeyError:
pass
return result return result
def validate(self, obj=None): def validate(self, obj=None):
@@ -319,7 +326,10 @@ def _add_class_from_schema(*args, **kwargs):
for i in [ for i in [
'aggregatedEvaluation',
'analysis', 'analysis',
'dataset',
'datasets',
'emotion', 'emotion',
'emotionConversion', 'emotionConversion',
'emotionConversionPlugin', 'emotionConversionPlugin',
@@ -327,19 +337,17 @@ for i in [
'emotionModel', 'emotionModel',
'emotionPlugin', 'emotionPlugin',
'emotionSet', 'emotionSet',
'evaluation',
'entity', 'entity',
'help', 'help',
'metric',
'plugin', 'plugin',
'plugins', 'plugins',
'response', 'response',
'results', 'results',
'sentimentPlugin', 'sentimentPlugin',
'suggestion', 'suggestion',
'aggregatedEvaluation', 'topic',
'evaluation',
'metric',
'dataset',
'datasets',
]: ]:
_add_class_from_schema(i) _add_class_from_schema(i)

View File

@@ -18,8 +18,7 @@ import subprocess
import importlib import importlib
import yaml import yaml
import threading import threading
import nltk
import numpy as np
from .. import models, utils from .. import models, utils
from .. import api from .. import api
@@ -49,11 +48,11 @@ class PluginMeta(models.BaseMeta):
attrs['name'] = alias attrs['name'] = alias
if 'description' not in attrs: if 'description' not in attrs:
doc = attrs.get('__doc__', None) doc = attrs.get('__doc__', None)
if not doc: if doc:
raise Exception(('Please, add a description or '
'documentation to class {}').format(name))
attrs['description'] = doc attrs['description'] = doc
attrs['name'] = alias else:
logger.warn(('Plugin {} does not have a description. '
'Please, add a short summary to help other developers').format(name))
cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs) cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs)
if alias in mcs._classes: if alias in mcs._classes:
@@ -97,6 +96,16 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
self.is_activated = False self.is_activated = False
self._lock = threading.Lock() self._lock = threading.Lock()
self.data_folder = data_folder or os.getcwd() self.data_folder = data_folder or os.getcwd()
self._directory = os.path.abspath(os.path.dirname(inspect.getfile(self.__class__)))
self._data_paths = ['',
self._directory,
os.path.join(self._directory, 'data'),
self.data_folder]
self._log = logging.getLogger('{}.{}'.format(__name__, self.name))
@property
def log(self):
return self._log
def validate(self): def validate(self):
missing = [] missing = []
@@ -125,9 +134,9 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
for case in test_cases: for case in test_cases:
try: try:
self.test_case(case) self.test_case(case)
logger.debug('Test case passed:\n{}'.format(pprint.pformat(case))) self.log.debug('Test case passed:\n{}'.format(pprint.pformat(case)))
except Exception as ex: except Exception as ex:
logger.warn('Test case failed:\n{}'.format(pprint.pformat(case))) self.log.warn('Test case failed:\n{}'.format(pprint.pformat(case)))
raise raise
def test_case(self, case): def test_case(self, case):
@@ -150,10 +159,22 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
raise raise
assert not should_fail assert not should_fail
def open(self, fpath, *args, **kwargs): def find_file(self, fname):
for p in self._data_paths:
alternative = os.path.join(p, fname)
if os.path.exists(alternative):
return alternative
raise IOError('File does not exist: {}'.format(fname))
def open(self, fpath, mode='r'):
if 'w' in mode:
# When writing, only use absolute paths or data_folder
if not os.path.isabs(fpath): if not os.path.isabs(fpath):
fpath = os.path.join(self.data_folder, fpath) fpath = os.path.join(self.data_folder, fpath)
return open(fpath, *args, **kwargs) else:
fpath = self.find_file(fpath)
return open(fpath, mode=mode)
def serve(self, debug=True, **kwargs): def serve(self, debug=True, **kwargs):
utils.easy(plugin_list=[self, ], plugin_folder=None, debug=debug, **kwargs) utils.easy(plugin_list=[self, ], plugin_folder=None, debug=debug, **kwargs)
@@ -188,7 +209,7 @@ class Analysis(Plugin):
def analyse_entries(self, entries, parameters): def analyse_entries(self, entries, parameters):
for entry in entries: for entry in entries:
logger.debug('Analysing entry with plugin {}: {}'.format(self, entry)) self.log.debug('Analysing entry with plugin {}: {}'.format(self, entry))
results = self.analyse_entry(entry, parameters) results = self.analyse_entry(entry, parameters)
if inspect.isgenerator(results): if inspect.isgenerator(results):
for result in results: for result in results:
@@ -291,7 +312,7 @@ class Box(AnalysisPlugin):
return self return self
def transform(self, X): def transform(self, X):
return np.array([self.predict_one(x) for x in X]) return [self.predict_one(x) for x in X]
def predict(self, X): def predict(self, X):
return self.transform(X) return self.transform(X)
@@ -377,7 +398,7 @@ class ShelfMixin(object):
with self.open(self.shelf_file, 'rb') as p: with self.open(self.shelf_file, 'rb') as p:
self._sh = pickle.load(p) self._sh = pickle.load(p)
except (IndexError, EOFError, pickle.UnpicklingError): except (IndexError, EOFError, pickle.UnpicklingError):
logger.warning('{} has a corrupted shelf file!'.format(self.id)) self.log.warning('Corrupted shelf file: {}'.format(self.shelf_file))
if not self.get('force_shelf', False): if not self.get('force_shelf', False):
raise raise
return self._sh return self._sh
@@ -404,32 +425,31 @@ class ShelfMixin(object):
self._shelf_file = value self._shelf_file = value
def save(self): def save(self):
logger.debug('saving pickle') self.log.debug('Saving pickle')
if hasattr(self, '_sh') and self._sh is not None: if hasattr(self, '_sh') and self._sh is not None:
with self.open(self.shelf_file, 'wb') as f: with self.open(self.shelf_file, 'wb') as f:
pickle.dump(self._sh, f) pickle.dump(self._sh, f)
def pfilter(plugins, **kwargs): def pfilter(plugins, plugin_type=Analysis, **kwargs):
""" Filter plugins by different criteria """ """ Filter plugins by different criteria """
if isinstance(plugins, models.Plugins): if isinstance(plugins, models.Plugins):
plugins = plugins.plugins plugins = plugins.plugins
elif isinstance(plugins, dict): elif isinstance(plugins, dict):
plugins = plugins.values() plugins = plugins.values()
ptype = kwargs.pop('plugin_type', Plugin)
logger.debug('#' * 100) logger.debug('#' * 100)
logger.debug('ptype {}'.format(ptype)) logger.debug('plugin_type {}'.format(plugin_type))
if ptype: if plugin_type:
if isinstance(ptype, PluginMeta): if isinstance(plugin_type, PluginMeta):
ptype = ptype.__name__ plugin_type = plugin_type.__name__
try: try:
ptype = ptype[0].upper() + ptype[1:] plugin_type = plugin_type[0].upper() + plugin_type[1:]
pclass = globals()[ptype] pclass = globals()[plugin_type]
logger.debug('Class: {}'.format(pclass)) logger.debug('Class: {}'.format(pclass))
candidates = filter(lambda x: isinstance(x, pclass), candidates = filter(lambda x: isinstance(x, pclass),
plugins) plugins)
except KeyError: except KeyError:
raise models.Error('{} is not a valid type'.format(ptype)) raise models.Error('{} is not a valid type'.format(plugin_type))
else: else:
candidates = plugins candidates = plugins
@@ -464,6 +484,7 @@ def _log_subprocess_output(process):
def install_deps(*plugins): def install_deps(*plugins):
installed = False installed = False
nltk_resources = set()
for info in plugins: for info in plugins:
requirements = info.get('requirements', []) requirements = info.get('requirements', [])
if requirements: if requirements:
@@ -479,6 +500,9 @@ def install_deps(*plugins):
installed = True installed = True
if exitcode != 0: if exitcode != 0:
raise models.Error("Dependencies not properly installed") raise models.Error("Dependencies not properly installed")
nltk_resources |= set(info.get('nltk_resources', []))
installed |= nltk.download(list(nltk_resources))
return installed return installed
@@ -575,12 +599,14 @@ def _instances_in_module(module):
def _from_module_name(module, root, info=None, install=True, **kwargs): def _from_module_name(module, root, info=None, install=True, **kwargs):
try: try:
module = load_module(module, root) module = load_module(module, root)
except ImportError: except (ImportError, LookupError):
if not install or not info: if not install or not info:
raise raise
install_deps(info) install_deps(info)
module = load_module(module, root) module = load_module(module, root)
for plugin in _from_loaded_module(module=module, root=root, info=info, **kwargs): for plugin in _from_loaded_module(module=module, root=root, info=info, **kwargs):
if install:
install_deps(plugin)
yield plugin yield plugin

View File

@@ -10,8 +10,10 @@
"wna": "http://www.gsi.dit.upm.es/ontologies/wnaffect/ns#", "wna": "http://www.gsi.dit.upm.es/ontologies/wnaffect/ns#",
"emoml": "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/emotionml/ns#", "emoml": "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/emotionml/ns#",
"xsd": "http://www.w3.org/2001/XMLSchema#", "xsd": "http://www.w3.org/2001/XMLSchema#",
"fam": "http://vocab.fusepool.info/fam#",
"topics": { "topics": {
"@id": "dc:subject" "@id": "nif:topic",
"@container": "@set"
}, },
"entities": { "entities": {
"@id": "me:hasEntities" "@id": "me:hasEntities"

View File

@@ -1,6 +1,6 @@
{ {
"$schema": "http://json-schema.org/draft-04/schema#", "$schema": "http://json-schema.org/draft-04/schema#",
"name": "Evalation", "name": "Evaluation",
"properties": { "properties": {
"@id": { "@id": {
"type": "string" "type": "string"

View File

@@ -1,6 +1,7 @@
from . import models, __version__ from . import models, __version__
from collections import MutableMapping from collections import MutableMapping
import pprint import pprint
import pdb
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -32,8 +33,8 @@ def check_template(indict, template):
if indict != template: if indict != template:
raise models.Error(('Differences found.\n' raise models.Error(('Differences found.\n'
'\tExpected: {}\n' '\tExpected: {}\n'
'\tFound: {}').format(pprint.pformat(indict), '\tFound: {}').format(pprint.pformat(template),
pprint.pformat(template))) pprint.pformat(indict)))
def convert_dictionary(original, mappings): def convert_dictionary(original, mappings):
@@ -67,9 +68,10 @@ def easy_load(app=None, plugin_list=None, plugin_folder=None, **kwargs):
return sp, app return sp, app
def easy_test(plugin_list=None): def easy_test(plugin_list=None, debug=True):
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
try:
if not plugin_list: if not plugin_list:
import __main__ import __main__
logger.info('Loading classes from {}'.format(__main__)) logger.info('Loading classes from {}'.format(__main__))
@@ -77,8 +79,12 @@ def easy_test(plugin_list=None):
plugin_list = plugins.from_module(__main__) plugin_list = plugins.from_module(__main__)
for plug in plugin_list: for plug in plugin_list:
plug.test() plug.test()
logger.info('The tests for {} passed!'.format(plug.name)) plug.log.info('My tests passed!')
logger.info('All tests passed!') logger.info('All tests passed!')
except Exception:
if not debug:
raise
pdb.post_mortem()
def easy(host='0.0.0.0', port=5000, debug=True, **kwargs): def easy(host='0.0.0.0', port=5000, debug=True, **kwargs):

View File

@@ -32,7 +32,7 @@ class APITest(TestCase):
query = {} query = {}
plug_params = { plug_params = {
'hello': { 'hello': {
'aliases': ['hello', 'hiya'], 'aliases': ['hiya', 'hello'],
'required': True 'required': True
} }
} }
@@ -48,6 +48,26 @@ class APITest(TestCase):
assert 'hello' in p assert 'hello' in p
assert p['hello'] == 'dlrow' assert p['hello'] == 'dlrow'
def test_parameters2(self):
in1 = {
'meaningcloud-key': 5
}
in2 = {
'apikey': 25
}
extra_params = {
"apikey": {
"aliases": [
"apikey",
"meaningcloud-key"
],
"required": True
}
}
p1 = parse_params(in1, extra_params)
p2 = parse_params(in2, extra_params)
assert (p2['apikey'] / p1['apikey']) == 5
def test_default(self): def test_default(self):
spec = { spec = {
'hello': { 'hello': {

View File

@@ -47,7 +47,7 @@ class ExtensionsTest(TestCase):
def test_add_delete(self): def test_add_delete(self):
'''Should be able to add and delete new plugins. ''' '''Should be able to add and delete new plugins. '''
new = plugins.Plugin(name='new', description='new', version=0) new = plugins.Analysis(name='new', description='new', version=0)
self.senpy.add_plugin(new) self.senpy.add_plugin(new)
assert new in self.senpy.plugins() assert new in self.senpy.plugins()
self.senpy.delete_plugin(new) self.senpy.delete_plugin(new)

View File

@@ -8,6 +8,8 @@ from fnmatch import fnmatch
from jsonschema import RefResolver, Draft4Validator, ValidationError from jsonschema import RefResolver, Draft4Validator, ValidationError
from senpy.models import read_schema
root_path = path.join(path.dirname(path.realpath(__file__)), '..') root_path = path.join(path.dirname(path.realpath(__file__)), '..')
schema_folder = path.join(root_path, 'senpy', 'schemas') schema_folder = path.join(root_path, 'senpy', 'schemas')
examples_path = path.join(root_path, 'docs', 'examples') examples_path = path.join(root_path, 'docs', 'examples')
@@ -15,7 +17,8 @@ bad_examples_path = path.join(root_path, 'docs', 'bad-examples')
class JSONSchemaTests(unittest.TestCase): class JSONSchemaTests(unittest.TestCase):
pass def test_definitions(self):
read_schema('definitions.json')
def do_create_(jsfile, success): def do_create_(jsfile, success):