Several fixes and improvements

* Add Topic model * Add PDB post-mortem debugging * Add logger to plugins (`self.log`) * Add NLTK resource auto-download * Force installation of requirements even if adding doesn't work * Add a method to find files in several possible locations. Now the plugin.open method will try these locations IF the file is to be opened in read mode. Otherwise only the SENPY_DATA folder will be used (to avoid writing to the package folder).
Fix schema issues and parameter validation
2025-09-16 19:42:21 +00:00 · 2018-06-14 15:10:16 +02:00 · 2018-05-16 11:16:32 +02:00 · 2018-05-14 11:38:02 +02:00 · 2018-04-25 11:01:17 +02:00 · 2018-04-25 10:52:30 +02:00
15 changed files with 141 additions and 73 deletions
--- a/.makefiles/python.mk
+++ b/.makefiles/python.mk
@@ -77,7 +77,6 @@ push-latest: $(addprefix push-latest-,$(PYVERSIONS)) ## Push the "latest" tag to
 	docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGEWTAG)'
 	docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGENAME)'
 	docker push '$(IMAGENAME):latest'
-	docker push '$(IMAGEWTAG)'

 push-latest-%: build-%  ## Push the latest image for a specific python version
 	docker tag $(IMAGENAME):$(VERSION)-python$* $(IMAGENAME):python$*
--- a/Dockerfile.template
+++ b/Dockerfile.template
@@ -6,8 +6,6 @@ RUN apt-get update && apt-get install -y \
 libblas-dev liblapack-dev liblapacke-dev gfortran \
 && rm -rf /var/lib/apt/lists/*

-RUN pip install --no-cache-dir --upgrade numpy scipy scikit-learn
-
 RUN mkdir /cache/ /senpy-plugins /data/

 VOLUME /data/
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,6 @@
 include requirements.txt
 include test-requirements.txt
+include extra-requirements.txt
 include README.rst
 include senpy/VERSION
 graft senpy/plugins
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,6 @@ jsonref
 PyYAML
 rdflib
 rdflib-jsonld
+numpy
+scipy
+scikit-learn
--- a/senpy/main.py
+++ b/senpy/main.py
@@ -130,7 +130,7 @@ def main():
        return
    sp.activate_all()
    if args.only_test:
-        easy_test(sp.plugins())
+        easy_test(sp.plugins(), debug=args.debug)
        return
    print('Senpy version {}'.format(senpy.__version__))
    print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
--- a/senpy/api.py
+++ b/senpy/api.py
@@ -147,7 +147,7 @@ def parse_params(indict, *specs):
        for param, options in iteritems(spec):
            for alias in options.get("aliases", []):
                # Replace each alias with the correct name of the parameter
-                if alias in indict and alias is not param:
+                if alias in indict and alias != param:
                    outdict[param] = indict[alias]
                    del outdict[alias]
                    continue
--- a/senpy/blueprints.py
+++ b/senpy/blueprints.py
@@ -19,7 +19,7 @@ Blueprints for Senpy
 """
 from flask import (Blueprint, request, current_app, render_template, url_for,
                   jsonify)
-from .models import Error, Response, Help, Plugins, read_schema, Datasets
+from .models import Error, Response, Help, Plugins, read_schema, dump_schema, Datasets
 from . import api
 from .version import __version__
 from functools import wraps
@@ -67,9 +67,9 @@ def index():
@api_blueprint.route('/schemas/<schema>')
 def schema(schema="definitions"):
    try:
-        return jsonify(read_schema(schema))
-    except Exception:  # Should be FileNotFoundError, but it's missing from py2
-        return Error(message="Schema not found", status=404).flask()
+        return dump_schema(read_schema(schema))
+    except Exception as ex:  # Should be FileNotFoundError, but it's missing from py2
+        return Error(message="Schema not found: {}".format(ex), status=404).flask()


 def basic_api(f):
@@ -133,6 +133,7 @@ def api_root():
    req = api.parse_call(request.parameters)
    return current_app.senpy.analyse(req)

+
@api_blueprint.route('/evaluate/', methods=['POST', 'GET'])
@basic_api
 def evaluate():
@@ -145,6 +146,7 @@ def evaluate():
        response = current_app.senpy.evaluate(params)
        return response

+
@api_blueprint.route('/plugins/', methods=['POST', 'GET'])
@basic_api
 def plugins():
@@ -163,10 +165,10 @@ def plugin(plugin=None):
    return sp.get_plugin(plugin)


-@api_blueprint.route('/datasets/', methods=['POST','GET'])
+@api_blueprint.route('/datasets/', methods=['POST', 'GET'])
@basic_api
 def datasets():
    sp = current_app.senpy
    datasets = sp.datasets
-    dic = Datasets(datasets = list(datasets.values()))
-    return dic
+    dic = Datasets(datasets=list(datasets.values()))
+    return dic
--- a/senpy/models.py
+++ b/senpy/models.py
@@ -51,6 +51,10 @@ def read_schema(schema_file, absolute=False):
        return jsonref.load(f, base_uri=schema_uri)


+def dump_schema(schema):
+    return jsonref.dumps(schema)
+
+
 def load_context(context):
    logging.debug('Loading context: {}'.format(context))
    if not context:
@@ -199,24 +203,27 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
               context_uri=None,
               prefix=None,
               expanded=False):
-        ser = self.serializable()

-        result = jsonld.compact(
-            ser,
-            self._context,
-            options={
-                'base': prefix,
-                'expandContext': self._context,
-                'senpy': prefix
-            })
-        if context_uri:
-            result['@context'] = context_uri
+        result = self.serializable()
+        if context_uri or with_context:
+            result['@context'] = context_uri or self._context
+
+        # result = jsonld.compact(result,
+        #                         self._context,
+        #                         options={
+        #                             'base': prefix,
+        #                             'expandContext': self._context,
+        #                             'senpy': prefix
+        #                         })
        if expanded:
            result = jsonld.expand(
                result, options={'base': prefix,
                                 'expandContext': self._context})
        if not with_context:
-            del result['@context']
+            try:
+                del result['@context']
+            except KeyError:
+                pass
        return result

    def validate(self, obj=None):
@@ -319,7 +326,10 @@ def _add_class_from_schema(*args, **kwargs):


 for i in [
+        'aggregatedEvaluation',
        'analysis',
+        'dataset',
+        'datasets',
        'emotion',
        'emotionConversion',
        'emotionConversionPlugin',
@@ -327,19 +337,17 @@ for i in [
        'emotionModel',
        'emotionPlugin',
        'emotionSet',
+        'evaluation',
        'entity',
        'help',
+        'metric',
        'plugin',
        'plugins',
        'response',
        'results',
        'sentimentPlugin',
        'suggestion',
-        'aggregatedEvaluation',
-        'evaluation',
-        'metric',
-        'dataset',
-        'datasets',
+        'topic',

 ]:
    _add_class_from_schema(i)
--- a/senpy/plugins/init.py
+++ b/senpy/plugins/init.py
@@ -18,8 +18,7 @@ import subprocess
 import importlib
 import yaml
 import threading
-
-import numpy as np
+import nltk

 from .. import models, utils
 from .. import api
@@ -49,11 +48,11 @@ class PluginMeta(models.BaseMeta):
        attrs['name'] = alias
        if 'description' not in attrs:
            doc = attrs.get('__doc__', None)
-            if not doc:
-                raise Exception(('Please, add a description or '
-                                 'documentation to class {}').format(name))
-            attrs['description'] = doc
-            attrs['name'] = alias
+            if doc:
+                attrs['description'] = doc
+            else:
+                logger.warn(('Plugin {} does not have a description. '
+                             'Please, add a short summary to help other developers').format(name))
        cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs)

        if alias in mcs._classes:
@@ -97,6 +96,16 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
        self.is_activated = False
        self._lock = threading.Lock()
        self.data_folder = data_folder or os.getcwd()
+        self._directory = os.path.abspath(os.path.dirname(inspect.getfile(self.__class__)))
+        self._data_paths = ['',
+                            self._directory,
+                            os.path.join(self._directory, 'data'),
+                            self.data_folder]
+        self._log = logging.getLogger('{}.{}'.format(__name__, self.name))
+
+    @property
+    def log(self):
+        return self._log

    def validate(self):
        missing = []
@@ -125,9 +134,9 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
        for case in test_cases:
            try:
                self.test_case(case)
-                logger.debug('Test case passed:\n{}'.format(pprint.pformat(case)))
+                self.log.debug('Test case passed:\n{}'.format(pprint.pformat(case)))
            except Exception as ex:
-                logger.warn('Test case failed:\n{}'.format(pprint.pformat(case)))
+                self.log.warn('Test case failed:\n{}'.format(pprint.pformat(case)))
                raise

    def test_case(self, case):
@@ -150,10 +159,22 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
            raise
        assert not should_fail

-    def open(self, fpath, *args, **kwargs):
-        if not os.path.isabs(fpath):
-            fpath = os.path.join(self.data_folder, fpath)
-        return open(fpath, *args, **kwargs)
+    def find_file(self, fname):
+        for p in self._data_paths:
+            alternative = os.path.join(p, fname)
+            if os.path.exists(alternative):
+                return alternative
+        raise IOError('File does not exist: {}'.format(fname))
+
+    def open(self, fpath, mode='r'):
+        if 'w' in mode:
+            # When writing, only use absolute paths or data_folder
+            if not os.path.isabs(fpath):
+                fpath = os.path.join(self.data_folder, fpath)
+        else:
+            fpath = self.find_file(fpath)
+
+        return open(fpath, mode=mode)

    def serve(self, debug=True, **kwargs):
        utils.easy(plugin_list=[self, ], plugin_folder=None, debug=debug, **kwargs)
@@ -188,7 +209,7 @@ class Analysis(Plugin):

    def analyse_entries(self, entries, parameters):
        for entry in entries:
-            logger.debug('Analysing entry with plugin {}: {}'.format(self, entry))
+            self.log.debug('Analysing entry with plugin {}: {}'.format(self, entry))
            results = self.analyse_entry(entry, parameters)
            if inspect.isgenerator(results):
                for result in results:
@@ -291,7 +312,7 @@ class Box(AnalysisPlugin):
        return self

    def transform(self, X):
-        return np.array([self.predict_one(x) for x in X])
+        return [self.predict_one(x) for x in X]

    def predict(self, X):
        return self.transform(X)
@@ -377,7 +398,7 @@ class ShelfMixin(object):
                    with self.open(self.shelf_file, 'rb') as p:
                        self._sh = pickle.load(p)
                except (IndexError, EOFError, pickle.UnpicklingError):
-                    logger.warning('{} has a corrupted shelf file!'.format(self.id))
+                    self.log.warning('Corrupted shelf file: {}'.format(self.shelf_file))
                    if not self.get('force_shelf', False):
                        raise
        return self._sh
@@ -404,32 +425,31 @@ class ShelfMixin(object):
        self._shelf_file = value

    def save(self):
-        logger.debug('saving pickle')
+        self.log.debug('Saving pickle')
        if hasattr(self, '_sh') and self._sh is not None:
            with self.open(self.shelf_file, 'wb') as f:
                pickle.dump(self._sh, f)


-def pfilter(plugins, **kwargs):
+def pfilter(plugins, plugin_type=Analysis, **kwargs):
    """ Filter plugins by different criteria """
    if isinstance(plugins, models.Plugins):
        plugins = plugins.plugins
    elif isinstance(plugins, dict):
        plugins = plugins.values()
-    ptype = kwargs.pop('plugin_type', Plugin)
    logger.debug('#' * 100)
-    logger.debug('ptype {}'.format(ptype))
-    if ptype:
-        if isinstance(ptype, PluginMeta):
-            ptype = ptype.__name__
+    logger.debug('plugin_type {}'.format(plugin_type))
+    if plugin_type:
+        if isinstance(plugin_type, PluginMeta):
+            plugin_type = plugin_type.__name__
        try:
-            ptype = ptype[0].upper() + ptype[1:]
-            pclass = globals()[ptype]
+            plugin_type = plugin_type[0].upper() + plugin_type[1:]
+            pclass = globals()[plugin_type]
            logger.debug('Class: {}'.format(pclass))
            candidates = filter(lambda x: isinstance(x, pclass),
                                plugins)
        except KeyError:
-            raise models.Error('{} is not a valid type'.format(ptype))
+            raise models.Error('{} is not a valid type'.format(plugin_type))
    else:
        candidates = plugins

@@ -464,6 +484,7 @@ def _log_subprocess_output(process):

 def install_deps(*plugins):
    installed = False
+    nltk_resources = set()
    for info in plugins:
        requirements = info.get('requirements', [])
        if requirements:
@@ -479,6 +500,9 @@ def install_deps(*plugins):
            installed = True
            if exitcode != 0:
                raise models.Error("Dependencies not properly installed")
+        nltk_resources |= set(info.get('nltk_resources', []))
+
+    installed |= nltk.download(list(nltk_resources))
    return installed


@@ -575,12 +599,14 @@ def _instances_in_module(module):
 def _from_module_name(module, root, info=None, install=True, **kwargs):
    try:
        module = load_module(module, root)
-    except ImportError:
+    except (ImportError, LookupError):
        if not install or not info:
            raise
        install_deps(info)
        module = load_module(module, root)
    for plugin in _from_loaded_module(module=module, root=root, info=info, **kwargs):
+        if install:
+            install_deps(plugin)
        yield plugin


--- a/senpy/schemas/context.jsonld
+++ b/senpy/schemas/context.jsonld
@@ -10,8 +10,10 @@
    "wna": "http://www.gsi.dit.upm.es/ontologies/wnaffect/ns#",
    "emoml": "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/emotionml/ns#",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
+    "fam": "http://vocab.fusepool.info/fam#",
    "topics": {
-      "@id": "dc:subject"
+      "@id": "nif:topic",
+      "@container": "@set"
    },
    "entities": {
      "@id": "me:hasEntities"
--- a/senpy/schemas/evaluation.json
+++ b/senpy/schemas/evaluation.json
@@ -1,6 +1,6 @@
 {
  "$schema": "http://json-schema.org/draft-04/schema#",
-  "name": "Evalation",
+  "name": "Evaluation",
  "properties": {
    "@id": {
      "type": "string"
--- a/senpy/utils.py
+++ b/senpy/utils.py
@@ -1,6 +1,7 @@
 from . import models, __version__
 from collections import MutableMapping
 import pprint
+import pdb

 import logging
 logger = logging.getLogger(__name__)
@@ -32,8 +33,8 @@ def check_template(indict, template):
        if indict != template:
            raise models.Error(('Differences found.\n'
                                '\tExpected: {}\n'
-                                '\tFound: {}').format(pprint.pformat(indict),
-                                                      pprint.pformat(template)))
+                                '\tFound: {}').format(pprint.pformat(template),
+                                                      pprint.pformat(indict)))


 def convert_dictionary(original, mappings):
@@ -67,18 +68,23 @@ def easy_load(app=None, plugin_list=None, plugin_folder=None, **kwargs):
    return sp, app


-def easy_test(plugin_list=None):
+def easy_test(plugin_list=None, debug=True):
    logger.setLevel(logging.DEBUG)
    logging.getLogger().setLevel(logging.INFO)
-    if not plugin_list:
-        import __main__
-        logger.info('Loading classes from {}'.format(__main__))
-        from . import plugins
-        plugin_list = plugins.from_module(__main__)
-    for plug in plugin_list:
-        plug.test()
-        logger.info('The tests for {} passed!'.format(plug.name))
-    logger.info('All tests passed!')
+    try:
+        if not plugin_list:
+            import __main__
+            logger.info('Loading classes from {}'.format(__main__))
+            from . import plugins
+            plugin_list = plugins.from_module(__main__)
+        for plug in plugin_list:
+            plug.test()
+            plug.log.info('My tests passed!')
+            logger.info('All tests passed!')
+    except Exception:
+        if not debug:
+            raise
+        pdb.post_mortem()


 def easy(host='0.0.0.0', port=5000, debug=True, **kwargs):
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -32,7 +32,7 @@ class APITest(TestCase):
        query = {}
        plug_params = {
            'hello': {
-                'aliases': ['hello', 'hiya'],
+                'aliases': ['hiya', 'hello'],
                'required': True
            }
        }
@@ -48,6 +48,26 @@ class APITest(TestCase):
        assert 'hello' in p
        assert p['hello'] == 'dlrow'

+    def test_parameters2(self):
+        in1 = {
+            'meaningcloud-key': 5
+        }
+        in2 = {
+            'apikey': 25
+        }
+        extra_params = {
+            "apikey": {
+                "aliases": [
+                    "apikey",
+                    "meaningcloud-key"
+                ],
+                "required": True
+            }
+        }
+        p1 = parse_params(in1, extra_params)
+        p2 = parse_params(in2, extra_params)
+        assert (p2['apikey'] / p1['apikey']) == 5
+
    def test_default(self):
        spec = {
            'hello': {
--- a/tests/test_extensions.py
+++ b/tests/test_extensions.py
@@ -47,7 +47,7 @@ class ExtensionsTest(TestCase):

    def test_add_delete(self):
        '''Should be able to add and delete new plugins. '''
-        new = plugins.Plugin(name='new', description='new', version=0)
+        new = plugins.Analysis(name='new', description='new', version=0)
        self.senpy.add_plugin(new)
        assert new in self.senpy.plugins()
        self.senpy.delete_plugin(new)
--- a/tests/test_schemas.py
+++ b/tests/test_schemas.py
@@ -8,6 +8,8 @@ from fnmatch import fnmatch

 from jsonschema import RefResolver, Draft4Validator, ValidationError

+from senpy.models import read_schema
+
 root_path = path.join(path.dirname(path.realpath(__file__)), '..')
 schema_folder = path.join(root_path, 'senpy', 'schemas')
 examples_path = path.join(root_path, 'docs', 'examples')
@@ -15,7 +17,8 @@ bad_examples_path = path.join(root_path, 'docs', 'bad-examples')


 class JSONSchemaTests(unittest.TestCase):
-    pass
+    def test_definitions(self):
+        read_schema('definitions.json')


 def do_create_(jsfile, success):
Author	SHA1	Message	Date
J. Fernando Sánchez	1313853788	Several fixes and improvements * Add Topic model * Add PDB post-mortem debugging * Add logger to plugins (`self.log`) * Add NLTK resource auto-download * Force installation of requirements even if adding doesn't work * Add a method to find files in several possible locations. Now the plugin.open method will try these locations IF the file is to be opened in read mode. Otherwise only the SENPY_DATA folder will be used (to avoid writing to the package folder).	2018-06-14 15:10:16 +02:00
J. Fernando Sánchez	697e779767	Fix schema issues and parameter validation	2018-05-16 11:16:32 +02:00
J. Fernando Sánchez	48f5ffafa1	Defer plugin validation to init	2018-05-14 11:38:02 +02:00
J. Fernando Sánchez	73f7cbbe8a	Add extra-requirements for pip	2018-04-25 11:01:17 +02:00
J. Fernando Sánchez	07a41236f8	Do not push image tag for latest	2018-04-25 10:52:30 +02:00
J. Fernando Sánchez	55db97cf62	Add basic evaluation and fix installation * Merge branch '44-add-basic-evaluation-with-gsitk' * Refactor requirements (add extra-requirements) * Skip evaluation tests in Py2 * Fix installation with PIP * Implement the evaluation service inside the Senpy API * Connect Plugins to GSITK's evaluation module * Add an evaluation method inside the Senpy Context * Add the evaluation models and schemas * Add Evaluation to the Playground, with a table view * Add evaluation tests	2018-04-25 10:12:26 +02:00