diff --git a/senpy/api.py b/senpy/api.py index 3b2f605..ee35dad 100644 --- a/senpy/api.py +++ b/senpy/api.py @@ -187,22 +187,108 @@ def parse_params(indict, *specs): return outdict -def parse_extra_params(request, plugins=None): - plugins = plugins or [] - params = request.parameters.copy() - for plugin in plugins: - if plugin: - extra_params = parse_params(params, plugin.get('extra_params', {})) - for k, v in extra_params.items(): - if k not in BUILTIN_PARAMS: - if k in params: # Set by another plugin - del params[k] - else: - params[k] = v - params['{}.{}'.format(plugin.name, k)] = v +def get_all_params(plugins, *specs): + '''Return a list of parameters for a given set of specifications and plugins.''' + dic = {} + for s in specs: + dic.update(s) + dic.update(get_extra_params(plugins)) + return dic + + +def get_extra_params(plugins): + '''Get a list of possible parameters given a list of plugins''' + params = {} + extra_params = {} + for i, plugin in enumerate(plugins): + this_params = plugin.get('extra_params', {}) + for k, v in this_params.items(): + if k not in extra_params: + extra_params[k] = [] + extra_params[k].append(v) + params['{}.{}'.format(plugin.name, k)] = v + params['{}.{}'.format(i, k)] = v + for k, v in extra_params.items(): # Resolve conflicts + if len(v) == 1: # Add the extra options that do not collide + params[k] = v[0] + else: + required = False + aliases = None + options = None + default = None + nodefault = False # Set when defaults are not compatible + + for opt in v: + required = required or opt.get('required', False) + newaliases = set(opt.get('aliases', [])) + if aliases is None: + aliases = newaliases + else: + aliases = aliases & newaliases + if 'options' in opt: + newoptions = set(opt['options']) + options = newoptions if options is None else options & newoptions + if 'default' in opt: + newdefault = opt['default'] + if newdefault: + if default is None and not nodefault: + default = newdefault + elif newdefault != default: + nodefault = True + default = None + # Check for incompatibilities + if options != set(): + params[k] = { + 'default': default, + 'aliases': list(aliases), + 'required': required, + 'options': list(options) + } return params +def parse_extra_params(params, plugins): + ''' + Parse the given parameters individually for each plugin, and get a list of the parameters that + belong to each of the plugins. Each item can then be used in the plugin.analyse_entries method. + ''' + extra_params = [] + for i, plugin in enumerate(plugins): + this_params = filter_params(params, plugin, i) + parsed = parse_params(this_params, plugin.get('extra_params', {})) + extra_params.append(parsed) + return extra_params + + +def filter_params(params, plugin, ith=-1): + ''' + Get the values within params that apply to a plugin. + More specific names override more general names, in this order: + + .parameter > .parameter > parameter + + + Example: + + >>> filter_params({'0.hello': True, 'hello': False}, Plugin(), 0) + { '0.hello': True, 'hello': True} + + ''' + thisparams = {} + if ith >= 0: + ith = '{}.'.format(ith) + else: + ith = "" + for k, v in params.items(): + if ith and k.startswith(str(ith)): + thisparams[k[len(ith):]] = v + elif k.startswith(plugin.name): + thisparams[k[len(plugin.name) + 1:]] = v + elif k not in thisparams: + thisparams[k] = v + return thisparams + + def parse_call(params): '''Return a results object based on the parameters used in a call/request. ''' diff --git a/senpy/blueprints.py b/senpy/blueprints.py index f1cf538..0bc6e40 100644 --- a/senpy/blueprints.py +++ b/senpy/blueprints.py @@ -188,15 +188,20 @@ def basic_api(f): @api_blueprint.route('/', methods=['POST', 'GET']) @basic_api def api_root(plugin): + if plugin: + if 'algorithm' in request.parameters: + raise Error('You cannot specify the algorithm with a parameter and a URL variable.' + ' Please, remove one of them') + plugin = plugin.replace('+', '/') + request.parameters['algorithm'] = tuple(plugin.split('/')) + if request.parameters['help']: - dic = dict(api.API_PARAMS, **api.NIF_PARAMS) - response = Help(valid_parameters=dic) + sp = current_app.senpy + plugins = sp._get_plugins(request) + allparameters = api.get_all_params(plugins, api.WEB_PARAMS, api.API_PARAMS, api.NIF_PARAMS) + response = Help(valid_parameters=allparameters) return response req = api.parse_call(request.parameters) - if plugin: - plugin = plugin.replace('+', '/') - plugin = plugin.split('/') - req.parameters['algorithm'] = tuple(plugin) results = current_app.senpy.analyse(req) results.analysis = set(i.id for i in results.analysis) return results diff --git a/senpy/extensions.py b/senpy/extensions.py index 428186d..054bded 100644 --- a/senpy/extensions.py +++ b/senpy/extensions.py @@ -144,7 +144,7 @@ class Senpy(object): return plugins - def _process(self, req, pending, done=None): + def _process(self, req, parameters, pending, done=None): """ Recursively process the entries with the first plugin in the list, and pass the results to the rest of the plugins. @@ -154,10 +154,11 @@ class Senpy(object): return req plugin = pending[0] + req.parameters = parameters[0] results = plugin.process(req, conversions_applied=done) if plugin not in results.analysis: results.analysis.append(plugin) - return self._process(results, pending[1:], done) + return self._process(results, parameters[1:], pending[1:], done) def install_deps(self): plugins.install_deps(*self.plugins()) @@ -168,10 +169,11 @@ class Senpy(object): It takes a processed request, provided by the user, as returned by api.parse_call(). """ + logger.debug("analysing request: {}".format(request)) plugins = self._get_plugins(request) - request.parameters = api.parse_extra_params(request, plugins) - results = self._process(request, plugins) + parameters = api.parse_extra_params(request.parameters, plugins) + results = self._process(request, parameters, plugins) logger.debug("Got analysis result: {}".format(results)) results = self.postprocess(results) logger.debug("Returning post-processed result: {}".format(results)) diff --git a/senpy/plugins/misc/split_plugin.py b/senpy/plugins/misc/split_plugin.py index 4c11f3a..d54a6fb 100644 --- a/senpy/plugins/misc/split_plugin.py +++ b/senpy/plugins/misc/split_plugin.py @@ -9,7 +9,7 @@ class Split(AnalysisPlugin): '''description: A sample plugin that chunks input text''' author = ["@militarpancho", '@balkian'] - version = '0.2' + version = '0.3' url = "https://github.com/gsi-upm/senpy" extra_params = { @@ -33,12 +33,15 @@ class Split(AnalysisPlugin): if chunker_type == "paragraph": tokenizer = LineTokenizer() chars = list(tokenizer.span_tokenize(original_text)) - for i, chunk in enumerate(tokenizer.tokenize(original_text)): - print(chunk) + if len(chars) == 1: + # This sentence was already split + return + for i, chunk in enumerate(chars): + start, end = chunk e = Entry() - e['nif:isString'] = chunk + e['nif:isString'] = original_text[start:end] if entry.id: - e.id = entry.id + "#char={},{}".format(chars[i][0], chars[i][1]) + e.id = entry.id + "#char={},{}".format(start, end) yield e test_cases = [ diff --git a/tests/test_api.py b/tests/test_api.py index 80e16bc..6b8f64f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -3,8 +3,9 @@ import logging logger = logging.getLogger(__name__) from unittest import TestCase -from senpy.api import parse_params, API_PARAMS, NIF_PARAMS, WEB_PARAMS -from senpy.models import Error +from senpy.api import (boolean, parse_params, get_extra_params, parse_extra_params, + API_PARAMS, NIF_PARAMS, WEB_PARAMS) +from senpy.models import Error, Plugin class APITest(TestCase): @@ -89,3 +90,157 @@ class APITest(TestCase): assert "Dummy" in p['algorithm'] assert 'input' in p assert p['input'] == 'Aloha my friend' + + def test_parse_extra_params(self): + '''The API should parse user parameters and return them in a format that plugins can use''' + plugins = [ + Plugin({ + 'name': 'plugin1', + 'extra_params': { + # Incompatible parameter + 'param0': { + 'aliases': ['p1', 'parameter1'], + 'options': ['option1', 'option2'], + 'default': 'option1', + 'required': True + }, + 'param1': { + 'aliases': ['p1', 'parameter1'], + 'options': ['en', 'es'], + + 'default': 'en', + 'required': False + }, + 'param2': { + 'aliases': ['p2', 'parameter2'], + 'required': False, + 'options': ['value2_1', 'value2_2', 'value3_3'] + } + } + }), Plugin({ + 'name': 'plugin2', + 'extra_params': { + 'param0': { + 'aliases': ['parameter1'], + 'options': ['new option', 'new option2'], + 'default': 'new option', + 'required': False + }, + 'param1': { + 'aliases': ['myparam1', 'p1'], + 'options': ['en', 'de', 'auto'], + 'default': 'de', + 'required': True + }, + 'param3': { + 'aliases': ['p3', 'parameter3'], + 'options': boolean, + 'default': True + } + } + }) + ] + call = { + 'param1': 'en', + '0.param0': 'option1', + '0.param1': 'en', + 'param2': 'value2_1', + 'param0': 'new option', + '1.param1': 'de', + 'param3': False, + } + expected = [ + { + 'param0': 'option1', + 'param1': 'en', + 'param2': 'value2_1', + }, { + 'param0': 'new option', + 'param1': 'de', + 'param3': False, + } + + ] + p = parse_extra_params(call, plugins) + for i, arg in enumerate(expected): + for k, v in arg.items(): + assert p[i][k] == v + + def test_get_extra_params(self): + '''The API should return the list of valid parameters for a set of plugins''' + plugins = [ + Plugin({ + 'name': 'plugin1', + 'extra_params': { + # Incompatible parameter + 'param0': { + 'aliases': ['p1', 'parameter1'], + 'options': ['option1', 'option2'], + 'default': 'option1', + 'required': True + }, + 'param1': { + 'aliases': ['p1', 'parameter1'], + 'options': ['en', 'es'], + 'default': 'en', + 'required': False + }, + 'param2': { + 'aliases': ['p2', 'parameter2'], + 'required': False, + 'options': ['value2_1', 'value2_2', 'value3_3'] + } + } + }), Plugin({ + 'name': 'plugin2', + 'extra_params': { + 'param0': { + 'aliases': ['parameter1'], + 'options': ['new option', 'new option2'], + 'default': 'new option', + 'required': False + }, + 'param1': { + 'aliases': ['myparam1', 'p1'], + 'options': ['en', 'de', 'auto'], + 'default': 'de', + 'required': True + }, + 'param3': { + 'aliases': ['p3', 'parameter3'], + 'options': boolean, + 'default': True + } + } + }) + ] + + expected = { + # Each plugin's parameters + '0.param0': plugins[0]['extra_params']['param0'], + '0.param1': plugins[0]['extra_params']['param1'], + '0.param2': plugins[0]['extra_params']['param2'], + '1.param0': plugins[1]['extra_params']['param0'], + '1.param1': plugins[1]['extra_params']['param1'], + '1.param3': plugins[1]['extra_params']['param3'], + + # Non-overlapping parameters + 'param2': plugins[0]['extra_params']['param2'], + 'param3': plugins[1]['extra_params']['param3'], + + # Intersection of overlapping parameters + 'param1': { + 'aliases': ['p1'], + 'options': ['en'], + 'default': None, + 'required': True + } + } + + result = get_extra_params(plugins) + + for ik, iv in expected.items(): + assert ik in result + for jk, jv in iv.items(): + assert jk in result[ik] + assert expected[ik][jk] == result[ik][jk] diff --git a/tests/test_blueprints.py b/tests/test_blueprints.py index 212ce61..633c443 100644 --- a/tests/test_blueprints.py +++ b/tests/test_blueprints.py @@ -107,6 +107,7 @@ class BlueprintsTest(TestCase): assert isinstance(js, models.Error) resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=notvalid") self.assertCode(resp, 400) + self.app.config['TESTING'] = True resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=a") self.assertCode(resp, 200) @@ -148,6 +149,53 @@ class BlueprintsTest(TestCase): assert len(js['analysis']) == 1 assert js['entries'][0]['nif:isString'] == 'My aloha mohame' + def test_requirements_chain_help(self): + '''The extra parameters of each plugin should be merged if they are in a chain ''' + resp = self.client.get("/api/split/DummyRequired?help=true") + self.assertCode(resp, 200) + js = parse_resp(resp) + assert 'valid_parameters' in js + vp = js['valid_parameters'] + assert 'example' in vp + + def test_requirements_chain_repeat_help(self): + ''' + If a plugin appears several times in a chain, there should be a way to set different + parameters for each. + ''' + resp = self.client.get("/api/split/split?help=true") + self.assertCode(resp, 200) + js = parse_resp(resp) + assert 'valid_parameters' in js + vp = js['valid_parameters'] + assert '0.delimiter' in vp + assert '1.delimiter' in vp + assert 'delimiter' in vp + + def test_requirements_chain(self): + """ + It should be possible to specify different parameters for each step in the chain. + """ + + # First, we split by sentence twice. Each call should generate 3 additional entries + # (one per sentence in the original). + resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.' + '\nA new paragraph&delimiter=sentence') + js = parse_resp(resp) + assert len(js['analysis']) == 2 + assert len(js['entries']) == 7 + + # Now, we split by sentence. This produces 3 additional entries. + # Then, we split by paragraph. This should create 2 additional entries (One per paragraph + # in the original text) + resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.' + '\nA new paragraph&0.delimiter=sentence&1.delimiter=paragraph') + # Calling dummy twice, should return the same string + self.assertCode(resp, 200) + js = parse_resp(resp) + assert len(js['analysis']) == 2 + assert len(js['entries']) == 6 + def test_error(self): """ The dummy plugin returns an empty response,\