From aa35e62a2724f9a3c33765d88de01fe8e44ba5f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Mon, 20 Aug 2018 14:07:33 +0200 Subject: [PATCH 1/2] Avoid duplication in split plugin --- senpy/plugins/misc/split_plugin.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/senpy/plugins/misc/split_plugin.py b/senpy/plugins/misc/split_plugin.py index 4c11f3a..d54a6fb 100644 --- a/senpy/plugins/misc/split_plugin.py +++ b/senpy/plugins/misc/split_plugin.py @@ -9,7 +9,7 @@ class Split(AnalysisPlugin): '''description: A sample plugin that chunks input text''' author = ["@militarpancho", '@balkian'] - version = '0.2' + version = '0.3' url = "https://github.com/gsi-upm/senpy" extra_params = { @@ -33,12 +33,15 @@ class Split(AnalysisPlugin): if chunker_type == "paragraph": tokenizer = LineTokenizer() chars = list(tokenizer.span_tokenize(original_text)) - for i, chunk in enumerate(tokenizer.tokenize(original_text)): - print(chunk) + if len(chars) == 1: + # This sentence was already split + return + for i, chunk in enumerate(chars): + start, end = chunk e = Entry() - e['nif:isString'] = chunk + e['nif:isString'] = original_text[start:end] if entry.id: - e.id = entry.id + "#char={},{}".format(chars[i][0], chars[i][1]) + e.id = entry.id + "#char={},{}".format(start, end) yield e test_cases = [ From ca69bddc17122e08142e93180d687d9b91cb4f1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Mon, 20 Aug 2018 15:44:54 +0200 Subject: [PATCH 2/2] Improve extra requirement handling This commit adds a new mechanism to handle parameters beforehand in chained calls, and the ability to get help on available parameters in chained calls (through `?help`). It also includes tests for this feature. Closes #51 --- senpy/api.py | 108 ++++++++++++++++++++++++-- senpy/blueprints.py | 17 +++-- senpy/extensions.py | 16 ++-- tests/test_api.py | 159 ++++++++++++++++++++++++++++++++++++++- tests/test_blueprints.py | 47 ++++++++++++ 5 files changed, 326 insertions(+), 21 deletions(-) diff --git a/senpy/api.py b/senpy/api.py index 5d9b8b9..666eddb 100644 --- a/senpy/api.py +++ b/senpy/api.py @@ -175,23 +175,117 @@ def parse_params(indict, *specs): parameters=outdict, errors=wrong_params) raise message - if 'algorithm' in outdict and not isinstance(outdict['algorithm'], list): - outdict['algorithm'] = list(outdict['algorithm'].split(',')) return outdict -def parse_extra_params(request, plugin=None): - params = request.parameters.copy() - if plugin: - extra_params = parse_params(params, plugin.get('extra_params', {})) - params.update(extra_params) +def get_all_params(plugins, *specs): + '''Return a list of parameters for a given set of specifications and plugins.''' + dic = {} + for s in specs: + dic.update(s) + dic.update(get_extra_params(plugins)) + return dic + + +def get_extra_params(plugins): + '''Get a list of possible parameters given a list of plugins''' + params = {} + extra_params = {} + for i, plugin in enumerate(plugins): + this_params = plugin.get('extra_params', {}) + for k, v in this_params.items(): + if k not in extra_params: + extra_params[k] = [] + extra_params[k].append(v) + params['{}.{}'.format(plugin.name, k)] = v + params['{}.{}'.format(i, k)] = v + for k, v in extra_params.items(): # Resolve conflicts + if len(v) == 1: # Add the extra options that do not collide + params[k] = v[0] + else: + required = False + aliases = None + options = None + default = None + nodefault = False # Set when defaults are not compatible + + for opt in v: + required = required or opt.get('required', False) + newaliases = set(opt.get('aliases', [])) + if aliases is None: + aliases = newaliases + else: + aliases = aliases & newaliases + if 'options' in opt: + newoptions = set(opt['options']) + options = newoptions if options is None else options & newoptions + if 'default' in opt: + newdefault = opt['default'] + if newdefault: + if default is None and not nodefault: + default = newdefault + elif newdefault != default: + nodefault = True + default = None + # Check for incompatibilities + if options != set(): + params[k] = { + 'default': default, + 'aliases': list(aliases), + 'required': required, + 'options': list(options) + } return params +def parse_extra_params(params, plugins): + ''' + Parse the given parameters individually for each plugin, and get a list of the parameters that + belong to each of the plugins. Each item can then be used in the plugin.analyse_entries method. + ''' + extra_params = [] + for i, plugin in enumerate(plugins): + this_params = filter_params(params, plugin, i) + parsed = parse_params(this_params, plugin.get('extra_params', {})) + extra_params.append(parsed) + return extra_params + + +def filter_params(params, plugin, ith=-1): + ''' + Get the values within params that apply to a plugin. + More specific names override more general names, in this order: + + .parameter > .parameter > parameter + + + Example: + + >>> filter_params({'0.hello': True, 'hello': False}, Plugin(), 0) + { '0.hello': True, 'hello': True} + + ''' + thisparams = {} + if ith >= 0: + ith = '{}.'.format(ith) + else: + ith = "" + for k, v in params.items(): + if ith and k.startswith(str(ith)): + thisparams[k[len(ith):]] = v + elif k.startswith(plugin.name): + thisparams[k[len(plugin.name) + 1:]] = v + elif k not in thisparams: + thisparams[k] = v + return thisparams + + def parse_call(params): '''Return a results object based on the parameters used in a call/request. ''' params = parse_params(params, NIF_PARAMS) + if 'algorithm' in params and not isinstance(params['algorithm'], list): + params['algorithm'] = list(params['algorithm'].split(',')) if params['informat'] == 'text': results = Results() entry = Entry(nif__isString=params['input'], diff --git a/senpy/blueprints.py b/senpy/blueprints.py index 42e0c90..01b1546 100644 --- a/senpy/blueprints.py +++ b/senpy/blueprints.py @@ -188,15 +188,20 @@ def basic_api(f): @api_blueprint.route('/', methods=['POST', 'GET']) @basic_api def api_root(plugin): + if plugin: + if 'algorithm' in request.parameters: + raise Error('You cannot specify the algorithm with a parameter and a URL variable.' + ' Please, remove one of them') + plugin = plugin.replace('+', '/') + request.parameters['algorithm'] = plugin.split('/') + if request.parameters['help']: - dic = dict(api.API_PARAMS, **api.NIF_PARAMS) - response = Help(valid_parameters=dic) + sp = current_app.senpy + plugins = sp._get_plugins(request) + allparameters = api.get_all_params(plugins, api.WEB_PARAMS, api.API_PARAMS, api.NIF_PARAMS) + response = Help(valid_parameters=allparameters) return response req = api.parse_call(request.parameters) - if plugin: - plugin = plugin.replace('+', '/') - plugin = plugin.split('/') - req.parameters['algorithm'] = plugin return current_app.senpy.analyse(req) diff --git a/senpy/extensions.py b/senpy/extensions.py index d545485..ba7261b 100644 --- a/senpy/extensions.py +++ b/senpy/extensions.py @@ -143,7 +143,7 @@ class Senpy(object): plugins.append(self._plugins[algo]) return plugins - def _process_entries(self, entries, req, plugins): + def _process_entries(self, entries, req, plugins, extra, ith=0): """ Recursively process the entries with the first plugin in the list, and pass the results to the rest of the plugins. @@ -153,11 +153,11 @@ class Senpy(object): yield i return plugin = plugins[0] - specific_params = api.parse_extra_params(req, plugin) + specific_params = extra[ith] req.analysis.append({'plugin': plugin, 'parameters': specific_params}) results = plugin.analyse_entries(entries, specific_params) - for i in self._process_entries(results, req, plugins[1:]): + for i in self._process_entries(results, req, plugins[1:], extra, ith=ith + 1): yield i def install_deps(self): @@ -169,12 +169,16 @@ class Senpy(object): It takes a processed request, provided by the user, as returned by api.parse_call(). """ + logger.debug("analysing request: {}".format(request)) - entries = request.entries - request.entries = [] + plugins = self._get_plugins(request) + extra = api.parse_extra_params(request.parameters, plugins) + + entries = request.entries results = request - for i in self._process_entries(entries, results, plugins): + results.entries = [] + for i in self._process_entries(entries, results, plugins, extra): results.entries.append(i) self.convert_emotions(results) logger.debug("Returning analysis result: {}".format(results)) diff --git a/tests/test_api.py b/tests/test_api.py index 80e16bc..6b8f64f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -3,8 +3,9 @@ import logging logger = logging.getLogger(__name__) from unittest import TestCase -from senpy.api import parse_params, API_PARAMS, NIF_PARAMS, WEB_PARAMS -from senpy.models import Error +from senpy.api import (boolean, parse_params, get_extra_params, parse_extra_params, + API_PARAMS, NIF_PARAMS, WEB_PARAMS) +from senpy.models import Error, Plugin class APITest(TestCase): @@ -89,3 +90,157 @@ class APITest(TestCase): assert "Dummy" in p['algorithm'] assert 'input' in p assert p['input'] == 'Aloha my friend' + + def test_parse_extra_params(self): + '''The API should parse user parameters and return them in a format that plugins can use''' + plugins = [ + Plugin({ + 'name': 'plugin1', + 'extra_params': { + # Incompatible parameter + 'param0': { + 'aliases': ['p1', 'parameter1'], + 'options': ['option1', 'option2'], + 'default': 'option1', + 'required': True + }, + 'param1': { + 'aliases': ['p1', 'parameter1'], + 'options': ['en', 'es'], + + 'default': 'en', + 'required': False + }, + 'param2': { + 'aliases': ['p2', 'parameter2'], + 'required': False, + 'options': ['value2_1', 'value2_2', 'value3_3'] + } + } + }), Plugin({ + 'name': 'plugin2', + 'extra_params': { + 'param0': { + 'aliases': ['parameter1'], + 'options': ['new option', 'new option2'], + 'default': 'new option', + 'required': False + }, + 'param1': { + 'aliases': ['myparam1', 'p1'], + 'options': ['en', 'de', 'auto'], + 'default': 'de', + 'required': True + }, + 'param3': { + 'aliases': ['p3', 'parameter3'], + 'options': boolean, + 'default': True + } + } + }) + ] + call = { + 'param1': 'en', + '0.param0': 'option1', + '0.param1': 'en', + 'param2': 'value2_1', + 'param0': 'new option', + '1.param1': 'de', + 'param3': False, + } + expected = [ + { + 'param0': 'option1', + 'param1': 'en', + 'param2': 'value2_1', + }, { + 'param0': 'new option', + 'param1': 'de', + 'param3': False, + } + + ] + p = parse_extra_params(call, plugins) + for i, arg in enumerate(expected): + for k, v in arg.items(): + assert p[i][k] == v + + def test_get_extra_params(self): + '''The API should return the list of valid parameters for a set of plugins''' + plugins = [ + Plugin({ + 'name': 'plugin1', + 'extra_params': { + # Incompatible parameter + 'param0': { + 'aliases': ['p1', 'parameter1'], + 'options': ['option1', 'option2'], + 'default': 'option1', + 'required': True + }, + 'param1': { + 'aliases': ['p1', 'parameter1'], + 'options': ['en', 'es'], + 'default': 'en', + 'required': False + }, + 'param2': { + 'aliases': ['p2', 'parameter2'], + 'required': False, + 'options': ['value2_1', 'value2_2', 'value3_3'] + } + } + }), Plugin({ + 'name': 'plugin2', + 'extra_params': { + 'param0': { + 'aliases': ['parameter1'], + 'options': ['new option', 'new option2'], + 'default': 'new option', + 'required': False + }, + 'param1': { + 'aliases': ['myparam1', 'p1'], + 'options': ['en', 'de', 'auto'], + 'default': 'de', + 'required': True + }, + 'param3': { + 'aliases': ['p3', 'parameter3'], + 'options': boolean, + 'default': True + } + } + }) + ] + + expected = { + # Each plugin's parameters + '0.param0': plugins[0]['extra_params']['param0'], + '0.param1': plugins[0]['extra_params']['param1'], + '0.param2': plugins[0]['extra_params']['param2'], + '1.param0': plugins[1]['extra_params']['param0'], + '1.param1': plugins[1]['extra_params']['param1'], + '1.param3': plugins[1]['extra_params']['param3'], + + # Non-overlapping parameters + 'param2': plugins[0]['extra_params']['param2'], + 'param3': plugins[1]['extra_params']['param3'], + + # Intersection of overlapping parameters + 'param1': { + 'aliases': ['p1'], + 'options': ['en'], + 'default': None, + 'required': True + } + } + + result = get_extra_params(plugins) + + for ik, iv in expected.items(): + assert ik in result + for jk, jv in iv.items(): + assert jk in result[ik] + assert expected[ik][jk] == result[ik][jk] diff --git a/tests/test_blueprints.py b/tests/test_blueprints.py index 5648ece..659ff2f 100644 --- a/tests/test_blueprints.py +++ b/tests/test_blueprints.py @@ -133,6 +133,53 @@ class BlueprintsTest(TestCase): assert len(js['analysis']) == 2 assert js['entries'][0]['nif:isString'] == 'My aloha mohame' + def test_requirements_chain_help(self): + '''The extra parameters of each plugin should be merged if they are in a chain ''' + resp = self.client.get("/api/split/DummyRequired?help=true") + self.assertCode(resp, 200) + js = parse_resp(resp) + assert 'valid_parameters' in js + vp = js['valid_parameters'] + assert 'example' in vp + + def test_requirements_chain_repeat_help(self): + ''' + If a plugin appears several times in a chain, there should be a way to set different + parameters for each. + ''' + resp = self.client.get("/api/split/split?help=true") + self.assertCode(resp, 200) + js = parse_resp(resp) + assert 'valid_parameters' in js + vp = js['valid_parameters'] + assert '0.delimiter' in vp + assert '1.delimiter' in vp + assert 'delimiter' in vp + + def test_requirements_chain(self): + """ + It should be possible to specify different parameters for each step in the chain. + """ + + # First, we split by sentence twice. Each call should generate 3 additional entries + # (one per sentence in the original). + resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.' + '\nA new paragraph&delimiter=sentence') + js = parse_resp(resp) + assert len(js['analysis']) == 2 + assert len(js['entries']) == 7 + + # Now, we split by sentence. This produces 3 additional entries. + # Then, we split by paragraph. This should create 2 additional entries (One per paragraph + # in the original text) + resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.' + '\nA new paragraph&0.delimiter=sentence&1.delimiter=paragraph') + # Calling dummy twice, should return the same string + self.assertCode(resp, 200) + js = parse_resp(resp) + assert len(js['analysis']) == 2 + assert len(js['entries']) == 6 + def test_error(self): """ The dummy plugin returns an empty response,\