mirror of
https://github.com/gsi-upm/senpy
synced 2024-12-23 13:38:12 +00:00
Draft merge 51-improve parameters
There are some unsolved issues, like representing the mix of analysis+parameters in a sensible way. I think we should somehow represent each of the analysis tasks with a unique ID, and it should contain the specific parameters used. Right now results.parameters is a mix of a dict with global parameters and a list with a dict of parameters per plugin.
This commit is contained in:
commit
6a1069780b
112
senpy/api.py
112
senpy/api.py
@ -187,22 +187,108 @@ def parse_params(indict, *specs):
|
||||
return outdict
|
||||
|
||||
|
||||
def parse_extra_params(request, plugins=None):
|
||||
plugins = plugins or []
|
||||
params = request.parameters.copy()
|
||||
for plugin in plugins:
|
||||
if plugin:
|
||||
extra_params = parse_params(params, plugin.get('extra_params', {}))
|
||||
for k, v in extra_params.items():
|
||||
if k not in BUILTIN_PARAMS:
|
||||
if k in params: # Set by another plugin
|
||||
del params[k]
|
||||
else:
|
||||
params[k] = v
|
||||
params['{}.{}'.format(plugin.name, k)] = v
|
||||
def get_all_params(plugins, *specs):
|
||||
'''Return a list of parameters for a given set of specifications and plugins.'''
|
||||
dic = {}
|
||||
for s in specs:
|
||||
dic.update(s)
|
||||
dic.update(get_extra_params(plugins))
|
||||
return dic
|
||||
|
||||
|
||||
def get_extra_params(plugins):
|
||||
'''Get a list of possible parameters given a list of plugins'''
|
||||
params = {}
|
||||
extra_params = {}
|
||||
for i, plugin in enumerate(plugins):
|
||||
this_params = plugin.get('extra_params', {})
|
||||
for k, v in this_params.items():
|
||||
if k not in extra_params:
|
||||
extra_params[k] = []
|
||||
extra_params[k].append(v)
|
||||
params['{}.{}'.format(plugin.name, k)] = v
|
||||
params['{}.{}'.format(i, k)] = v
|
||||
for k, v in extra_params.items(): # Resolve conflicts
|
||||
if len(v) == 1: # Add the extra options that do not collide
|
||||
params[k] = v[0]
|
||||
else:
|
||||
required = False
|
||||
aliases = None
|
||||
options = None
|
||||
default = None
|
||||
nodefault = False # Set when defaults are not compatible
|
||||
|
||||
for opt in v:
|
||||
required = required or opt.get('required', False)
|
||||
newaliases = set(opt.get('aliases', []))
|
||||
if aliases is None:
|
||||
aliases = newaliases
|
||||
else:
|
||||
aliases = aliases & newaliases
|
||||
if 'options' in opt:
|
||||
newoptions = set(opt['options'])
|
||||
options = newoptions if options is None else options & newoptions
|
||||
if 'default' in opt:
|
||||
newdefault = opt['default']
|
||||
if newdefault:
|
||||
if default is None and not nodefault:
|
||||
default = newdefault
|
||||
elif newdefault != default:
|
||||
nodefault = True
|
||||
default = None
|
||||
# Check for incompatibilities
|
||||
if options != set():
|
||||
params[k] = {
|
||||
'default': default,
|
||||
'aliases': list(aliases),
|
||||
'required': required,
|
||||
'options': list(options)
|
||||
}
|
||||
return params
|
||||
|
||||
|
||||
def parse_extra_params(params, plugins):
|
||||
'''
|
||||
Parse the given parameters individually for each plugin, and get a list of the parameters that
|
||||
belong to each of the plugins. Each item can then be used in the plugin.analyse_entries method.
|
||||
'''
|
||||
extra_params = []
|
||||
for i, plugin in enumerate(plugins):
|
||||
this_params = filter_params(params, plugin, i)
|
||||
parsed = parse_params(this_params, plugin.get('extra_params', {}))
|
||||
extra_params.append(parsed)
|
||||
return extra_params
|
||||
|
||||
|
||||
def filter_params(params, plugin, ith=-1):
|
||||
'''
|
||||
Get the values within params that apply to a plugin.
|
||||
More specific names override more general names, in this order:
|
||||
|
||||
<index_order>.parameter > <plugin.name>.parameter > parameter
|
||||
|
||||
|
||||
Example:
|
||||
|
||||
>>> filter_params({'0.hello': True, 'hello': False}, Plugin(), 0)
|
||||
{ '0.hello': True, 'hello': True}
|
||||
|
||||
'''
|
||||
thisparams = {}
|
||||
if ith >= 0:
|
||||
ith = '{}.'.format(ith)
|
||||
else:
|
||||
ith = ""
|
||||
for k, v in params.items():
|
||||
if ith and k.startswith(str(ith)):
|
||||
thisparams[k[len(ith):]] = v
|
||||
elif k.startswith(plugin.name):
|
||||
thisparams[k[len(plugin.name) + 1:]] = v
|
||||
elif k not in thisparams:
|
||||
thisparams[k] = v
|
||||
return thisparams
|
||||
|
||||
|
||||
def parse_call(params):
|
||||
'''Return a results object based on the parameters used in a call/request.
|
||||
'''
|
||||
|
@ -188,15 +188,20 @@ def basic_api(f):
|
||||
@api_blueprint.route('/<path:plugin>', methods=['POST', 'GET'])
|
||||
@basic_api
|
||||
def api_root(plugin):
|
||||
if plugin:
|
||||
if 'algorithm' in request.parameters:
|
||||
raise Error('You cannot specify the algorithm with a parameter and a URL variable.'
|
||||
' Please, remove one of them')
|
||||
plugin = plugin.replace('+', '/')
|
||||
request.parameters['algorithm'] = tuple(plugin.split('/'))
|
||||
|
||||
if request.parameters['help']:
|
||||
dic = dict(api.API_PARAMS, **api.NIF_PARAMS)
|
||||
response = Help(valid_parameters=dic)
|
||||
sp = current_app.senpy
|
||||
plugins = sp._get_plugins(request)
|
||||
allparameters = api.get_all_params(plugins, api.WEB_PARAMS, api.API_PARAMS, api.NIF_PARAMS)
|
||||
response = Help(valid_parameters=allparameters)
|
||||
return response
|
||||
req = api.parse_call(request.parameters)
|
||||
if plugin:
|
||||
plugin = plugin.replace('+', '/')
|
||||
plugin = plugin.split('/')
|
||||
req.parameters['algorithm'] = tuple(plugin)
|
||||
results = current_app.senpy.analyse(req)
|
||||
results.analysis = set(i.id for i in results.analysis)
|
||||
return results
|
||||
|
@ -144,7 +144,7 @@ class Senpy(object):
|
||||
|
||||
return plugins
|
||||
|
||||
def _process(self, req, pending, done=None):
|
||||
def _process(self, req, parameters, pending, done=None):
|
||||
"""
|
||||
Recursively process the entries with the first plugin in the list, and pass the results
|
||||
to the rest of the plugins.
|
||||
@ -154,10 +154,11 @@ class Senpy(object):
|
||||
return req
|
||||
|
||||
plugin = pending[0]
|
||||
req.parameters = parameters[0]
|
||||
results = plugin.process(req, conversions_applied=done)
|
||||
if plugin not in results.analysis:
|
||||
results.analysis.append(plugin)
|
||||
return self._process(results, pending[1:], done)
|
||||
return self._process(results, parameters[1:], pending[1:], done)
|
||||
|
||||
def install_deps(self):
|
||||
plugins.install_deps(*self.plugins())
|
||||
@ -168,10 +169,11 @@ class Senpy(object):
|
||||
It takes a processed request, provided by the user, as returned
|
||||
by api.parse_call().
|
||||
"""
|
||||
|
||||
logger.debug("analysing request: {}".format(request))
|
||||
plugins = self._get_plugins(request)
|
||||
request.parameters = api.parse_extra_params(request, plugins)
|
||||
results = self._process(request, plugins)
|
||||
parameters = api.parse_extra_params(request.parameters, plugins)
|
||||
results = self._process(request, parameters, plugins)
|
||||
logger.debug("Got analysis result: {}".format(results))
|
||||
results = self.postprocess(results)
|
||||
logger.debug("Returning post-processed result: {}".format(results))
|
||||
|
@ -9,7 +9,7 @@ class Split(AnalysisPlugin):
|
||||
'''description: A sample plugin that chunks input text'''
|
||||
|
||||
author = ["@militarpancho", '@balkian']
|
||||
version = '0.2'
|
||||
version = '0.3'
|
||||
url = "https://github.com/gsi-upm/senpy"
|
||||
|
||||
extra_params = {
|
||||
@ -33,12 +33,15 @@ class Split(AnalysisPlugin):
|
||||
if chunker_type == "paragraph":
|
||||
tokenizer = LineTokenizer()
|
||||
chars = list(tokenizer.span_tokenize(original_text))
|
||||
for i, chunk in enumerate(tokenizer.tokenize(original_text)):
|
||||
print(chunk)
|
||||
if len(chars) == 1:
|
||||
# This sentence was already split
|
||||
return
|
||||
for i, chunk in enumerate(chars):
|
||||
start, end = chunk
|
||||
e = Entry()
|
||||
e['nif:isString'] = chunk
|
||||
e['nif:isString'] = original_text[start:end]
|
||||
if entry.id:
|
||||
e.id = entry.id + "#char={},{}".format(chars[i][0], chars[i][1])
|
||||
e.id = entry.id + "#char={},{}".format(start, end)
|
||||
yield e
|
||||
|
||||
test_cases = [
|
||||
|
@ -3,8 +3,9 @@ import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from unittest import TestCase
|
||||
from senpy.api import parse_params, API_PARAMS, NIF_PARAMS, WEB_PARAMS
|
||||
from senpy.models import Error
|
||||
from senpy.api import (boolean, parse_params, get_extra_params, parse_extra_params,
|
||||
API_PARAMS, NIF_PARAMS, WEB_PARAMS)
|
||||
from senpy.models import Error, Plugin
|
||||
|
||||
|
||||
class APITest(TestCase):
|
||||
@ -89,3 +90,157 @@ class APITest(TestCase):
|
||||
assert "Dummy" in p['algorithm']
|
||||
assert 'input' in p
|
||||
assert p['input'] == 'Aloha my friend'
|
||||
|
||||
def test_parse_extra_params(self):
|
||||
'''The API should parse user parameters and return them in a format that plugins can use'''
|
||||
plugins = [
|
||||
Plugin({
|
||||
'name': 'plugin1',
|
||||
'extra_params': {
|
||||
# Incompatible parameter
|
||||
'param0': {
|
||||
'aliases': ['p1', 'parameter1'],
|
||||
'options': ['option1', 'option2'],
|
||||
'default': 'option1',
|
||||
'required': True
|
||||
},
|
||||
'param1': {
|
||||
'aliases': ['p1', 'parameter1'],
|
||||
'options': ['en', 'es'],
|
||||
|
||||
'default': 'en',
|
||||
'required': False
|
||||
},
|
||||
'param2': {
|
||||
'aliases': ['p2', 'parameter2'],
|
||||
'required': False,
|
||||
'options': ['value2_1', 'value2_2', 'value3_3']
|
||||
}
|
||||
}
|
||||
}), Plugin({
|
||||
'name': 'plugin2',
|
||||
'extra_params': {
|
||||
'param0': {
|
||||
'aliases': ['parameter1'],
|
||||
'options': ['new option', 'new option2'],
|
||||
'default': 'new option',
|
||||
'required': False
|
||||
},
|
||||
'param1': {
|
||||
'aliases': ['myparam1', 'p1'],
|
||||
'options': ['en', 'de', 'auto'],
|
||||
'default': 'de',
|
||||
'required': True
|
||||
},
|
||||
'param3': {
|
||||
'aliases': ['p3', 'parameter3'],
|
||||
'options': boolean,
|
||||
'default': True
|
||||
}
|
||||
}
|
||||
})
|
||||
]
|
||||
call = {
|
||||
'param1': 'en',
|
||||
'0.param0': 'option1',
|
||||
'0.param1': 'en',
|
||||
'param2': 'value2_1',
|
||||
'param0': 'new option',
|
||||
'1.param1': 'de',
|
||||
'param3': False,
|
||||
}
|
||||
expected = [
|
||||
{
|
||||
'param0': 'option1',
|
||||
'param1': 'en',
|
||||
'param2': 'value2_1',
|
||||
}, {
|
||||
'param0': 'new option',
|
||||
'param1': 'de',
|
||||
'param3': False,
|
||||
}
|
||||
|
||||
]
|
||||
p = parse_extra_params(call, plugins)
|
||||
for i, arg in enumerate(expected):
|
||||
for k, v in arg.items():
|
||||
assert p[i][k] == v
|
||||
|
||||
def test_get_extra_params(self):
|
||||
'''The API should return the list of valid parameters for a set of plugins'''
|
||||
plugins = [
|
||||
Plugin({
|
||||
'name': 'plugin1',
|
||||
'extra_params': {
|
||||
# Incompatible parameter
|
||||
'param0': {
|
||||
'aliases': ['p1', 'parameter1'],
|
||||
'options': ['option1', 'option2'],
|
||||
'default': 'option1',
|
||||
'required': True
|
||||
},
|
||||
'param1': {
|
||||
'aliases': ['p1', 'parameter1'],
|
||||
'options': ['en', 'es'],
|
||||
'default': 'en',
|
||||
'required': False
|
||||
},
|
||||
'param2': {
|
||||
'aliases': ['p2', 'parameter2'],
|
||||
'required': False,
|
||||
'options': ['value2_1', 'value2_2', 'value3_3']
|
||||
}
|
||||
}
|
||||
}), Plugin({
|
||||
'name': 'plugin2',
|
||||
'extra_params': {
|
||||
'param0': {
|
||||
'aliases': ['parameter1'],
|
||||
'options': ['new option', 'new option2'],
|
||||
'default': 'new option',
|
||||
'required': False
|
||||
},
|
||||
'param1': {
|
||||
'aliases': ['myparam1', 'p1'],
|
||||
'options': ['en', 'de', 'auto'],
|
||||
'default': 'de',
|
||||
'required': True
|
||||
},
|
||||
'param3': {
|
||||
'aliases': ['p3', 'parameter3'],
|
||||
'options': boolean,
|
||||
'default': True
|
||||
}
|
||||
}
|
||||
})
|
||||
]
|
||||
|
||||
expected = {
|
||||
# Each plugin's parameters
|
||||
'0.param0': plugins[0]['extra_params']['param0'],
|
||||
'0.param1': plugins[0]['extra_params']['param1'],
|
||||
'0.param2': plugins[0]['extra_params']['param2'],
|
||||
'1.param0': plugins[1]['extra_params']['param0'],
|
||||
'1.param1': plugins[1]['extra_params']['param1'],
|
||||
'1.param3': plugins[1]['extra_params']['param3'],
|
||||
|
||||
# Non-overlapping parameters
|
||||
'param2': plugins[0]['extra_params']['param2'],
|
||||
'param3': plugins[1]['extra_params']['param3'],
|
||||
|
||||
# Intersection of overlapping parameters
|
||||
'param1': {
|
||||
'aliases': ['p1'],
|
||||
'options': ['en'],
|
||||
'default': None,
|
||||
'required': True
|
||||
}
|
||||
}
|
||||
|
||||
result = get_extra_params(plugins)
|
||||
|
||||
for ik, iv in expected.items():
|
||||
assert ik in result
|
||||
for jk, jv in iv.items():
|
||||
assert jk in result[ik]
|
||||
assert expected[ik][jk] == result[ik][jk]
|
||||
|
@ -107,6 +107,7 @@ class BlueprintsTest(TestCase):
|
||||
assert isinstance(js, models.Error)
|
||||
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=notvalid")
|
||||
self.assertCode(resp, 400)
|
||||
self.app.config['TESTING'] = True
|
||||
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=a")
|
||||
self.assertCode(resp, 200)
|
||||
|
||||
@ -148,6 +149,53 @@ class BlueprintsTest(TestCase):
|
||||
assert len(js['analysis']) == 1
|
||||
assert js['entries'][0]['nif:isString'] == 'My aloha mohame'
|
||||
|
||||
def test_requirements_chain_help(self):
|
||||
'''The extra parameters of each plugin should be merged if they are in a chain '''
|
||||
resp = self.client.get("/api/split/DummyRequired?help=true")
|
||||
self.assertCode(resp, 200)
|
||||
js = parse_resp(resp)
|
||||
assert 'valid_parameters' in js
|
||||
vp = js['valid_parameters']
|
||||
assert 'example' in vp
|
||||
|
||||
def test_requirements_chain_repeat_help(self):
|
||||
'''
|
||||
If a plugin appears several times in a chain, there should be a way to set different
|
||||
parameters for each.
|
||||
'''
|
||||
resp = self.client.get("/api/split/split?help=true")
|
||||
self.assertCode(resp, 200)
|
||||
js = parse_resp(resp)
|
||||
assert 'valid_parameters' in js
|
||||
vp = js['valid_parameters']
|
||||
assert '0.delimiter' in vp
|
||||
assert '1.delimiter' in vp
|
||||
assert 'delimiter' in vp
|
||||
|
||||
def test_requirements_chain(self):
|
||||
"""
|
||||
It should be possible to specify different parameters for each step in the chain.
|
||||
"""
|
||||
|
||||
# First, we split by sentence twice. Each call should generate 3 additional entries
|
||||
# (one per sentence in the original).
|
||||
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
|
||||
'\nA new paragraph&delimiter=sentence')
|
||||
js = parse_resp(resp)
|
||||
assert len(js['analysis']) == 2
|
||||
assert len(js['entries']) == 7
|
||||
|
||||
# Now, we split by sentence. This produces 3 additional entries.
|
||||
# Then, we split by paragraph. This should create 2 additional entries (One per paragraph
|
||||
# in the original text)
|
||||
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
|
||||
'\nA new paragraph&0.delimiter=sentence&1.delimiter=paragraph')
|
||||
# Calling dummy twice, should return the same string
|
||||
self.assertCode(resp, 200)
|
||||
js = parse_resp(resp)
|
||||
assert len(js['analysis']) == 2
|
||||
assert len(js['entries']) == 6
|
||||
|
||||
def test_error(self):
|
||||
"""
|
||||
The dummy plugin returns an empty response,\
|
||||
|
Loading…
Reference in New Issue
Block a user