1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-12-23 13:38:12 +00:00

Draft merge 51-improve parameters

There are some unsolved issues, like representing the mix of analysis+parameters
in a sensible way.
I think we should somehow represent each of the analysis tasks with a unique ID,
and it should contain the specific parameters used.

Right now results.parameters is a mix of a dict with global parameters and a
list with a dict of parameters per plugin.
This commit is contained in:
J. Fernando Sánchez 2018-11-22 18:50:21 +01:00
commit 6a1069780b
6 changed files with 329 additions and 30 deletions

View File

@ -187,22 +187,108 @@ def parse_params(indict, *specs):
return outdict
def parse_extra_params(request, plugins=None):
plugins = plugins or []
params = request.parameters.copy()
for plugin in plugins:
if plugin:
extra_params = parse_params(params, plugin.get('extra_params', {}))
for k, v in extra_params.items():
if k not in BUILTIN_PARAMS:
if k in params: # Set by another plugin
del params[k]
else:
params[k] = v
params['{}.{}'.format(plugin.name, k)] = v
def get_all_params(plugins, *specs):
'''Return a list of parameters for a given set of specifications and plugins.'''
dic = {}
for s in specs:
dic.update(s)
dic.update(get_extra_params(plugins))
return dic
def get_extra_params(plugins):
'''Get a list of possible parameters given a list of plugins'''
params = {}
extra_params = {}
for i, plugin in enumerate(plugins):
this_params = plugin.get('extra_params', {})
for k, v in this_params.items():
if k not in extra_params:
extra_params[k] = []
extra_params[k].append(v)
params['{}.{}'.format(plugin.name, k)] = v
params['{}.{}'.format(i, k)] = v
for k, v in extra_params.items(): # Resolve conflicts
if len(v) == 1: # Add the extra options that do not collide
params[k] = v[0]
else:
required = False
aliases = None
options = None
default = None
nodefault = False # Set when defaults are not compatible
for opt in v:
required = required or opt.get('required', False)
newaliases = set(opt.get('aliases', []))
if aliases is None:
aliases = newaliases
else:
aliases = aliases & newaliases
if 'options' in opt:
newoptions = set(opt['options'])
options = newoptions if options is None else options & newoptions
if 'default' in opt:
newdefault = opt['default']
if newdefault:
if default is None and not nodefault:
default = newdefault
elif newdefault != default:
nodefault = True
default = None
# Check for incompatibilities
if options != set():
params[k] = {
'default': default,
'aliases': list(aliases),
'required': required,
'options': list(options)
}
return params
def parse_extra_params(params, plugins):
'''
Parse the given parameters individually for each plugin, and get a list of the parameters that
belong to each of the plugins. Each item can then be used in the plugin.analyse_entries method.
'''
extra_params = []
for i, plugin in enumerate(plugins):
this_params = filter_params(params, plugin, i)
parsed = parse_params(this_params, plugin.get('extra_params', {}))
extra_params.append(parsed)
return extra_params
def filter_params(params, plugin, ith=-1):
'''
Get the values within params that apply to a plugin.
More specific names override more general names, in this order:
<index_order>.parameter > <plugin.name>.parameter > parameter
Example:
>>> filter_params({'0.hello': True, 'hello': False}, Plugin(), 0)
{ '0.hello': True, 'hello': True}
'''
thisparams = {}
if ith >= 0:
ith = '{}.'.format(ith)
else:
ith = ""
for k, v in params.items():
if ith and k.startswith(str(ith)):
thisparams[k[len(ith):]] = v
elif k.startswith(plugin.name):
thisparams[k[len(plugin.name) + 1:]] = v
elif k not in thisparams:
thisparams[k] = v
return thisparams
def parse_call(params):
'''Return a results object based on the parameters used in a call/request.
'''

View File

@ -188,15 +188,20 @@ def basic_api(f):
@api_blueprint.route('/<path:plugin>', methods=['POST', 'GET'])
@basic_api
def api_root(plugin):
if plugin:
if 'algorithm' in request.parameters:
raise Error('You cannot specify the algorithm with a parameter and a URL variable.'
' Please, remove one of them')
plugin = plugin.replace('+', '/')
request.parameters['algorithm'] = tuple(plugin.split('/'))
if request.parameters['help']:
dic = dict(api.API_PARAMS, **api.NIF_PARAMS)
response = Help(valid_parameters=dic)
sp = current_app.senpy
plugins = sp._get_plugins(request)
allparameters = api.get_all_params(plugins, api.WEB_PARAMS, api.API_PARAMS, api.NIF_PARAMS)
response = Help(valid_parameters=allparameters)
return response
req = api.parse_call(request.parameters)
if plugin:
plugin = plugin.replace('+', '/')
plugin = plugin.split('/')
req.parameters['algorithm'] = tuple(plugin)
results = current_app.senpy.analyse(req)
results.analysis = set(i.id for i in results.analysis)
return results

View File

@ -144,7 +144,7 @@ class Senpy(object):
return plugins
def _process(self, req, pending, done=None):
def _process(self, req, parameters, pending, done=None):
"""
Recursively process the entries with the first plugin in the list, and pass the results
to the rest of the plugins.
@ -154,10 +154,11 @@ class Senpy(object):
return req
plugin = pending[0]
req.parameters = parameters[0]
results = plugin.process(req, conversions_applied=done)
if plugin not in results.analysis:
results.analysis.append(plugin)
return self._process(results, pending[1:], done)
return self._process(results, parameters[1:], pending[1:], done)
def install_deps(self):
plugins.install_deps(*self.plugins())
@ -168,10 +169,11 @@ class Senpy(object):
It takes a processed request, provided by the user, as returned
by api.parse_call().
"""
logger.debug("analysing request: {}".format(request))
plugins = self._get_plugins(request)
request.parameters = api.parse_extra_params(request, plugins)
results = self._process(request, plugins)
parameters = api.parse_extra_params(request.parameters, plugins)
results = self._process(request, parameters, plugins)
logger.debug("Got analysis result: {}".format(results))
results = self.postprocess(results)
logger.debug("Returning post-processed result: {}".format(results))

View File

@ -9,7 +9,7 @@ class Split(AnalysisPlugin):
'''description: A sample plugin that chunks input text'''
author = ["@militarpancho", '@balkian']
version = '0.2'
version = '0.3'
url = "https://github.com/gsi-upm/senpy"
extra_params = {
@ -33,12 +33,15 @@ class Split(AnalysisPlugin):
if chunker_type == "paragraph":
tokenizer = LineTokenizer()
chars = list(tokenizer.span_tokenize(original_text))
for i, chunk in enumerate(tokenizer.tokenize(original_text)):
print(chunk)
if len(chars) == 1:
# This sentence was already split
return
for i, chunk in enumerate(chars):
start, end = chunk
e = Entry()
e['nif:isString'] = chunk
e['nif:isString'] = original_text[start:end]
if entry.id:
e.id = entry.id + "#char={},{}".format(chars[i][0], chars[i][1])
e.id = entry.id + "#char={},{}".format(start, end)
yield e
test_cases = [

View File

@ -3,8 +3,9 @@ import logging
logger = logging.getLogger(__name__)
from unittest import TestCase
from senpy.api import parse_params, API_PARAMS, NIF_PARAMS, WEB_PARAMS
from senpy.models import Error
from senpy.api import (boolean, parse_params, get_extra_params, parse_extra_params,
API_PARAMS, NIF_PARAMS, WEB_PARAMS)
from senpy.models import Error, Plugin
class APITest(TestCase):
@ -89,3 +90,157 @@ class APITest(TestCase):
assert "Dummy" in p['algorithm']
assert 'input' in p
assert p['input'] == 'Aloha my friend'
def test_parse_extra_params(self):
'''The API should parse user parameters and return them in a format that plugins can use'''
plugins = [
Plugin({
'name': 'plugin1',
'extra_params': {
# Incompatible parameter
'param0': {
'aliases': ['p1', 'parameter1'],
'options': ['option1', 'option2'],
'default': 'option1',
'required': True
},
'param1': {
'aliases': ['p1', 'parameter1'],
'options': ['en', 'es'],
'default': 'en',
'required': False
},
'param2': {
'aliases': ['p2', 'parameter2'],
'required': False,
'options': ['value2_1', 'value2_2', 'value3_3']
}
}
}), Plugin({
'name': 'plugin2',
'extra_params': {
'param0': {
'aliases': ['parameter1'],
'options': ['new option', 'new option2'],
'default': 'new option',
'required': False
},
'param1': {
'aliases': ['myparam1', 'p1'],
'options': ['en', 'de', 'auto'],
'default': 'de',
'required': True
},
'param3': {
'aliases': ['p3', 'parameter3'],
'options': boolean,
'default': True
}
}
})
]
call = {
'param1': 'en',
'0.param0': 'option1',
'0.param1': 'en',
'param2': 'value2_1',
'param0': 'new option',
'1.param1': 'de',
'param3': False,
}
expected = [
{
'param0': 'option1',
'param1': 'en',
'param2': 'value2_1',
}, {
'param0': 'new option',
'param1': 'de',
'param3': False,
}
]
p = parse_extra_params(call, plugins)
for i, arg in enumerate(expected):
for k, v in arg.items():
assert p[i][k] == v
def test_get_extra_params(self):
'''The API should return the list of valid parameters for a set of plugins'''
plugins = [
Plugin({
'name': 'plugin1',
'extra_params': {
# Incompatible parameter
'param0': {
'aliases': ['p1', 'parameter1'],
'options': ['option1', 'option2'],
'default': 'option1',
'required': True
},
'param1': {
'aliases': ['p1', 'parameter1'],
'options': ['en', 'es'],
'default': 'en',
'required': False
},
'param2': {
'aliases': ['p2', 'parameter2'],
'required': False,
'options': ['value2_1', 'value2_2', 'value3_3']
}
}
}), Plugin({
'name': 'plugin2',
'extra_params': {
'param0': {
'aliases': ['parameter1'],
'options': ['new option', 'new option2'],
'default': 'new option',
'required': False
},
'param1': {
'aliases': ['myparam1', 'p1'],
'options': ['en', 'de', 'auto'],
'default': 'de',
'required': True
},
'param3': {
'aliases': ['p3', 'parameter3'],
'options': boolean,
'default': True
}
}
})
]
expected = {
# Each plugin's parameters
'0.param0': plugins[0]['extra_params']['param0'],
'0.param1': plugins[0]['extra_params']['param1'],
'0.param2': plugins[0]['extra_params']['param2'],
'1.param0': plugins[1]['extra_params']['param0'],
'1.param1': plugins[1]['extra_params']['param1'],
'1.param3': plugins[1]['extra_params']['param3'],
# Non-overlapping parameters
'param2': plugins[0]['extra_params']['param2'],
'param3': plugins[1]['extra_params']['param3'],
# Intersection of overlapping parameters
'param1': {
'aliases': ['p1'],
'options': ['en'],
'default': None,
'required': True
}
}
result = get_extra_params(plugins)
for ik, iv in expected.items():
assert ik in result
for jk, jv in iv.items():
assert jk in result[ik]
assert expected[ik][jk] == result[ik][jk]

View File

@ -107,6 +107,7 @@ class BlueprintsTest(TestCase):
assert isinstance(js, models.Error)
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=notvalid")
self.assertCode(resp, 400)
self.app.config['TESTING'] = True
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=a")
self.assertCode(resp, 200)
@ -148,6 +149,53 @@ class BlueprintsTest(TestCase):
assert len(js['analysis']) == 1
assert js['entries'][0]['nif:isString'] == 'My aloha mohame'
def test_requirements_chain_help(self):
'''The extra parameters of each plugin should be merged if they are in a chain '''
resp = self.client.get("/api/split/DummyRequired?help=true")
self.assertCode(resp, 200)
js = parse_resp(resp)
assert 'valid_parameters' in js
vp = js['valid_parameters']
assert 'example' in vp
def test_requirements_chain_repeat_help(self):
'''
If a plugin appears several times in a chain, there should be a way to set different
parameters for each.
'''
resp = self.client.get("/api/split/split?help=true")
self.assertCode(resp, 200)
js = parse_resp(resp)
assert 'valid_parameters' in js
vp = js['valid_parameters']
assert '0.delimiter' in vp
assert '1.delimiter' in vp
assert 'delimiter' in vp
def test_requirements_chain(self):
"""
It should be possible to specify different parameters for each step in the chain.
"""
# First, we split by sentence twice. Each call should generate 3 additional entries
# (one per sentence in the original).
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
'\nA new paragraph&delimiter=sentence')
js = parse_resp(resp)
assert len(js['analysis']) == 2
assert len(js['entries']) == 7
# Now, we split by sentence. This produces 3 additional entries.
# Then, we split by paragraph. This should create 2 additional entries (One per paragraph
# in the original text)
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
'\nA new paragraph&0.delimiter=sentence&1.delimiter=paragraph')
# Calling dummy twice, should return the same string
self.assertCode(resp, 200)
js = parse_resp(resp)
assert len(js['analysis']) == 2
assert len(js['entries']) == 6
def test_error(self):
"""
The dummy plugin returns an empty response,\