1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-23 08:32:29 +00:00

Draft merge 51-improve parameters

There are some unsolved issues, like representing the mix of analysis+parameters
in a sensible way.
I think we should somehow represent each of the analysis tasks with a unique ID,
and it should contain the specific parameters used.

Right now results.parameters is a mix of a dict with global parameters and a
list with a dict of parameters per plugin.
This commit is contained in:
J. Fernando Sánchez 2018-11-22 18:50:21 +01:00
commit 6a1069780b
6 changed files with 329 additions and 30 deletions

View File

@ -187,22 +187,108 @@ def parse_params(indict, *specs):
return outdict return outdict
def parse_extra_params(request, plugins=None): def get_all_params(plugins, *specs):
plugins = plugins or [] '''Return a list of parameters for a given set of specifications and plugins.'''
params = request.parameters.copy() dic = {}
for plugin in plugins: for s in specs:
if plugin: dic.update(s)
extra_params = parse_params(params, plugin.get('extra_params', {})) dic.update(get_extra_params(plugins))
for k, v in extra_params.items(): return dic
if k not in BUILTIN_PARAMS:
if k in params: # Set by another plugin
del params[k] def get_extra_params(plugins):
else: '''Get a list of possible parameters given a list of plugins'''
params[k] = v params = {}
extra_params = {}
for i, plugin in enumerate(plugins):
this_params = plugin.get('extra_params', {})
for k, v in this_params.items():
if k not in extra_params:
extra_params[k] = []
extra_params[k].append(v)
params['{}.{}'.format(plugin.name, k)] = v params['{}.{}'.format(plugin.name, k)] = v
params['{}.{}'.format(i, k)] = v
for k, v in extra_params.items(): # Resolve conflicts
if len(v) == 1: # Add the extra options that do not collide
params[k] = v[0]
else:
required = False
aliases = None
options = None
default = None
nodefault = False # Set when defaults are not compatible
for opt in v:
required = required or opt.get('required', False)
newaliases = set(opt.get('aliases', []))
if aliases is None:
aliases = newaliases
else:
aliases = aliases & newaliases
if 'options' in opt:
newoptions = set(opt['options'])
options = newoptions if options is None else options & newoptions
if 'default' in opt:
newdefault = opt['default']
if newdefault:
if default is None and not nodefault:
default = newdefault
elif newdefault != default:
nodefault = True
default = None
# Check for incompatibilities
if options != set():
params[k] = {
'default': default,
'aliases': list(aliases),
'required': required,
'options': list(options)
}
return params return params
def parse_extra_params(params, plugins):
'''
Parse the given parameters individually for each plugin, and get a list of the parameters that
belong to each of the plugins. Each item can then be used in the plugin.analyse_entries method.
'''
extra_params = []
for i, plugin in enumerate(plugins):
this_params = filter_params(params, plugin, i)
parsed = parse_params(this_params, plugin.get('extra_params', {}))
extra_params.append(parsed)
return extra_params
def filter_params(params, plugin, ith=-1):
'''
Get the values within params that apply to a plugin.
More specific names override more general names, in this order:
<index_order>.parameter > <plugin.name>.parameter > parameter
Example:
>>> filter_params({'0.hello': True, 'hello': False}, Plugin(), 0)
{ '0.hello': True, 'hello': True}
'''
thisparams = {}
if ith >= 0:
ith = '{}.'.format(ith)
else:
ith = ""
for k, v in params.items():
if ith and k.startswith(str(ith)):
thisparams[k[len(ith):]] = v
elif k.startswith(plugin.name):
thisparams[k[len(plugin.name) + 1:]] = v
elif k not in thisparams:
thisparams[k] = v
return thisparams
def parse_call(params): def parse_call(params):
'''Return a results object based on the parameters used in a call/request. '''Return a results object based on the parameters used in a call/request.
''' '''

View File

@ -188,15 +188,20 @@ def basic_api(f):
@api_blueprint.route('/<path:plugin>', methods=['POST', 'GET']) @api_blueprint.route('/<path:plugin>', methods=['POST', 'GET'])
@basic_api @basic_api
def api_root(plugin): def api_root(plugin):
if plugin:
if 'algorithm' in request.parameters:
raise Error('You cannot specify the algorithm with a parameter and a URL variable.'
' Please, remove one of them')
plugin = plugin.replace('+', '/')
request.parameters['algorithm'] = tuple(plugin.split('/'))
if request.parameters['help']: if request.parameters['help']:
dic = dict(api.API_PARAMS, **api.NIF_PARAMS) sp = current_app.senpy
response = Help(valid_parameters=dic) plugins = sp._get_plugins(request)
allparameters = api.get_all_params(plugins, api.WEB_PARAMS, api.API_PARAMS, api.NIF_PARAMS)
response = Help(valid_parameters=allparameters)
return response return response
req = api.parse_call(request.parameters) req = api.parse_call(request.parameters)
if plugin:
plugin = plugin.replace('+', '/')
plugin = plugin.split('/')
req.parameters['algorithm'] = tuple(plugin)
results = current_app.senpy.analyse(req) results = current_app.senpy.analyse(req)
results.analysis = set(i.id for i in results.analysis) results.analysis = set(i.id for i in results.analysis)
return results return results

View File

@ -144,7 +144,7 @@ class Senpy(object):
return plugins return plugins
def _process(self, req, pending, done=None): def _process(self, req, parameters, pending, done=None):
""" """
Recursively process the entries with the first plugin in the list, and pass the results Recursively process the entries with the first plugin in the list, and pass the results
to the rest of the plugins. to the rest of the plugins.
@ -154,10 +154,11 @@ class Senpy(object):
return req return req
plugin = pending[0] plugin = pending[0]
req.parameters = parameters[0]
results = plugin.process(req, conversions_applied=done) results = plugin.process(req, conversions_applied=done)
if plugin not in results.analysis: if plugin not in results.analysis:
results.analysis.append(plugin) results.analysis.append(plugin)
return self._process(results, pending[1:], done) return self._process(results, parameters[1:], pending[1:], done)
def install_deps(self): def install_deps(self):
plugins.install_deps(*self.plugins()) plugins.install_deps(*self.plugins())
@ -168,10 +169,11 @@ class Senpy(object):
It takes a processed request, provided by the user, as returned It takes a processed request, provided by the user, as returned
by api.parse_call(). by api.parse_call().
""" """
logger.debug("analysing request: {}".format(request)) logger.debug("analysing request: {}".format(request))
plugins = self._get_plugins(request) plugins = self._get_plugins(request)
request.parameters = api.parse_extra_params(request, plugins) parameters = api.parse_extra_params(request.parameters, plugins)
results = self._process(request, plugins) results = self._process(request, parameters, plugins)
logger.debug("Got analysis result: {}".format(results)) logger.debug("Got analysis result: {}".format(results))
results = self.postprocess(results) results = self.postprocess(results)
logger.debug("Returning post-processed result: {}".format(results)) logger.debug("Returning post-processed result: {}".format(results))

View File

@ -9,7 +9,7 @@ class Split(AnalysisPlugin):
'''description: A sample plugin that chunks input text''' '''description: A sample plugin that chunks input text'''
author = ["@militarpancho", '@balkian'] author = ["@militarpancho", '@balkian']
version = '0.2' version = '0.3'
url = "https://github.com/gsi-upm/senpy" url = "https://github.com/gsi-upm/senpy"
extra_params = { extra_params = {
@ -33,12 +33,15 @@ class Split(AnalysisPlugin):
if chunker_type == "paragraph": if chunker_type == "paragraph":
tokenizer = LineTokenizer() tokenizer = LineTokenizer()
chars = list(tokenizer.span_tokenize(original_text)) chars = list(tokenizer.span_tokenize(original_text))
for i, chunk in enumerate(tokenizer.tokenize(original_text)): if len(chars) == 1:
print(chunk) # This sentence was already split
return
for i, chunk in enumerate(chars):
start, end = chunk
e = Entry() e = Entry()
e['nif:isString'] = chunk e['nif:isString'] = original_text[start:end]
if entry.id: if entry.id:
e.id = entry.id + "#char={},{}".format(chars[i][0], chars[i][1]) e.id = entry.id + "#char={},{}".format(start, end)
yield e yield e
test_cases = [ test_cases = [

View File

@ -3,8 +3,9 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from unittest import TestCase from unittest import TestCase
from senpy.api import parse_params, API_PARAMS, NIF_PARAMS, WEB_PARAMS from senpy.api import (boolean, parse_params, get_extra_params, parse_extra_params,
from senpy.models import Error API_PARAMS, NIF_PARAMS, WEB_PARAMS)
from senpy.models import Error, Plugin
class APITest(TestCase): class APITest(TestCase):
@ -89,3 +90,157 @@ class APITest(TestCase):
assert "Dummy" in p['algorithm'] assert "Dummy" in p['algorithm']
assert 'input' in p assert 'input' in p
assert p['input'] == 'Aloha my friend' assert p['input'] == 'Aloha my friend'
def test_parse_extra_params(self):
'''The API should parse user parameters and return them in a format that plugins can use'''
plugins = [
Plugin({
'name': 'plugin1',
'extra_params': {
# Incompatible parameter
'param0': {
'aliases': ['p1', 'parameter1'],
'options': ['option1', 'option2'],
'default': 'option1',
'required': True
},
'param1': {
'aliases': ['p1', 'parameter1'],
'options': ['en', 'es'],
'default': 'en',
'required': False
},
'param2': {
'aliases': ['p2', 'parameter2'],
'required': False,
'options': ['value2_1', 'value2_2', 'value3_3']
}
}
}), Plugin({
'name': 'plugin2',
'extra_params': {
'param0': {
'aliases': ['parameter1'],
'options': ['new option', 'new option2'],
'default': 'new option',
'required': False
},
'param1': {
'aliases': ['myparam1', 'p1'],
'options': ['en', 'de', 'auto'],
'default': 'de',
'required': True
},
'param3': {
'aliases': ['p3', 'parameter3'],
'options': boolean,
'default': True
}
}
})
]
call = {
'param1': 'en',
'0.param0': 'option1',
'0.param1': 'en',
'param2': 'value2_1',
'param0': 'new option',
'1.param1': 'de',
'param3': False,
}
expected = [
{
'param0': 'option1',
'param1': 'en',
'param2': 'value2_1',
}, {
'param0': 'new option',
'param1': 'de',
'param3': False,
}
]
p = parse_extra_params(call, plugins)
for i, arg in enumerate(expected):
for k, v in arg.items():
assert p[i][k] == v
def test_get_extra_params(self):
'''The API should return the list of valid parameters for a set of plugins'''
plugins = [
Plugin({
'name': 'plugin1',
'extra_params': {
# Incompatible parameter
'param0': {
'aliases': ['p1', 'parameter1'],
'options': ['option1', 'option2'],
'default': 'option1',
'required': True
},
'param1': {
'aliases': ['p1', 'parameter1'],
'options': ['en', 'es'],
'default': 'en',
'required': False
},
'param2': {
'aliases': ['p2', 'parameter2'],
'required': False,
'options': ['value2_1', 'value2_2', 'value3_3']
}
}
}), Plugin({
'name': 'plugin2',
'extra_params': {
'param0': {
'aliases': ['parameter1'],
'options': ['new option', 'new option2'],
'default': 'new option',
'required': False
},
'param1': {
'aliases': ['myparam1', 'p1'],
'options': ['en', 'de', 'auto'],
'default': 'de',
'required': True
},
'param3': {
'aliases': ['p3', 'parameter3'],
'options': boolean,
'default': True
}
}
})
]
expected = {
# Each plugin's parameters
'0.param0': plugins[0]['extra_params']['param0'],
'0.param1': plugins[0]['extra_params']['param1'],
'0.param2': plugins[0]['extra_params']['param2'],
'1.param0': plugins[1]['extra_params']['param0'],
'1.param1': plugins[1]['extra_params']['param1'],
'1.param3': plugins[1]['extra_params']['param3'],
# Non-overlapping parameters
'param2': plugins[0]['extra_params']['param2'],
'param3': plugins[1]['extra_params']['param3'],
# Intersection of overlapping parameters
'param1': {
'aliases': ['p1'],
'options': ['en'],
'default': None,
'required': True
}
}
result = get_extra_params(plugins)
for ik, iv in expected.items():
assert ik in result
for jk, jv in iv.items():
assert jk in result[ik]
assert expected[ik][jk] == result[ik][jk]

View File

@ -107,6 +107,7 @@ class BlueprintsTest(TestCase):
assert isinstance(js, models.Error) assert isinstance(js, models.Error)
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=notvalid") resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=notvalid")
self.assertCode(resp, 400) self.assertCode(resp, 400)
self.app.config['TESTING'] = True
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=a") resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=a")
self.assertCode(resp, 200) self.assertCode(resp, 200)
@ -148,6 +149,53 @@ class BlueprintsTest(TestCase):
assert len(js['analysis']) == 1 assert len(js['analysis']) == 1
assert js['entries'][0]['nif:isString'] == 'My aloha mohame' assert js['entries'][0]['nif:isString'] == 'My aloha mohame'
def test_requirements_chain_help(self):
'''The extra parameters of each plugin should be merged if they are in a chain '''
resp = self.client.get("/api/split/DummyRequired?help=true")
self.assertCode(resp, 200)
js = parse_resp(resp)
assert 'valid_parameters' in js
vp = js['valid_parameters']
assert 'example' in vp
def test_requirements_chain_repeat_help(self):
'''
If a plugin appears several times in a chain, there should be a way to set different
parameters for each.
'''
resp = self.client.get("/api/split/split?help=true")
self.assertCode(resp, 200)
js = parse_resp(resp)
assert 'valid_parameters' in js
vp = js['valid_parameters']
assert '0.delimiter' in vp
assert '1.delimiter' in vp
assert 'delimiter' in vp
def test_requirements_chain(self):
"""
It should be possible to specify different parameters for each step in the chain.
"""
# First, we split by sentence twice. Each call should generate 3 additional entries
# (one per sentence in the original).
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
'\nA new paragraph&delimiter=sentence')
js = parse_resp(resp)
assert len(js['analysis']) == 2
assert len(js['entries']) == 7
# Now, we split by sentence. This produces 3 additional entries.
# Then, we split by paragraph. This should create 2 additional entries (One per paragraph
# in the original text)
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
'\nA new paragraph&0.delimiter=sentence&1.delimiter=paragraph')
# Calling dummy twice, should return the same string
self.assertCode(resp, 200)
js = parse_resp(resp)
assert len(js['analysis']) == 2
assert len(js['entries']) == 6
def test_error(self): def test_error(self):
""" """
The dummy plugin returns an empty response,\ The dummy plugin returns an empty response,\