mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-22 08:12:27 +00:00
426 lines
13 KiB
Python
426 lines
13 KiB
Python
#
|
|
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
from future.utils import iteritems
|
|
from .models import Error, Results, Entry, from_string
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
boolean = [True, False]
|
|
|
|
processors = {
|
|
'string_to_tuple': lambda p: p if isinstance(p, (tuple, list)) else tuple(p.split(','))
|
|
}
|
|
|
|
API_PARAMS = {
|
|
"algorithm": {
|
|
"aliases": ["algorithms", "a", "algo"],
|
|
"required": True,
|
|
"default": 'default',
|
|
"processor": 'string_to_tuple',
|
|
"description": ("Algorithms that will be used to process the request."
|
|
"It may be a list of comma-separated names."),
|
|
},
|
|
"expanded-jsonld": {
|
|
"@id": "expanded-jsonld",
|
|
"description": "use JSON-LD expansion to get full URIs",
|
|
"aliases": ["expanded", "expanded_jsonld"],
|
|
"options": boolean,
|
|
"required": True,
|
|
"default": False
|
|
},
|
|
"with-parameters": {
|
|
"aliases": ['withparameters',
|
|
'with_parameters'],
|
|
"description": "include initial parameters in the response",
|
|
"options": boolean,
|
|
"default": False,
|
|
"required": True
|
|
},
|
|
"outformat": {
|
|
"@id": "outformat",
|
|
"aliases": ["o"],
|
|
"default": "json-ld",
|
|
"description": """The data can be semantically formatted (JSON-LD, turtle or n-triples),
|
|
given as a list of comma-separated fields (see the fields option) or constructed from a Jinja2
|
|
template (see the template option).""",
|
|
"required": True,
|
|
"options": ["json-ld", "turtle", "ntriples"],
|
|
},
|
|
"template": {
|
|
"@id": "template",
|
|
"required": False,
|
|
"description": """Jinja2 template for the result. The input data for the template will
|
|
be the results as a dictionary.
|
|
For example:
|
|
|
|
Consider the results before templating:
|
|
|
|
```
|
|
[{
|
|
"@type": "entry",
|
|
"onyx:hasEmotionSet": [],
|
|
"nif:isString": "testing the template",
|
|
"marl:hasOpinion": [
|
|
{
|
|
"@type": "sentiment",
|
|
"marl:hasPolarity": "marl:Positive"
|
|
}
|
|
]
|
|
}]
|
|
```
|
|
|
|
|
|
And the template:
|
|
|
|
```
|
|
{% for entry in entries %}
|
|
{{ entry["nif:isString"] | upper }},{{entry.sentiments[0]["marl:hasPolarity"].split(":")[1]}}
|
|
{% endfor %}
|
|
```
|
|
|
|
The final result would be:
|
|
|
|
```
|
|
TESTING THE TEMPLATE,Positive
|
|
```
|
|
"""
|
|
|
|
},
|
|
"fields": {
|
|
"@id": "fields",
|
|
"required": False,
|
|
"description": """A jmespath selector, that can be used to extract a new dictionary, array or value
|
|
from the results.
|
|
jmespath is a powerful query language for json and/or dictionaries.
|
|
It allows you to change the structure (and data) of your objects through queries.
|
|
|
|
e.g., the following expression gets a list of `[emotion label, intensity]` for each entry:
|
|
`entries[]."onyx:hasEmotionSet"[]."onyx:hasEmotion"[]["onyx:hasEmotionCategory","onyx:hasEmotionIntensity"]`
|
|
|
|
For more information, see: https://jmespath.org
|
|
|
|
"""
|
|
},
|
|
"help": {
|
|
"@id": "help",
|
|
"description": "Show additional help to know more about the possible parameters",
|
|
"aliases": ["h"],
|
|
"required": True,
|
|
"options": boolean,
|
|
"default": False
|
|
},
|
|
"verbose": {
|
|
"@id": "verbose",
|
|
"description": "Show all properties in the result",
|
|
"aliases": ["v"],
|
|
"required": True,
|
|
"options": boolean,
|
|
"default": False
|
|
},
|
|
"aliases": {
|
|
"@id": "aliases",
|
|
"description": "Replace JSON properties with their aliases",
|
|
"aliases": [],
|
|
"required": True,
|
|
"options": boolean,
|
|
"default": False
|
|
},
|
|
"emotion-model": {
|
|
"@id": "emotionModel",
|
|
"description": """Emotion model to use in the response.
|
|
Senpy will try to convert the output to this model automatically.
|
|
|
|
Examples: `wna:liking` and `emoml:big6`.
|
|
""",
|
|
"aliases": ["emoModel", "emotionModel"],
|
|
"required": False
|
|
},
|
|
"conversion": {
|
|
"@id": "conversion",
|
|
"description": """How to show the elements that have (not) been converted.
|
|
|
|
* full: converted and original elements will appear side-by-side
|
|
* filtered: only converted elements will be shown
|
|
* nested: converted elements will be shown, and they will include a link to the original element
|
|
(using `prov:wasGeneratedBy`).
|
|
""",
|
|
"required": True,
|
|
"options": ["filtered", "nested", "full"],
|
|
"default": "full"
|
|
}
|
|
}
|
|
|
|
EVAL_PARAMS = {
|
|
"algorithm": {
|
|
"aliases": ["plug", "p", "plugins", "algorithms", 'algo', 'a', 'plugin'],
|
|
"description": "Plugins to evaluate",
|
|
"required": True,
|
|
"help": "See activated plugins in /plugins",
|
|
"processor": API_PARAMS['algorithm']['processor']
|
|
},
|
|
"dataset": {
|
|
"aliases": ["datasets", "data", "d"],
|
|
"description": "Datasets to be evaluated",
|
|
"required": True,
|
|
"help": "See avalaible datasets in /datasets"
|
|
}
|
|
}
|
|
|
|
PLUGINS_PARAMS = {
|
|
"plugin-type": {
|
|
"@id": "pluginType",
|
|
"description": 'What kind of plugins to list',
|
|
"aliases": ["pluginType", "plugin_type"],
|
|
"required": True,
|
|
"default": 'analysisPlugin'
|
|
}
|
|
}
|
|
|
|
WEB_PARAMS = {
|
|
"in-headers": {
|
|
"aliases": ["headers", "inheaders", "inHeaders", "in-headers", "in_headers"],
|
|
"description": "Only include the JSON-LD context in the headers",
|
|
"required": True,
|
|
"default": False,
|
|
"options": boolean
|
|
},
|
|
}
|
|
|
|
CLI_PARAMS = {
|
|
"plugin-folder": {
|
|
"aliases": ["folder", "plugin_folder"],
|
|
"required": True,
|
|
"default": "."
|
|
},
|
|
}
|
|
|
|
NIF_PARAMS = {
|
|
"input": {
|
|
"@id": "input",
|
|
"aliases": ["i"],
|
|
"required": True,
|
|
"help": "Input text"
|
|
},
|
|
"intype": {
|
|
"@id": "intype",
|
|
"description": "input type",
|
|
"aliases": ["t"],
|
|
"required": False,
|
|
"default": "direct",
|
|
"options": ["direct", "url", "file"],
|
|
},
|
|
"informat": {
|
|
"@id": "informat",
|
|
"description": "input format",
|
|
"aliases": ["f"],
|
|
"required": False,
|
|
"default": "text",
|
|
"options": ["text", "json-ld"],
|
|
},
|
|
"language": {
|
|
"@id": "language",
|
|
"description": "language of the input",
|
|
"aliases": ["l"],
|
|
"required": False,
|
|
},
|
|
"prefix": {
|
|
"@id": "prefix",
|
|
"description": "prefix to use for new entities",
|
|
"aliases": ["p"],
|
|
"required": True,
|
|
"default": "",
|
|
},
|
|
"urischeme": {
|
|
"@id": "urischeme",
|
|
"description": "scheme for NIF URIs",
|
|
"aliases": ["u"],
|
|
"required": False,
|
|
"default": "RFC5147String",
|
|
"options": ["RFC5147String", ]
|
|
}
|
|
}
|
|
|
|
BUILTIN_PARAMS = {}
|
|
|
|
for d in [
|
|
NIF_PARAMS, CLI_PARAMS, WEB_PARAMS, PLUGINS_PARAMS, EVAL_PARAMS,
|
|
API_PARAMS
|
|
]:
|
|
for k, v in d.items():
|
|
BUILTIN_PARAMS[k] = v
|
|
|
|
|
|
def parse_params(indict, *specs):
|
|
if not specs:
|
|
specs = [NIF_PARAMS]
|
|
logger.debug("Parsing: {}\n{}".format(indict, specs))
|
|
outdict = indict.copy()
|
|
wrong_params = {}
|
|
for spec in specs:
|
|
for param, options in iteritems(spec):
|
|
for alias in options.get("aliases", []):
|
|
# Replace each alias with the correct name of the parameter
|
|
if alias in indict and alias != param:
|
|
outdict[param] = indict[alias]
|
|
del outdict[alias]
|
|
break
|
|
if param not in outdict:
|
|
if "default" in options:
|
|
# We assume the default is correct
|
|
outdict[param] = options["default"]
|
|
elif options.get("required", False):
|
|
wrong_params[param] = spec[param]
|
|
continue
|
|
if 'processor' in options:
|
|
outdict[param] = processors[options['processor']](outdict[param])
|
|
if "options" in options:
|
|
if options["options"] == boolean:
|
|
outdict[param] = str(outdict[param]).lower() in ['true', '1', '']
|
|
elif outdict[param] not in options["options"]:
|
|
wrong_params[param] = spec[param]
|
|
if wrong_params:
|
|
logger.debug("Error parsing: %s", wrong_params)
|
|
message = Error(
|
|
status=400,
|
|
message='Missing or invalid parameters',
|
|
parameters=outdict,
|
|
errors=wrong_params)
|
|
raise message
|
|
return outdict
|
|
|
|
|
|
def get_all_params(plugins, *specs):
|
|
'''Return a list of parameters for a given set of specifications and plugins.'''
|
|
dic = {}
|
|
for s in specs:
|
|
dic.update(s)
|
|
dic.update(get_extra_params(plugins))
|
|
return dic
|
|
|
|
|
|
def get_extra_params(plugins):
|
|
'''Get a list of possible parameters given a list of plugins'''
|
|
params = {}
|
|
extra_params = {}
|
|
for plugin in plugins:
|
|
this_params = plugin.get('extra_params', {})
|
|
for k, v in this_params.items():
|
|
if k not in extra_params:
|
|
extra_params[k] = {}
|
|
extra_params[k][plugin.name] = v
|
|
for k, v in extra_params.items(): # Resolve conflicts
|
|
if len(v) == 1: # Add the extra options that do not collide
|
|
params[k] = list(v.values())[0]
|
|
else:
|
|
required = False
|
|
aliases = None
|
|
options = None
|
|
default = None
|
|
nodefault = False # Set when defaults are not compatible
|
|
|
|
for plugin, opt in v.items():
|
|
params['{}.{}'.format(plugin, k)] = opt
|
|
required = required or opt.get('required', False)
|
|
newaliases = set(opt.get('aliases', []))
|
|
if aliases is None:
|
|
aliases = newaliases
|
|
else:
|
|
aliases = aliases & newaliases
|
|
if 'options' in opt:
|
|
newoptions = set(opt['options'])
|
|
options = newoptions if options is None else options & newoptions
|
|
if 'default' in opt:
|
|
newdefault = opt['default']
|
|
if newdefault:
|
|
if default is None and not nodefault:
|
|
default = newdefault
|
|
elif newdefault != default:
|
|
nodefault = True
|
|
default = None
|
|
# Check for incompatibilities
|
|
if options != set():
|
|
params[k] = {
|
|
'default': default,
|
|
'aliases': list(aliases),
|
|
'required': required,
|
|
'options': list(options)
|
|
}
|
|
return params
|
|
|
|
|
|
def parse_analyses(params, plugins):
|
|
'''
|
|
Parse the given parameters individually for each plugin, and get a list of the parameters that
|
|
belong to each of the plugins. Each item can then be used in the plugin.analyse_entries method.
|
|
'''
|
|
analysis_list = []
|
|
for i, plugin in enumerate(plugins):
|
|
if not plugin:
|
|
continue
|
|
this_params = filter_params(params, plugin, i)
|
|
parsed = parse_params(this_params, plugin.get('extra_params', {}))
|
|
analysis = plugin.activity(parsed)
|
|
analysis_list.append(analysis)
|
|
return analysis_list
|
|
|
|
|
|
def filter_params(params, plugin, ith=-1):
|
|
'''
|
|
Get the values within params that apply to a plugin.
|
|
More specific names override more general names, in this order:
|
|
|
|
<index_order>.parameter > <plugin.name>.parameter > parameter
|
|
|
|
|
|
Example:
|
|
|
|
>>> filter_params({'0.hello': True, 'hello': False}, Plugin(), 0)
|
|
{ '0.hello': True, 'hello': True}
|
|
|
|
'''
|
|
thisparams = {}
|
|
if ith >= 0:
|
|
ith = '{}.'.format(ith)
|
|
else:
|
|
ith = ""
|
|
for k, v in params.items():
|
|
if ith and k.startswith(str(ith)):
|
|
thisparams[k[len(ith):]] = v
|
|
elif k.startswith(plugin.name):
|
|
thisparams[k[len(plugin.name) + 1:]] = v
|
|
elif k not in thisparams:
|
|
thisparams[k] = v
|
|
return thisparams
|
|
|
|
|
|
def parse_call(params):
|
|
'''
|
|
Return a results object based on the parameters used in a call/request.
|
|
'''
|
|
params = parse_params(params, NIF_PARAMS)
|
|
if params['informat'] == 'text':
|
|
results = Results()
|
|
entry = Entry(nif__isString=params['input'], id='prefix:') # Use @base
|
|
results.entries.append(entry)
|
|
elif params['informat'] == 'json-ld':
|
|
results = from_string(params['input'], cls=Results)
|
|
else: # pragma: no cover
|
|
raise NotImplementedError('Informat {} is not implemented'.format(
|
|
params['informat']))
|
|
results.parameters = params
|
|
return results
|