1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-21 07:42:28 +00:00

New schema for parameters

* Improve extra requirement handling
* New mechanism to handle parameters beforehand in chained
  calls, and the ability to get help on available parameters in chained
  calls (through `?help`).
* Redefined Analysis, to reflect the new ontology
* Add parameters as an entity in the schema
* Update examples to include analyses and parameters
* Add processing plugins, with an interface similar to analysis plugins
* Update tests
* Avoid duplication in split plugin

Closes #51

Squashed commit of the following:

commit d145a852e7
commit 6a1069780b
commit ca69bddc17
commit aa35e62a27
This commit is contained in:
J. Fernando Sánchez 2018-12-07 18:30:05 +01:00
parent 41aa142ce0
commit 4ba30304a4
30 changed files with 717 additions and 353 deletions

View File

@ -6,13 +6,9 @@
],
"entries": [
{
"@type": [
"nif:RFC5147String",
"nif:Context"
],
"nif:beginIndex": 0,
"nif:endIndex": 40,
"nif:isString": "My favourite actress is Natalie Portman"
"text": "An entry should have a nif:isString key"
}
]
}

View File

@ -3,10 +3,21 @@
"@id": "me:Result1",
"@type": "results",
"analysis": [
"me:SAnalysis1",
"me:SgAnalysis1",
"me:EmotionAnalysis1",
"me:NER1"
{
"@id": "_:SAnalysis1_Activity",
"@type": "marl:SentimentAnalysis",
"prov:wasAssociatedWith": "me:SAnalysis1"
},
{
"@id": "_:EmotionAnalysis1_Activity",
"@type": "onyx:EmotionAnalysis",
"prov:wasAssociatedWith": "me:EmotionAnalysis1"
},
{
"@id": "_:NER1_Activity",
"@type": "me:NER",
"prov:wasAssociatedWith": "me:NER1"
}
],
"entries": [
{
@ -23,7 +34,7 @@
"nif:endIndex": 13,
"nif:anchorOf": "Microsoft",
"me:references": "http://dbpedia.org/page/Microsoft",
"prov:wasGeneratedBy": "me:NER1"
"prov:wasGeneratedBy": "_:NER1_Activity"
},
{
"@id": "http://micro.blog/status1#char=25,37",
@ -31,7 +42,7 @@
"nif:endIndex": 37,
"nif:anchorOf": "Windows Phone",
"me:references": "http://dbpedia.org/page/Windows_Phone",
"prov:wasGeneratedBy": "me:NER1"
"prov:wasGeneratedBy": "_:NER1_Activity"
}
],
"suggestions": [
@ -40,7 +51,7 @@
"nif:beginIndex": 16,
"nif:endIndex": 77,
"nif:anchorOf": "put your Windows Phone on your newest #open technology program",
"prov:wasGeneratedBy": "me:SgAnalysis1"
"prov:wasGeneratedBy": "_:SgAnalysis1_Activity"
}
],
"sentiments": [
@ -51,14 +62,14 @@
"nif:anchorOf": "You'll be awesome.",
"marl:hasPolarity": "marl:Positive",
"marl:polarityValue": 0.9,
"prov:wasGeneratedBy": "me:SAnalysis1"
"prov:wasGeneratedBy": "_:SgAnalysis1_Activity"
}
],
"emotions": [
{
"@id": "http://micro.blog/status1#char=0,109",
"nif:anchorOf": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource",
"prov:wasGeneratedBy": "me:EAnalysis1",
"prov:wasGeneratedBy": "_:EmotionAnalysis1_Activity",
"onyx:hasEmotion": [
{
"onyx:hasEmotionCategory": "wna:liking"

View File

@ -1,78 +0,0 @@
{
"@context": "http://mixedemotions-project.eu/ns/context.jsonld",
"@id": "me:Result1",
"@type": "results",
"analysis": [
"me:SAnalysis1",
"me:SgAnalysis1",
"me:EmotionAnalysis1",
"me:NER1",
{
"@type": "analysis",
"@id": "anonymous"
}
],
"entries": [
{
"@id": "http://micro.blog/status1",
"@type": [
"nif:RFC5147String",
"nif:Context"
],
"nif:isString": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource",
"entities": [
{
"@id": "http://micro.blog/status1#char=5,13",
"nif:beginIndex": 5,
"nif:endIndex": 13,
"nif:anchorOf": "Microsoft",
"me:references": "http://dbpedia.org/page/Microsoft",
"prov:wasGeneratedBy": "me:NER1"
},
{
"@id": "http://micro.blog/status1#char=25,37",
"nif:beginIndex": 25,
"nif:endIndex": 37,
"nif:anchorOf": "Windows Phone",
"me:references": "http://dbpedia.org/page/Windows_Phone",
"prov:wasGeneratedBy": "me:NER1"
}
],
"suggestions": [
{
"@id": "http://micro.blog/status1#char=16,77",
"nif:beginIndex": 16,
"nif:endIndex": 77,
"nif:anchorOf": "put your Windows Phone on your newest #open technology program",
"prov:wasGeneratedBy": "me:SgAnalysis1"
}
],
"sentiments": [
{
"@id": "http://micro.blog/status1#char=80,97",
"nif:beginIndex": 80,
"nif:endIndex": 97,
"nif:anchorOf": "You'll be awesome.",
"marl:hasPolarity": "marl:Positive",
"marl:polarityValue": 0.9,
"prov:wasGeneratedBy": "me:SAnalysis1"
}
],
"emotions": [
{
"@id": "http://micro.blog/status1#char=0,109",
"nif:anchorOf": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource",
"prov:wasGeneratedBy": "me:EAnalysis1",
"onyx:hasEmotion": [
{
"onyx:hasEmotionCategory": "wna:liking"
},
{
"onyx:hasEmotionCategory": "wna:excitement"
}
]
}
]
}
]
}

View File

@ -1,19 +1,18 @@
{
"@context": "http://mixedemotions-project.eu/ns/context.jsonld",
"@id": "http://example.com#NIFExample",
"@type": "results",
"analysis": [
],
"entries": [
{
"@id": "http://example.org#char=0,40",
"@type": [
"nif:RFC5147String",
"nif:Context"
],
"nif:beginIndex": 0,
"nif:endIndex": 40,
"nif:isString": "My favourite actress is Natalie Portman"
}
]
"@context": "http://mixedemotions-project.eu/ns/context.jsonld",
"@id": "me:Result1",
"@type": "results",
"analysis": [ ],
"entries": [
{
"@id": "http://example.org#char=0,40",
"@type": [
"nif:RFC5147String",
"nif:Context"
],
"nif:beginIndex": 0,
"nif:endIndex": 40,
"nif:isString": "My favourite actress is Natalie Portman"
}
]
}

View File

@ -1,88 +1,100 @@
{
"@context": "http://mixedemotions-project.eu/ns/context.jsonld",
"@id": "me:Result1",
"@type": "results",
"analysis": [
{
"@id": "me:SAnalysis1",
"@type": "marl:SentimentAnalysis",
"marl:maxPolarityValue": 1,
"marl:minPolarityValue": 0
},
{
"@id": "me:SgAnalysis1",
"@type": "me:SuggestionAnalysis"
},
{
"@id": "me:EmotionAnalysis1",
"@type": "me:EmotionAnalysis"
},
{
"@id": "me:NER1",
"@type": "me:NER"
}
],
"entries": [
{
"@id": "http://micro.blog/status1",
"@type": [
"nif:RFC5147String",
"nif:Context"
],
"nif:isString": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource",
"entities": [
"@context": "http://mixedemotions-project.eu/ns/context.jsonld",
"@id": "me:Result1",
"@type": "results",
"analysis": [
{
"@id": "http://micro.blog/status1#char=5,13",
"nif:beginIndex": 5,
"nif:endIndex": 13,
"nif:anchorOf": "Microsoft",
"me:references": "http://dbpedia.org/page/Microsoft",
"prov:wasGeneratedBy": "me:NER1"
"@id": "_:SAnalysis1_Activity",
"@type": "marl:SentimentAnalysis",
"prov:wasAssociatedWith": "me:SentimentAnalysis",
"prov:used": [
{
"name": "marl:maxPolarityValue",
"prov:value": "1"
},
{
"name": "marl:minPolarityValue",
"prov:value": "0"
}
]
},
{
"@id": "http://micro.blog/status1#char=25,37",
"nif:beginIndex": 25,
"nif:endIndex": 37,
"nif:anchorOf": "Windows Phone",
"me:references": "http://dbpedia.org/page/Windows_Phone",
"prov:wasGeneratedBy": "me:NER1"
}
],
"suggestions": [
"@id": "_:SgAnalysis1_Activity",
"prov:wasAssociatedWith": "me:SgAnalysis1",
"@type": "me:SuggestionAnalysis"
},
{
"@id": "http://micro.blog/status1#char=16,77",
"nif:beginIndex": 16,
"nif:endIndex": 77,
"nif:anchorOf": "put your Windows Phone on your newest #open technology program",
"prov:wasGeneratedBy": "me:SgAnalysis1"
}
],
"sentiments": [
"@id": "_:EmotionAnalysis1_Activity",
"@type": "me:EmotionAnalysis",
"prov:wasAssociatedWith": "me:EmotionAnalysis1"
},
{
"@id": "http://micro.blog/status1#char=80,97",
"nif:beginIndex": 80,
"nif:endIndex": 97,
"nif:anchorOf": "You'll be awesome.",
"marl:hasPolarity": "marl:Positive",
"marl:polarityValue": 0.9,
"prov:wasGeneratedBy": "me:SAnalysis1"
"@id": "_:NER1_Activity",
"@type": "me:NER",
"prov:wasAssociatedWith": "me:EmotionNER1"
}
],
"emotions": [
],
"entries": [
{
"@id": "http://micro.blog/status1#char=0,109",
"nif:anchorOf": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource",
"prov:wasGeneratedBy": "me:EAnalysis1",
"onyx:hasEmotion": [
{
"onyx:hasEmotionCategory": "wna:liking"
},
{
"onyx:hasEmotionCategory": "wna:excitement"
}
]
"@id": "http://micro.blog/status1",
"@type": [
"nif:RFC5147String",
"nif:Context"
],
"nif:isString": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource",
"entities": [
{
"@id": "http://micro.blog/status1#char=5,13",
"nif:beginIndex": 5,
"nif:endIndex": 13,
"nif:anchorOf": "Microsoft",
"me:references": "http://dbpedia.org/page/Microsoft",
"prov:wasGeneratedBy": "me:NER1"
},
{
"@id": "http://micro.blog/status1#char=25,37",
"nif:beginIndex": 25,
"nif:endIndex": 37,
"nif:anchorOf": "Windows Phone",
"me:references": "http://dbpedia.org/page/Windows_Phone",
"prov:wasGeneratedBy": "me:NER1"
}
],
"suggestions": [
{
"@id": "http://micro.blog/status1#char=16,77",
"nif:beginIndex": 16,
"nif:endIndex": 77,
"nif:anchorOf": "put your Windows Phone on your newest #open technology program",
"prov:wasGeneratedBy": "me:SgAnalysis1"
}
],
"sentiments": [
{
"@id": "http://micro.blog/status1#char=80,97",
"nif:beginIndex": 80,
"nif:endIndex": 97,
"nif:anchorOf": "You'll be awesome.",
"marl:hasPolarity": "marl:Positive",
"marl:polarityValue": 0.9,
"prov:wasGeneratedBy": "me:SAnalysis1"
}
],
"emotions": [
{
"@id": "http://micro.blog/status1#char=0,109",
"nif:anchorOf": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource",
"prov:wasGeneratedBy": "me:EAnalysis1",
"onyx:hasEmotion": [
{
"onyx:hasEmotionCategory": "wna:liking"
},
{
"onyx:hasEmotionCategory": "wna:excitement"
}
]
}
]
}
]
}
]
]
}

View File

@ -4,8 +4,9 @@
"@type": "results",
"analysis": [
{
"@id": "me:EmotionAnalysis1",
"@type": "onyx:EmotionAnalysis"
"@id": "me:EmotionAnalysis1_Activity",
"@type": "me:EmotionAnalysis1",
"prov:wasAssociatedWith": "me:EmotionAnalysis1"
}
],
"entries": [
@ -26,7 +27,7 @@
{
"@id": "http://micro.blog/status1#char=0,109",
"nif:anchorOf": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource",
"prov:wasGeneratedBy": "me:EmotionAnalysis1",
"prov:wasGeneratedBy": "_:EmotionAnalysis1_Activity",
"onyx:hasEmotion": [
{
"onyx:hasEmotionCategory": "wna:liking"

View File

@ -4,8 +4,9 @@
"@type": "results",
"analysis": [
{
"@id": "me:NER1",
"@type": "me:NERAnalysis"
"@id": "_:NER1_Activity",
"@type": "me:NERAnalysis",
"prov:wasAssociatedWith": "me:NER1"
}
],
"entries": [

View File

@ -9,9 +9,15 @@
"@type": "results",
"analysis": [
{
"@id": "me:HesamsAnalysis",
"@id": "me:HesamsAnalysis_Activity",
"@type": "onyx:EmotionAnalysis",
"onyx:usesEmotionModel": "emovoc:pad-dimensions"
"prov:wasAssociatedWith": "me:HesamsAnalysis",
"prov:used": [
{
"name": "emotion-model",
"prov:value": "emovoc:pad-dimensions"
}
]
}
],
"entries": [
@ -32,7 +38,7 @@
{
"@id": "Entry1#char=0,21",
"nif:anchorOf": "This is a test string",
"prov:wasGeneratedBy": "me:HesamAnalysis",
"prov:wasGeneratedBy": "_:HesamAnalysis_Activity",
"onyx:hasEmotion": [
{
"emovoc:pleasure": 0.5,

View File

@ -4,10 +4,9 @@
"@type": "results",
"analysis": [
{
"@id": "me:SAnalysis1",
"@id": "_:SAnalysis1_Activity",
"@type": "marl:SentimentAnalysis",
"marl:maxPolarityValue": 1,
"marl:minPolarityValue": 0
"prov:wasAssociatedWith": "me:SAnalysis1"
}
],
"entries": [
@ -30,7 +29,7 @@
"nif:anchorOf": "You'll be awesome.",
"marl:hasPolarity": "marl:Positive",
"marl:polarityValue": 0.9,
"prov:wasGeneratedBy": "me:SAnalysis1"
"prov:wasGeneratedBy": "_:SAnalysis1_Activity"
}
],
"emotionSets": [

View File

@ -3,7 +3,11 @@
"@id": "me:Result1",
"@type": "results",
"analysis": [
"me:SgAnalysis1"
{
"@id": "_:SgAnalysis1_Activity",
"@type": "me:SuggestionAnalysis",
"prov:wasAssociatedWith": "me:SgAnalysis1"
}
],
"entries": [
{
@ -12,7 +16,6 @@
"nif:RFC5147String",
"nif:Context"
],
"prov:wasGeneratedBy": "me:SAnalysis1",
"nif:isString": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource",
"entities": [
],
@ -22,7 +25,7 @@
"nif:beginIndex": 16,
"nif:endIndex": 77,
"nif:anchorOf": "put your Windows Phone on your newest #open technology program",
"prov:wasGeneratedBy": "me:SgAnalysis1"
"prov:wasGeneratedBy": "_:SgAnalysis1_Activity"
}
],
"sentiments": [

View File

@ -8,7 +8,8 @@ boolean = [True, False]
API_PARAMS = {
"algorithm": {
"aliases": ["algorithms", "a", "algo"],
"required": False,
"required": True,
"default": 'default',
"description": ("Algorithms that will be used to process the request."
"It may be a list of comma-separated names."),
},
@ -41,6 +42,15 @@ API_PARAMS = {
"options": boolean,
"default": False
},
"verbose": {
"@id": "verbose",
"description": ("Show all help, including the common API parameters, or "
"only plugin-related info"),
"aliases": ["v"],
"required": True,
"options": boolean,
"default": True
},
"emotionModel": {
"@id": "emotionModel",
"aliases": ["emoModel"],
@ -168,8 +178,7 @@ def parse_params(indict, *specs):
outdict[param] = options["default"]
elif options.get("required", False):
wrong_params[param] = spec[param]
continue
if "options" in options:
elif "options" in options:
if options["options"] == boolean:
outdict[param] = str(outdict[param]).lower() in ['true', '1']
elif outdict[param] not in options["options"]:
@ -182,29 +191,116 @@ def parse_params(indict, *specs):
parameters=outdict,
errors=wrong_params)
raise message
if 'algorithm' in outdict and not isinstance(outdict['algorithm'], tuple):
outdict['algorithm'] = tuple(outdict['algorithm'].split(','))
return outdict
def parse_extra_params(request, plugins=None):
plugins = plugins or []
params = request.parameters.copy()
def get_all_params(plugins, *specs):
'''Return a list of parameters for a given set of specifications and plugins.'''
dic = {}
for s in specs:
dic.update(s)
dic.update(get_extra_params(plugins))
return dic
def get_extra_params(plugins):
'''Get a list of possible parameters given a list of plugins'''
params = {}
extra_params = {}
for plugin in plugins:
if plugin:
extra_params = parse_params(params, plugin.get('extra_params', {}))
for k, v in extra_params.items():
if k not in BUILTIN_PARAMS:
if k in params: # Set by another plugin
del params[k]
else:
params[k] = v
params['{}.{}'.format(plugin.name, k)] = v
this_params = plugin.get('extra_params', {})
for k, v in this_params.items():
if k not in extra_params:
extra_params[k] = {}
extra_params[k][plugin.name] = v
for k, v in extra_params.items(): # Resolve conflicts
if len(v) == 1: # Add the extra options that do not collide
params[k] = list(v.values())[0]
else:
required = False
aliases = None
options = None
default = None
nodefault = False # Set when defaults are not compatible
for plugin, opt in v.items():
params['{}.{}'.format(plugin, k)] = opt
required = required or opt.get('required', False)
newaliases = set(opt.get('aliases', []))
if aliases is None:
aliases = newaliases
else:
aliases = aliases & newaliases
if 'options' in opt:
newoptions = set(opt['options'])
options = newoptions if options is None else options & newoptions
if 'default' in opt:
newdefault = opt['default']
if newdefault:
if default is None and not nodefault:
default = newdefault
elif newdefault != default:
nodefault = True
default = None
# Check for incompatibilities
if options != set():
params[k] = {
'default': default,
'aliases': list(aliases),
'required': required,
'options': list(options)
}
return params
def parse_analysis(params, plugins):
'''
Parse the given parameters individually for each plugin, and get a list of the parameters that
belong to each of the plugins. Each item can then be used in the plugin.analyse_entries method.
'''
analysis_list = []
for i, plugin in enumerate(plugins):
if not plugin:
continue
this_params = filter_params(params, plugin, i)
parsed = parse_params(this_params, plugin.get('extra_params', {}))
analysis = plugin.activity(parsed)
analysis_list.append(analysis)
return analysis_list
def filter_params(params, plugin, ith=-1):
'''
Get the values within params that apply to a plugin.
More specific names override more general names, in this order:
<index_order>.parameter > <plugin.name>.parameter > parameter
Example:
>>> filter_params({'0.hello': True, 'hello': False}, Plugin(), 0)
{ '0.hello': True, 'hello': True}
'''
thisparams = {}
if ith >= 0:
ith = '{}.'.format(ith)
else:
ith = ""
for k, v in params.items():
if ith and k.startswith(str(ith)):
thisparams[k[len(ith):]] = v
elif k.startswith(plugin.name):
thisparams[k[len(plugin.name) + 1:]] = v
elif k not in thisparams:
thisparams[k] = v
return thisparams
def parse_call(params):
'''Return a results object based on the parameters used in a call/request.
'''
Return a results object based on the parameters used in a call/request.
'''
params = parse_params(params, NIF_PARAMS)
if params['informat'] == 'text':

View File

@ -188,17 +188,27 @@ def basic_api(f):
@api_blueprint.route('/<path:plugin>', methods=['POST', 'GET'])
@basic_api
def api_root(plugin):
if plugin:
if request.parameters['algorithm'] != api.API_PARAMS['algorithm']['default']:
raise Error('You cannot specify the algorithm with a parameter and a URL variable.'
' Please, remove one of them')
request.parameters['algorithm'] = tuple(plugin.replace('+', '/').split('/'))
plugin = request.parameters['algorithm']
sp = current_app.senpy
plugins = sp.get_plugins(plugin)
if request.parameters['help']:
dic = dict(api.API_PARAMS, **api.NIF_PARAMS)
response = Help(valid_parameters=dic)
apis = []
if request.parameters['verbose']:
apis.append(api.BUILTIN_PARAMS)
allparameters = api.get_all_params(plugins, *apis)
response = Help(valid_parameters=allparameters)
return response
req = api.parse_call(request.parameters)
if plugin:
plugin = plugin.replace('+', '/')
plugin = plugin.split('/')
req.parameters['algorithm'] = tuple(plugin)
results = current_app.senpy.analyse(req)
results.analysis = set(i.id for i in results.analysis)
analysis = api.parse_analysis(req.parameters, plugins)
results = current_app.senpy.analyse(req, analysis)
return results

View File

@ -31,10 +31,10 @@ def main_function(argv):
default_plugins = params.get('default-plugins', False)
sp = Senpy(default_plugins=default_plugins, plugin_folder=plugin_folder)
request = api.parse_call(params)
algos = request.parameters.get('algorithm', None)
algos = sp.get_plugins(request.parameters.get('algorithm', None))
if algos:
for algo in algos:
sp.activate_plugin(algo)
sp.activate_plugin(algo.name)
else:
sp.activate_all()
res = sp.analyse(request)

View File

@ -78,27 +78,47 @@ class Senpy(object):
def delete_plugin(self, plugin):
del self._plugins[plugin.name.lower()]
def plugins(self, **kwargs):
def plugins(self, plugin_type=None, is_activated=True, **kwargs):
""" Return the plugins registered for a given application. Filtered by criteria """
return list(plugins.pfilter(self._plugins, **kwargs))
return list(plugins.pfilter(self._plugins, plugin_type=plugin_type,
is_activated=is_activated, **kwargs))
def get_plugin(self, name, default=None):
if name == 'default':
return self.default_plugin
plugin = name.lower()
if plugin in self._plugins:
return self._plugins[plugin]
elif name == 'conversion':
return None
results = self.plugins(id='endpoint:plugins/{}'.format(name))
if name.lower() in self._plugins:
return self._plugins[name.lower()]
if not results:
return Error(message="Plugin not found", status=404)
return results[0]
results = self.plugins(id='endpoint:plugins/{}'.format(name.lower()),
plugin_type=None)
if results:
return results[0]
results = self.plugins(id=name,
plugin_type=None)
if results:
return results[0]
msg = ("Plugin not found: '{}'\n"
"Make sure it is ACTIVATED\n"
"Valid algorithms: {}").format(name,
self._plugins.keys())
raise Error(message=msg, status=404)
def get_plugins(self, name):
try:
name = name.split(',')
except AttributeError:
pass # Assume it is a tuple or a list
return tuple(self.get_plugin(n) for n in name)
@property
def analysis_plugins(self):
""" Return only the analysis plugins """
return self.plugins(plugin_type='analysisPlugin')
""" Return only the analysis plugins that are active"""
return self.plugins(plugin_type='analysisPlugin', is_activated=True)
def add_folder(self, folder, from_root=False):
""" Find plugins in this folder and add them to this instance """
@ -113,37 +133,6 @@ class Senpy(object):
else:
raise AttributeError("Not a folder or does not exist: %s", folder)
def _get_plugins(self, request):
'''Get a list of plugins that should be run for a specific request'''
if not self.analysis_plugins:
raise Error(
status=404,
message=("No plugins found."
" Please install one."))
algos = request.parameters.get('algorithm', None)
if not algos:
if self.default_plugin:
algos = [self.default_plugin.name, ]
else:
raise Error(
status=404,
message="No default plugin found, and None provided")
plugins = list()
for algo in algos:
algo = algo.lower()
if algo == 'conversion':
continue # Allow 'conversion' as a virtual plugin, which does nothing
if algo not in self._plugins:
msg = ("The algorithm '{}' is not valid\n"
"Valid algorithms: {}").format(algo,
self._plugins.keys())
logger.debug(msg)
raise Error(status=404, message=msg)
plugins.append(self._plugins[algo])
return plugins
def _process(self, req, pending, done=None):
"""
Recursively process the entries with the first plugin in the list, and pass the results
@ -153,25 +142,31 @@ class Senpy(object):
if not pending:
return req
plugin = pending[0]
results = plugin.process(req, conversions_applied=done)
if plugin not in results.analysis:
results.analysis.append(plugin)
analysis = pending[0]
results = analysis.run(req)
results.analysis.append(analysis)
done += analysis
return self._process(results, pending[1:], done)
def install_deps(self):
plugins.install_deps(*self.plugins())
def analyse(self, request):
def analyse(self, request, analysis=None):
"""
Main method that analyses a request, either from CLI or HTTP.
It takes a processed request, provided by the user, as returned
by api.parse_call().
"""
if not self.plugins():
raise Error(
status=404,
message=("No plugins found."
" Please install one."))
if analysis is None:
plugins = self.get_plugins(request.parameters['algorithm'])
analysis = api.parse_analysis(request.parameters, plugins)
logger.debug("analysing request: {}".format(request))
plugins = self._get_plugins(request)
request.parameters = api.parse_extra_params(request, plugins)
results = self._process(request, plugins)
results = self._process(request, analysis)
logger.debug("Got analysis result: {}".format(results))
results = self.postprocess(results)
logger.debug("Returning post-processed result: {}".format(results))
@ -187,7 +182,10 @@ class Senpy(object):
"""
plugins = resp.analysis
params = resp.parameters
if 'parameters' not in resp:
return resp
params = resp['parameters']
toModel = params.get('emotionModel', None)
if not toModel:
return resp
@ -288,7 +286,10 @@ class Senpy(object):
results = AggregatedEvaluation()
results.parameters = params
datasets = self._get_datasets(results)
plugins = self._get_plugins(results)
plugins = []
for plugname in params.algorithm:
plugins = self.get_plugin(plugname)
for eval in plugins.evaluate(plugins, datasets):
results.evaluations.append(eval)
if 'with_parameters' not in results.parameters:

View File

@ -85,7 +85,8 @@ class BaseMeta(ABCMeta):
schema = json.load(f)
resolver = jsonschema.RefResolver(schema_path, schema)
attrs['@type'] = "".join((name[0].lower(), name[1:]))
if '@type' not in attrs:
attrs['@type'] = "".join((name[0].lower(), name[1:]))
attrs['_schema_file'] = schema_file
attrs['schema'] = schema
attrs['_validator'] = jsonschema.Draft4Validator(schema, resolver=resolver)
@ -244,10 +245,10 @@ class CustomDict(MutableMapping, object):
return key[0] == '_'
def __str__(self):
return str(self.serializable())
return json.dumps(self.serializable(), sort_keys=True, indent=4)
def __repr__(self):
return str(self.serializable())
return json.dumps(self.serializable(), sort_keys=True, indent=4)
_Alias = namedtuple('Alias', 'indict')

View File

@ -121,11 +121,11 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
'''
schema_file = DEFINITIONS_FILE
# schema_file = DEFINITIONS_FILE
_context = base_context["@context"]
def __init__(self, *args, **kwargs):
auto_id = kwargs.pop('_auto_id', True)
auto_id = kwargs.pop('_auto_id', False)
super(BaseModel, self).__init__(*args, **kwargs)
@ -133,7 +133,7 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
self.id
if '@type' not in self:
logger.warn('Created an instance of an unknown model')
logger.warning('Created an instance of an unknown model')
@property
def id(self):
@ -325,7 +325,6 @@ def _add_class_from_schema(*args, **kwargs):
for i in [
'aggregatedEvaluation',
'analysis',
'dataset',
'datasets',
'emotion',
@ -339,7 +338,7 @@ for i in [
'entity',
'help',
'metric',
'plugin',
'parameter',
'plugins',
'response',
'results',
@ -349,3 +348,54 @@ for i in [
]:
_add_class_from_schema(i)
class Analysis(BaseModel):
schema = 'analysis'
parameters = alias('prov:used')
@property
def params(self):
outdict = {}
outdict['algorithm'] = self.algorithm
for param in self.parameters:
outdict[param['name']] = param['value']
return outdict
@params.setter
def params(self, value):
for k, v in value.items():
for param in self.parameters:
if param.name == k:
param.value = v
break
else:
self.parameters.append(Parameter(name=k, value=v)) # noqa: F821
@property
def algorithm(self):
return self['prov:wasAssociatedWith']
@property
def plugin(self):
return self._plugin
@plugin.setter
def plugin(self, value):
self._plugin = value
self['prov:wasAssociatedWith'] = value.id
def run(self, request):
return self.plugin.process(request, self.params)
class Plugin(BaseModel):
schema = 'plugin'
def activity(self, parameters):
'''Generate a prov:Activity from this plugin and the '''
a = Analysis()
a.plugin = self
a.params = parameters
return a

View File

@ -17,7 +17,7 @@ import subprocess
import importlib
import yaml
import threading
import nltk
from nltk import download
from .. import models, utils
from .. import api
@ -132,12 +132,12 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
def deactivate(self):
pass
def process(self, request, **kwargs):
def process(self, request, parameters, **kwargs):
"""
An implemented plugin should override this method.
Here, we assume that a process_entries method exists."""
newentries = list(
self.process_entries(request.entries, request.parameters))
self.process_entries(request.entries, parameters))
request.entries = newentries
return request
@ -194,13 +194,13 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
try:
request = models.Response()
request.parameters = api.parse_params(given_parameters,
self.extra_params)
parameters = api.parse_params(given_parameters,
self.extra_params)
request.entries = [
entry,
]
method = partial(self.process, request)
method = partial(self.process, request, parameters)
if mock:
res = method()
@ -249,14 +249,14 @@ class Analysis(Plugin):
'''
def analyse(self, request, parameters):
return super(Analysis, self).process(request)
return super(Analysis, self).process(request, parameters)
def analyse_entries(self, entries, parameters):
for i in super(Analysis, self).process_entries(entries, parameters):
yield i
def process(self, request, **kwargs):
return self.analyse(request, request.parameters)
def process(self, request, parameters, **kwargs):
return self.analyse(request, parameters)
def process_entries(self, entries, parameters):
for i in self.analyse_entries(entries, parameters):
@ -279,12 +279,12 @@ class Conversion(Plugin):
e.g. a conversion of emotion models, or normalization of sentiment values.
'''
def process(self, response, plugins=None, **kwargs):
def process(self, response, parameters, plugins=None, **kwargs):
plugins = plugins or []
newentries = []
for entry in response.entries:
newentries.append(
self.convert_entry(entry, response.parameters, plugins))
self.convert_entry(entry, parameters, plugins))
response.entries = newentries
return response
@ -574,7 +574,7 @@ def install_deps(*plugins):
"Dependencies not properly installed: {}".format(pip_args))
nltk_resources |= set(info.get('nltk_resources', []))
installed |= nltk.download(list(nltk_resources))
installed |= download(list(nltk_resources))
return installed

View File

@ -9,7 +9,7 @@ class Split(AnalysisPlugin):
'''description: A sample plugin that chunks input text'''
author = ["@militarpancho", '@balkian']
version = '0.2'
version = '0.3'
url = "https://github.com/gsi-upm/senpy"
extra_params = {
@ -33,12 +33,15 @@ class Split(AnalysisPlugin):
if chunker_type == "paragraph":
tokenizer = LineTokenizer()
chars = list(tokenizer.span_tokenize(original_text))
for i, chunk in enumerate(tokenizer.tokenize(original_text)):
print(chunk)
if len(chars) == 1:
# This sentence was already split
return
for i, chunk in enumerate(chars):
start, end = chunk
e = Entry()
e['nif:isString'] = chunk
e['nif:isString'] = original_text[start:end]
if entry.id:
e.id = entry.id + "#char={},{}".format(chars[i][0], chars[i][1])
e.id = entry.id + "#char={},{}".format(start, end)
yield e
test_cases = [

View File

@ -9,7 +9,20 @@
"@type": {
"type": "string",
"description": "Type of the analysis. e.g. marl:SentimentAnalysis"
},
"prov:wasAssociatedWith": {
"@type": "string",
"description": "Algorithm/plugin that was used"
},
"prov:used": {
"description": "Parameters of the algorithm",
"@type": "array",
"default": [],
"type": "array",
"items": {
"$ref": "parameter.json"
}
}
},
"required": ["@id", "@type"]
"required": ["@type", "prov:wasAssociatedWith"]
}

View File

@ -41,7 +41,7 @@
"@container": "@set"
},
"analysis": {
"@id": "AnalysisInvolved",
"@id": "prov:wasInformedBy",
"@type": "@id",
"@container": "@set"
},

View File

@ -20,5 +20,5 @@
"description": "The ID of the analysis that generated this Emotion. The full object should be included in the \"analysis\" property of the root object"
}
},
"required": ["@id", "prov:wasGeneratedBy", "onyx:hasEmotion"]
"required": ["prov:wasGeneratedBy", "onyx:hasEmotion"]
}

View File

@ -35,5 +35,5 @@
"default": []
}
},
"required": ["@id", "nif:isString"]
"required": ["nif:isString"]
}

View File

@ -0,0 +1,16 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "Parameters for a senpy analysis",
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Name of the parameter"
},
"prov:value": {
"@type": "any",
"description": "Value of the parameter"
}
},
"required": ["name", "prov:value"]
}

View File

@ -21,13 +21,7 @@
"default": [],
"type": "array",
"items": {
"anyOf": [
{
"$ref": "analysis.json"
},{
"type": "string"
}
]
"$ref": "analysis.json"
}
},
"entries": {

View File

@ -19,5 +19,5 @@
"description": "The ID of the analysis that generated this Sentiment. The full object should be included in the \"analysis\" property of the root object"
}
},
"required": ["@id", "prov:wasGeneratedBy"]
"required": ["prov:wasGeneratedBy"]
}

View File

@ -12,6 +12,7 @@ max-line-length = 100
universal=1
[tool:pytest]
addopts = --cov=senpy --cov-report term-missing
filterwarnings =
ignore:the matrix subclass:PendingDeprecationWarning
[coverage:report]
omit = senpy/__main__.py

View File

@ -3,8 +3,9 @@ import logging
logger = logging.getLogger(__name__)
from unittest import TestCase
from senpy.api import parse_params, API_PARAMS, NIF_PARAMS, WEB_PARAMS
from senpy.models import Error
from senpy.api import (boolean, parse_params, get_extra_params, parse_analysis,
API_PARAMS, NIF_PARAMS, WEB_PARAMS)
from senpy.models import Error, Plugin
class APITest(TestCase):
@ -89,3 +90,156 @@ class APITest(TestCase):
assert "Dummy" in p['algorithm']
assert 'input' in p
assert p['input'] == 'Aloha my friend'
def test_parse_analysis(self):
'''The API should parse user parameters and return them in a format that plugins can use'''
plugins = [
Plugin({
'name': 'plugin1',
'extra_params': {
# Incompatible parameter
'param0': {
'aliases': ['p1', 'parameter1'],
'options': ['option1', 'option2'],
'default': 'option1',
'required': True
},
'param1': {
'aliases': ['p1', 'parameter1'],
'options': ['en', 'es'],
'default': 'en',
'required': False
},
'param2': {
'aliases': ['p2', 'parameter2'],
'required': False,
'options': ['value2_1', 'value2_2', 'value3_3']
}
}
}), Plugin({
'name': 'plugin2',
'extra_params': {
'param0': {
'aliases': ['parameter1'],
'options': ['new option', 'new option2'],
'default': 'new option',
'required': False
},
'param1': {
'aliases': ['myparam1', 'p1'],
'options': ['en', 'de', 'auto'],
'default': 'de',
'required': True
},
'param3': {
'aliases': ['p3', 'parameter3'],
'options': boolean,
'default': True
}
}
})
]
call = {
'param1': 'en',
'0.param0': 'option1',
'0.param1': 'en',
'param2': 'value2_1',
'param0': 'new option',
'1.param1': 'de',
'param3': False,
}
expected = [
{
'param0': 'option1',
'param1': 'en',
'param2': 'value2_1',
}, {
'param0': 'new option',
'param1': 'de',
'param3': False,
}
]
p = parse_analysis(call, plugins)
for i, arg in enumerate(expected):
params = p[i].params
for k, v in arg.items():
assert params[k] == v
def test_get_extra_params(self):
'''The API should return the list of valid parameters for a set of plugins'''
plugins = [
Plugin({
'name': 'plugin1',
'extra_params': {
# Incompatible parameter
'param0': {
'aliases': ['p1', 'parameter1'],
'options': ['option1', 'option2'],
'default': 'option1',
'required': True
},
'param1': {
'aliases': ['p1', 'parameter1'],
'options': ['en', 'es'],
'default': 'en',
'required': False
},
'param2': {
'aliases': ['p2', 'parameter2'],
'required': False,
'options': ['value2_1', 'value2_2', 'value3_3']
}
}
}), Plugin({
'name': 'plugin2',
'extra_params': {
'param0': {
'aliases': ['parameter1'],
'options': ['new option', 'new option2'],
'default': 'new option',
'required': False
},
'param1': {
'aliases': ['myparam1', 'p1'],
'options': ['en', 'de', 'auto'],
'default': 'de',
'required': True
},
'param3': {
'aliases': ['p3', 'parameter3'],
'options': boolean,
'default': True
}
}
})
]
expected = {
# Overlapping parameters
'plugin1.param0': plugins[0]['extra_params']['param0'],
'plugin1.param1': plugins[0]['extra_params']['param1'],
'plugin2.param0': plugins[1]['extra_params']['param0'],
'plugin2.param1': plugins[1]['extra_params']['param1'],
# Non-overlapping parameters
'param2': plugins[0]['extra_params']['param2'],
'param3': plugins[1]['extra_params']['param3'],
# Intersection of overlapping parameters
'param1': {
'aliases': ['p1'],
'options': ['en'],
'default': None,
'required': True
}
}
result = get_extra_params(plugins)
for ik, iv in expected.items():
assert ik in result
for jk, jv in iv.items():
assert jk in result[ik]
assert expected[ik][jk] == result[ik][jk]

View File

@ -26,8 +26,7 @@ class BlueprintsTest(TestCase):
cls.senpy.init_app(cls.app)
cls.dir = os.path.join(os.path.dirname(__file__), "..")
cls.senpy.add_folder(cls.dir)
cls.senpy.activate_plugin("Dummy", sync=True)
cls.senpy.activate_plugin("DummyRequired", sync=True)
cls.senpy.activate_all()
cls.senpy.default_plugin = 'Dummy'
def setUp(self):
@ -107,6 +106,7 @@ class BlueprintsTest(TestCase):
assert isinstance(js, models.Error)
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=notvalid")
self.assertCode(resp, 400)
self.app.config['TESTING'] = True
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired&example=a")
self.assertCode(resp, 200)
@ -138,16 +138,77 @@ class BlueprintsTest(TestCase):
# Calling dummy twice, should return the same string
self.assertCode(resp, 200)
js = parse_resp(resp)
assert len(js['analysis']) == 1
assert len(js['analysis']) == 2
assert js['entries'][0]['nif:isString'] == 'My aloha mohame'
resp = self.client.get("/api/Dummy+Dummy?i=My aloha mohame")
# Same with pluses instead of slashes
self.assertCode(resp, 200)
js = parse_resp(resp)
assert len(js['analysis']) == 1
assert len(js['analysis']) == 2
assert js['entries'][0]['nif:isString'] == 'My aloha mohame'
def test_analysis_chain_required(self):
"""
If a parameter is required and duplicated (because two plugins require it), specifying
it once should suffice
"""
resp = self.client.get("/api/DummyRequired/DummyRequired?i=My aloha mohame&example=a")
js = parse_resp(resp)
assert len(js['analysis']) == 2
assert js['entries'][0]['nif:isString'] == 'My aloha mohame'
assert js['entries'][0]['reversed'] == 2
def test_requirements_chain_help(self):
'''The extra parameters of each plugin should be merged if they are in a chain '''
resp = self.client.get("/api/split/DummyRequired?help=true")
self.assertCode(resp, 200)
js = parse_resp(resp)
assert 'valid_parameters' in js
vp = js['valid_parameters']
assert 'example' in vp
assert 'delimiter' in vp
def test_requirements_chain_repeat_help(self):
'''
If a plugin appears several times in a chain, there should be a way to set different
parameters for each.
'''
resp = self.client.get("/api/split/split?help=true")
self.assertCode(resp, 200)
js = parse_resp(resp)
assert 'valid_parameters' in js
vp = js['valid_parameters']
assert 'delimiter' in vp
resp = self.client.get("/api/split/split?help=true&verbose=false")
js = parse_resp(resp)
vp = js['valid_parameters']
assert len(vp.keys()) == 1
def test_requirements_chain(self):
"""
It should be possible to specify different parameters for each step in the chain.
"""
# First, we split by sentence twice. Each call should generate 3 additional entries
# (one per sentence in the original).
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
'\nA new paragraph&delimiter=sentence')
js = parse_resp(resp)
assert len(js['analysis']) == 2
assert len(js['entries']) == 7
# Now, we split by sentence. This produces 3 additional entries.
# Then, we split by paragraph. This should create 2 additional entries (One per paragraph
# in the original text)
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
'\nA new paragraph&0.delimiter=sentence&1.delimiter=paragraph')
# Calling dummy twice, should return the same string
self.assertCode(resp, 200)
js = parse_resp(resp)
assert len(js['analysis']) == 2
assert len(js['entries']) == 6
def test_error(self):
"""
The dummy plugin returns an empty response,\

View File

@ -3,10 +3,6 @@ import os
from copy import deepcopy
import logging
try:
from unittest import mock
except ImportError:
import mock
from functools import partial
from senpy.extensions import Senpy
@ -18,7 +14,8 @@ from unittest import TestCase
def analyse(instance, **kwargs):
request = api.parse_call(kwargs)
basic = api.parse_params(kwargs, api.API_PARAMS)
request = api.parse_call(basic)
return instance.analyse(request)
@ -49,9 +46,9 @@ class ExtensionsTest(TestCase):
'''Should be able to add and delete new plugins. '''
new = plugins.Analysis(name='new', description='new', version=0)
self.senpy.add_plugin(new)
assert new in self.senpy.plugins()
assert new in self.senpy.plugins(is_activated=False)
self.senpy.delete_plugin(new)
assert new not in self.senpy.plugins()
assert new not in self.senpy.plugins(is_activated=False)
def test_adding_folder(self):
""" It should be possible for senpy to look for plugins in more folders. """
@ -60,7 +57,7 @@ class ExtensionsTest(TestCase):
default_plugins=False)
assert not senpy.analysis_plugins
senpy.add_folder(self.examples_dir)
assert senpy.analysis_plugins
assert senpy.plugins(plugin_type=plugins.AnalysisPlugin, is_activated=False)
self.assertRaises(AttributeError, senpy.add_folder, 'DOES NOT EXIST')
def test_installing(self):
@ -121,8 +118,8 @@ class ExtensionsTest(TestCase):
# Leaf (defaultdict with __setattr__ and __getattr__.
r1 = analyse(self.senpy, algorithm="Dummy", input="tupni", output="tuptuo")
r2 = analyse(self.senpy, input="tupni", output="tuptuo")
assert r1.analysis[0].id == "endpoint:plugins/Dummy_0.1"
assert r2.analysis[0].id == "endpoint:plugins/Dummy_0.1"
assert r1.analysis[0].algorithm == "endpoint:plugins/Dummy_0.1"
assert r2.analysis[0].algorithm == "endpoint:plugins/Dummy_0.1"
assert r1.entries[0]['nif:isString'] == 'input'
def test_analyse_empty(self):
@ -130,7 +127,7 @@ class ExtensionsTest(TestCase):
senpy = Senpy(plugin_folder=None,
app=self.app,
default_plugins=False)
self.assertRaises(Error, senpy.analyse, Results())
self.assertRaises(Error, senpy.analyse, Results(), [])
def test_analyse_wrong(self):
""" Trying to analyse with a non-existent plugin should raise an error."""
@ -156,29 +153,32 @@ class ExtensionsTest(TestCase):
r2 = analyse(self.senpy,
input="tupni",
output="tuptuo")
assert r1.analysis[0].id == "endpoint:plugins/Dummy_0.1"
assert r2.analysis[0].id == "endpoint:plugins/Dummy_0.1"
assert r1.analysis[0].algorithm == "endpoint:plugins/Dummy_0.1"
assert r2.analysis[0].algorithm == "endpoint:plugins/Dummy_0.1"
assert r1.entries[0]['nif:isString'] == 'input'
def test_analyse_error(self):
mm = mock.MagicMock()
mm.id = 'magic_mock'
mm.name = 'mock'
mm.is_activated = True
mm.process.side_effect = Error('error in analysis', status=500)
self.senpy.add_plugin(mm)
class ErrorPlugin(plugins.Analysis):
author = 'nobody'
version = 0
ex = Error()
def process(self, *args, **kwargs):
raise self.ex
m = ErrorPlugin(ex=Error('error in analysis', status=500))
self.senpy.add_plugin(m)
try:
analyse(self.senpy, input='nothing', algorithm='MOCK')
analyse(self.senpy, input='nothing', algorithm='ErrorPlugin')
assert False
except Error as ex:
assert 'error in analysis' in ex['message']
assert ex['status'] == 500
ex = Exception('generic exception on analysis')
mm.process.side_effect = ex
m.ex = Exception('generic exception on analysis')
try:
analyse(self.senpy, input='nothing', algorithm='MOCK')
analyse(self.senpy, input='nothing', algorithm='ErrorPlugin')
assert False
except Exception as ex:
assert 'generic exception on analysis' in str(ex)
@ -194,7 +194,7 @@ class ExtensionsTest(TestCase):
def test_load_default_plugins(self):
senpy = Senpy(plugin_folder=self.examples_dir, default_plugins=True)
assert len(senpy.plugins()) > 1
assert len(senpy.plugins(is_activated=False)) > 1
def test_convert_emotions(self):
self.senpy.activate_all(sync=True)

View File

@ -5,7 +5,8 @@ import jsonschema
import json
import rdflib
from unittest import TestCase
from senpy.models import (Emotion,
from senpy.models import (Analysis,
Emotion,
EmotionAnalysis,
EmotionSet,
Entry,
@ -61,7 +62,7 @@ class ModelsTest(TestCase):
def test_id(self):
""" Adding the id after creation should overwrite the automatic ID
"""
r = Entry()
r = Entry(_auto_id=True)
j = r.jsonld()
assert '@id' in j
r.id = "test"
@ -189,6 +190,19 @@ class ModelsTest(TestCase):
assert isinstance(js['plugins'], list)
assert js['plugins'][0]['@type'] == 'sentimentPlugin'
def test_parameters(self):
'''An Analysis should contain the algorithm and the list of parameters to be used'''
a = Analysis()
a.params = {'param1': 1, 'param2': 2}
assert len(a.parameters) == 2
for param in a.parameters:
if param.name == 'param1':
assert param.value == 1
elif param.name == 'param2':
assert param.value == 2
else:
raise Exception('Unknown value %s' % param)
def test_from_string(self):
results = {
'@type': 'results',