1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-23 08:32:29 +00:00

Refactored conversion and postprocessing

This commit is contained in:
J. Fernando Sánchez 2018-11-22 17:27:43 +01:00
parent b48730137d
commit 41aa142ce0
13 changed files with 486 additions and 199 deletions

View File

@ -3,10 +3,8 @@ from .models import Error, Results, Entry, from_string
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
boolean = [True, False] boolean = [True, False]
API_PARAMS = { API_PARAMS = {
"algorithm": { "algorithm": {
"aliases": ["algorithms", "a", "algo"], "aliases": ["algorithms", "a", "algo"],
@ -140,6 +138,15 @@ NIF_PARAMS = {
} }
} }
BUILTIN_PARAMS = {}
for d in [
NIF_PARAMS, CLI_PARAMS, WEB_PARAMS, PLUGINS_PARAMS, EVAL_PARAMS,
API_PARAMS
]:
for k, v in d.items():
BUILTIN_PARAMS[k] = v
def parse_params(indict, *specs): def parse_params(indict, *specs):
if not specs: if not specs:
@ -164,7 +171,7 @@ def parse_params(indict, *specs):
continue continue
if "options" in options: if "options" in options:
if options["options"] == boolean: if options["options"] == boolean:
outdict[param] = outdict[param] in [None, True, 'true', '1'] outdict[param] = str(outdict[param]).lower() in ['true', '1']
elif outdict[param] not in options["options"]: elif outdict[param] not in options["options"]:
wrong_params[param] = spec[param] wrong_params[param] = spec[param]
if wrong_params: if wrong_params:
@ -180,11 +187,19 @@ def parse_params(indict, *specs):
return outdict return outdict
def parse_extra_params(request, plugin=None): def parse_extra_params(request, plugins=None):
plugins = plugins or []
params = request.parameters.copy() params = request.parameters.copy()
for plugin in plugins:
if plugin: if plugin:
extra_params = parse_params(params, plugin.get('extra_params', {})) extra_params = parse_params(params, plugin.get('extra_params', {}))
params.update(extra_params) for k, v in extra_params.items():
if k not in BUILTIN_PARAMS:
if k in params: # Set by another plugin
del params[k]
else:
params[k] = v
params['{}.{}'.format(plugin.name, k)] = v
return params return params
@ -194,12 +209,12 @@ def parse_call(params):
params = parse_params(params, NIF_PARAMS) params = parse_params(params, NIF_PARAMS)
if params['informat'] == 'text': if params['informat'] == 'text':
results = Results() results = Results()
entry = Entry(nif__isString=params['input'], entry = Entry(nif__isString=params['input'], id='#') # Use @base
id='#') # Use @base
results.entries.append(entry) results.entries.append(entry)
elif params['informat'] == 'json-ld': elif params['informat'] == 'json-ld':
results = from_string(params['input'], cls=Results) results = from_string(params['input'], cls=Results)
else: # pragma: no cover else: # pragma: no cover
raise NotImplementedError('Informat {} is not implemented'.format(params['informat'])) raise NotImplementedError('Informat {} is not implemented'.format(
params['informat']))
results.parameters = params results.parameters = params
return results return results

View File

@ -197,7 +197,9 @@ def api_root(plugin):
plugin = plugin.replace('+', '/') plugin = plugin.replace('+', '/')
plugin = plugin.split('/') plugin = plugin.split('/')
req.parameters['algorithm'] = tuple(plugin) req.parameters['algorithm'] = tuple(plugin)
return current_app.senpy.analyse(req) results = current_app.senpy.analyse(req)
results.analysis = set(i.id for i in results.analysis)
return results
@api_blueprint.route('/evaluate/', methods=['POST', 'GET']) @api_blueprint.route('/evaluate/', methods=['POST', 'GET'])

View File

@ -6,7 +6,6 @@ from future import standard_library
standard_library.install_aliases() standard_library.install_aliases()
from . import plugins, api from . import plugins, api
from .plugins import Plugin, evaluate
from .models import Error, AggregatedEvaluation from .models import Error, AggregatedEvaluation
from .blueprints import api_blueprint, demo_blueprint, ns_blueprint from .blueprints import api_blueprint, demo_blueprint, ns_blueprint
@ -17,7 +16,6 @@ import copy
import errno import errno
import logging import logging
from . import gsitk_compat from . import gsitk_compat
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -25,6 +23,7 @@ logger = logging.getLogger(__name__)
class Senpy(object): class Senpy(object):
""" Default Senpy extension for Flask """ """ Default Senpy extension for Flask """
def __init__(self, def __init__(self,
app=None, app=None,
plugin_folder=".", plugin_folder=".",
@ -50,7 +49,7 @@ class Senpy(object):
self.add_folder('plugins', from_root=True) self.add_folder('plugins', from_root=True)
else: else:
# Add only conversion plugins # Add only conversion plugins
self.add_folder(os.path.join('plugins', 'conversion'), self.add_folder(os.path.join('plugins', 'postprocessing'),
from_root=True) from_root=True)
self.app = app self.app = app
if app is not None: if app is not None:
@ -115,6 +114,7 @@ class Senpy(object):
raise AttributeError("Not a folder or does not exist: %s", folder) raise AttributeError("Not a folder or does not exist: %s", folder)
def _get_plugins(self, request): def _get_plugins(self, request):
'''Get a list of plugins that should be run for a specific request'''
if not self.analysis_plugins: if not self.analysis_plugins:
raise Error( raise Error(
status=404, status=404,
@ -132,33 +132,32 @@ class Senpy(object):
plugins = list() plugins = list()
for algo in algos: for algo in algos:
algo = algo.lower() algo = algo.lower()
if algo == 'conversion':
continue # Allow 'conversion' as a virtual plugin, which does nothing
if algo not in self._plugins: if algo not in self._plugins:
msg = ("The algorithm '{}' is not valid\n" msg = ("The algorithm '{}' is not valid\n"
"Valid algorithms: {}").format(algo, "Valid algorithms: {}").format(algo,
self._plugins.keys()) self._plugins.keys())
logger.debug(msg) logger.debug(msg)
raise Error( raise Error(status=404, message=msg)
status=404,
message=msg)
plugins.append(self._plugins[algo]) plugins.append(self._plugins[algo])
return plugins return plugins
def _process_entries(self, entries, req, plugins): def _process(self, req, pending, done=None):
""" """
Recursively process the entries with the first plugin in the list, and pass the results Recursively process the entries with the first plugin in the list, and pass the results
to the rest of the plugins. to the rest of the plugins.
""" """
if not plugins: done = done or []
for i in entries: if not pending:
yield i return req
return
plugin = plugins[0] plugin = pending[0]
specific_params = api.parse_extra_params(req, plugin) results = plugin.process(req, conversions_applied=done)
req.analysis.append({'plugin': plugin, if plugin not in results.analysis:
'parameters': specific_params}) results.analysis.append(plugin)
results = plugin.analyse_entries(entries, specific_params) return self._process(results, pending[1:], done)
for i in self._process_entries(results, req, plugins[1:]):
yield i
def install_deps(self): def install_deps(self):
plugins.install_deps(*self.plugins()) plugins.install_deps(*self.plugins())
@ -170,17 +169,88 @@ class Senpy(object):
by api.parse_call(). by api.parse_call().
""" """
logger.debug("analysing request: {}".format(request)) logger.debug("analysing request: {}".format(request))
entries = request.entries
request.entries = []
plugins = self._get_plugins(request) plugins = self._get_plugins(request)
results = request request.parameters = api.parse_extra_params(request, plugins)
for i in self._process_entries(entries, results, plugins): results = self._process(request, plugins)
results.entries.append(i) logger.debug("Got analysis result: {}".format(results))
self.convert_emotions(results) results = self.postprocess(results)
logger.debug("Returning analysis result: {}".format(results)) logger.debug("Returning post-processed result: {}".format(results))
results.analysis = [i['plugin'].id for i in results.analysis]
return results return results
def convert_emotions(self, resp):
"""
Conversion of all emotions in a response **in place**.
In addition to converting from one model to another, it has
to include the conversion plugin to the analysis list.
Needless to say, this is far from an elegant solution, but it works.
@todo refactor and clean up
"""
plugins = resp.analysis
params = resp.parameters
toModel = params.get('emotionModel', None)
if not toModel:
return resp
logger.debug('Asked for model: {}'.format(toModel))
output = params.get('conversion', None)
candidates = {}
for plugin in plugins:
try:
fromModel = plugin.get('onyx:usesEmotionModel', None)
candidates[plugin.id] = next(self._conversion_candidates(fromModel, toModel))
logger.debug('Analysis plugin {} uses model: {}'.format(
plugin.id, fromModel))
except StopIteration:
e = Error(('No conversion plugin found for: '
'{} -> {}'.format(fromModel, toModel)),
status=404)
e.original_response = resp
e.parameters = params
raise e
newentries = []
done = []
for i in resp.entries:
if output == "full":
newemotions = copy.deepcopy(i.emotions)
else:
newemotions = []
for j in i.emotions:
plugname = j['prov:wasGeneratedBy']
candidate = candidates[plugname]
done.append({'plugin': candidate, 'parameters': params})
for k in candidate.convert(j, fromModel, toModel, params):
k.prov__wasGeneratedBy = candidate.id
if output == 'nested':
k.prov__wasDerivedFrom = j
newemotions.append(k)
i.emotions = newemotions
newentries.append(i)
resp.entries = newentries
return resp
def _conversion_candidates(self, fromModel, toModel):
candidates = self.plugins(plugin_type=plugins.EmotionConversion)
for candidate in candidates:
for pair in candidate.onyx__doesConversion:
logging.debug(pair)
if candidate.can_convert(fromModel, toModel):
yield candidate
def postprocess(self, response):
'''
Transform the results from the analysis plugins.
It has some pre-defined post-processing like emotion conversion,
and it also allows plugins to auto-select themselves.
'''
response = self.convert_emotions(response)
for plug in self.plugins(plugin_type=plugins.PostProcessing):
if plug.check(response, response.analysis):
response = plug.process(response)
return response
def _get_datasets(self, request): def _get_datasets(self, request):
if not self.datasets: if not self.datasets:
raise Error( raise Error(
@ -191,8 +261,8 @@ class Senpy(object):
for dataset in datasets_name: for dataset in datasets_name:
if dataset not in self.datasets: if dataset not in self.datasets:
logger.debug(("The dataset '{}' is not valid\n" logger.debug(("The dataset '{}' is not valid\n"
"Valid datasets: {}").format(dataset, "Valid datasets: {}").format(
self.datasets.keys())) dataset, self.datasets.keys()))
raise Error( raise Error(
status=404, status=404,
message="The dataset '{}' is not valid".format(dataset)) message="The dataset '{}' is not valid".format(dataset))
@ -219,77 +289,18 @@ class Senpy(object):
results.parameters = params results.parameters = params
datasets = self._get_datasets(results) datasets = self._get_datasets(results)
plugins = self._get_plugins(results) plugins = self._get_plugins(results)
for eval in evaluate(plugins, datasets): for eval in plugins.evaluate(plugins, datasets):
results.evaluations.append(eval) results.evaluations.append(eval)
if 'with_parameters' not in results.parameters: if 'with_parameters' not in results.parameters:
del results.parameters del results.parameters
logger.debug("Returning evaluation result: {}".format(results)) logger.debug("Returning evaluation result: {}".format(results))
return results return results
def _conversion_candidates(self, fromModel, toModel):
candidates = self.plugins(plugin_type='emotionConversionPlugin')
for candidate in candidates:
for pair in candidate.onyx__doesConversion:
logging.debug(pair)
if pair['onyx:conversionFrom'] == fromModel \
and pair['onyx:conversionTo'] == toModel:
yield candidate
def convert_emotions(self, resp):
"""
Conversion of all emotions in a response **in place**.
In addition to converting from one model to another, it has
to include the conversion plugin to the analysis list.
Needless to say, this is far from an elegant solution, but it works.
@todo refactor and clean up
"""
plugins = [i['plugin'] for i in resp.analysis]
params = resp.parameters
toModel = params.get('emotionModel', None)
if not toModel:
return
logger.debug('Asked for model: {}'.format(toModel))
output = params.get('conversion', None)
candidates = {}
for plugin in plugins:
try:
fromModel = plugin.get('onyx:usesEmotionModel', None)
candidates[plugin.id] = next(self._conversion_candidates(fromModel, toModel))
logger.debug('Analysis plugin {} uses model: {}'.format(plugin.id, fromModel))
except StopIteration:
e = Error(('No conversion plugin found for: '
'{} -> {}'.format(fromModel, toModel)),
status=404)
e.original_response = resp
e.parameters = params
raise e
newentries = []
for i in resp.entries:
if output == "full":
newemotions = copy.deepcopy(i.emotions)
else:
newemotions = []
for j in i.emotions:
plugname = j['prov:wasGeneratedBy']
candidate = candidates[plugname]
resp.analysis.append({'plugin': candidate,
'parameters': params})
for k in candidate.convert(j, fromModel, toModel, params):
k.prov__wasGeneratedBy = candidate.id
if output == 'nested':
k.prov__wasDerivedFrom = j
newemotions.append(k)
i.emotions = newemotions
newentries.append(i)
resp.entries = newentries
@property @property
def default_plugin(self): def default_plugin(self):
if not self._default or not self._default.is_activated: if not self._default or not self._default.is_activated:
candidates = self.plugins(plugin_type='analysisPlugin', candidates = self.plugins(
is_activated=True) plugin_type='analysisPlugin', is_activated=True)
if len(candidates) > 0: if len(candidates) > 0:
self._default = candidates[0] self._default = candidates[0]
else: else:
@ -299,7 +310,7 @@ class Senpy(object):
@default_plugin.setter @default_plugin.setter
def default_plugin(self, value): def default_plugin(self, value):
if isinstance(value, Plugin): if isinstance(value, plugins.Plugin):
if not value.is_activated: if not value.is_activated:
raise AttributeError('The default plugin has to be activated.') raise AttributeError('The default plugin has to be activated.')
self._default = value self._default = value
@ -351,7 +362,8 @@ class Senpy(object):
logger.info("Activating plugin: {}".format(plugin.name)) logger.info("Activating plugin: {}".format(plugin.name))
if sync or not getattr(plugin, 'async', True) or getattr(plugin, 'sync', False): if sync or not getattr(plugin, 'async', True) or getattr(
plugin, 'sync', False):
return self._activate(plugin) return self._activate(plugin)
else: else:
th = Thread(target=partial(self._activate, plugin)) th = Thread(target=partial(self._activate, plugin))
@ -374,7 +386,8 @@ class Senpy(object):
self._set_active(plugin, False) self._set_active(plugin, False)
if sync or not getattr(plugin, 'async', True) or not getattr(plugin, 'sync', False): if sync or not getattr(plugin, 'async', True) or not getattr(
plugin, 'sync', False):
self._deactivate(plugin) self._deactivate(plugin)
else: else:
th = Thread(target=partial(self._deactivate, plugin)) th = Thread(target=partial(self._deactivate, plugin))

View File

@ -1,7 +1,6 @@
from future import standard_library from future import standard_library
standard_library.install_aliases() standard_library.install_aliases()
from future.utils import with_metaclass from future.utils import with_metaclass
from functools import partial from functools import partial
@ -10,7 +9,6 @@ import os
import re import re
import pickle import pickle
import logging import logging
import copy
import pprint import pprint
import inspect import inspect
@ -26,7 +24,6 @@ from .. import api
from .. import gsitk_compat from .. import gsitk_compat
from .. import testing from .. import testing
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -46,16 +43,19 @@ class PluginMeta(models.BaseMeta):
if doc: if doc:
attrs['description'] = doc attrs['description'] = doc
else: else:
logger.warn(('Plugin {} does not have a description. ' logger.warning(
'Please, add a short summary to help other developers').format(name)) ('Plugin {} does not have a description. '
'Please, add a short summary to help other developers'
).format(name))
cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs) cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs)
if alias in mcs._classes: if alias in mcs._classes:
if os.environ.get('SENPY_TESTING', ""): if os.environ.get('SENPY_TESTING', ""):
raise Exception(('The type of plugin {} already exists. ' raise Exception(
('The type of plugin {} already exists. '
'Please, choose a different name').format(name)) 'Please, choose a different name').format(name))
else: else:
logger.warn('Overloading plugin class: {}'.format(alias)) logger.warning('Overloading plugin class: {}'.format(alias))
mcs._classes[alias] = cls mcs._classes[alias] = cls
return cls return cls
@ -87,10 +87,12 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
if info: if info:
self.update(info) self.update(info)
self.validate() self.validate()
self.id = 'endpoint:plugins/{}_{}'.format(self['name'], self['version']) self.id = 'endpoint:plugins/{}_{}'.format(self['name'],
self['version'])
self.is_activated = False self.is_activated = False
self._lock = threading.Lock() self._lock = threading.Lock()
self._directory = os.path.abspath(os.path.dirname(inspect.getfile(self.__class__))) self._directory = os.path.abspath(
os.path.dirname(inspect.getfile(self.__class__)))
data_folder = data_folder or os.getcwd() data_folder = data_folder or os.getcwd()
subdir = os.path.join(data_folder, self.name) subdir = os.path.join(data_folder, self.name)
@ -118,7 +120,8 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
if x not in self: if x not in self:
missing.append(x) missing.append(x)
if missing: if missing:
raise models.Error('Missing configuration parameters: {}'.format(missing)) raise models.Error(
'Missing configuration parameters: {}'.format(missing))
def get_folder(self): def get_folder(self):
return os.path.dirname(inspect.getfile(self.__class__)) return os.path.dirname(inspect.getfile(self.__class__))
@ -129,22 +132,60 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
def deactivate(self): def deactivate(self):
pass pass
def process(self, request, **kwargs):
"""
An implemented plugin should override this method.
Here, we assume that a process_entries method exists."""
newentries = list(
self.process_entries(request.entries, request.parameters))
request.entries = newentries
return request
def process_entries(self, entries, parameters):
for entry in entries:
self.log.debug('Processing entry with plugin {}: {}'.format(
self, entry))
results = self.process_entry(entry, parameters)
if inspect.isgenerator(results):
for result in results:
yield result
else:
yield results
def process_entry(self, entry, parameters):
"""
This base method is here to adapt plugins which only
implement the *process* function.
Note that this method may yield an annotated entry or a list of
entries (e.g. in a tokenizer)
"""
raise NotImplementedError(
'You need to implement process, process_entries or process_entry in your plugin'
)
def test(self, test_cases=None): def test(self, test_cases=None):
if not test_cases: if not test_cases:
if not hasattr(self, 'test_cases'): if not hasattr(self, 'test_cases'):
raise AttributeError(('Plugin {} [{}] does not have any defined ' raise AttributeError(
('Plugin {} [{}] does not have any defined '
'test cases').format(self.id, 'test cases').format(self.id,
inspect.getfile(self.__class__))) inspect.getfile(self.__class__)))
test_cases = self.test_cases test_cases = self.test_cases
for case in test_cases: for case in test_cases:
try: try:
self.test_case(case) self.test_case(case)
self.log.debug('Test case passed:\n{}'.format(pprint.pformat(case))) self.log.debug('Test case passed:\n{}'.format(
pprint.pformat(case)))
except Exception as ex: except Exception as ex:
self.log.warn('Test case failed:\n{}'.format(pprint.pformat(case))) self.log.warning('Test case failed:\n{}'.format(
pprint.pformat(case)))
raise raise
def test_case(self, case, mock=testing.MOCK_REQUESTS): def test_case(self, case, mock=testing.MOCK_REQUESTS):
if 'entry' not in case and 'input' in case:
entry = models.Entry(_auto_id=False)
entry.nif__isString = case['input']
case['entry'] = entry
entry = models.Entry(case['entry']) entry = models.Entry(case['entry'])
given_parameters = case.get('params', case.get('parameters', {})) given_parameters = case.get('params', case.get('parameters', {}))
expected = case.get('expected', None) expected = case.get('expected', None)
@ -152,21 +193,25 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
responses = case.get('responses', []) responses = case.get('responses', [])
try: try:
params = api.parse_params(given_parameters, self.extra_params) request = models.Response()
request.parameters = api.parse_params(given_parameters,
self.extra_params)
request.entries = [
entry,
]
method = partial(self.analyse_entries, [entry, ], params) method = partial(self.process, request)
if mock: if mock:
res = list(method()) res = method()
else: else:
with testing.patch_all_requests(responses): with testing.patch_all_requests(responses):
res = list(method()) res = method()
if not isinstance(expected, list): if not isinstance(expected, list):
expected = [expected] expected = [expected]
utils.check_template(res, expected) utils.check_template(res.entries, expected)
for r in res: res.validate()
r.validate()
except models.Error: except models.Error:
if should_fail: if should_fail:
return return
@ -203,40 +248,26 @@ class Analysis(Plugin):
A subclass of Plugin that analyses text and provides an annotation. A subclass of Plugin that analyses text and provides an annotation.
''' '''
def analyse(self, *args, **kwargs): def analyse(self, request, parameters):
raise NotImplementedError( return super(Analysis, self).process(request)
'Your plugin should implement either analyse or analyse_entry')
def analyse_entry(self, entry, parameters):
""" An implemented plugin should override this method.
This base method is here to adapt old style plugins which only
implement the *analyse* function.
Note that this method may yield an annotated entry or a list of
entries (e.g. in a tokenizer)
"""
text = entry['nif:isString']
params = copy.copy(parameters)
params['input'] = text
results = self.analyse(**params)
for i in results.entries:
yield i
def analyse_entries(self, entries, parameters): def analyse_entries(self, entries, parameters):
for entry in entries: for i in super(Analysis, self).process_entries(entries, parameters):
self.log.debug('Analysing entry with plugin {}: {}'.format(self, entry)) yield i
results = self.analyse_entry(entry, parameters)
if inspect.isgenerator(results):
for result in results:
yield result
else:
yield results
def test_case(self, case): def process(self, request, **kwargs):
if 'entry' not in case and 'input' in case: return self.analyse(request, request.parameters)
entry = models.Entry(_auto_id=False)
entry.nif__isString = case['input'] def process_entries(self, entries, parameters):
case['entry'] = entry for i in self.analyse_entries(entries, parameters):
super(Analysis, self).test_case(case) yield i
def process_entry(self, entry, parameters, **kwargs):
if hasattr(self, 'analyse_entry'):
for i in self.analyse_entry(entry, parameters):
yield i
else:
super(Analysis, self).process_entry(entry, parameters, **kwargs)
AnalysisPlugin = Analysis AnalysisPlugin = Analysis
@ -247,7 +278,20 @@ class Conversion(Plugin):
A subclass of Plugins that convert between different annotation models. A subclass of Plugins that convert between different annotation models.
e.g. a conversion of emotion models, or normalization of sentiment values. e.g. a conversion of emotion models, or normalization of sentiment values.
''' '''
pass
def process(self, response, plugins=None, **kwargs):
plugins = plugins or []
newentries = []
for entry in response.entries:
newentries.append(
self.convert_entry(entry, response.parameters, plugins))
response.entries = newentries
return response
def convert_entry(self, entry, parameters, conversions_applied):
raise NotImplementedError(
'You should implement a way to convert each entry, or a custom process method'
)
ConversionPlugin = Conversion ConversionPlugin = Conversion
@ -284,12 +328,28 @@ class EmotionConversion(Conversion):
''' '''
A subclass of Conversion that converts emotion annotations using different models A subclass of Conversion that converts emotion annotations using different models
''' '''
pass
def can_convert(self, fromModel, toModel):
'''
Whether this plugin can convert from fromModel to toModel.
If fromModel is None, it is interpreted as "any Model"
'''
for pair in self.onyx__doesConversion:
if (pair['onyx:conversionTo'] == toModel) and \
((fromModel is None) or (pair['onyx:conversionFrom'] == fromModel)):
return True
return False
EmotionConversionPlugin = EmotionConversion EmotionConversionPlugin = EmotionConversion
class PostProcessing(Plugin):
def check(self, request, plugins):
'''Should this plugin be run for this request?'''
return False
class Box(AnalysisPlugin): class Box(AnalysisPlugin):
''' '''
Black box plugins delegate analysis to a function. Black box plugins delegate analysis to a function.
@ -314,9 +374,10 @@ class Box(AnalysisPlugin):
return output return output
def predict_one(self, input): def predict_one(self, input):
raise NotImplementedError('You should define the behavior of this plugin') raise NotImplementedError(
'You should define the behavior of this plugin')
def analyse_entries(self, entries, params): def process_entries(self, entries, params):
for entry in entries: for entry in entries:
input = self.input(entry=entry, params=params) input = self.input(entry=entry, params=params)
results = self.predict_one(input=input) results = self.predict_one(input=input)
@ -385,7 +446,6 @@ class EmotionBox(TextBox, EmotionPlugin):
class MappingMixin(object): class MappingMixin(object):
@property @property
def mappings(self): def mappings(self):
return self._mappings return self._mappings
@ -395,11 +455,10 @@ class MappingMixin(object):
self._mappings = value self._mappings = value
def output(self, output, entry, params): def output(self, output, entry, params):
output = self.mappings.get(output, output = self.mappings.get(output, self.mappings.get(
self.mappings.get('default', output)) 'default', output))
return super(MappingMixin, self).output(output=output, return super(MappingMixin, self).output(
entry=entry, output=output, entry=entry, params=params)
params=params)
class ShelfMixin(object): class ShelfMixin(object):
@ -412,7 +471,8 @@ class ShelfMixin(object):
with self.open(self.shelf_file, 'rb') as p: with self.open(self.shelf_file, 'rb') as p:
self._sh = pickle.load(p) self._sh = pickle.load(p)
except (IndexError, EOFError, pickle.UnpicklingError): except (IndexError, EOFError, pickle.UnpicklingError):
self.log.warning('Corrupted shelf file: {}'.format(self.shelf_file)) self.log.warning('Corrupted shelf file: {}'.format(
self.shelf_file))
if not self.get('force_shelf', False): if not self.get('force_shelf', False):
raise raise
return self._sh return self._sh
@ -460,8 +520,7 @@ def pfilter(plugins, plugin_type=Analysis, **kwargs):
plugin_type = plugin_type[0].upper() + plugin_type[1:] plugin_type = plugin_type[0].upper() + plugin_type[1:]
pclass = globals()[plugin_type] pclass = globals()[plugin_type]
logger.debug('Class: {}'.format(pclass)) logger.debug('Class: {}'.format(pclass))
candidates = filter(lambda x: isinstance(x, pclass), candidates = filter(lambda x: isinstance(x, pclass), plugins)
plugins)
except KeyError: except KeyError:
raise models.Error('{} is not a valid type'.format(plugin_type)) raise models.Error('{} is not a valid type'.format(plugin_type))
else: else:
@ -471,8 +530,7 @@ def pfilter(plugins, plugin_type=Analysis, **kwargs):
def matches(plug): def matches(plug):
res = all(getattr(plug, k, None) == v for (k, v) in kwargs.items()) res = all(getattr(plug, k, None) == v for (k, v) in kwargs.items())
logger.debug( logger.debug("matching {} with {}: {}".format(plug.name, kwargs, res))
"matching {} with {}: {}".format(plug.name, kwargs, res))
return res return res
if kwargs: if kwargs:
@ -506,14 +564,14 @@ def install_deps(*plugins):
for req in requirements: for req in requirements:
pip_args.append(req) pip_args.append(req)
logger.info('Installing requirements: ' + str(requirements)) logger.info('Installing requirements: ' + str(requirements))
process = subprocess.Popen(pip_args, process = subprocess.Popen(
stdout=subprocess.PIPE, pip_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stderr=subprocess.PIPE)
_log_subprocess_output(process) _log_subprocess_output(process)
exitcode = process.wait() exitcode = process.wait()
installed = True installed = True
if exitcode != 0: if exitcode != 0:
raise models.Error("Dependencies not properly installed: {}".format(pip_args)) raise models.Error(
"Dependencies not properly installed: {}".format(pip_args))
nltk_resources |= set(info.get('nltk_resources', [])) nltk_resources |= set(info.get('nltk_resources', []))
installed |= nltk.download(list(nltk_resources)) installed |= nltk.download(list(nltk_resources))
@ -556,7 +614,7 @@ def from_folder(folders, loader=from_path, **kwargs):
def from_info(info, root=None, install_on_fail=True, **kwargs): def from_info(info, root=None, install_on_fail=True, **kwargs):
if any(x not in info for x in ('module',)): if any(x not in info for x in ('module', )):
raise ValueError('Plugin info is not valid: {}'.format(info)) raise ValueError('Plugin info is not valid: {}'.format(info))
module = info["module"] module = info["module"]
@ -593,7 +651,8 @@ def one_from_module(module, root, info, **kwargs):
if '@type' in info: if '@type' in info:
cls = PluginMeta.from_type(info['@type']) cls = PluginMeta.from_type(info['@type'])
return cls(info=info, **kwargs) return cls(info=info, **kwargs)
instance = next(from_module(module=module, root=root, info=info, **kwargs), None) instance = next(
from_module(module=module, root=root, info=info, **kwargs), None)
if not instance: if not instance:
raise Exception("No valid plugin for: {}".format(module)) raise Exception("No valid plugin for: {}".format(module))
return instance return instance
@ -617,7 +676,8 @@ def _instances_in_module(module):
def _from_module_name(module, root, info=None, **kwargs): def _from_module_name(module, root, info=None, **kwargs):
module = load_module(module, root) module = load_module(module, root)
for plugin in _from_loaded_module(module=module, root=root, info=info, **kwargs): for plugin in _from_loaded_module(
module=module, root=root, info=info, **kwargs):
yield plugin yield plugin
@ -629,7 +689,8 @@ def _from_loaded_module(module, info=None, **kwargs):
def evaluate(plugins, datasets, **kwargs): def evaluate(plugins, datasets, **kwargs):
ev = gsitk_compat.Eval(tuples=None, ev = gsitk_compat.Eval(
tuples=None,
datasets=datasets, datasets=datasets,
pipelines=[plugin.as_pipe() for plugin in plugins]) pipelines=[plugin.as_pipe() for plugin in plugins])
ev.evaluate() ev.evaluate()

View File

@ -1,6 +1,6 @@
--- ---
name: Ekman2FSRE name: Ekman2FSRE
module: senpy.plugins.conversion.emotion.centroids module: senpy.plugins.postprocessing.emotion.centroids
description: Plugin to convert emotion sets from Ekman to VAD description: Plugin to convert emotion sets from Ekman to VAD
version: 0.2 version: 0.2
# No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction # No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction

View File

@ -1,6 +1,6 @@
--- ---
name: Ekman2PAD name: Ekman2PAD
module: senpy.plugins.conversion.emotion.centroids module: senpy.plugins.postprocessing.emotion.centroids
description: Plugin to convert emotion sets from Ekman to VAD description: Plugin to convert emotion sets from Ekman to VAD
version: 0.2 version: 0.2
# No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction # No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction

View File

@ -0,0 +1,196 @@
from senpy import PostProcessing, easy_test
class MaxEmotion(PostProcessing):
'''Plugin to extract the emotion with highest value from an EmotionSet'''
author = '@dsuarezsouto'
version = '0.1'
def process_entry(self, entry, params):
if len(entry.emotions) < 1:
yield entry
return
set_emotions = entry.emotions[0]['onyx:hasEmotion']
# If there is only one emotion, do not modify it
if len(set_emotions) < 2:
yield entry
return
max_emotion = set_emotions[0]
# Extract max emotion from the set emotions (emotion with highest intensity)
for tmp_emotion in set_emotions:
if tmp_emotion['onyx:hasEmotionIntensity'] > max_emotion[
'onyx:hasEmotionIntensity']:
max_emotion = tmp_emotion
if max_emotion['onyx:hasEmotionIntensity'] == 0:
max_emotion['onyx:hasEmotionCategory'] = "neutral"
max_emotion['onyx:hasEmotionIntensity'] = 1.0
entry.emotions[0]['onyx:hasEmotion'] = [max_emotion]
entry.emotions[0]['prov:wasGeneratedBy'] = "maxSentiment"
yield entry
def check(self, request, plugins):
return 'maxemotion' in request.parameters and self not in plugins
# Test Cases:
# 1 Normal Situation.
# 2 Case to return a Neutral Emotion.
test_cases = [
{
"name":
"If there are several emotions within an emotion set, reduce it to one.",
"entry": {
"@type":
"entry",
"emotions": [
{
"@id":
"Emotions0",
"@type":
"emotionSet",
"onyx:hasEmotion": [
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory": "anger",
"onyx:hasEmotionIntensity": 0
},
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory": "joy",
"onyx:hasEmotionIntensity": 0.3333333333333333
},
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory": "negative-fear",
"onyx:hasEmotionIntensity": 0
},
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory": "sadness",
"onyx:hasEmotionIntensity": 0
},
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory": "disgust",
"onyx:hasEmotionIntensity": 0
}
]
}
],
"nif:isString":
"Test"
},
'expected': {
"@type":
"entry",
"emotions": [
{
"@id":
"Emotions0",
"@type":
"emotionSet",
"onyx:hasEmotion": [
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory": "joy",
"onyx:hasEmotionIntensity": 0.3333333333333333
}
],
"prov:wasGeneratedBy":
'maxSentiment'
}
],
"nif:isString":
"Test"
}
},
{
"name":
"If the maximum emotion has an intensity of 0, return a neutral emotion.",
"entry": {
"@type":
"entry",
"emotions": [{
"@id":
"Emotions0",
"@type":
"emotionSet",
"onyx:hasEmotion": [
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory": "anger",
"onyx:hasEmotionIntensity": 0
},
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory": "joy",
"onyx:hasEmotionIntensity": 0
},
{
"@id":
"_:Emotion_1538121033.74",
"@type":
"emotion",
"onyx:hasEmotionCategory":
"negative-fear",
"onyx:hasEmotionIntensity":
0
},
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory":
"sadness",
"onyx:hasEmotionIntensity": 0
},
{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory":
"disgust",
"onyx:hasEmotionIntensity": 0
}]
}],
"nif:isString":
"Test"
},
'expected': {
"@type":
"entry",
"emotions": [{
"@id":
"Emotions0",
"@type":
"emotionSet",
"onyx:hasEmotion": [{
"@id": "_:Emotion_1538121033.74",
"@type": "emotion",
"onyx:hasEmotionCategory": "neutral",
"onyx:hasEmotionIntensity": 1
}],
"prov:wasGeneratedBy":
'maxSentiment'
}],
"nif:isString":
"Test"
}
}
]
if __name__ == '__main__':
easy_test()

View File

@ -138,14 +138,14 @@ class BlueprintsTest(TestCase):
# Calling dummy twice, should return the same string # Calling dummy twice, should return the same string
self.assertCode(resp, 200) self.assertCode(resp, 200)
js = parse_resp(resp) js = parse_resp(resp)
assert len(js['analysis']) == 2 assert len(js['analysis']) == 1
assert js['entries'][0]['nif:isString'] == 'My aloha mohame' assert js['entries'][0]['nif:isString'] == 'My aloha mohame'
resp = self.client.get("/api/Dummy+Dummy?i=My aloha mohame") resp = self.client.get("/api/Dummy+Dummy?i=My aloha mohame")
# Same with pluses instead of slashes # Same with pluses instead of slashes
self.assertCode(resp, 200) self.assertCode(resp, 200)
js = parse_resp(resp) js = parse_resp(resp)
assert len(js['analysis']) == 2 assert len(js['analysis']) == 1
assert js['entries'][0]['nif:isString'] == 'My aloha mohame' assert js['entries'][0]['nif:isString'] == 'My aloha mohame'
def test_error(self): def test_error(self):

View File

@ -121,8 +121,8 @@ class ExtensionsTest(TestCase):
# Leaf (defaultdict with __setattr__ and __getattr__. # Leaf (defaultdict with __setattr__ and __getattr__.
r1 = analyse(self.senpy, algorithm="Dummy", input="tupni", output="tuptuo") r1 = analyse(self.senpy, algorithm="Dummy", input="tupni", output="tuptuo")
r2 = analyse(self.senpy, input="tupni", output="tuptuo") r2 = analyse(self.senpy, input="tupni", output="tuptuo")
assert r1.analysis[0] == "endpoint:plugins/Dummy_0.1" assert r1.analysis[0].id == "endpoint:plugins/Dummy_0.1"
assert r2.analysis[0] == "endpoint:plugins/Dummy_0.1" assert r2.analysis[0].id == "endpoint:plugins/Dummy_0.1"
assert r1.entries[0]['nif:isString'] == 'input' assert r1.entries[0]['nif:isString'] == 'input'
def test_analyse_empty(self): def test_analyse_empty(self):
@ -156,8 +156,8 @@ class ExtensionsTest(TestCase):
r2 = analyse(self.senpy, r2 = analyse(self.senpy,
input="tupni", input="tupni",
output="tuptuo") output="tuptuo")
assert r1.analysis[0] == "endpoint:plugins/Dummy_0.1" assert r1.analysis[0].id == "endpoint:plugins/Dummy_0.1"
assert r2.analysis[0] == "endpoint:plugins/Dummy_0.1" assert r2.analysis[0].id == "endpoint:plugins/Dummy_0.1"
assert r1.entries[0]['nif:isString'] == 'input' assert r1.entries[0]['nif:isString'] == 'input'
def test_analyse_error(self): def test_analyse_error(self):
@ -165,7 +165,7 @@ class ExtensionsTest(TestCase):
mm.id = 'magic_mock' mm.id = 'magic_mock'
mm.name = 'mock' mm.name = 'mock'
mm.is_activated = True mm.is_activated = True
mm.analyse_entries.side_effect = Error('error in analysis', status=500) mm.process.side_effect = Error('error in analysis', status=500)
self.senpy.add_plugin(mm) self.senpy.add_plugin(mm)
try: try:
analyse(self.senpy, input='nothing', algorithm='MOCK') analyse(self.senpy, input='nothing', algorithm='MOCK')
@ -175,8 +175,7 @@ class ExtensionsTest(TestCase):
assert ex['status'] == 500 assert ex['status'] == 500
ex = Exception('generic exception on analysis') ex = Exception('generic exception on analysis')
mm.analyse.side_effect = ex mm.process.side_effect = ex
mm.analyse_entries.side_effect = ex
try: try:
analyse(self.senpy, input='nothing', algorithm='MOCK') analyse(self.senpy, input='nothing', algorithm='MOCK')
@ -211,27 +210,28 @@ class ExtensionsTest(TestCase):
'emoml:valence': 0 'emoml:valence': 0
})) }))
response = Results({ response = Results({
'analysis': [{'plugin': plugin}], 'analysis': [plugin],
'entries': [Entry({ 'entries': [Entry({
'nif:isString': 'much ado about nothing', 'nif:isString': 'much ado about nothing',
'emotions': [eSet1] 'emotions': [eSet1]
})] })]
}) })
params = {'emotionModel': 'emoml:big6', params = {'emotionModel': 'emoml:big6',
'algorithm': ['conversion'],
'conversion': 'full'} 'conversion': 'full'}
r1 = deepcopy(response) r1 = deepcopy(response)
r1.parameters = params r1.parameters = params
self.senpy.convert_emotions(r1) self.senpy.analyse(r1)
assert len(r1.entries[0].emotions) == 2 assert len(r1.entries[0].emotions) == 2
params['conversion'] = 'nested' params['conversion'] = 'nested'
r2 = deepcopy(response) r2 = deepcopy(response)
r2.parameters = params r2.parameters = params
self.senpy.convert_emotions(r2) self.senpy.analyse(r2)
assert len(r2.entries[0].emotions) == 1 assert len(r2.entries[0].emotions) == 1
assert r2.entries[0].emotions[0]['prov:wasDerivedFrom'] == eSet1 assert r2.entries[0].emotions[0]['prov:wasDerivedFrom'] == eSet1
params['conversion'] = 'filtered' params['conversion'] = 'filtered'
r3 = deepcopy(response) r3 = deepcopy(response)
r3.parameters = params r3.parameters = params
self.senpy.convert_emotions(r3) self.senpy.analyse(r3)
assert len(r3.entries[0].emotions) == 1 assert len(r3.entries[0].emotions) == 1
r3.jsonld() r3.jsonld()

View File

@ -8,7 +8,7 @@ import tempfile
from unittest import TestCase, skipIf from unittest import TestCase, skipIf
from senpy.models import Results, Entry, EmotionSet, Emotion, Plugins from senpy.models import Results, Entry, EmotionSet, Emotion, Plugins
from senpy import plugins from senpy import plugins
from senpy.plugins.conversion.emotion.centroids import CentroidConversion from senpy.plugins.postprocessing.emotion.centroids import CentroidConversion
from senpy.gsitk_compat import GSITK_AVAILABLE from senpy.gsitk_compat import GSITK_AVAILABLE
import pandas as pd import pandas as pd