mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-22 00:02:28 +00:00
WIP simpler pipeline
This commit is contained in:
parent
fca0ac00c4
commit
a243f68bfc
133
senpy/api.py
133
senpy/api.py
@ -1,66 +1,76 @@
|
||||
from future.utils import iteritems
|
||||
from .models import Error
|
||||
from .models import Error, Results, Entry, from_string
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
API_PARAMS = {
|
||||
"algorithm": {
|
||||
"aliases": ["algorithm", "a", "algo"],
|
||||
"aliases": ["algorithms", "a", "algo"],
|
||||
"required": False,
|
||||
},
|
||||
"outformat": {
|
||||
"@id": "outformat",
|
||||
"aliases": ["outformat", "o"],
|
||||
"default": "json-ld",
|
||||
"required": True,
|
||||
"options": ["json-ld", "turtle"],
|
||||
"description": ("Algorithms that will be used to process the request."
|
||||
"It may be a list of comma-separated names."),
|
||||
},
|
||||
"expanded-jsonld": {
|
||||
"@id": "expanded-jsonld",
|
||||
"aliases": ["expanded", "expanded-jsonld"],
|
||||
"aliases": ["expanded"],
|
||||
"required": True,
|
||||
"default": 0
|
||||
},
|
||||
"emotionModel": {
|
||||
"@id": "emotionModel",
|
||||
"aliases": ["emotionModel", "emoModel"],
|
||||
"required": False
|
||||
"with_parameters": {
|
||||
"aliases": ['withparameters',
|
||||
'with-parameters'],
|
||||
"options": "boolean",
|
||||
"default": False,
|
||||
"required": True
|
||||
},
|
||||
"plugin_type": {
|
||||
"@id": "pluginType",
|
||||
"description": 'What kind of plugins to list',
|
||||
"aliases": ["pluginType", "plugin_type"],
|
||||
"aliases": ["pluginType"],
|
||||
"required": True,
|
||||
"default": "analysisPlugin"
|
||||
},
|
||||
"outformat": {
|
||||
"@id": "outformat",
|
||||
"aliases": ["o"],
|
||||
"default": "json-ld",
|
||||
"required": True,
|
||||
"options": ["json-ld", "turtle"],
|
||||
},
|
||||
"help": {
|
||||
"@id": "help",
|
||||
"description": "Show additional help to know more about the possible parameters",
|
||||
"aliases": ["h"],
|
||||
"required": True,
|
||||
"options": "boolean",
|
||||
"default": False
|
||||
},
|
||||
"emotionModel": {
|
||||
"@id": "emotionModel",
|
||||
"aliases": ["emoModel"],
|
||||
"required": False
|
||||
},
|
||||
"conversion": {
|
||||
"@id": "conversion",
|
||||
"description": "How to show the elements that have (not) been converted",
|
||||
"required": True,
|
||||
"options": ["filtered", "nested", "full"],
|
||||
"default": "full"
|
||||
},
|
||||
"help": {
|
||||
"@id": "help",
|
||||
"description": "Show additional help to know more about the possible parameters",
|
||||
"aliases": ["help", "h"],
|
||||
"required": True,
|
||||
"options": ["True", "False"],
|
||||
"default": "False"
|
||||
}
|
||||
}
|
||||
|
||||
WEB_PARAMS = {
|
||||
"inHeaders": {
|
||||
"aliases": ["inHeaders", "headers"],
|
||||
"aliases": ["headers"],
|
||||
"required": True,
|
||||
"default": "0"
|
||||
"default": False,
|
||||
"options": "boolean"
|
||||
},
|
||||
}
|
||||
|
||||
CLI_PARAMS = {
|
||||
"plugin_folder": {
|
||||
"aliases": ["plugin_folder", "folder"],
|
||||
"aliases": ["folder"],
|
||||
"required": True,
|
||||
"default": "."
|
||||
},
|
||||
@ -69,63 +79,70 @@ CLI_PARAMS = {
|
||||
NIF_PARAMS = {
|
||||
"input": {
|
||||
"@id": "input",
|
||||
"aliases": ["i", "input"],
|
||||
"aliases": ["i"],
|
||||
"required": True,
|
||||
"help": "Input text"
|
||||
},
|
||||
"informat": {
|
||||
"@id": "informat",
|
||||
"aliases": ["f", "informat"],
|
||||
"required": False,
|
||||
"default": "text",
|
||||
"options": ["turtle", "text", "json-ld"],
|
||||
},
|
||||
"intype": {
|
||||
"@id": "intype",
|
||||
"aliases": ["intype", "t"],
|
||||
"aliases": ["t"],
|
||||
"required": False,
|
||||
"default": "direct",
|
||||
"options": ["direct", "url", "file"],
|
||||
},
|
||||
"informat": {
|
||||
"@id": "informat",
|
||||
"aliases": ["f"],
|
||||
"required": False,
|
||||
"default": "text",
|
||||
"options": ["turtle", "text", "json-ld"],
|
||||
},
|
||||
"language": {
|
||||
"@id": "language",
|
||||
"aliases": ["language", "l"],
|
||||
"aliases": ["l"],
|
||||
"required": False,
|
||||
},
|
||||
"prefix": {
|
||||
"@id": "prefix",
|
||||
"aliases": ["prefix", "p"],
|
||||
"aliases": ["p"],
|
||||
"required": True,
|
||||
"default": "",
|
||||
},
|
||||
"urischeme": {
|
||||
"@id": "urischeme",
|
||||
"aliases": ["urischeme", "u"],
|
||||
"aliases": ["u"],
|
||||
"required": False,
|
||||
"default": "RFC5147String",
|
||||
"options": "RFC5147String"
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def parse_params(indict, spec=NIF_PARAMS):
|
||||
logger.debug("Parsing: {}\n{}".format(indict, spec))
|
||||
def parse_params(indict, *specs):
|
||||
if not specs:
|
||||
specs = [NIF_PARAMS]
|
||||
logger.debug("Parsing: {}\n{}".format(indict, specs))
|
||||
outdict = indict.copy()
|
||||
wrong_params = {}
|
||||
for spec in specs:
|
||||
for param, options in iteritems(spec):
|
||||
if param[0] != "@": # Exclude json-ld properties
|
||||
for alias in options.get("aliases", []):
|
||||
if alias in indict:
|
||||
# Replace each alias with the correct name of the parameter
|
||||
if alias in indict and alias is not param:
|
||||
outdict[param] = indict[alias]
|
||||
del indict[alias]
|
||||
continue
|
||||
if param not in outdict:
|
||||
if options.get("required", False) and "default" not in options:
|
||||
wrong_params[param] = spec[param]
|
||||
else:
|
||||
if "default" in options:
|
||||
outdict[param] = options["default"]
|
||||
else:
|
||||
if "options" in spec[param] and \
|
||||
outdict[param] not in spec[param]["options"]:
|
||||
elif "options" in spec[param]:
|
||||
if spec[param]["options"] == "boolean":
|
||||
outdict[param] = outdict[param] in [None, True, 'true', '1']
|
||||
elif outdict[param] not in spec[param]["options"]:
|
||||
wrong_params[param] = spec[param]
|
||||
if wrong_params:
|
||||
logger.debug("Error parsing: %s", wrong_params)
|
||||
@ -136,4 +153,30 @@ def parse_params(indict, spec=NIF_PARAMS):
|
||||
errors={param: error
|
||||
for param, error in iteritems(wrong_params)})
|
||||
raise message
|
||||
if 'algorithm' in outdict and isinstance(outdict['algorithm'], str):
|
||||
outdict['algorithm'] = outdict['algorithm'].split(',')
|
||||
return outdict
|
||||
|
||||
|
||||
def get_extra_params(request, plugin=None):
|
||||
params = request.parameters.copy()
|
||||
if plugin:
|
||||
extra_params = parse_params(params, plugin.get('extra_params', {}))
|
||||
params.update(extra_params)
|
||||
return params
|
||||
|
||||
|
||||
def parse_call(params):
|
||||
'''Return a results object based on the parameters used in a call/request.
|
||||
'''
|
||||
params = parse_params(params, NIF_PARAMS)
|
||||
if params['informat'] == 'text':
|
||||
results = Results()
|
||||
entry = Entry(nif__isString=params['input'])
|
||||
results.entries.append(entry)
|
||||
elif params['informat'] == 'json-ld':
|
||||
results = from_string(params['input'], cls=Results)
|
||||
else:
|
||||
raise NotImplemented('Informat {} is not implemented'.format(params['informat']))
|
||||
results.parameters = params
|
||||
return results
|
||||
|
@ -19,8 +19,8 @@ Blueprints for Senpy
|
||||
"""
|
||||
from flask import (Blueprint, request, current_app, render_template, url_for,
|
||||
jsonify)
|
||||
from .models import Error, Response, Plugins, read_schema
|
||||
from .api import WEB_PARAMS, API_PARAMS, CLI_PARAMS, NIF_PARAMS, parse_params
|
||||
from .models import Error, Response, Help, Plugins, read_schema
|
||||
from . import api
|
||||
from .version import __version__
|
||||
from functools import wraps
|
||||
|
||||
@ -43,6 +43,7 @@ def get_params(req):
|
||||
raise Error(message="Invalid data")
|
||||
return indict
|
||||
|
||||
|
||||
@demo_blueprint.route('/')
|
||||
def index():
|
||||
return render_template("index.html", version=__version__)
|
||||
@ -75,20 +76,16 @@ def basic_api(f):
|
||||
def decorated_function(*args, **kwargs):
|
||||
raw_params = get_params(request)
|
||||
headers = {'X-ORIGINAL-PARAMS': json.dumps(raw_params)}
|
||||
# Get defaults
|
||||
web_params = parse_params({}, spec=WEB_PARAMS)
|
||||
api_params = parse_params({}, spec=API_PARAMS)
|
||||
|
||||
outformat = 'json-ld'
|
||||
try:
|
||||
print('Getting request:')
|
||||
print(request)
|
||||
web_params = parse_params(raw_params, spec=WEB_PARAMS)
|
||||
api_params = parse_params(raw_params, spec=API_PARAMS)
|
||||
if hasattr(request, 'params'):
|
||||
request.params.update(api_params)
|
||||
params = api.parse_params(raw_params, api.WEB_PARAMS, api.API_PARAMS)
|
||||
if hasattr(request, 'parameters'):
|
||||
request.parameters.update(params)
|
||||
else:
|
||||
request.params = api_params
|
||||
request.parameters = params
|
||||
response = f(*args, **kwargs)
|
||||
except Error as ex:
|
||||
response = ex
|
||||
@ -96,14 +93,14 @@ def basic_api(f):
|
||||
if current_app.debug:
|
||||
raise
|
||||
|
||||
in_headers = web_params['inHeaders'] != "0"
|
||||
expanded = api_params['expanded-jsonld']
|
||||
outformat = api_params['outformat']
|
||||
in_headers = params['inHeaders']
|
||||
expanded = params['expanded-jsonld']
|
||||
outformat = params['outformat']
|
||||
|
||||
return response.flask(
|
||||
in_headers=in_headers,
|
||||
headers=headers,
|
||||
prefix=url_for('.api', _external=True),
|
||||
prefix=url_for('.api_root', _external=True),
|
||||
context_uri=url_for('api.context',
|
||||
entity=type(response).__name__,
|
||||
_external=True),
|
||||
@ -115,14 +112,14 @@ def basic_api(f):
|
||||
|
||||
@api_blueprint.route('/', methods=['POST', 'GET'])
|
||||
@basic_api
|
||||
def api():
|
||||
phelp = request.params.get('help')
|
||||
if phelp == "True":
|
||||
dic = dict(API_PARAMS, **NIF_PARAMS)
|
||||
response = Response(dic)
|
||||
def api_root():
|
||||
if request.parameters['help']:
|
||||
dic = dict(api.API_PARAMS, **api.NIF_PARAMS)
|
||||
response = Help(parameters=dic)
|
||||
return response
|
||||
else:
|
||||
response = current_app.senpy.analyse(**request.params)
|
||||
req = api.parse_call(request.parameters)
|
||||
response = current_app.senpy.analyse(req)
|
||||
return response
|
||||
|
||||
|
||||
@ -130,7 +127,7 @@ def api():
|
||||
@basic_api
|
||||
def plugins():
|
||||
sp = current_app.senpy
|
||||
ptype = request.params.get('plugin_type')
|
||||
ptype = request.parameters.get('plugin_type')
|
||||
plugins = sp.filter_plugins(plugin_type=ptype)
|
||||
dic = Plugins(plugins=list(plugins.values()))
|
||||
return dic
|
||||
|
22
senpy/cli.py
22
senpy/cli.py
@ -1,7 +1,7 @@
|
||||
import sys
|
||||
from .models import Error
|
||||
from .api import parse_params, CLI_PARAMS
|
||||
from .extensions import Senpy
|
||||
from . import api
|
||||
|
||||
|
||||
def argv_to_dict(argv):
|
||||
@ -13,27 +13,25 @@ def argv_to_dict(argv):
|
||||
if argv[i][0] == '-':
|
||||
key = argv[i].strip('-')
|
||||
value = argv[i + 1] if len(argv) > i + 1 else None
|
||||
if value and value[0] == '-':
|
||||
cli_dict[key] = ""
|
||||
if not value or value[0] == '-':
|
||||
cli_dict[key] = True
|
||||
else:
|
||||
cli_dict[key] = value
|
||||
return cli_dict
|
||||
|
||||
|
||||
def parse_cli(argv):
|
||||
cli_dict = argv_to_dict(argv)
|
||||
cli_params = parse_params(cli_dict, spec=CLI_PARAMS)
|
||||
return cli_params, cli_dict
|
||||
|
||||
|
||||
def main_function(argv):
|
||||
'''This is the method for unit testing
|
||||
'''
|
||||
cli_params, cli_dict = parse_cli(argv)
|
||||
plugin_folder = cli_params['plugin_folder']
|
||||
params = api.parse_params(argv_to_dict(argv),
|
||||
api.CLI_PARAMS,
|
||||
api.API_PARAMS,
|
||||
api.NIF_PARAMS)
|
||||
plugin_folder = params['plugin_folder']
|
||||
sp = Senpy(default_plugins=False, plugin_folder=plugin_folder)
|
||||
sp.activate_all(sync=True)
|
||||
res = sp.analyse(**cli_dict)
|
||||
request = api.parse_call(params)
|
||||
res = sp.analyse(request)
|
||||
return res
|
||||
|
||||
|
||||
|
@ -5,11 +5,10 @@ It orchestrates plugin (de)activation and analysis.
|
||||
from future import standard_library
|
||||
standard_library.install_aliases()
|
||||
|
||||
from . import plugins
|
||||
from . import plugins, api
|
||||
from .plugins import SenpyPlugin
|
||||
from .models import Error, Entry, Results, from_string
|
||||
from .models import Error
|
||||
from .blueprints import api_blueprint, demo_blueprint, ns_blueprint
|
||||
from .api import API_PARAMS, NIF_PARAMS, parse_params
|
||||
|
||||
from threading import Thread
|
||||
|
||||
@ -72,17 +71,15 @@ class Senpy(object):
|
||||
else:
|
||||
logger.debug("Not a folder: %s", folder)
|
||||
|
||||
def _find_plugins(self, params):
|
||||
def _get_plugins(self, request):
|
||||
if not self.analysis_plugins:
|
||||
raise Error(
|
||||
status=404,
|
||||
message=("No plugins found."
|
||||
" Please install one."))
|
||||
api_params = parse_params(params, spec=API_PARAMS)
|
||||
algos = None
|
||||
if "algorithm" in api_params and api_params["algorithm"]:
|
||||
algos = api_params["algorithm"].split(',')
|
||||
elif self.default_plugin:
|
||||
algos = request.parameters.get('algorithm', None)
|
||||
if not algos:
|
||||
if self.default_plugin:
|
||||
algos = [self.default_plugin.name, ]
|
||||
else:
|
||||
raise Error(
|
||||
@ -108,66 +105,46 @@ class Senpy(object):
|
||||
plugins.append(self.plugins[algo])
|
||||
return plugins
|
||||
|
||||
def _get_params(self, params, plugin=None):
|
||||
nif_params = parse_params(params, spec=NIF_PARAMS)
|
||||
if plugin:
|
||||
extra_params = plugin.get('extra_params', {})
|
||||
specific_params = parse_params(params, spec=extra_params)
|
||||
nif_params.update(specific_params)
|
||||
return nif_params
|
||||
|
||||
def _get_entries(self, params):
|
||||
if params['informat'] == 'text':
|
||||
results = Results()
|
||||
entry = Entry(text=params['input'])
|
||||
results.entries.append(entry)
|
||||
elif params['informat'] == 'json-ld':
|
||||
results = from_string(params['input'], cls=Results)
|
||||
else:
|
||||
raise NotImplemented('Informat {} is not implemented'.format(params['informat']))
|
||||
return results
|
||||
|
||||
def _process_entries(self, entries, plugins, nif_params):
|
||||
def _process_entries(self, entries, req, plugins):
|
||||
if not plugins:
|
||||
for i in entries:
|
||||
yield i
|
||||
return
|
||||
plugin = plugins[0]
|
||||
specific_params = self._get_params(nif_params, plugin)
|
||||
specific_params = api.get_extra_params(req, plugin)
|
||||
req.analysis.append({'plugin': plugin,
|
||||
'parameters': specific_params})
|
||||
results = plugin.analyse_entries(entries, specific_params)
|
||||
for i in self._process_entries(results, plugins[1:], nif_params):
|
||||
for i in self._process_entries(results, req, plugins[1:]):
|
||||
yield i
|
||||
|
||||
def _process_response(self, resp, plugins, nif_params):
|
||||
entries = resp.entries
|
||||
resp.entries = []
|
||||
for plug in plugins:
|
||||
resp.analysis.append(plug.id)
|
||||
for i in self._process_entries(entries, plugins, nif_params):
|
||||
resp.entries.append(i)
|
||||
return resp
|
||||
|
||||
def analyse(self, **api_params):
|
||||
def analyse(self, request):
|
||||
"""
|
||||
Main method that analyses a request, either from CLI or HTTP.
|
||||
It uses a dictionary of parameters, provided by the user.
|
||||
It takes a processed request, provided by the user, as returned
|
||||
by api.parse_call().
|
||||
"""
|
||||
logger.debug("analysing with params: {}".format(api_params))
|
||||
plugins = self._find_plugins(api_params)
|
||||
nif_params = self._get_params(api_params)
|
||||
resp = self._get_entries(nif_params)
|
||||
if 'with_parameters' in api_params:
|
||||
resp.parameters = nif_params
|
||||
logger.debug("analysing request: {}".format(request))
|
||||
try:
|
||||
resp = self._process_response(resp, plugins, nif_params)
|
||||
self.convert_emotions(resp, plugins, nif_params)
|
||||
logger.debug("Returning analysis result: {}".format(resp))
|
||||
entries = request.entries
|
||||
request.entries = []
|
||||
plugins = self._get_plugins(request)
|
||||
results = request
|
||||
for i in self._process_entries(entries, results, plugins):
|
||||
results.entries.append(i)
|
||||
self.convert_emotions(results)
|
||||
if 'with_parameters' not in results.parameters:
|
||||
del results.parameters
|
||||
logger.debug("Returning analysis result: {}".format(results))
|
||||
except (Error, Exception) as ex:
|
||||
if not isinstance(ex, Error):
|
||||
ex = Error(message=str(ex), status=500)
|
||||
msg = "Error during analysis: {} \n\t{}".format(ex,
|
||||
traceback.format_exc())
|
||||
ex = Error(message=msg, status=500)
|
||||
logger.exception('Error returning analysis result')
|
||||
raise ex
|
||||
return resp
|
||||
results.analysis = [i['plugin'].id for i in results.analysis]
|
||||
return results
|
||||
|
||||
def _conversion_candidates(self, fromModel, toModel):
|
||||
candidates = self.filter_plugins(plugin_type='emotionConversionPlugin')
|
||||
@ -180,7 +157,7 @@ class Senpy(object):
|
||||
# logging.debug('Found candidate: {}'.format(candidate))
|
||||
yield candidate
|
||||
|
||||
def convert_emotions(self, resp, plugins, params):
|
||||
def convert_emotions(self, resp):
|
||||
"""
|
||||
Conversion of all emotions in a response **in place**.
|
||||
In addition to converting from one model to another, it has
|
||||
@ -188,6 +165,8 @@ class Senpy(object):
|
||||
Needless to say, this is far from an elegant solution, but it works.
|
||||
@todo refactor and clean up
|
||||
"""
|
||||
plugins = [i['plugin'] for i in resp.analysis]
|
||||
params = resp.parameters
|
||||
toModel = params.get('emotionModel', None)
|
||||
if not toModel:
|
||||
return
|
||||
@ -215,7 +194,8 @@ class Senpy(object):
|
||||
for j in i.emotions:
|
||||
plugname = j['prov:wasGeneratedBy']
|
||||
candidate = candidates[plugname]
|
||||
resp.analysis.append(candidate.id)
|
||||
resp.analysis.append({'plugin': candidate,
|
||||
'parameters': params})
|
||||
for k in candidate.convert(j, fromModel, toModel, params):
|
||||
k.prov__wasGeneratedBy = candidate.id
|
||||
if output == 'nested':
|
||||
@ -224,7 +204,6 @@ class Senpy(object):
|
||||
i.emotions = newemotions
|
||||
newentries.append(i)
|
||||
resp.entries = newentries
|
||||
resp.analysis = list(set(resp.analysis))
|
||||
|
||||
@property
|
||||
def default_plugin(self):
|
||||
|
@ -181,7 +181,7 @@ class SenpyMixin(object):
|
||||
obj = self
|
||||
if hasattr(obj, "jsonld"):
|
||||
obj = obj.jsonld()
|
||||
jsonschema.validate(obj, self.schema)
|
||||
self._validator.validate(obj)
|
||||
|
||||
def __str__(self):
|
||||
return str(self.serialize())
|
||||
@ -246,13 +246,13 @@ class BaseModel(SenpyMixin, dict):
|
||||
return d
|
||||
|
||||
|
||||
_subtypes = {}
|
||||
|
||||
|
||||
def register(rsubclass, rtype=None):
|
||||
_subtypes[rtype or rsubclass.__name__] = rsubclass
|
||||
|
||||
|
||||
_subtypes = {}
|
||||
|
||||
|
||||
def from_dict(indict, cls=None):
|
||||
if not cls:
|
||||
target = indict.get('@type', None)
|
||||
@ -286,15 +286,31 @@ def from_json(injson):
|
||||
return from_dict(indict)
|
||||
|
||||
|
||||
def from_schema(name, schema_file=None, base_classes=None):
|
||||
def from_schema(name, schema=None, schema_file=None, base_classes=None):
|
||||
base_classes = base_classes or []
|
||||
base_classes.append(BaseModel)
|
||||
schema_file = schema_file or '{}.json'.format(name)
|
||||
class_name = '{}{}'.format(name[0].upper(), name[1:])
|
||||
newclass = type(class_name, tuple(base_classes), {})
|
||||
setattr(newclass, '@type', name)
|
||||
setattr(newclass, 'schema', read_schema(schema_file))
|
||||
setattr(newclass, 'class_name', class_name)
|
||||
if '/' not in 'schema_file':
|
||||
schema_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
|
||||
'schemas',
|
||||
schema_file)
|
||||
|
||||
schema_path = 'file://' + schema_file
|
||||
|
||||
with open(schema_file) as f:
|
||||
schema = json.load(f)
|
||||
|
||||
dct = {}
|
||||
|
||||
resolver = jsonschema.RefResolver(schema_path, schema)
|
||||
dct['@type'] = name
|
||||
dct['_schema_file'] = schema_file
|
||||
dct['schema'] = schema
|
||||
dct['_validator'] = jsonschema.Draft4Validator(schema, resolver=resolver)
|
||||
|
||||
newclass = type(class_name, tuple(base_classes), dct)
|
||||
|
||||
register(newclass, name)
|
||||
return newclass
|
||||
|
||||
@ -315,6 +331,7 @@ for i in [
|
||||
'emotionPlugin',
|
||||
'emotionSet',
|
||||
'entry',
|
||||
'help',
|
||||
'plugin',
|
||||
'plugins',
|
||||
'response',
|
||||
@ -334,6 +351,9 @@ class Error(SenpyMixin, Exception):
|
||||
self._error = _ErrorModel(message=message, *args, **kwargs)
|
||||
self.message = message
|
||||
|
||||
def validate(self, obj=None):
|
||||
self._error.validate()
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._error[key]
|
||||
|
||||
|
@ -76,7 +76,7 @@ class AnalysisPlugin(Plugin):
|
||||
Note that this method may yield an annotated entry or a list of
|
||||
entries (e.g. in a tokenizer)
|
||||
"""
|
||||
text = entry['text']
|
||||
text = entry['nif:isString']
|
||||
params = copy.copy(parameters)
|
||||
params['input'] = text
|
||||
results = self.analyse(**params)
|
||||
|
64
senpy/plugins/misc/split.py
Normal file
64
senpy/plugins/misc/split.py
Normal file
@ -0,0 +1,64 @@
|
||||
from senpy.plugins import AnalysisPlugin
|
||||
from senpy.models import Entry
|
||||
from nltk.tokenize.punkt import PunktSentenceTokenizer
|
||||
from nltk.tokenize.simple import LineTokenizer
|
||||
import nltk
|
||||
|
||||
|
||||
class SplitPlugin(AnalysisPlugin):
|
||||
|
||||
def activate(self):
|
||||
nltk.download('punkt')
|
||||
|
||||
def analyse_entry(self, entry, params):
|
||||
chunker_type = params.get("delimiter", "sentence")
|
||||
original_text = entry.get('nif:isString', None)
|
||||
if chunker_type == "sentence":
|
||||
tokenizer = PunktSentenceTokenizer()
|
||||
if chunker_type == "paragraph":
|
||||
tokenizer = LineTokenizer()
|
||||
chars = tokenizer.span_tokenize(original_text)
|
||||
for i, chunk in enumerate(tokenizer.tokenize(original_text)):
|
||||
e = Entry()
|
||||
e['nif:isString'] = chunk
|
||||
if entry.id:
|
||||
e.id = entry.id + "#char={},{}".format(chars[i][0], chars[i][1])
|
||||
yield e
|
||||
|
||||
test_cases = [
|
||||
{
|
||||
'entry': {
|
||||
'nif:isString': 'Hello. World.'
|
||||
},
|
||||
'params': {
|
||||
'delimiter': 'sentence',
|
||||
},
|
||||
'expected': [
|
||||
{
|
||||
'nif:isString': 'Hello.'
|
||||
},
|
||||
{
|
||||
'nif:isString': 'World.'
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
'entry': {
|
||||
"id": ":test",
|
||||
'nif:isString': 'Hello. World.'
|
||||
},
|
||||
'params': {
|
||||
'delimiter': 'sentence',
|
||||
},
|
||||
'expected': [
|
||||
{
|
||||
"@id": ":test#char=0,6",
|
||||
'nif:isString': 'Hello.'
|
||||
},
|
||||
{
|
||||
"@id": ":test#char=7,13",
|
||||
'nif:isString': 'World.'
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
19
senpy/plugins/misc/split.senpy
Normal file
19
senpy/plugins/misc/split.senpy
Normal file
@ -0,0 +1,19 @@
|
||||
---
|
||||
name: split
|
||||
module: senpy.plugins.misc.split
|
||||
description: A sample plugin that chunks input text
|
||||
author: "@militarpancho"
|
||||
version: '0.2'
|
||||
url: "https://github.com/gsi-upm/senpy"
|
||||
requirements:
|
||||
- nltk
|
||||
extra_params:
|
||||
delimiter:
|
||||
aliases:
|
||||
- type
|
||||
- t
|
||||
required: false
|
||||
default: sentence
|
||||
options:
|
||||
- sentence
|
||||
- paragraph
|
@ -12,7 +12,7 @@ class Sentiment140Plugin(SentimentPlugin):
|
||||
json.dumps({
|
||||
"language": lang,
|
||||
"data": [{
|
||||
"text": entry.nif__isString
|
||||
"text": entry['nif:isString']
|
||||
}]
|
||||
}))
|
||||
p = params.get("prefix", None)
|
||||
|
@ -27,6 +27,9 @@
|
||||
"@id": "onyx:hasEmotionSet",
|
||||
"@container": "@set"
|
||||
},
|
||||
"onyx:hasEmotion": {
|
||||
"@container": "@set"
|
||||
},
|
||||
"sentiments": {
|
||||
"@id": "marl:hasOpinion",
|
||||
"@container": "@set"
|
||||
|
@ -6,7 +6,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"nif:isString": {
|
||||
"description": "String contained in this Context",
|
||||
"description": "String contained in this Context. Alternative: nif:isString",
|
||||
"type": "string"
|
||||
},
|
||||
"sentiments": {
|
||||
|
17
senpy/schemas/help.json
Normal file
17
senpy/schemas/help.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"allOf": [
|
||||
{"$ref": "response.json"},
|
||||
{
|
||||
"title": "Help",
|
||||
"description": "Help containing accepted parameters",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"parameters": {
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": "parameters"
|
||||
}
|
||||
]
|
||||
}
|
@ -2,7 +2,12 @@
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@type": {"type": "string"}
|
||||
"@type": {"type": "string"},
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"default": {}
|
||||
|
||||
}
|
||||
},
|
||||
"required": ["@type"]
|
||||
|
||||
|
@ -3,7 +3,7 @@ from senpy.plugins import SentimentPlugin
|
||||
|
||||
class DummyPlugin(SentimentPlugin):
|
||||
def analyse_entry(self, entry, params):
|
||||
entry.text = entry.text[::-1]
|
||||
entry['nif:iString'] = entry['nif:isString'][::-1]
|
||||
entry.reversed = entry.get('reversed', 0) + 1
|
||||
yield entry
|
||||
|
||||
|
@ -11,24 +11,24 @@ class APITest(TestCase):
|
||||
|
||||
def test_api_params(self):
|
||||
"""The API should not define any required parameters without a default"""
|
||||
parse_params({}, spec=API_PARAMS)
|
||||
parse_params({}, API_PARAMS)
|
||||
|
||||
def test_web_params(self):
|
||||
"""The WEB should not define any required parameters without a default"""
|
||||
parse_params({}, spec=WEB_PARAMS)
|
||||
parse_params({}, WEB_PARAMS)
|
||||
|
||||
def test_basic(self):
|
||||
a = {}
|
||||
try:
|
||||
parse_params(a, spec=NIF_PARAMS)
|
||||
parse_params(a, NIF_PARAMS)
|
||||
raise AssertionError()
|
||||
except Error:
|
||||
pass
|
||||
a = {'input': 'hello'}
|
||||
p = parse_params(a, spec=NIF_PARAMS)
|
||||
p = parse_params(a, NIF_PARAMS)
|
||||
assert 'input' in p
|
||||
b = {'i': 'hello'}
|
||||
p = parse_params(b, spec=NIF_PARAMS)
|
||||
p = parse_params(b, NIF_PARAMS)
|
||||
assert 'input' in p
|
||||
|
||||
def test_plugin(self):
|
||||
@ -40,18 +40,18 @@ class APITest(TestCase):
|
||||
}
|
||||
}
|
||||
try:
|
||||
parse_params(query, spec=plug_params)
|
||||
parse_params(query, plug_params)
|
||||
raise AssertionError()
|
||||
except Error:
|
||||
pass
|
||||
query['hello'] = 'world'
|
||||
p = parse_params(query, spec=plug_params)
|
||||
p = parse_params(query, plug_params)
|
||||
assert 'hello' in p
|
||||
assert p['hello'] == 'world'
|
||||
del query['hello']
|
||||
|
||||
query['hiya'] = 'dlrow'
|
||||
p = parse_params(query, spec=plug_params)
|
||||
p = parse_params(query, plug_params)
|
||||
assert 'hello' in p
|
||||
assert 'hiya' in p
|
||||
assert p['hello'] == 'dlrow'
|
||||
@ -63,6 +63,6 @@ class APITest(TestCase):
|
||||
'default': 1
|
||||
}
|
||||
}
|
||||
p = parse_params({}, spec=spec)
|
||||
p = parse_params({}, spec)
|
||||
assert 'hello' in p
|
||||
assert p['hello'] == 1
|
||||
|
@ -38,6 +38,7 @@ class BlueprintsTest(TestCase):
|
||||
"""
|
||||
Calling with no arguments should ask the user for more arguments
|
||||
"""
|
||||
self.app.debug = False
|
||||
resp = self.client.get("/api/")
|
||||
self.assertCode(resp, 400)
|
||||
js = parse_resp(resp)
|
||||
@ -54,7 +55,7 @@ class BlueprintsTest(TestCase):
|
||||
The dummy plugin returns an empty response,\
|
||||
it should contain the context
|
||||
"""
|
||||
resp = self.client.get("/api/?i=My aloha mohame")
|
||||
resp = self.client.get("/api/?i=My aloha mohame&with_parameters=True")
|
||||
self.assertCode(resp, 200)
|
||||
js = parse_resp(resp)
|
||||
logging.debug("Got response: %s", js)
|
||||
@ -77,6 +78,7 @@ class BlueprintsTest(TestCase):
|
||||
Extra params that have a required argument that does not
|
||||
have a default should raise an error.
|
||||
"""
|
||||
self.app.debug = False
|
||||
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired")
|
||||
self.assertCode(resp, 400)
|
||||
js = parse_resp(resp)
|
||||
@ -88,6 +90,7 @@ class BlueprintsTest(TestCase):
|
||||
The dummy plugin returns an empty response,\
|
||||
it should contain the context
|
||||
"""
|
||||
self.app.debug = False
|
||||
resp = self.client.get("/api/?i=My aloha mohame&algo=DOESNOTEXIST")
|
||||
self.assertCode(resp, 404)
|
||||
js = parse_resp(resp)
|
||||
@ -154,3 +157,10 @@ class BlueprintsTest(TestCase):
|
||||
self.assertCode(resp, 200)
|
||||
js = parse_resp(resp)
|
||||
assert "$schema" in js
|
||||
|
||||
def test_help(self):
|
||||
resp = self.client.get("/api/?help=true")
|
||||
self.assertCode(resp, 200)
|
||||
js = parse_resp(resp)
|
||||
assert "parameters" in js
|
||||
assert "help" in js["parameters"]
|
||||
|
@ -1,11 +1,6 @@
|
||||
import logging
|
||||
from functools import partial
|
||||
|
||||
try:
|
||||
from unittest.mock import patch
|
||||
except ImportError:
|
||||
from mock import patch
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from unittest import TestCase
|
||||
@ -17,11 +12,7 @@ class CLITest(TestCase):
|
||||
def test_basic(self):
|
||||
self.assertRaises(Error, partial(main_function, []))
|
||||
|
||||
with patch('senpy.extensions.Senpy.analyse') as patched:
|
||||
main_function(['--input', 'test'])
|
||||
|
||||
patched.assert_called_with(input='test')
|
||||
with patch('senpy.extensions.Senpy.analyse') as patched:
|
||||
main_function(['--input', 'test', '--algo', 'rand'])
|
||||
|
||||
patched.assert_called_with(input='test', algo='rand')
|
||||
res = main_function(['--input', 'test', '--algo', 'rand', '--with-parameters'])
|
||||
assert res.parameters['input'] == 'test'
|
||||
assert 'rand' in res.parameters['algorithm']
|
||||
assert res.parameters['input'] == 'test'
|
||||
|
@ -12,10 +12,16 @@ from functools import partial
|
||||
from senpy.extensions import Senpy
|
||||
from senpy import plugins
|
||||
from senpy.models import Error, Results, Entry, EmotionSet, Emotion, Plugin
|
||||
from senpy import api
|
||||
from flask import Flask
|
||||
from unittest import TestCase
|
||||
|
||||
|
||||
def analyse(instance, **kwargs):
|
||||
request = api.parse_call(kwargs)
|
||||
return instance.analyse(request)
|
||||
|
||||
|
||||
class ExtensionsTest(TestCase):
|
||||
def setUp(self):
|
||||
self.app = Flask('test_extensions')
|
||||
@ -91,10 +97,11 @@ class ExtensionsTest(TestCase):
|
||||
def test_noplugin(self):
|
||||
""" Don't analyse if there isn't any plugin installed """
|
||||
self.senpy.deactivate_all(sync=True)
|
||||
self.assertRaises(Error, partial(self.senpy.analyse, input="tupni"))
|
||||
self.assertRaises(Error, partial(analyse, self.senpy, input="tupni"))
|
||||
self.assertRaises(Error,
|
||||
partial(
|
||||
self.senpy.analyse,
|
||||
analyse,
|
||||
self.senpy,
|
||||
input="tupni",
|
||||
algorithm='Dummy'))
|
||||
|
||||
@ -102,12 +109,11 @@ class ExtensionsTest(TestCase):
|
||||
""" Using a plugin """
|
||||
# I was using mock until plugin started inheriting
|
||||
# Leaf (defaultdict with __setattr__ and __getattr__.
|
||||
r1 = self.senpy.analyse(
|
||||
algorithm="Dummy", input="tupni", output="tuptuo")
|
||||
r2 = self.senpy.analyse(input="tupni", output="tuptuo")
|
||||
r1 = analyse(self.senpy, algorithm="Dummy", input="tupni", output="tuptuo")
|
||||
r2 = analyse(self.senpy, input="tupni", output="tuptuo")
|
||||
assert r1.analysis[0] == "plugins/Dummy_0.1"
|
||||
assert r2.analysis[0] == "plugins/Dummy_0.1"
|
||||
assert r1.entries[0].text == 'input'
|
||||
assert r1.entries[0]['nif:iString'] == 'input'
|
||||
|
||||
def test_analyse_jsonld(self):
|
||||
""" Using a plugin with JSON-LD input"""
|
||||
@ -116,30 +122,33 @@ class ExtensionsTest(TestCase):
|
||||
"@type": "results",
|
||||
"entries": [
|
||||
{"@id": "entry1",
|
||||
"text": "tupni",
|
||||
"nif:isString": "tupni",
|
||||
"@type": "entry"
|
||||
}
|
||||
]
|
||||
}'''
|
||||
r1 = self.senpy.analyse(algorithm="Dummy",
|
||||
r1 = analyse(self.senpy,
|
||||
algorithm="Dummy",
|
||||
input=js_input,
|
||||
informat="json-ld",
|
||||
output="tuptuo")
|
||||
r2 = self.senpy.analyse(input="tupni", output="tuptuo")
|
||||
r2 = analyse(self.senpy,
|
||||
input="tupni",
|
||||
output="tuptuo")
|
||||
assert r1.analysis[0] == "plugins/Dummy_0.1"
|
||||
assert r2.analysis[0] == "plugins/Dummy_0.1"
|
||||
assert r1.entries[0].text == 'input'
|
||||
assert r1.entries[0]['nif:iString'] == 'input'
|
||||
|
||||
def test_analyse_error(self):
|
||||
mm = mock.MagicMock()
|
||||
mm.id = 'magic_mock'
|
||||
mm.analyse_entries.side_effect = Error('error on analysis', status=500)
|
||||
mm.analyse_entries.side_effect = Error('error in analysis', status=500)
|
||||
self.senpy.plugins['MOCK'] = mm
|
||||
try:
|
||||
self.senpy.analyse(input='nothing', algorithm='MOCK')
|
||||
analyse(self.senpy, input='nothing', algorithm='MOCK')
|
||||
assert False
|
||||
except Error as ex:
|
||||
assert ex['message'] == 'error on analysis'
|
||||
assert 'error in analysis' in ex['message']
|
||||
assert ex['status'] == 500
|
||||
|
||||
mm.analyse.side_effect = Exception('generic exception on analysis')
|
||||
@ -147,10 +156,10 @@ class ExtensionsTest(TestCase):
|
||||
'generic exception on analysis')
|
||||
|
||||
try:
|
||||
self.senpy.analyse(input='nothing', algorithm='MOCK')
|
||||
analyse(self.senpy, input='nothing', algorithm='MOCK')
|
||||
assert False
|
||||
except Error as ex:
|
||||
assert ex['message'] == 'generic exception on analysis'
|
||||
assert 'generic exception on analysis' in ex['message']
|
||||
assert ex['status'] == 500
|
||||
|
||||
def test_filtering(self):
|
||||
@ -180,40 +189,27 @@ class ExtensionsTest(TestCase):
|
||||
'emoml:valence': 0
|
||||
}))
|
||||
response = Results({
|
||||
'analysis': [{'plugin': plugin}],
|
||||
'entries': [Entry({
|
||||
'text': 'much ado about nothing',
|
||||
'nif:iString': 'much ado about nothing',
|
||||
'emotions': [eSet1]
|
||||
})]
|
||||
})
|
||||
params = {'emotionModel': 'emoml:big6',
|
||||
'conversion': 'full'}
|
||||
r1 = deepcopy(response)
|
||||
self.senpy.convert_emotions(r1,
|
||||
[plugin, ],
|
||||
params)
|
||||
r1.parameters = params
|
||||
self.senpy.convert_emotions(r1)
|
||||
assert len(r1.entries[0].emotions) == 2
|
||||
params['conversion'] = 'nested'
|
||||
r2 = deepcopy(response)
|
||||
self.senpy.convert_emotions(r2,
|
||||
[plugin, ],
|
||||
params)
|
||||
r2.parameters = params
|
||||
self.senpy.convert_emotions(r2)
|
||||
assert len(r2.entries[0].emotions) == 1
|
||||
assert r2.entries[0].emotions[0]['prov:wasDerivedFrom'] == eSet1
|
||||
params['conversion'] = 'filtered'
|
||||
r3 = deepcopy(response)
|
||||
self.senpy.convert_emotions(r3,
|
||||
[plugin, ],
|
||||
params)
|
||||
r3.parameters = params
|
||||
self.senpy.convert_emotions(r3)
|
||||
assert len(r3.entries[0].emotions) == 1
|
||||
r3.jsonld()
|
||||
|
||||
# def test_async_plugin(self):
|
||||
# """ We should accept multiprocessing plugins with async=False"""
|
||||
# thread1 = self.senpy.activate_plugin("Async", sync=False)
|
||||
# thread1.join(timeout=1)
|
||||
# assert len(self.senpy.plugins['Async'].value) == 4
|
||||
|
||||
# resp = self.senpy.analyse(input='nothing', algorithm='Async')
|
||||
|
||||
# assert len(resp.entries[0].async_values) == 2
|
||||
# self.senpy.activate_plugin("Async", sync=True)
|
||||
|
@ -185,7 +185,7 @@ class ModelsTest(TestCase):
|
||||
'entries': [{
|
||||
'@id': 'entry1',
|
||||
'@type': 'entry',
|
||||
'text': 'TEST'
|
||||
'nif:isString': 'TEST'
|
||||
}]
|
||||
}
|
||||
recovered = from_dict(results)
|
||||
|
Loading…
Reference in New Issue
Block a user