1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-22 08:12:27 +00:00

WIP simpler pipeline

This commit is contained in:
J. Fernando Sánchez 2017-06-21 19:58:18 +02:00
parent fca0ac00c4
commit a243f68bfc
19 changed files with 369 additions and 227 deletions

View File

@ -1,66 +1,76 @@
from future.utils import iteritems from future.utils import iteritems
from .models import Error from .models import Error, Results, Entry, from_string
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
API_PARAMS = { API_PARAMS = {
"algorithm": { "algorithm": {
"aliases": ["algorithm", "a", "algo"], "aliases": ["algorithms", "a", "algo"],
"required": False, "required": False,
}, "description": ("Algorithms that will be used to process the request."
"outformat": { "It may be a list of comma-separated names."),
"@id": "outformat",
"aliases": ["outformat", "o"],
"default": "json-ld",
"required": True,
"options": ["json-ld", "turtle"],
}, },
"expanded-jsonld": { "expanded-jsonld": {
"@id": "expanded-jsonld", "@id": "expanded-jsonld",
"aliases": ["expanded", "expanded-jsonld"], "aliases": ["expanded"],
"required": True, "required": True,
"default": 0 "default": 0
}, },
"emotionModel": { "with_parameters": {
"@id": "emotionModel", "aliases": ['withparameters',
"aliases": ["emotionModel", "emoModel"], 'with-parameters'],
"required": False "options": "boolean",
"default": False,
"required": True
}, },
"plugin_type": { "plugin_type": {
"@id": "pluginType", "@id": "pluginType",
"description": 'What kind of plugins to list', "description": 'What kind of plugins to list',
"aliases": ["pluginType", "plugin_type"], "aliases": ["pluginType"],
"required": True, "required": True,
"default": "analysisPlugin" "default": "analysisPlugin"
}, },
"outformat": {
"@id": "outformat",
"aliases": ["o"],
"default": "json-ld",
"required": True,
"options": ["json-ld", "turtle"],
},
"help": {
"@id": "help",
"description": "Show additional help to know more about the possible parameters",
"aliases": ["h"],
"required": True,
"options": "boolean",
"default": False
},
"emotionModel": {
"@id": "emotionModel",
"aliases": ["emoModel"],
"required": False
},
"conversion": { "conversion": {
"@id": "conversion", "@id": "conversion",
"description": "How to show the elements that have (not) been converted", "description": "How to show the elements that have (not) been converted",
"required": True, "required": True,
"options": ["filtered", "nested", "full"], "options": ["filtered", "nested", "full"],
"default": "full" "default": "full"
},
"help": {
"@id": "help",
"description": "Show additional help to know more about the possible parameters",
"aliases": ["help", "h"],
"required": True,
"options": ["True", "False"],
"default": "False"
} }
} }
WEB_PARAMS = { WEB_PARAMS = {
"inHeaders": { "inHeaders": {
"aliases": ["inHeaders", "headers"], "aliases": ["headers"],
"required": True, "required": True,
"default": "0" "default": False,
"options": "boolean"
}, },
} }
CLI_PARAMS = { CLI_PARAMS = {
"plugin_folder": { "plugin_folder": {
"aliases": ["plugin_folder", "folder"], "aliases": ["folder"],
"required": True, "required": True,
"default": "." "default": "."
}, },
@ -69,64 +79,71 @@ CLI_PARAMS = {
NIF_PARAMS = { NIF_PARAMS = {
"input": { "input": {
"@id": "input", "@id": "input",
"aliases": ["i", "input"], "aliases": ["i"],
"required": True, "required": True,
"help": "Input text" "help": "Input text"
}, },
"informat": {
"@id": "informat",
"aliases": ["f", "informat"],
"required": False,
"default": "text",
"options": ["turtle", "text", "json-ld"],
},
"intype": { "intype": {
"@id": "intype", "@id": "intype",
"aliases": ["intype", "t"], "aliases": ["t"],
"required": False, "required": False,
"default": "direct", "default": "direct",
"options": ["direct", "url", "file"], "options": ["direct", "url", "file"],
}, },
"informat": {
"@id": "informat",
"aliases": ["f"],
"required": False,
"default": "text",
"options": ["turtle", "text", "json-ld"],
},
"language": { "language": {
"@id": "language", "@id": "language",
"aliases": ["language", "l"], "aliases": ["l"],
"required": False, "required": False,
}, },
"prefix": { "prefix": {
"@id": "prefix", "@id": "prefix",
"aliases": ["prefix", "p"], "aliases": ["p"],
"required": True, "required": True,
"default": "", "default": "",
}, },
"urischeme": { "urischeme": {
"@id": "urischeme", "@id": "urischeme",
"aliases": ["urischeme", "u"], "aliases": ["u"],
"required": False, "required": False,
"default": "RFC5147String", "default": "RFC5147String",
"options": "RFC5147String" "options": "RFC5147String"
}, }
} }
def parse_params(indict, spec=NIF_PARAMS): def parse_params(indict, *specs):
logger.debug("Parsing: {}\n{}".format(indict, spec)) if not specs:
specs = [NIF_PARAMS]
logger.debug("Parsing: {}\n{}".format(indict, specs))
outdict = indict.copy() outdict = indict.copy()
wrong_params = {} wrong_params = {}
for param, options in iteritems(spec): for spec in specs:
if param[0] != "@": # Exclude json-ld properties for param, options in iteritems(spec):
for alias in options.get("aliases", []): if param[0] != "@": # Exclude json-ld properties
if alias in indict: for alias in options.get("aliases", []):
outdict[param] = indict[alias] # Replace each alias with the correct name of the parameter
if param not in outdict: if alias in indict and alias is not param:
if options.get("required", False) and "default" not in options: outdict[param] = indict[alias]
wrong_params[param] = spec[param] del indict[alias]
else: continue
if "default" in options: if param not in outdict:
outdict[param] = options["default"] if options.get("required", False) and "default" not in options:
else: wrong_params[param] = spec[param]
if "options" in spec[param] and \ else:
outdict[param] not in spec[param]["options"]: if "default" in options:
wrong_params[param] = spec[param] outdict[param] = options["default"]
elif "options" in spec[param]:
if spec[param]["options"] == "boolean":
outdict[param] = outdict[param] in [None, True, 'true', '1']
elif outdict[param] not in spec[param]["options"]:
wrong_params[param] = spec[param]
if wrong_params: if wrong_params:
logger.debug("Error parsing: %s", wrong_params) logger.debug("Error parsing: %s", wrong_params)
message = Error( message = Error(
@ -136,4 +153,30 @@ def parse_params(indict, spec=NIF_PARAMS):
errors={param: error errors={param: error
for param, error in iteritems(wrong_params)}) for param, error in iteritems(wrong_params)})
raise message raise message
if 'algorithm' in outdict and isinstance(outdict['algorithm'], str):
outdict['algorithm'] = outdict['algorithm'].split(',')
return outdict return outdict
def get_extra_params(request, plugin=None):
params = request.parameters.copy()
if plugin:
extra_params = parse_params(params, plugin.get('extra_params', {}))
params.update(extra_params)
return params
def parse_call(params):
'''Return a results object based on the parameters used in a call/request.
'''
params = parse_params(params, NIF_PARAMS)
if params['informat'] == 'text':
results = Results()
entry = Entry(nif__isString=params['input'])
results.entries.append(entry)
elif params['informat'] == 'json-ld':
results = from_string(params['input'], cls=Results)
else:
raise NotImplemented('Informat {} is not implemented'.format(params['informat']))
results.parameters = params
return results

View File

@ -19,8 +19,8 @@ Blueprints for Senpy
""" """
from flask import (Blueprint, request, current_app, render_template, url_for, from flask import (Blueprint, request, current_app, render_template, url_for,
jsonify) jsonify)
from .models import Error, Response, Plugins, read_schema from .models import Error, Response, Help, Plugins, read_schema
from .api import WEB_PARAMS, API_PARAMS, CLI_PARAMS, NIF_PARAMS, parse_params from . import api
from .version import __version__ from .version import __version__
from functools import wraps from functools import wraps
@ -43,6 +43,7 @@ def get_params(req):
raise Error(message="Invalid data") raise Error(message="Invalid data")
return indict return indict
@demo_blueprint.route('/') @demo_blueprint.route('/')
def index(): def index():
return render_template("index.html", version=__version__) return render_template("index.html", version=__version__)
@ -75,20 +76,16 @@ def basic_api(f):
def decorated_function(*args, **kwargs): def decorated_function(*args, **kwargs):
raw_params = get_params(request) raw_params = get_params(request)
headers = {'X-ORIGINAL-PARAMS': json.dumps(raw_params)} headers = {'X-ORIGINAL-PARAMS': json.dumps(raw_params)}
# Get defaults
web_params = parse_params({}, spec=WEB_PARAMS)
api_params = parse_params({}, spec=API_PARAMS)
outformat = 'json-ld' outformat = 'json-ld'
try: try:
print('Getting request:') print('Getting request:')
print(request) print(request)
web_params = parse_params(raw_params, spec=WEB_PARAMS) params = api.parse_params(raw_params, api.WEB_PARAMS, api.API_PARAMS)
api_params = parse_params(raw_params, spec=API_PARAMS) if hasattr(request, 'parameters'):
if hasattr(request, 'params'): request.parameters.update(params)
request.params.update(api_params)
else: else:
request.params = api_params request.parameters = params
response = f(*args, **kwargs) response = f(*args, **kwargs)
except Error as ex: except Error as ex:
response = ex response = ex
@ -96,14 +93,14 @@ def basic_api(f):
if current_app.debug: if current_app.debug:
raise raise
in_headers = web_params['inHeaders'] != "0" in_headers = params['inHeaders']
expanded = api_params['expanded-jsonld'] expanded = params['expanded-jsonld']
outformat = api_params['outformat'] outformat = params['outformat']
return response.flask( return response.flask(
in_headers=in_headers, in_headers=in_headers,
headers=headers, headers=headers,
prefix=url_for('.api', _external=True), prefix=url_for('.api_root', _external=True),
context_uri=url_for('api.context', context_uri=url_for('api.context',
entity=type(response).__name__, entity=type(response).__name__,
_external=True), _external=True),
@ -115,14 +112,14 @@ def basic_api(f):
@api_blueprint.route('/', methods=['POST', 'GET']) @api_blueprint.route('/', methods=['POST', 'GET'])
@basic_api @basic_api
def api(): def api_root():
phelp = request.params.get('help') if request.parameters['help']:
if phelp == "True": dic = dict(api.API_PARAMS, **api.NIF_PARAMS)
dic = dict(API_PARAMS, **NIF_PARAMS) response = Help(parameters=dic)
response = Response(dic)
return response return response
else: else:
response = current_app.senpy.analyse(**request.params) req = api.parse_call(request.parameters)
response = current_app.senpy.analyse(req)
return response return response
@ -130,7 +127,7 @@ def api():
@basic_api @basic_api
def plugins(): def plugins():
sp = current_app.senpy sp = current_app.senpy
ptype = request.params.get('plugin_type') ptype = request.parameters.get('plugin_type')
plugins = sp.filter_plugins(plugin_type=ptype) plugins = sp.filter_plugins(plugin_type=ptype)
dic = Plugins(plugins=list(plugins.values())) dic = Plugins(plugins=list(plugins.values()))
return dic return dic

View File

@ -1,7 +1,7 @@
import sys import sys
from .models import Error from .models import Error
from .api import parse_params, CLI_PARAMS
from .extensions import Senpy from .extensions import Senpy
from . import api
def argv_to_dict(argv): def argv_to_dict(argv):
@ -13,27 +13,25 @@ def argv_to_dict(argv):
if argv[i][0] == '-': if argv[i][0] == '-':
key = argv[i].strip('-') key = argv[i].strip('-')
value = argv[i + 1] if len(argv) > i + 1 else None value = argv[i + 1] if len(argv) > i + 1 else None
if value and value[0] == '-': if not value or value[0] == '-':
cli_dict[key] = "" cli_dict[key] = True
else: else:
cli_dict[key] = value cli_dict[key] = value
return cli_dict return cli_dict
def parse_cli(argv):
cli_dict = argv_to_dict(argv)
cli_params = parse_params(cli_dict, spec=CLI_PARAMS)
return cli_params, cli_dict
def main_function(argv): def main_function(argv):
'''This is the method for unit testing '''This is the method for unit testing
''' '''
cli_params, cli_dict = parse_cli(argv) params = api.parse_params(argv_to_dict(argv),
plugin_folder = cli_params['plugin_folder'] api.CLI_PARAMS,
api.API_PARAMS,
api.NIF_PARAMS)
plugin_folder = params['plugin_folder']
sp = Senpy(default_plugins=False, plugin_folder=plugin_folder) sp = Senpy(default_plugins=False, plugin_folder=plugin_folder)
sp.activate_all(sync=True) sp.activate_all(sync=True)
res = sp.analyse(**cli_dict) request = api.parse_call(params)
res = sp.analyse(request)
return res return res

View File

@ -5,11 +5,10 @@ It orchestrates plugin (de)activation and analysis.
from future import standard_library from future import standard_library
standard_library.install_aliases() standard_library.install_aliases()
from . import plugins from . import plugins, api
from .plugins import SenpyPlugin from .plugins import SenpyPlugin
from .models import Error, Entry, Results, from_string from .models import Error
from .blueprints import api_blueprint, demo_blueprint, ns_blueprint from .blueprints import api_blueprint, demo_blueprint, ns_blueprint
from .api import API_PARAMS, NIF_PARAMS, parse_params
from threading import Thread from threading import Thread
@ -72,22 +71,20 @@ class Senpy(object):
else: else:
logger.debug("Not a folder: %s", folder) logger.debug("Not a folder: %s", folder)
def _find_plugins(self, params): def _get_plugins(self, request):
if not self.analysis_plugins: if not self.analysis_plugins:
raise Error( raise Error(
status=404, status=404,
message=("No plugins found." message=("No plugins found."
" Please install one.")) " Please install one."))
api_params = parse_params(params, spec=API_PARAMS) algos = request.parameters.get('algorithm', None)
algos = None if not algos:
if "algorithm" in api_params and api_params["algorithm"]: if self.default_plugin:
algos = api_params["algorithm"].split(',') algos = [self.default_plugin.name, ]
elif self.default_plugin: else:
algos = [self.default_plugin.name, ] raise Error(
else: status=404,
raise Error( message="No default plugin found, and None provided")
status=404,
message="No default plugin found, and None provided")
plugins = list() plugins = list()
for algo in algos: for algo in algos:
@ -108,66 +105,46 @@ class Senpy(object):
plugins.append(self.plugins[algo]) plugins.append(self.plugins[algo])
return plugins return plugins
def _get_params(self, params, plugin=None): def _process_entries(self, entries, req, plugins):
nif_params = parse_params(params, spec=NIF_PARAMS)
if plugin:
extra_params = plugin.get('extra_params', {})
specific_params = parse_params(params, spec=extra_params)
nif_params.update(specific_params)
return nif_params
def _get_entries(self, params):
if params['informat'] == 'text':
results = Results()
entry = Entry(text=params['input'])
results.entries.append(entry)
elif params['informat'] == 'json-ld':
results = from_string(params['input'], cls=Results)
else:
raise NotImplemented('Informat {} is not implemented'.format(params['informat']))
return results
def _process_entries(self, entries, plugins, nif_params):
if not plugins: if not plugins:
for i in entries: for i in entries:
yield i yield i
return return
plugin = plugins[0] plugin = plugins[0]
specific_params = self._get_params(nif_params, plugin) specific_params = api.get_extra_params(req, plugin)
req.analysis.append({'plugin': plugin,
'parameters': specific_params})
results = plugin.analyse_entries(entries, specific_params) results = plugin.analyse_entries(entries, specific_params)
for i in self._process_entries(results, plugins[1:], nif_params): for i in self._process_entries(results, req, plugins[1:]):
yield i yield i
def _process_response(self, resp, plugins, nif_params): def analyse(self, request):
entries = resp.entries
resp.entries = []
for plug in plugins:
resp.analysis.append(plug.id)
for i in self._process_entries(entries, plugins, nif_params):
resp.entries.append(i)
return resp
def analyse(self, **api_params):
""" """
Main method that analyses a request, either from CLI or HTTP. Main method that analyses a request, either from CLI or HTTP.
It uses a dictionary of parameters, provided by the user. It takes a processed request, provided by the user, as returned
by api.parse_call().
""" """
logger.debug("analysing with params: {}".format(api_params)) logger.debug("analysing request: {}".format(request))
plugins = self._find_plugins(api_params)
nif_params = self._get_params(api_params)
resp = self._get_entries(nif_params)
if 'with_parameters' in api_params:
resp.parameters = nif_params
try: try:
resp = self._process_response(resp, plugins, nif_params) entries = request.entries
self.convert_emotions(resp, plugins, nif_params) request.entries = []
logger.debug("Returning analysis result: {}".format(resp)) plugins = self._get_plugins(request)
results = request
for i in self._process_entries(entries, results, plugins):
results.entries.append(i)
self.convert_emotions(results)
if 'with_parameters' not in results.parameters:
del results.parameters
logger.debug("Returning analysis result: {}".format(results))
except (Error, Exception) as ex: except (Error, Exception) as ex:
if not isinstance(ex, Error): if not isinstance(ex, Error):
ex = Error(message=str(ex), status=500) msg = "Error during analysis: {} \n\t{}".format(ex,
traceback.format_exc())
ex = Error(message=msg, status=500)
logger.exception('Error returning analysis result') logger.exception('Error returning analysis result')
raise ex raise ex
return resp results.analysis = [i['plugin'].id for i in results.analysis]
return results
def _conversion_candidates(self, fromModel, toModel): def _conversion_candidates(self, fromModel, toModel):
candidates = self.filter_plugins(plugin_type='emotionConversionPlugin') candidates = self.filter_plugins(plugin_type='emotionConversionPlugin')
@ -180,7 +157,7 @@ class Senpy(object):
# logging.debug('Found candidate: {}'.format(candidate)) # logging.debug('Found candidate: {}'.format(candidate))
yield candidate yield candidate
def convert_emotions(self, resp, plugins, params): def convert_emotions(self, resp):
""" """
Conversion of all emotions in a response **in place**. Conversion of all emotions in a response **in place**.
In addition to converting from one model to another, it has In addition to converting from one model to another, it has
@ -188,6 +165,8 @@ class Senpy(object):
Needless to say, this is far from an elegant solution, but it works. Needless to say, this is far from an elegant solution, but it works.
@todo refactor and clean up @todo refactor and clean up
""" """
plugins = [i['plugin'] for i in resp.analysis]
params = resp.parameters
toModel = params.get('emotionModel', None) toModel = params.get('emotionModel', None)
if not toModel: if not toModel:
return return
@ -215,7 +194,8 @@ class Senpy(object):
for j in i.emotions: for j in i.emotions:
plugname = j['prov:wasGeneratedBy'] plugname = j['prov:wasGeneratedBy']
candidate = candidates[plugname] candidate = candidates[plugname]
resp.analysis.append(candidate.id) resp.analysis.append({'plugin': candidate,
'parameters': params})
for k in candidate.convert(j, fromModel, toModel, params): for k in candidate.convert(j, fromModel, toModel, params):
k.prov__wasGeneratedBy = candidate.id k.prov__wasGeneratedBy = candidate.id
if output == 'nested': if output == 'nested':
@ -224,7 +204,6 @@ class Senpy(object):
i.emotions = newemotions i.emotions = newemotions
newentries.append(i) newentries.append(i)
resp.entries = newentries resp.entries = newentries
resp.analysis = list(set(resp.analysis))
@property @property
def default_plugin(self): def default_plugin(self):

View File

@ -181,7 +181,7 @@ class SenpyMixin(object):
obj = self obj = self
if hasattr(obj, "jsonld"): if hasattr(obj, "jsonld"):
obj = obj.jsonld() obj = obj.jsonld()
jsonschema.validate(obj, self.schema) self._validator.validate(obj)
def __str__(self): def __str__(self):
return str(self.serialize()) return str(self.serialize())
@ -246,13 +246,13 @@ class BaseModel(SenpyMixin, dict):
return d return d
_subtypes = {}
def register(rsubclass, rtype=None): def register(rsubclass, rtype=None):
_subtypes[rtype or rsubclass.__name__] = rsubclass _subtypes[rtype or rsubclass.__name__] = rsubclass
_subtypes = {}
def from_dict(indict, cls=None): def from_dict(indict, cls=None):
if not cls: if not cls:
target = indict.get('@type', None) target = indict.get('@type', None)
@ -286,15 +286,31 @@ def from_json(injson):
return from_dict(indict) return from_dict(indict)
def from_schema(name, schema_file=None, base_classes=None): def from_schema(name, schema=None, schema_file=None, base_classes=None):
base_classes = base_classes or [] base_classes = base_classes or []
base_classes.append(BaseModel) base_classes.append(BaseModel)
schema_file = schema_file or '{}.json'.format(name) schema_file = schema_file or '{}.json'.format(name)
class_name = '{}{}'.format(name[0].upper(), name[1:]) class_name = '{}{}'.format(name[0].upper(), name[1:])
newclass = type(class_name, tuple(base_classes), {}) if '/' not in 'schema_file':
setattr(newclass, '@type', name) schema_file = os.path.join(os.path.dirname(os.path.realpath(__file__)),
setattr(newclass, 'schema', read_schema(schema_file)) 'schemas',
setattr(newclass, 'class_name', class_name) schema_file)
schema_path = 'file://' + schema_file
with open(schema_file) as f:
schema = json.load(f)
dct = {}
resolver = jsonschema.RefResolver(schema_path, schema)
dct['@type'] = name
dct['_schema_file'] = schema_file
dct['schema'] = schema
dct['_validator'] = jsonschema.Draft4Validator(schema, resolver=resolver)
newclass = type(class_name, tuple(base_classes), dct)
register(newclass, name) register(newclass, name)
return newclass return newclass
@ -315,6 +331,7 @@ for i in [
'emotionPlugin', 'emotionPlugin',
'emotionSet', 'emotionSet',
'entry', 'entry',
'help',
'plugin', 'plugin',
'plugins', 'plugins',
'response', 'response',
@ -334,6 +351,9 @@ class Error(SenpyMixin, Exception):
self._error = _ErrorModel(message=message, *args, **kwargs) self._error = _ErrorModel(message=message, *args, **kwargs)
self.message = message self.message = message
def validate(self, obj=None):
self._error.validate()
def __getitem__(self, key): def __getitem__(self, key):
return self._error[key] return self._error[key]

View File

@ -76,7 +76,7 @@ class AnalysisPlugin(Plugin):
Note that this method may yield an annotated entry or a list of Note that this method may yield an annotated entry or a list of
entries (e.g. in a tokenizer) entries (e.g. in a tokenizer)
""" """
text = entry['text'] text = entry['nif:isString']
params = copy.copy(parameters) params = copy.copy(parameters)
params['input'] = text params['input'] = text
results = self.analyse(**params) results = self.analyse(**params)

View File

@ -0,0 +1,64 @@
from senpy.plugins import AnalysisPlugin
from senpy.models import Entry
from nltk.tokenize.punkt import PunktSentenceTokenizer
from nltk.tokenize.simple import LineTokenizer
import nltk
class SplitPlugin(AnalysisPlugin):
def activate(self):
nltk.download('punkt')
def analyse_entry(self, entry, params):
chunker_type = params.get("delimiter", "sentence")
original_text = entry.get('nif:isString', None)
if chunker_type == "sentence":
tokenizer = PunktSentenceTokenizer()
if chunker_type == "paragraph":
tokenizer = LineTokenizer()
chars = tokenizer.span_tokenize(original_text)
for i, chunk in enumerate(tokenizer.tokenize(original_text)):
e = Entry()
e['nif:isString'] = chunk
if entry.id:
e.id = entry.id + "#char={},{}".format(chars[i][0], chars[i][1])
yield e
test_cases = [
{
'entry': {
'nif:isString': 'Hello. World.'
},
'params': {
'delimiter': 'sentence',
},
'expected': [
{
'nif:isString': 'Hello.'
},
{
'nif:isString': 'World.'
}
]
},
{
'entry': {
"id": ":test",
'nif:isString': 'Hello. World.'
},
'params': {
'delimiter': 'sentence',
},
'expected': [
{
"@id": ":test#char=0,6",
'nif:isString': 'Hello.'
},
{
"@id": ":test#char=7,13",
'nif:isString': 'World.'
}
]
}
]

View File

@ -0,0 +1,19 @@
---
name: split
module: senpy.plugins.misc.split
description: A sample plugin that chunks input text
author: "@militarpancho"
version: '0.2'
url: "https://github.com/gsi-upm/senpy"
requirements:
- nltk
extra_params:
delimiter:
aliases:
- type
- t
required: false
default: sentence
options:
- sentence
- paragraph

View File

@ -12,7 +12,7 @@ class Sentiment140Plugin(SentimentPlugin):
json.dumps({ json.dumps({
"language": lang, "language": lang,
"data": [{ "data": [{
"text": entry.nif__isString "text": entry['nif:isString']
}] }]
})) }))
p = params.get("prefix", None) p = params.get("prefix", None)

View File

@ -27,6 +27,9 @@
"@id": "onyx:hasEmotionSet", "@id": "onyx:hasEmotionSet",
"@container": "@set" "@container": "@set"
}, },
"onyx:hasEmotion": {
"@container": "@set"
},
"sentiments": { "sentiments": {
"@id": "marl:hasOpinion", "@id": "marl:hasOpinion",
"@container": "@set" "@container": "@set"

View File

@ -6,7 +6,7 @@
"type": "string" "type": "string"
}, },
"nif:isString": { "nif:isString": {
"description": "String contained in this Context", "description": "String contained in this Context. Alternative: nif:isString",
"type": "string" "type": "string"
}, },
"sentiments": { "sentiments": {

17
senpy/schemas/help.json Normal file
View File

@ -0,0 +1,17 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"allOf": [
{"$ref": "response.json"},
{
"title": "Help",
"description": "Help containing accepted parameters",
"type": "object",
"properties": {
"parameters": {
"type": "object"
}
},
"required": "parameters"
}
]
}

View File

@ -2,7 +2,12 @@
"$schema": "http://json-schema.org/draft-04/schema#", "$schema": "http://json-schema.org/draft-04/schema#",
"type": "object", "type": "object",
"properties": { "properties": {
"@type": {"type": "string"} "@type": {"type": "string"},
"parameters": {
"type": "object",
"default": {}
}
}, },
"required": ["@type"] "required": ["@type"]

View File

@ -3,7 +3,7 @@ from senpy.plugins import SentimentPlugin
class DummyPlugin(SentimentPlugin): class DummyPlugin(SentimentPlugin):
def analyse_entry(self, entry, params): def analyse_entry(self, entry, params):
entry.text = entry.text[::-1] entry['nif:iString'] = entry['nif:isString'][::-1]
entry.reversed = entry.get('reversed', 0) + 1 entry.reversed = entry.get('reversed', 0) + 1
yield entry yield entry

View File

@ -11,24 +11,24 @@ class APITest(TestCase):
def test_api_params(self): def test_api_params(self):
"""The API should not define any required parameters without a default""" """The API should not define any required parameters without a default"""
parse_params({}, spec=API_PARAMS) parse_params({}, API_PARAMS)
def test_web_params(self): def test_web_params(self):
"""The WEB should not define any required parameters without a default""" """The WEB should not define any required parameters without a default"""
parse_params({}, spec=WEB_PARAMS) parse_params({}, WEB_PARAMS)
def test_basic(self): def test_basic(self):
a = {} a = {}
try: try:
parse_params(a, spec=NIF_PARAMS) parse_params(a, NIF_PARAMS)
raise AssertionError() raise AssertionError()
except Error: except Error:
pass pass
a = {'input': 'hello'} a = {'input': 'hello'}
p = parse_params(a, spec=NIF_PARAMS) p = parse_params(a, NIF_PARAMS)
assert 'input' in p assert 'input' in p
b = {'i': 'hello'} b = {'i': 'hello'}
p = parse_params(b, spec=NIF_PARAMS) p = parse_params(b, NIF_PARAMS)
assert 'input' in p assert 'input' in p
def test_plugin(self): def test_plugin(self):
@ -40,18 +40,18 @@ class APITest(TestCase):
} }
} }
try: try:
parse_params(query, spec=plug_params) parse_params(query, plug_params)
raise AssertionError() raise AssertionError()
except Error: except Error:
pass pass
query['hello'] = 'world' query['hello'] = 'world'
p = parse_params(query, spec=plug_params) p = parse_params(query, plug_params)
assert 'hello' in p assert 'hello' in p
assert p['hello'] == 'world' assert p['hello'] == 'world'
del query['hello'] del query['hello']
query['hiya'] = 'dlrow' query['hiya'] = 'dlrow'
p = parse_params(query, spec=plug_params) p = parse_params(query, plug_params)
assert 'hello' in p assert 'hello' in p
assert 'hiya' in p assert 'hiya' in p
assert p['hello'] == 'dlrow' assert p['hello'] == 'dlrow'
@ -63,6 +63,6 @@ class APITest(TestCase):
'default': 1 'default': 1
} }
} }
p = parse_params({}, spec=spec) p = parse_params({}, spec)
assert 'hello' in p assert 'hello' in p
assert p['hello'] == 1 assert p['hello'] == 1

View File

@ -38,6 +38,7 @@ class BlueprintsTest(TestCase):
""" """
Calling with no arguments should ask the user for more arguments Calling with no arguments should ask the user for more arguments
""" """
self.app.debug = False
resp = self.client.get("/api/") resp = self.client.get("/api/")
self.assertCode(resp, 400) self.assertCode(resp, 400)
js = parse_resp(resp) js = parse_resp(resp)
@ -54,7 +55,7 @@ class BlueprintsTest(TestCase):
The dummy plugin returns an empty response,\ The dummy plugin returns an empty response,\
it should contain the context it should contain the context
""" """
resp = self.client.get("/api/?i=My aloha mohame") resp = self.client.get("/api/?i=My aloha mohame&with_parameters=True")
self.assertCode(resp, 200) self.assertCode(resp, 200)
js = parse_resp(resp) js = parse_resp(resp)
logging.debug("Got response: %s", js) logging.debug("Got response: %s", js)
@ -77,6 +78,7 @@ class BlueprintsTest(TestCase):
Extra params that have a required argument that does not Extra params that have a required argument that does not
have a default should raise an error. have a default should raise an error.
""" """
self.app.debug = False
resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired") resp = self.client.get("/api/?i=My aloha mohame&algo=DummyRequired")
self.assertCode(resp, 400) self.assertCode(resp, 400)
js = parse_resp(resp) js = parse_resp(resp)
@ -88,6 +90,7 @@ class BlueprintsTest(TestCase):
The dummy plugin returns an empty response,\ The dummy plugin returns an empty response,\
it should contain the context it should contain the context
""" """
self.app.debug = False
resp = self.client.get("/api/?i=My aloha mohame&algo=DOESNOTEXIST") resp = self.client.get("/api/?i=My aloha mohame&algo=DOESNOTEXIST")
self.assertCode(resp, 404) self.assertCode(resp, 404)
js = parse_resp(resp) js = parse_resp(resp)
@ -154,3 +157,10 @@ class BlueprintsTest(TestCase):
self.assertCode(resp, 200) self.assertCode(resp, 200)
js = parse_resp(resp) js = parse_resp(resp)
assert "$schema" in js assert "$schema" in js
def test_help(self):
resp = self.client.get("/api/?help=true")
self.assertCode(resp, 200)
js = parse_resp(resp)
assert "parameters" in js
assert "help" in js["parameters"]

View File

@ -1,11 +1,6 @@
import logging import logging
from functools import partial from functools import partial
try:
from unittest.mock import patch
except ImportError:
from mock import patch
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from unittest import TestCase from unittest import TestCase
@ -17,11 +12,7 @@ class CLITest(TestCase):
def test_basic(self): def test_basic(self):
self.assertRaises(Error, partial(main_function, [])) self.assertRaises(Error, partial(main_function, []))
with patch('senpy.extensions.Senpy.analyse') as patched: res = main_function(['--input', 'test', '--algo', 'rand', '--with-parameters'])
main_function(['--input', 'test']) assert res.parameters['input'] == 'test'
assert 'rand' in res.parameters['algorithm']
patched.assert_called_with(input='test') assert res.parameters['input'] == 'test'
with patch('senpy.extensions.Senpy.analyse') as patched:
main_function(['--input', 'test', '--algo', 'rand'])
patched.assert_called_with(input='test', algo='rand')

View File

@ -12,10 +12,16 @@ from functools import partial
from senpy.extensions import Senpy from senpy.extensions import Senpy
from senpy import plugins from senpy import plugins
from senpy.models import Error, Results, Entry, EmotionSet, Emotion, Plugin from senpy.models import Error, Results, Entry, EmotionSet, Emotion, Plugin
from senpy import api
from flask import Flask from flask import Flask
from unittest import TestCase from unittest import TestCase
def analyse(instance, **kwargs):
request = api.parse_call(kwargs)
return instance.analyse(request)
class ExtensionsTest(TestCase): class ExtensionsTest(TestCase):
def setUp(self): def setUp(self):
self.app = Flask('test_extensions') self.app = Flask('test_extensions')
@ -91,10 +97,11 @@ class ExtensionsTest(TestCase):
def test_noplugin(self): def test_noplugin(self):
""" Don't analyse if there isn't any plugin installed """ """ Don't analyse if there isn't any plugin installed """
self.senpy.deactivate_all(sync=True) self.senpy.deactivate_all(sync=True)
self.assertRaises(Error, partial(self.senpy.analyse, input="tupni")) self.assertRaises(Error, partial(analyse, self.senpy, input="tupni"))
self.assertRaises(Error, self.assertRaises(Error,
partial( partial(
self.senpy.analyse, analyse,
self.senpy,
input="tupni", input="tupni",
algorithm='Dummy')) algorithm='Dummy'))
@ -102,12 +109,11 @@ class ExtensionsTest(TestCase):
""" Using a plugin """ """ Using a plugin """
# I was using mock until plugin started inheriting # I was using mock until plugin started inheriting
# Leaf (defaultdict with __setattr__ and __getattr__. # Leaf (defaultdict with __setattr__ and __getattr__.
r1 = self.senpy.analyse( r1 = analyse(self.senpy, algorithm="Dummy", input="tupni", output="tuptuo")
algorithm="Dummy", input="tupni", output="tuptuo") r2 = analyse(self.senpy, input="tupni", output="tuptuo")
r2 = self.senpy.analyse(input="tupni", output="tuptuo")
assert r1.analysis[0] == "plugins/Dummy_0.1" assert r1.analysis[0] == "plugins/Dummy_0.1"
assert r2.analysis[0] == "plugins/Dummy_0.1" assert r2.analysis[0] == "plugins/Dummy_0.1"
assert r1.entries[0].text == 'input' assert r1.entries[0]['nif:iString'] == 'input'
def test_analyse_jsonld(self): def test_analyse_jsonld(self):
""" Using a plugin with JSON-LD input""" """ Using a plugin with JSON-LD input"""
@ -116,30 +122,33 @@ class ExtensionsTest(TestCase):
"@type": "results", "@type": "results",
"entries": [ "entries": [
{"@id": "entry1", {"@id": "entry1",
"text": "tupni", "nif:isString": "tupni",
"@type": "entry" "@type": "entry"
} }
] ]
}''' }'''
r1 = self.senpy.analyse(algorithm="Dummy", r1 = analyse(self.senpy,
input=js_input, algorithm="Dummy",
informat="json-ld", input=js_input,
output="tuptuo") informat="json-ld",
r2 = self.senpy.analyse(input="tupni", output="tuptuo") output="tuptuo")
r2 = analyse(self.senpy,
input="tupni",
output="tuptuo")
assert r1.analysis[0] == "plugins/Dummy_0.1" assert r1.analysis[0] == "plugins/Dummy_0.1"
assert r2.analysis[0] == "plugins/Dummy_0.1" assert r2.analysis[0] == "plugins/Dummy_0.1"
assert r1.entries[0].text == 'input' assert r1.entries[0]['nif:iString'] == 'input'
def test_analyse_error(self): def test_analyse_error(self):
mm = mock.MagicMock() mm = mock.MagicMock()
mm.id = 'magic_mock' mm.id = 'magic_mock'
mm.analyse_entries.side_effect = Error('error on analysis', status=500) mm.analyse_entries.side_effect = Error('error in analysis', status=500)
self.senpy.plugins['MOCK'] = mm self.senpy.plugins['MOCK'] = mm
try: try:
self.senpy.analyse(input='nothing', algorithm='MOCK') analyse(self.senpy, input='nothing', algorithm='MOCK')
assert False assert False
except Error as ex: except Error as ex:
assert ex['message'] == 'error on analysis' assert 'error in analysis' in ex['message']
assert ex['status'] == 500 assert ex['status'] == 500
mm.analyse.side_effect = Exception('generic exception on analysis') mm.analyse.side_effect = Exception('generic exception on analysis')
@ -147,10 +156,10 @@ class ExtensionsTest(TestCase):
'generic exception on analysis') 'generic exception on analysis')
try: try:
self.senpy.analyse(input='nothing', algorithm='MOCK') analyse(self.senpy, input='nothing', algorithm='MOCK')
assert False assert False
except Error as ex: except Error as ex:
assert ex['message'] == 'generic exception on analysis' assert 'generic exception on analysis' in ex['message']
assert ex['status'] == 500 assert ex['status'] == 500
def test_filtering(self): def test_filtering(self):
@ -180,40 +189,27 @@ class ExtensionsTest(TestCase):
'emoml:valence': 0 'emoml:valence': 0
})) }))
response = Results({ response = Results({
'analysis': [{'plugin': plugin}],
'entries': [Entry({ 'entries': [Entry({
'text': 'much ado about nothing', 'nif:iString': 'much ado about nothing',
'emotions': [eSet1] 'emotions': [eSet1]
})] })]
}) })
params = {'emotionModel': 'emoml:big6', params = {'emotionModel': 'emoml:big6',
'conversion': 'full'} 'conversion': 'full'}
r1 = deepcopy(response) r1 = deepcopy(response)
self.senpy.convert_emotions(r1, r1.parameters = params
[plugin, ], self.senpy.convert_emotions(r1)
params)
assert len(r1.entries[0].emotions) == 2 assert len(r1.entries[0].emotions) == 2
params['conversion'] = 'nested' params['conversion'] = 'nested'
r2 = deepcopy(response) r2 = deepcopy(response)
self.senpy.convert_emotions(r2, r2.parameters = params
[plugin, ], self.senpy.convert_emotions(r2)
params)
assert len(r2.entries[0].emotions) == 1 assert len(r2.entries[0].emotions) == 1
assert r2.entries[0].emotions[0]['prov:wasDerivedFrom'] == eSet1 assert r2.entries[0].emotions[0]['prov:wasDerivedFrom'] == eSet1
params['conversion'] = 'filtered' params['conversion'] = 'filtered'
r3 = deepcopy(response) r3 = deepcopy(response)
self.senpy.convert_emotions(r3, r3.parameters = params
[plugin, ], self.senpy.convert_emotions(r3)
params)
assert len(r3.entries[0].emotions) == 1 assert len(r3.entries[0].emotions) == 1
r3.jsonld() r3.jsonld()
# def test_async_plugin(self):
# """ We should accept multiprocessing plugins with async=False"""
# thread1 = self.senpy.activate_plugin("Async", sync=False)
# thread1.join(timeout=1)
# assert len(self.senpy.plugins['Async'].value) == 4
# resp = self.senpy.analyse(input='nothing', algorithm='Async')
# assert len(resp.entries[0].async_values) == 2
# self.senpy.activate_plugin("Async", sync=True)

View File

@ -185,7 +185,7 @@ class ModelsTest(TestCase):
'entries': [{ 'entries': [{
'@id': 'entry1', '@id': 'entry1',
'@type': 'entry', '@type': 'entry',
'text': 'TEST' 'nif:isString': 'TEST'
}] }]
} }
recovered = from_dict(results) recovered = from_dict(results)