from future.utils import iteritems from .models import Error, Results, Entry, from_string import logging logger = logging.getLogger(__name__) boolean = [True, False] processors = { 'string_to_tuple': lambda p: p if isinstance(p, (tuple, list)) else tuple(p.split(',')) } API_PARAMS = { "algorithm": { "aliases": ["algorithms", "a", "algo"], "required": True, "default": 'default', "processor": 'string_to_tuple', "description": ("Algorithms that will be used to process the request." "It may be a list of comma-separated names."), }, "expanded-jsonld": { "@id": "expanded-jsonld", "description": "use JSON-LD expansion to get full URIs", "aliases": ["expanded", "expanded_jsonld"], "options": boolean, "required": True, "default": False }, "with-parameters": { "aliases": ['withparameters', 'with_parameters'], "description": "include initial parameters in the response", "options": boolean, "default": False, "required": True }, "outformat": { "@id": "outformat", "aliases": ["o"], "default": "json-ld", "description": """The data can be semantically formatted (JSON-LD, turtle or n-triples), given as a list of comma-separated fields (see the fields option) or constructed from a Jinja2 template (see the template option).""", "required": True, "options": ["json-ld", "turtle", "ntriples"], }, "template": { "@id": "template", "required": False, "description": """Jinja2 template for the result. The input data for the template will be the results as a dictionary. For example: Consider the results before templating: ``` [{ "@type": "entry", "onyx:hasEmotionSet": [], "nif:isString": "testing the template", "marl:hasOpinion": [ { "@type": "sentiment", "marl:hasPolarity": "marl:Positive" } ] }] ``` And the template: ``` {% for entry in entries %} {{ entry["nif:isString"] | upper }},{{entry.sentiments[0]["marl:hasPolarity"].split(":")[1]}} {% endfor %} ``` The final result would be: ``` TESTING THE TEMPLATE,Positive ``` """ }, "fields": { "@id": "fields", "required": False, "description": """A jmespath selector, that can be used to extract a new dictionary, array or value from the results. jmespath is a powerful query language for json and/or dictionaries. It allows you to change the structure (and data) of your objects through queries. e.g., the following expression gets a list of `[emotion label, intensity]` for each entry: `entries[]."onyx:hasEmotionSet"[]."onyx:hasEmotion"[]["onyx:hasEmotionCategory","onyx:hasEmotionIntensity"]` For more information, see: https://jmespath.org """ }, "help": { "@id": "help", "description": "Show additional help to know more about the possible parameters", "aliases": ["h"], "required": True, "options": boolean, "default": False }, "verbose": { "@id": "verbose", "description": "Show all properties in the result", "aliases": ["v"], "required": True, "options": boolean, "default": False }, "aliases": { "@id": "aliases", "description": "Replace JSON properties with their aliases", "aliases": [], "required": True, "options": boolean, "default": False }, "emotion-model": { "@id": "emotionModel", "description": """Emotion model to use in the response. Senpy will try to convert the output to this model automatically. Examples: `wna:liking` and `emoml:big6`. """, "aliases": ["emoModel", "emotionModel"], "required": False }, "conversion": { "@id": "conversion", "description": """How to show the elements that have (not) been converted. * full: converted and original elements will appear side-by-side * filtered: only converted elements will be shown * nested: converted elements will be shown, and they will include a link to the original element (using `prov:wasGeneratedBy`). """, "required": True, "options": ["filtered", "nested", "full"], "default": "full" } } EVAL_PARAMS = { "algorithm": { "aliases": ["plug", "p", "plugins", "algorithms", 'algo', 'a', 'plugin'], "description": "Plugins to evaluate", "required": True, "help": "See activated plugins in /plugins", "processor": API_PARAMS['algorithm']['processor'] }, "dataset": { "aliases": ["datasets", "data", "d"], "description": "Datasets to be evaluated", "required": True, "help": "See avalaible datasets in /datasets" } } PLUGINS_PARAMS = { "plugin-type": { "@id": "pluginType", "description": 'What kind of plugins to list', "aliases": ["pluginType", "plugin_type"], "required": True, "default": 'analysisPlugin' } } WEB_PARAMS = { "in-headers": { "aliases": ["headers", "inheaders", "inHeaders", "in-headers", "in_headers"], "description": "Only include the JSON-LD context in the headers", "required": True, "default": False, "options": boolean }, } CLI_PARAMS = { "plugin-folder": { "aliases": ["folder", "plugin_folder"], "required": True, "default": "." }, } NIF_PARAMS = { "input": { "@id": "input", "aliases": ["i"], "required": True, "help": "Input text" }, "intype": { "@id": "intype", "description": "input type", "aliases": ["t"], "required": False, "default": "direct", "options": ["direct", "url", "file"], }, "informat": { "@id": "informat", "description": "input format", "aliases": ["f"], "required": False, "default": "text", "options": ["text", "json-ld"], }, "language": { "@id": "language", "description": "language of the input", "aliases": ["l"], "required": False, }, "prefix": { "@id": "prefix", "description": "prefix to use for new entities", "aliases": ["p"], "required": True, "default": "", }, "urischeme": { "@id": "urischeme", "description": "scheme for NIF URIs", "aliases": ["u"], "required": False, "default": "RFC5147String", "options": ["RFC5147String", ] } } BUILTIN_PARAMS = {} for d in [ NIF_PARAMS, CLI_PARAMS, WEB_PARAMS, PLUGINS_PARAMS, EVAL_PARAMS, API_PARAMS ]: for k, v in d.items(): BUILTIN_PARAMS[k] = v def parse_params(indict, *specs): if not specs: specs = [NIF_PARAMS] logger.debug("Parsing: {}\n{}".format(indict, specs)) outdict = indict.copy() wrong_params = {} for spec in specs: for param, options in iteritems(spec): for alias in options.get("aliases", []): # Replace each alias with the correct name of the parameter if alias in indict and alias != param: outdict[param] = indict[alias] del outdict[alias] break if param not in outdict: if "default" in options: # We assume the default is correct outdict[param] = options["default"] elif options.get("required", False): wrong_params[param] = spec[param] continue if 'processor' in options: outdict[param] = processors[options['processor']](outdict[param]) if "options" in options: if options["options"] == boolean: outdict[param] = str(outdict[param]).lower() in ['true', '1', ''] elif outdict[param] not in options["options"]: wrong_params[param] = spec[param] if wrong_params: logger.debug("Error parsing: %s", wrong_params) message = Error( status=400, message='Missing or invalid parameters', parameters=outdict, errors=wrong_params) raise message return outdict def get_all_params(plugins, *specs): '''Return a list of parameters for a given set of specifications and plugins.''' dic = {} for s in specs: dic.update(s) dic.update(get_extra_params(plugins)) return dic def get_extra_params(plugins): '''Get a list of possible parameters given a list of plugins''' params = {} extra_params = {} for plugin in plugins: this_params = plugin.get('extra_params', {}) for k, v in this_params.items(): if k not in extra_params: extra_params[k] = {} extra_params[k][plugin.name] = v for k, v in extra_params.items(): # Resolve conflicts if len(v) == 1: # Add the extra options that do not collide params[k] = list(v.values())[0] else: required = False aliases = None options = None default = None nodefault = False # Set when defaults are not compatible for plugin, opt in v.items(): params['{}.{}'.format(plugin, k)] = opt required = required or opt.get('required', False) newaliases = set(opt.get('aliases', [])) if aliases is None: aliases = newaliases else: aliases = aliases & newaliases if 'options' in opt: newoptions = set(opt['options']) options = newoptions if options is None else options & newoptions if 'default' in opt: newdefault = opt['default'] if newdefault: if default is None and not nodefault: default = newdefault elif newdefault != default: nodefault = True default = None # Check for incompatibilities if options != set(): params[k] = { 'default': default, 'aliases': list(aliases), 'required': required, 'options': list(options) } return params def parse_analyses(params, plugins): ''' Parse the given parameters individually for each plugin, and get a list of the parameters that belong to each of the plugins. Each item can then be used in the plugin.analyse_entries method. ''' analysis_list = [] for i, plugin in enumerate(plugins): if not plugin: continue this_params = filter_params(params, plugin, i) parsed = parse_params(this_params, plugin.get('extra_params', {})) analysis = plugin.activity(parsed) analysis_list.append(analysis) return analysis_list def filter_params(params, plugin, ith=-1): ''' Get the values within params that apply to a plugin. More specific names override more general names, in this order: .parameter > .parameter > parameter Example: >>> filter_params({'0.hello': True, 'hello': False}, Plugin(), 0) { '0.hello': True, 'hello': True} ''' thisparams = {} if ith >= 0: ith = '{}.'.format(ith) else: ith = "" for k, v in params.items(): if ith and k.startswith(str(ith)): thisparams[k[len(ith):]] = v elif k.startswith(plugin.name): thisparams[k[len(plugin.name) + 1:]] = v elif k not in thisparams: thisparams[k] = v return thisparams def parse_call(params): ''' Return a results object based on the parameters used in a call/request. ''' params = parse_params(params, NIF_PARAMS) if params['informat'] == 'text': results = Results() entry = Entry(nif__isString=params['input'], id='prefix:') # Use @base results.entries.append(entry) elif params['informat'] == 'json-ld': results = from_string(params['input'], cls=Results) else: # pragma: no cover raise NotImplementedError('Informat {} is not implemented'.format( params['informat'])) results.parameters = params return results