diff --git a/CHANGELOG.md b/CHANGELOG.md index 29e2e09..d1fdee4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,17 +5,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Changed +* Changed the underlying model to use `pydantic` models +* Plugin interface slightly changed. Activation should be performed in the `activate` method +* data directory selection logic is slightly modified, and will choose one of the following (in this order): `data_folder` (argument), `$SENPY_DATA` or `$CWD` +* Plugins cannot be deactivated ### Fixed -* Tests now use strict mode to detect errors on optional plugins +* Tests now detect errors on optional plugins ### Added * The code of many senpy community plugins have been included by default. However, additional files (e.g., licensed data) and/or installing additional dependencies may be necessary for some plugins. Read each plugin's documentation for more information. * `optional` attribute in plugins. Optional plugins may fail to load or activate but the server will be started regardless, unless running in strict mode -* `--strict` flag to set strict mode. In this mode, the server will not start when optional plugins fail to load * Option in shelf plugins to ignore pickling errors ### Removed * `--only-install`, `--only-test` and `--only-list` flags were removed in favor of `--no-run` + `--install`/`--test`/`--dependencies` -### Changed -* data directory selection logic is slightly modified, and will choose one of the following (in this order): `data_folder` (argument), `$SENPY_DATA` or `$CWD` + ## [1.0.6] ### Fixed diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..7611407 --- /dev/null +++ b/TODO.md @@ -0,0 +1,9 @@ +* Upload context to www.gsi.upm.es/ontologies/senpy +* Use context from gsi.upm.es +* Add example of morality analysis + +* Simplify plugin loading +* Simplify models + * Remove json schemas and generate pydantic objects + * Remove some of the meta-programming magic +* Migrate to openapi/quart diff --git a/docs/examples/results/example-moral.json b/docs/examples/results/example-moral.json new file mode 100644 index 0000000..5dc1cb9 --- /dev/null +++ b/docs/examples/results/example-moral.json @@ -0,0 +1,42 @@ +{ + "@context": "http://senpy.gsi.upm.es/api/contexts/YXBpP2FsZ289ZW1", + "@id": ":Result1", + "@type": "results", + "activities": [ + { + "@id": ":MoralAnalysis1_Activity", + "@type": "amor:MoralValueAnalysis", + "prov:wasAssociatedWith": ":MoralAnalysis1", + "amor:usedMoralValueModel": "amor-mft:MoralFoundationsTheory", + "amor:analysed": "news1", + "amor:usedMLModel": ":model1", + "prov:generated": ":annotation3" + } + ], + "entries": [ + { + "@id": "http://micro.blog/status1", + "@type": [ + "nif:RFC5147String", + "nif:Context" + ], + "nif:isString": "Dear Microsoft, put your Windows Phone on your newest #open technology program. You'll be awesome. #opensource", + "morality": [ + { + "@id": ":annotation3", + "@type": "amor:MoralValueAnnotation", + "nif:beginIndex": 80, + "nif:endIndex": 97, + "amor:hasMoralValueCategory": "mft:Authority", + "amor:confidence": 0.75, + "amor-mft:hasPolarityIntensity": 0.2, + "amor:annotated": "http://micro.blog/status1", + "nif:anchorOf": "You'll be awesome.", + "prov:wasGeneratedBy": ":MoralAnalysis1_Activity" + } + ], + "emotions": [ + ] + } + ] +} diff --git a/example-plugins/async_plugin.py b/example-plugins/async_plugin.py deleted file mode 100644 index 3bc008a..0000000 --- a/example-plugins/async_plugin.py +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from senpy import AnalysisPlugin - -import multiprocessing - - -def _train(process_number): - return process_number - - -class Async(AnalysisPlugin): - '''An example of an asynchronous module''' - author = '@balkian' - version = '0.2' - sync = False - - def _do_async(self, num_processes): - pool = multiprocessing.Pool(processes=num_processes) - values = sorted(pool.map(_train, range(num_processes))) - - return values - - def activate(self): - self.value = self._do_async(4) - - def analyse_entry(self, entry, params): - values = self._do_async(2) - entry.async_values = values - yield entry - - test_cases = [ - { - 'input': 'any', - 'expected': { - 'async_values': [0, 1] - } - } - ] diff --git a/example-plugins/basic_analyse_entry_plugin.py b/example-plugins/basic_analyse_entry_plugin.py index 3ec042d..59701b5 100644 --- a/example-plugins/basic_analyse_entry_plugin.py +++ b/example-plugins/basic_analyse_entry_plugin.py @@ -24,8 +24,8 @@ import basic class BasicAnalyseEntry(plugins.SentimentPlugin): '''Equivalent to Basic, implementing the analyse_entry method''' - author = '@balkian' - version = '0.1' + author: str = '@balkian' + version: str = '0.1' mappings = { 'pos': 'marl:Positive', @@ -43,7 +43,7 @@ class BasicAnalyseEntry(plugins.SentimentPlugin): entry.sentiments.append(s) yield entry - test_cases = [{ + test_cases: list[dict] = [{ 'input': 'Hello :)', 'polarity': 'marl:Positive' }, { diff --git a/example-plugins/basic_box_plugin.py b/example-plugins/basic_box_plugin.py index 2bf6c09..ab8c012 100644 --- a/example-plugins/basic_box_plugin.py +++ b/example-plugins/basic_box_plugin.py @@ -24,8 +24,8 @@ import basic class BasicBox(SentimentBox): ''' A modified version of Basic that also does converts annotations manually''' - author = '@balkian' - version = '0.1' + author: str = '@balkian' + version: str = '0.1' def predict_one(self, features, **kwargs): output = basic.get_polarity(features[0]) @@ -35,7 +35,7 @@ class BasicBox(SentimentBox): return [0, 0, 1] return [0, 1, 0] - test_cases = [{ + test_cases: list[dict] = [{ 'input': 'Hello :)', 'polarity': 'marl:Positive' }, { diff --git a/example-plugins/basic_plugin.py b/example-plugins/basic_plugin.py index 6debd0a..462149c 100644 --- a/example-plugins/basic_plugin.py +++ b/example-plugins/basic_plugin.py @@ -25,8 +25,8 @@ import basic class Basic(SentimentBox): '''Provides sentiment annotation using a lexicon''' - author = '@balkian' - version = '0.1' + author: str = '@balkian' + version: str = '0.1' def predict_one(self, features, **kwargs): output = basic.get_polarity(features[0]) @@ -36,7 +36,7 @@ class Basic(SentimentBox): return [0, 1, 0] return [0, 0, 1] - test_cases = [{ + test_cases: list[dict] = [{ 'input': u'Hello :)', 'polarity': 'marl:Positive' }, { diff --git a/example-plugins/configurable_plugin.py b/example-plugins/configurable_plugin.py index c61dcef..258a71e 100644 --- a/example-plugins/configurable_plugin.py +++ b/example-plugins/configurable_plugin.py @@ -25,8 +25,8 @@ import basic class Dictionary(plugins.SentimentPlugin): '''Sentiment annotation using a configurable lexicon''' - author = '@balkian' - version = '0.2' + author: str = '@balkian' + version: str = '0.2' dictionaries = [basic.emojis, basic.emoticons] @@ -42,7 +42,7 @@ class Dictionary(plugins.SentimentPlugin): entry.sentiments.append(s) yield entry - test_cases = [{ + test_cases: list[dict] = [{ 'input': 'Hello :)', 'polarity': 'marl:Positive' }, { @@ -61,7 +61,7 @@ class EmojiOnly(Dictionary): '''Sentiment annotation with a basic lexicon of emojis''' dictionaries = [basic.emojis] - test_cases = [{ + test_cases: list[dict] = [{ 'input': 'Hello :)', 'polarity': 'marl:Neutral' }, { @@ -80,7 +80,7 @@ class EmoticonsOnly(Dictionary): '''Sentiment annotation with a basic lexicon of emoticons''' dictionaries = [basic.emoticons] - test_cases = [{ + test_cases: list[dict] = [{ 'input': 'Hello :)', 'polarity': 'marl:Positive' }, { @@ -102,7 +102,7 @@ class Salutes(Dictionary): 'marl:Negative': ['Good bye', ] }] - test_cases = [{ + test_cases: list[dict] = [{ 'input': 'Hello :)', 'polarity': 'marl:Positive' }, { diff --git a/example-plugins/dummy_plugin.py b/example-plugins/dummy_plugin.py index 640b336..1e3c044 100644 --- a/example-plugins/dummy_plugin.py +++ b/example-plugins/dummy_plugin.py @@ -19,15 +19,15 @@ from senpy import AnalysisPlugin, easy class Dummy(AnalysisPlugin): '''This is a dummy self-contained plugin''' - author = '@balkian' - version = '0.1' + author: str = '@balkian' + version: str = '0.1' def analyse_entry(self, entry, params): - entry['nif:isString'] = entry['nif:isString'][::-1] + entry.text = entry.text[::-1] entry.reversed = entry.get('reversed', 0) + 1 yield entry - test_cases = [{ + test_cases: list[dict] = [{ 'entry': { 'nif:isString': 'Hello', }, diff --git a/example-plugins/dummy_required_plugin.py b/example-plugins/dummy_required_plugin.py index def98a2..efb512b 100644 --- a/example-plugins/dummy_required_plugin.py +++ b/example-plugins/dummy_required_plugin.py @@ -19,9 +19,9 @@ from senpy import AnalysisPlugin, easy class DummyRequired(AnalysisPlugin): '''This is a dummy self-contained plugin''' - author = '@balkian' - version = '0.1' - extra_params = { + author: str = '@balkian' + version: str = '0.1' + extra_params: dict = { 'example': { 'description': 'An example parameter', 'required': True, @@ -30,11 +30,11 @@ class DummyRequired(AnalysisPlugin): } def analyse_entry(self, entry, params): - entry['nif:isString'] = entry['nif:isString'][::-1] + entry.text = entry.text[::-1] entry.reversed = entry.get('reversed', 0) + 1 yield entry - test_cases = [{ + test_cases: list[dict] = [{ 'entry': { 'nif:isString': 'Hello', }, diff --git a/example-plugins/emorand_plugin.py b/example-plugins/emorand_plugin.py index 0ed1e22..670fade 100644 --- a/example-plugins/emorand_plugin.py +++ b/example-plugins/emorand_plugin.py @@ -22,11 +22,11 @@ from senpy.models import EmotionSet, Emotion, Entry class EmoRand(EmotionPlugin): '''A sample plugin that returns a random emotion annotation''' - name = 'emotion-random' - author = '@balkian' - version = '0.1' - url = "https://github.com/gsi-upm/senpy-plugins-community" - onyx__usesEmotionModel = "emoml:big6" + name: str = 'emotion-random' + author: str = '@balkian' + version: str = '0.1' + url: str = "https://github.com/gsi-upm/senpy-plugins-community" + usesEmotionModel: str = "emoml:big6" def analyse_entry(self, entry, activity): category = "emoml:big6happiness" diff --git a/example-plugins/moral/example.ttl b/example-plugins/moral/example.ttl new file mode 100644 index 0000000..d7cfe8d --- /dev/null +++ b/example-plugins/moral/example.ttl @@ -0,0 +1,63 @@ +@prefix : . +@prefix amor: . +@prefix amor-bhv: . +@prefix amor-mft: . +@prefix bhv: . +@prefix mft: . +@prefix mls: . +@prefix owl: . +@prefix prov: . +@prefix rdfs: . +@prefix schema: . + + +:news1 a owl:NamedIndividual, schema:NewsArticle ; + schema:articleBody "Director Comey says the probe into last year's US election would assess if crimes were committed."^^xsd:string ; + schema:datePublished "2017-03-20T20:30:54+00:00"^^schema:Date ; + schema:headline "Trump Russia claims: FBI's Comey confirms investigation of election 'interference'"^^xsd:string ; + schema:image , + , + , + , + , + , + , + , + , + , + ; + schema:mainEntityOfPage ; + schema:publisher :bbc ; + schema:url . + +:bbc a schema:Organization ; + schema:logo ; + schema:name "BBC News"^^xsd:string . + + +:robot1 a prov:SoftwareAgent . + +:model1 a mls:Model . + +:logisticRegression a mls:Algorithm ; + rdfs:label "Logistic Regression"@en , + "Regresión Logística"@es . + +:run1 a mls:Run ; + mls:executes :wekaLogistic ; + mls:hasInput :credit-a ; + mls:hasOutput :model1 ; + mls:realizes :logisticRegression . + +:analysis3 a amor:MoralValueAnalysis ; + prov:wasAssociatedWith :robot1 ; + amor:usedMoralValueModel amor-mft:MoralFoundationsTheory ; + amor:analysed :news1 ; + amor:usedMLModel :model1 ; + prov:generated :annotation3 . + +:annotation3 a amor:MoralValueAnnotation ; + amor:hasMoralValueCategory mft:Authority ; + amor:confidence "0.75"^^xsd:float ; + amor-mft:hasPolarityIntensity "0.2"^^xsd:float ; + amor:annotated :news1 . diff --git a/example-plugins/moral/moral_plugin.py b/example-plugins/moral/moral_plugin.py new file mode 100644 index 0000000..4c9b895 --- /dev/null +++ b/example-plugins/moral/moral_plugin.py @@ -0,0 +1,10 @@ +from senpy.ns import amor, amor_bhv, amor_mft, prov +from senpy.plugins import MoralityPlugin + + +class DummyMoralityPlugin(MoralityPlugin): + moralValueModel: str = amor_mft["MoralFoundationTHeory"] + + def annotate(self, entry, activity, **kwargs): + yield MoralAnnotation(amor_bhv['Conservation'], + confidence=1.8) diff --git a/example-plugins/mynoop.py b/example-plugins/mynoop.py index 0f443d0..0e0e462 100644 --- a/example-plugins/mynoop.py +++ b/example-plugins/mynoop.py @@ -21,7 +21,7 @@ from senpy.plugins import SentimentPlugin class NoOp(SentimentPlugin): '''This plugin does nothing. Literally nothing.''' - version = 0 + version: str = 0 def analyse_entry(self, entry, *args, **kwargs): yield entry diff --git a/example-plugins/parameterized_plugin.py b/example-plugins/parameterized_plugin.py index 87c8176..028b441 100644 --- a/example-plugins/parameterized_plugin.py +++ b/example-plugins/parameterized_plugin.py @@ -24,11 +24,10 @@ import basic class ParameterizedDictionary(plugins.SentimentPlugin): '''This is a basic self-contained plugin''' + author: str = '@balkian' + version: str = '0.2' - author = '@balkian' - version = '0.2' - - extra_params = { + extra_params: dict = { 'positive-words': { 'description': 'Comma-separated list of words that are considered positive', 'aliases': ['positive'], @@ -56,7 +55,7 @@ class ParameterizedDictionary(plugins.SentimentPlugin): entry.sentiments.append(s) yield entry - test_cases = [ + test_cases: list[dict] = [ { 'input': 'Hello :)', 'polarity': 'marl:Positive', diff --git a/example-plugins/rand_plugin.py b/example-plugins/rand_plugin.py index b3d1d87..bbff341 100644 --- a/example-plugins/rand_plugin.py +++ b/example-plugins/rand_plugin.py @@ -20,12 +20,12 @@ from senpy import SentimentPlugin, Sentiment, Entry class RandSent(SentimentPlugin): '''A sample plugin that returns a random sentiment annotation''' - name = 'sentiment-random' - author = "@balkian" - version = '0.1' - url = "https://github.com/gsi-upm/senpy-plugins-community" - marl__maxPolarityValue = '1' - marl__minPolarityValue = "-1" + name: str = 'sentiment-random' + author: str = "@balkian" + version: str = '0.1' + url: str = "https://github.com/gsi-upm/senpy-plugins-community" + maxPolarityValue: float = 1 + minPolarityValue: float = -1 def analyse_entry(self, entry, activity): polarity_value = max(-1, min(1, random.gauss(0.2, 0.2))) diff --git a/example-plugins/sklearn/pipeline_plugin.py b/example-plugins/sklearn/pipeline_plugin.py index 64d7bb9..44cfa7c 100644 --- a/example-plugins/sklearn/pipeline_plugin.py +++ b/example-plugins/sklearn/pipeline_plugin.py @@ -22,8 +22,8 @@ from mypipeline import pipeline class PipelineSentiment(SentimentBox): '''This is a pipeline plugin that wraps a classifier defined in another module (mypipeline).''' - author = '@balkian' - version = 0.1 + author: str = '@balkian' + version: str = 0.1 maxPolarityValue = 1 minPolarityValue = -1 @@ -32,7 +32,7 @@ class PipelineSentiment(SentimentBox): return [1, 0, 0] return [0, 0, 1] - test_cases = [ + test_cases: list[dict] = [ { 'input': 'The sentiment for senpy should be positive :)', 'polarity': 'marl:Positive' diff --git a/example-plugins/sleep_plugin.py b/example-plugins/sleep_plugin.py index 38c00fe..baaf61f 100644 --- a/example-plugins/sleep_plugin.py +++ b/example-plugins/sleep_plugin.py @@ -20,10 +20,10 @@ from time import sleep class Sleep(AnalysisPlugin): '''Dummy plugin to test async''' - author = "@balkian" - version = "0.2" + author: str = "@balkian" + version: str = "0.2" timeout = 0.05 - extra_params = { + extra_params: dict = { "timeout": { "@id": "timeout_sleep", "aliases": ["timeout", "to"], @@ -32,7 +32,8 @@ class Sleep(AnalysisPlugin): } } - def activate(self, *args, **kwargs): + def __init__(self, **kwargs): + super().__init__(**kwargs) sleep(self.timeout) def analyse_entry(self, entry, params): diff --git a/requirements.txt b/requirements.txt index b26068b..0795110 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,3 +13,4 @@ scipy scikit-learn>=0.20 responses jmespath +deprecation==2.1.0 diff --git a/senpy/__main__.py b/senpy/__main__.py index d063297..e6af7a0 100644 --- a/senpy/__main__.py +++ b/senpy/__main__.py @@ -137,7 +137,7 @@ def main(): '--fail', action='store_true', default=False, - help='Do not exit if some plugins fail to activate') + help='Do not exit if some plugins can not be instantiated') parser.add_argument( '--enable-cors', '--cors', @@ -165,13 +165,14 @@ def main(): install=args.install, strict=args.strict, data_folder=args.data_folder) + folders = list(args.plugins_folder) if args.plugins_folder else [] if not folders: folders.append(".") for p in folders: sp.add_folder(p) - plugins = sp.plugins(plugin_type=None, is_activated=False) + plugins = sp.plugins(plugin_type=None) maxname = max(len(x.name) for x in plugins) maxversion = max(len(str(x.version)) for x in plugins) print('Found {} plugins:'.format(len(plugins))) @@ -187,7 +188,7 @@ def main(): print('Listing dependencies') missing = [] installed = [] - for plug in sp.plugins(is_activated=False): + for plug in sp.plugins(): inst, miss, nltkres = list_dependencies(plug) if not any([inst, miss, nltkres]): continue @@ -209,17 +210,11 @@ def main(): sp.install_deps() if args.test: - sp.activate_all(sync=True) - easy_test(sp.plugins(is_activated=True), debug=args.debug) + easy_test(sp.plugins(), debug=args.debug) if args.no_run: return - sp.activate_all(sync=True) - if sp.strict: - inactive = sp.plugins(is_activated=False) - assert not inactive - print('Senpy version {}'.format(senpy.__version__)) print('Server running on port %s:%d. Ctrl+C to quit' % (args.host, args.port)) @@ -238,8 +233,6 @@ def main(): debug=app.debug) except KeyboardInterrupt: print('Bye!') - sp.deactivate_all() - if __name__ == '__main__': main() diff --git a/senpy/api.py b/senpy/api.py index b11e162..c6450dd 100644 --- a/senpy/api.py +++ b/senpy/api.py @@ -15,7 +15,8 @@ # from future.utils import iteritems -from .models import Error, Results, Entry, from_string +from .models import Results, Entry, from_string +from .errors import Error, InvalidParams import logging logger = logging.getLogger(__name__) @@ -169,7 +170,7 @@ EVAL_PARAMS = { "aliases": ["plug", "p", "plugins", "algorithms", 'algo', 'a', 'plugin'], "description": "Plugins to evaluate", "required": True, - "help": "See activated plugins in /plugins", + "help": "See plugins at /plugins", "processor": API_PARAMS['algorithm']['processor'] }, "dataset": { @@ -294,12 +295,7 @@ def parse_params(indict, *specs): wrong_params[param] = spec[param] if wrong_params: logger.debug("Error parsing: %s", wrong_params) - message = Error( - status=400, - message='Missing or invalid parameters', - parameters=outdict, - errors=wrong_params) - raise message + raise InvalidParams(wrong_params) return outdict @@ -315,10 +311,9 @@ def get_all_params(plugins, *specs): def get_extra_params(plugins): '''Get a list of possible parameters given a list of plugins''' params = {} - extra_params = {} + extra_params: dict = {} for plugin in plugins: - this_params = plugin.get('extra_params', {}) - for k, v in this_params.items(): + for k, v in plugin.extra_params.items(): if k not in extra_params: extra_params[k] = {} extra_params[k][plugin.name] = v @@ -372,7 +367,7 @@ def parse_analyses(params, plugins): if not plugin: continue this_params = filter_params(params, plugin, i) - parsed = parse_params(this_params, plugin.get('extra_params', {})) + parsed = parse_params(this_params, plugin.extra_params) analysis = plugin.activity(parsed) analysis_list.append(analysis) return analysis_list @@ -414,7 +409,7 @@ def parse_call(params): params = parse_params(params, NIF_PARAMS) if params['informat'] == 'text': results = Results() - entry = Entry(nif__isString=params['input'], id='prefix:') # Use @base + entry = Entry(text=params['input'], id='prefix:') # Use @base results.entries.append(entry) elif params['informat'] == 'json-ld': results = from_string(params['input'], cls=Results) diff --git a/senpy/blueprints.py b/senpy/blueprints.py index cff01d8..f81bf85 100644 --- a/senpy/blueprints.py +++ b/senpy/blueprints.py @@ -20,7 +20,8 @@ Blueprints for Senpy """ from flask import (Blueprint, request, current_app, render_template, url_for, jsonify, redirect) -from .models import Error, Response, Help, Plugins, read_schema, dump_schema, Datasets +from .models import BASE_CONTEXT, Help, Plugins, Datasets +from .errors import Error from . import api from .version import __version__ from functools import wraps @@ -102,7 +103,7 @@ def index(): @api_blueprint.route('/contexts/') def context(code=''): - context = Response._context + context = BASE_CONTEXT context['@base'] = url_for('api.decode', code=code, _external=True) context['endpoint'] = url_for('api.api_root', _external=True) return jsonify({"@context": context}) @@ -113,22 +114,58 @@ def decode(code): try: return redirect(decoded_url(code)) except Exception: - return Error('invalid URL').flask() + return to_flask(Error('invalid URL')) @ns_blueprint.route('/') # noqa: F811 def index(): - context = Response._context.copy() + context = BASE_CONTEXT.copy() context['endpoint'] = url_for('api.api_root', _external=True) return jsonify({"@context": context}) -@api_blueprint.route('/schemas/') -def schema(schema="definitions"): - try: - return dump_schema(read_schema(schema)) - except Exception as ex: # Should be FileNotFoundError, but it's missing from py2 - return Error(message="Schema not found: {}".format(ex), status=404).flask() +# from .models import read_schema, dump_schema +#@api_blueprint.route('/schemas/') +#def schema(schema="definitions"): +# try: +# return dump_schema(read_schema(schema)) +# except Exception as ex: # Should be FileNotFoundError, but it's missing from py2 +# return Error(message="Schema not found: {}".format(ex), status=404).flask() + +def to_flask(resp, + in_headers=False, + headers=None, + outformat='json-ld', + **kwargs): + """ + Return the values and error to be used in flask. + So far, it returns a fixed context. We should store/generate different + contexts if the plugin adds more aliases. + """ + headers = headers or {} + if isinstance(resp, Error): + status = resp.code + resp = resp.to_response() + else: + status = 200 + + kwargs["with_context"] = not in_headers + content, mimetype = resp.serialize(format=outformat, + with_mime=True, + **kwargs) + + if outformat == 'json-ld' and in_headers: + headers.update({ + "Link": + ('<%s>;' + 'rel="http://www.w3.org/ns/json-ld#context";' + ' type="application/ld+json"' % kwargs.get('context_uri')) + }) + return FlaskResponse( + response=content, + status=status, + headers=headers, + mimetype=mimetype) def basic_api(f): @@ -171,20 +208,20 @@ def basic_api(f): prefix = params.get('prefix') code = encode_url(prefix) - return response.flask( - in_headers=params['in-headers'], - headers=headers, - prefix=prefix or url_for_code(code), - base=prefix, - context_uri=url_for('api.context', - code=code, - _external=True), - outformat=outformat, - expanded=params['expanded-jsonld'], - template=params.get('template'), - verbose=params['verbose'], - aliases=params['aliases'], - fields=params.get('fields')) + return to_flask(response, + in_headers=params['in-headers'], + headers=headers, + prefix=prefix or url_for_code(code), + base=prefix, + context_uri=url_for('api.context', + code=code, + _external=True), + outformat=outformat, + expanded=params['expanded-jsonld'], + template=params.get('template'), + verbose=params['verbose'], + aliases=params['aliases'], + fields=params.get('fields')) except (Exception) as ex: if current_app.debug or current_app.config['TESTING']: @@ -195,11 +232,10 @@ def basic_api(f): response = ex response.parameters = raw_params logger.exception(ex) - return response.flask( - outformat=outformat, - expanded=params['expanded-jsonld'], - verbose=params.get('verbose', True), - ) + return to_flask(response, + outformat=outformat, + expanded=params['expanded-jsonld'], + verbose=params.get('verbose', True)) return decorated_function diff --git a/senpy/cli.py b/senpy/cli.py index 7bf203d..6bb2362 100644 --- a/senpy/cli.py +++ b/senpy/cli.py @@ -16,7 +16,7 @@ from __future__ import print_function import sys -from .models import Error +from .errors import Error from .extensions import Senpy from . import api @@ -49,11 +49,6 @@ def main_function(argv): sp = Senpy(default_plugins=default_plugins, plugin_folder=plugin_folder) request = api.parse_call(params) algos = sp.get_plugins(request.parameters.get('algorithm', None)) - if algos: - for algo in algos: - sp.activate_plugin(algo.name) - else: - sp.activate_all() res = sp.analyse(request) return res diff --git a/senpy/errors.py b/senpy/errors.py new file mode 100644 index 0000000..b80257b --- /dev/null +++ b/senpy/errors.py @@ -0,0 +1,23 @@ +from .models import ErrorResponse + +class Error(Exception): + def __init__(self, message='Generic senpy exception', errors=[]): + Exception.__init__(self, message) + self.message = message + self.errors = errors + + def toResponse(self) -> ErrorResponse: + return ErrorResponse(self.message) + + def __str__(self): + if not hasattr(self, 'errors'): + return self.message + return '{}:\n\t{}'.format(self.message, self.errors) + + def __hash__(self): + return Exception.__hash__(self) + + +class InvalidParams(Error): + def __init__(self, wrong_params): + super().__init__(message='Wrong parameters:\n\t{}'.format(wrong_params)) diff --git a/senpy/extensions.py b/senpy/extensions.py index 3f68fcc..57a9625 100644 --- a/senpy/extensions.py +++ b/senpy/extensions.py @@ -15,14 +15,15 @@ # """ Main class for Senpy. -It orchestrates plugin (de)activation and analysis. +It orchestrates plugin discovery, creation and analysis. """ from future import standard_library standard_library.install_aliases() from . import config from . import plugins, api -from .models import Error, AggregatedEvaluation +from .models import AggregatedEvaluation +from .errors import Error from .plugins import AnalysisPlugin from .blueprints import api_blueprint, demo_blueprint, ns_blueprint @@ -43,7 +44,7 @@ class Senpy(object): def __init__(self, app=None, - plugin_folder=".", + plugin_folders=[".", ], data_folder=None, install=False, strict=None, @@ -62,16 +63,18 @@ class Senpy(object): self._default = None self.strict = strict if strict is not None else config.strict + self.strict = True # TODO: remove this after tests pass self.install = install self._plugins = {} - if plugin_folder: - self.add_folder(plugin_folder) + self.plugin_folders = plugin_folders + for folder in plugin_folders: + self._add_folder(folder) if default_plugins: - self.add_folder('plugins', from_root=True) + self._add_folder('plugins', from_root=True) else: # Add only conversion plugins - self.add_folder(os.path.join('plugins', 'postprocessing'), + self._add_folder(os.path.join('plugins', 'postprocessing'), from_root=True) self.app = app if app is not None: @@ -98,17 +101,18 @@ class Senpy(object): app.register_blueprint(demo_blueprint, url_prefix="/") def add_plugin(self, plugin): - self._plugins[plugin.name.lower()] = plugin + name = plugin.name.lower() + assert name + self._plugins[name] = plugin self._conversion_candidates = {} def delete_plugin(self, plugin): del self._plugins[plugin.name.lower()] - def plugins(self, plugin_type=None, is_activated=True, **kwargs): + def plugins(self, plugin_type=None, **kwargs): """ Return the plugins registered for a given application. Filtered by criteria """ return sorted(plugins.pfilter(self._plugins, plugin_type=plugin_type, - is_activated=is_activated, **kwargs), key=lambda x: x.id) @@ -149,7 +153,7 @@ class Senpy(object): candidates = self.plugins(**kwargs) return list(plugins.pfilter(candidates, plugin_type=AnalysisPlugin)) - def add_folder(self, folder, from_root=False): + def _add_folder(self, folder, from_root=False): """ Find plugins in this folder and add them to this instance """ if from_root: folder = os.path.join(os.path.dirname(__file__), folder) @@ -163,25 +167,21 @@ class Senpy(object): else: raise AttributeError("Not a folder or does not exist: %s", folder) - def _process(self, req, pending, done=None): + def _process(self, req, pending): """ Recursively process the entries with the first plugin in the list, and pass the results to the rest of the plugins. """ - done = done or [] if not pending: return req analysis = pending[0] results = analysis.run(req) results.activities.append(analysis) - done += analysis - return self._process(results, pending[1:], done) + return self._process(results, pending[1:]) def install_deps(self): logger.info('Installing dependencies') - # If a plugin is activated, its dependencies should already be installed - # Otherwise, it would've failed to activate. plugins.install_deps(*self._plugins.values()) def analyse(self, request, analyses=None): @@ -263,6 +263,7 @@ class Senpy(object): i.emotions = newemotions newentries.append(i) resp.entries = newentries + resp.activities.extend(done) return resp def _conversion_candidate(self, fromModel, toModel): @@ -330,7 +331,7 @@ class Senpy(object): @property def default_plugin(self): - if not self._default or not self._default.is_activated: + if not self._default: candidates = self.analysis_plugins() if len(candidates) > 0: self._default = candidates[0] @@ -342,84 +343,10 @@ class Senpy(object): @default_plugin.setter def default_plugin(self, value): if isinstance(value, plugins.Plugin): - if not value.is_activated: - raise AttributeError('The default plugin has to be activated.') self._default = value else: self._default = self._plugins[value.lower()] - def activate_all(self, sync=True): - ps = [] - for plug in self._plugins.keys(): - try: - self.activate_plugin(plug, sync=sync) - except Exception as ex: - if self.strict: - raise - logger.error('Could not activate {}: {}'.format(plug, ex)) - return ps - - def deactivate_all(self, sync=True): - ps = [] - for plug in self._plugins.keys(): - ps.append(self.deactivate_plugin(plug, sync=sync)) - return ps - - def _activate(self, plugin): - with plugin._lock: - if plugin.is_activated: - return - try: - logger.info("Activating plugin: {}".format(plugin.name)) - - assert plugin._activate() - logger.info(f"Plugin activated: {plugin.name}") - except Exception as ex: - if getattr(plugin, "optional", False) and not self.strict: - logger.info(f"Plugin could NOT be activated: {plugin.name}") - return False - raise - return plugin.is_activated - - def activate_plugin(self, plugin_name, sync=True): - plugin_name = plugin_name.lower() - if plugin_name not in self._plugins: - raise Error( - message="Plugin not found: {}".format(plugin_name), status=404) - plugin = self._plugins[plugin_name] - - logger.info("Activating plugin: {}".format(plugin.name)) - - if sync or not getattr(plugin, 'async', True) or getattr( - plugin, 'sync', False): - return self._activate(plugin) - else: - th = Thread(target=partial(self._activate, plugin)) - th.start() - return th - - def _deactivate(self, plugin): - with plugin._lock: - if not plugin.is_activated: - return - plugin._deactivate() - logger.info("Plugin deactivated: {}".format(plugin.name)) - - def deactivate_plugin(self, plugin_name, sync=True): - plugin_name = plugin_name.lower() - if plugin_name not in self._plugins: - raise Error( - message="Plugin not found: {}".format(plugin_name), status=404) - plugin = self._plugins[plugin_name] - - if sync or not getattr(plugin, 'async', True) or not getattr( - plugin, 'sync', False): - plugin._deactivate() - else: - th = Thread(target=plugin.deactivate) - th.start() - return th - def teardown(self, exception): pass diff --git a/senpy/meta.py b/senpy/meta.py deleted file mode 100644 index d1d7aaa..0000000 --- a/senpy/meta.py +++ /dev/null @@ -1,303 +0,0 @@ -# -# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -''' -Meta-programming for the models. -''' -import os -import json -import jsonschema -import inspect -import copy - -from abc import ABCMeta -from collections import namedtuple -from collections.abc import MutableMapping - - -class BaseMeta(ABCMeta): - ''' - Metaclass for models. It extracts the default values for the fields in - the model. - - For instance, instances of the following class wouldn't need to mark - their version or description on initialization: - - .. code-block:: python - - class MyPlugin(Plugin): - version=0.3 - description='A dull plugin' - - - Note that these operations could be included in the __init__ of the - class, but it would be very inefficient. - ''' - _subtypes = {} - - def __new__(mcs, name, bases, attrs, **kwargs): - register_afterwards = False - defaults = {} - aliases = {} - - attrs = mcs.expand_with_schema(name, attrs) - if 'schema' in attrs: - register_afterwards = True - for base in bases: - if hasattr(base, '_defaults'): - defaults.update(getattr(base, '_defaults')) - if hasattr(base, '_aliases'): - aliases.update(getattr(base, '_aliases')) - - info, rest = mcs.split_attrs(attrs) - - for i in list(info.keys()): - if isinstance(info[i], _Alias): - aliases[i] = info[i].indict - if info[i].default is not None: - defaults[i] = info[i].default - else: - defaults[i] = info[i] - - rest['_defaults'] = defaults - rest['_aliases'] = aliases - - cls = super(BaseMeta, mcs).__new__(mcs, name, tuple(bases), rest) - - if register_afterwards: - mcs.register(cls, defaults['@type']) - return cls - - @classmethod - def register(mcs, rsubclass, rtype=None): - mcs._subtypes[rtype or rsubclass.__name__] = rsubclass - - @staticmethod - def expand_with_schema(name, attrs): - if 'schema' in attrs: # Schema specified by name - schema_file = '{}.json'.format(attrs['schema']) - elif 'schema_file' in attrs: - schema_file = attrs['schema_file'] - del attrs['schema_file'] - else: - return attrs - - if '/' not in 'schema_file': - thisdir = os.path.dirname(os.path.realpath(__file__)) - schema_file = os.path.join(thisdir, - 'schemas', - schema_file) - - schema_path = 'file://' + schema_file - - with open(schema_file) as f: - schema = json.load(f) - - resolver = jsonschema.RefResolver(schema_path, schema) - if '@type' not in attrs: - attrs['@type'] = name - attrs['_schema_file'] = schema_file - attrs['schema'] = schema - attrs['_validator'] = jsonschema.Draft4Validator(schema, resolver=resolver) - - schema_defaults = BaseMeta.get_defaults(attrs['schema']) - attrs.update(schema_defaults) - - return attrs - - @staticmethod - def is_func(v): - return inspect.isroutine(v) or inspect.ismethod(v) or \ - inspect.ismodule(v) or isinstance(v, property) - - @staticmethod - def is_internal(k): - return k[0] == '_' or k == 'schema' or k == 'data' - - @staticmethod - def get_key(key): - if key[0] != '_': - key = key.replace("__", ":", 1) - return key - - @staticmethod - def split_attrs(attrs): - ''' - Extract the attributes of the class. - - This allows adding default values in the class definition. - e.g.: - ''' - isattr = {} - rest = {} - for key, value in attrs.items(): - if not (BaseMeta.is_internal(key)) and (not BaseMeta.is_func(value)): - isattr[key] = value - else: - rest[key] = value - return isattr, rest - - @staticmethod - def get_defaults(schema): - temp = {} - for obj in [ - schema, - ] + schema.get('allOf', []): - for k, v in obj.get('properties', {}).items(): - if 'default' in v and k not in temp: - temp[k] = v['default'] - return temp - - -def make_property(key, default=None): - - def fget(self): - if default: - return self.get(key, copy.copy(default)) - return self[key] - - def fdel(self): - del self[key] - - def fset(self, value): - self[key] = value - - return fget, fset, fdel - - -class CustomDict(MutableMapping, object): - ''' - A dictionary whose elements can also be accessed as attributes. Since some - characters are not valid in the dot-notation, the attribute names also - converted. e.g.: - - > d = CustomDict() - > d.key = d['ns:name'] = 1 - > d.key == d['key'] - True - > d.ns__name == d['ns:name'] - ''' - - _defaults = {} - _aliases = {'id': '@id'} - - def __init__(self, *args, **kwargs): - super(CustomDict, self).__init__() - for k, v in self._defaults.items(): - self[k] = copy.copy(v) - for arg in args: - self.update(arg) - for k, v in kwargs.items(): - self[k] = v - return self - - def serializable(self, **kwargs): - def ser_or_down(item): - if hasattr(item, 'serializable'): - return item.serializable(**kwargs) - elif isinstance(item, dict): - temp = dict() - for kp in item: - vp = item[kp] - temp[kp] = ser_or_down(vp) - return temp - elif isinstance(item, list) or isinstance(item, set): - return list(ser_or_down(i) for i in item) - else: - return item - - return ser_or_down(self.as_dict(**kwargs)) - - def __getitem__(self, key): - return self.__dict__[key] - - def __setitem__(self, key, value): - '''Do not insert data directly, there might be a property in that key. ''' - key = self._key_to_attr(key) - return setattr(self, key, value) - - def __delitem__(self, key): - key = self._key_to_attr(key) - del self.__dict__[key] - - def as_dict(self, verbose=True, aliases=False): - attrs = self.__dict__.keys() - if not verbose and hasattr(self, '_terse_keys'): - attrs = self._terse_keys + ['@type', '@id'] - res = {k: getattr(self, k) for k in attrs - if not self._internal_key(k) and hasattr(self, k)} - if not aliases: - return res - for k, ok in self._aliases.items(): - if ok in res: - res[k] = getattr(res, ok) - del res[ok] - return res - - def __iter__(self): - return (k for k in self.__dict__ if not self._internal_key(k)) - - def __len__(self): - return len(self.__dict__) - - def update(self, other): - for k, v in other.items(): - self[k] = v - - def _attr_to_key(self, key): - key = key.replace("__", ":", 1) - key = self._aliases.get(key, key) - return key - - def _key_to_attr(self, key): - if self._internal_key(key): - return key - - if key in self._aliases: - key = self._aliases[key] - else: - key = key.replace(":", "__", 1) - return key - - def __getattr__(self, key): - nkey = self._attr_to_key(key) - if nkey in self.__dict__: - return self.__dict__[nkey] - elif nkey == key: - raise AttributeError("Key not found: {}".format(key)) - return getattr(self, nkey) - - def __setattr__(self, key, value): - super(CustomDict, self).__setattr__(self._attr_to_key(key), value) - - def __delattr__(self, key): - super(CustomDict, self).__delattr__(self._attr_to_key(key)) - - @staticmethod - def _internal_key(key): - return key[0] == '_' - - def __str__(self): - return json.dumps(self.serializable(), sort_keys=True, indent=4) - - def __repr__(self): - return json.dumps(self.serializable(), sort_keys=True, indent=4) - - -_Alias = namedtuple('Alias', ['indict', 'default']) - - -def alias(key, default=None): - return _Alias(key, default) diff --git a/senpy/models.py b/senpy/models.py index 42a336f..0e4f806 100644 --- a/senpy/models.py +++ b/senpy/models.py @@ -15,141 +15,49 @@ # ''' Senpy Models. - -This implementation should mirror the JSON schema definition. -For compatibility with Py3 and for easier debugging, this new version drops -introspection and adds all arguments to the models. ''' -from __future__ import print_function -from future import standard_library -standard_library.install_aliases() -from future.utils import with_metaclass -from past.builtins import basestring -from jinja2 import Environment, BaseLoader +from typing import * -import time -import copy -import json import os -import jsonref -from flask import Response as FlaskResponse -from pyld import jsonld - import logging import jmespath +import json + +from textwrap import dedent + +from pydantic import BaseModel as PydanticModel +from pydantic import computed_field, field_validator, ConfigDict, Field +from pydantic_core import from_json logging.getLogger('rdflib').setLevel(logging.WARN) logger = logging.getLogger(__name__) -from rdflib import Graph - -from .meta import BaseMeta, CustomDict, alias - -DEFINITIONS_FILE = 'definitions.json' CONTEXT_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'schemas', 'context.jsonld') - -def get_schema_path(schema_file, absolute=False): - if absolute: - return os.path.realpath(schema_file) - else: - return os.path.join( - os.path.dirname(os.path.realpath(__file__)), 'schemas', - schema_file) +with open(CONTEXT_PATH) as f: + BASE_CONTEXT = dict(json.loads(f.read())) -def read_schema(schema_file, absolute=False): - schema_path = get_schema_path(schema_file, absolute) - schema_uri = 'file://{}'.format(schema_path) - with open(schema_path) as f: - return jsonref.load(f, base_uri=schema_uri) - - -def dump_schema(schema): - return jsonref.dumps(schema) - - -def load_context(context): - logging.debug('Loading context: {}'.format(context)) - if not context: - return context - elif isinstance(context, list): - contexts = [] - for c in context: - contexts.append(load_context(c)) - return contexts - elif isinstance(context, dict): - return dict(context) - elif isinstance(context, basestring): - try: - with open(context) as f: - return dict(json.loads(f.read())) - except IOError: - return context - else: - raise AttributeError('Please, provide a valid context') - - -base_context = load_context(CONTEXT_PATH) - - -def register(rsubclass, rtype=None): - BaseMeta.register(rsubclass, rtype) - - -class BaseModel(with_metaclass(BaseMeta, CustomDict)): +class BaseModel(PydanticModel): ''' Entities of the base model are a special kind of dictionary that emulates - a JSON-LD object. The structure of the dictionary is checked via JSON-schema. - For convenience, the values can also be accessed as attributes - (a la Javascript). e.g.: + a JSON-LD object. + The basic attributes should be set in the class, but additional values can + also be provided/set via attributes (à la Javascript). e.g.: >>> myobject.key == myobject['key'] True - >>> myobject.ns__name == myobject['ns:name'] + >>> myobject['ns:name'] = 'Peter' True - - Additionally, subclasses of this class can specify default values for their - instances. These defaults are inherited by subclasses. e.g.: - - >>> class NewModel(BaseModel): - ... mydefault = 5 - >>> n1 = NewModel() - >>> n1['mydefault'] == 5 - True - >>> n1.mydefault = 3 - >>> n1['mydefault'] = 3 - True - >>> n2 = NewModel() - >>> n2 == 5 - True - >>> class SubModel(NewModel): - pass - >>> subn = SubModel() - >>> subn.mydefault == 5 - True - - Lastly, every subclass that also specifies a schema will get registered, so it - is possible to deserialize JSON and get the right type. - i.e. to recover an instance of the original class from a plain JSON. - ''' - - # schema_file = DEFINITIONS_FILE - _context = base_context["@context"] - - def __init__(self, *args, **kwargs): - auto_id = kwargs.pop('_auto_id', False) - - super(BaseModel, self).__init__(*args, **kwargs) - - if auto_id: - self.id + model_config = ConfigDict(extra='ignore', ignored_types=(object, )) + context: Optional[Union[str, Dict]] = Field(default=None, serialization_alias="@context") + wasGeneratedBy: Optional[str] = Field(default=None, serialization_alias="prov:wasGeneratedBy") @property def id(self): @@ -161,35 +69,6 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)): def id(self, value): self['@id'] = value - def flask(self, - in_headers=False, - headers=None, - outformat='json-ld', - **kwargs): - """ - Return the values and error to be used in flask. - So far, it returns a fixed context. We should store/generate different - contexts if the plugin adds more aliases. - """ - headers = headers or {} - kwargs["with_context"] = not in_headers - content, mimetype = self.serialize(format=outformat, - with_mime=True, - **kwargs) - - if outformat == 'json-ld' and in_headers: - headers.update({ - "Link": - ('<%s>;' - 'rel="http://www.w3.org/ns/json-ld#context";' - ' type="application/ld+json"' % kwargs.get('context_uri')) - }) - return FlaskResponse( - response=content, - status=self.get('status', 200), - headers=headers, - mimetype=mimetype) - def serialize(self, format='json-ld', with_mime=False, template=None, prefix=None, fields=None, **kwargs): js = self.jsonld(prefix=prefix, **kwargs) @@ -240,7 +119,7 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)): result, options={ 'expandContext': [ - self._context, + self.context, { 'prefix': prefix, 'endpoint': prefix @@ -271,261 +150,177 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)): self['prov:wasGeneratedBy'] = another.id -def subtypes(): - return BaseMeta._subtypes - - -def from_dict(indict, cls=None, warn=True): - if not cls: - target = indict.get('@type', None) - cls = BaseModel - try: - cls = subtypes()[target] - except KeyError: - pass - - if cls == BaseModel and warn: - logger.warning('Created an instance of an unknown model') - - outdict = dict() - for k, v in indict.items(): - if k == '@context': - pass - elif isinstance(v, dict): - v = from_dict(indict[k]) - elif isinstance(v, list): - v = v[:] - for ix, v2 in enumerate(v): - if isinstance(v2, dict): - v[ix] = from_dict(v2) - outdict[k] = copy.copy(v) - return cls(**outdict) - - -def from_string(string, **kwargs): - return from_dict(json.loads(string), **kwargs) - - -def from_json(injson, **kwargs): - indict = json.loads(injson) - return from_dict(indict, **kwargs) - - class Entry(BaseModel): - schema = 'entry' + text: str = Field(serialization_alias="nif:isString") - text = alias('nif:isString') - sentiments = alias('marl:hasOpinion', []) - emotions = alias('onyx:hasEmotionSet', []) - - -class Sentiment(BaseModel): - schema = 'sentiment' - - polarity = alias('marl:hasPolarity') - polarityValue = alias('marl:polarityValue') - - -class Error(BaseModel, Exception): - schema = 'error' - - def __init__(self, message='Generic senpy exception', *args, **kwargs): - Exception.__init__(self, message) - super(Error, self).__init__(*args, **kwargs) - self.message = message - - def __str__(self): - if not hasattr(self, 'errors'): - return self.message - return '{}:\n\t{}'.format(self.message, self.errors) - - def __hash__(self): - return Exception.__hash__(self) - - -class AggregatedEvaluation(BaseModel): - schema = 'aggregatedEvaluation' - - evaluations = alias('senpy:evaluations', []) - - -class Dataset(BaseModel): - schema = 'dataset' - - -class Datasets(BaseModel): - schema = 'datasets' - - datasets = [] - - -class Emotion(BaseModel): - schema = 'emotion' - - -class EmotionConversion(BaseModel): - schema = 'emotionConversion' - - -class EmotionConversionPlugin(BaseModel): - schema = 'emotionConversionPlugin' - - -class EmotionAnalysis(BaseModel): - schema = 'emotionAnalysis' - - -class EmotionModel(BaseModel): - schema = 'emotionModel' - onyx__hasEmotionCategory = [] - - -class EmotionPlugin(BaseModel): - schema = 'emotionPlugin' - - -class EmotionSet(BaseModel): - schema = 'emotionSet' - - onyx__hasEmotion = [] - - -class Evaluation(BaseModel): - schema = 'evaluation' - - metrics = alias('senpy:metrics', []) - - -class Entity(BaseModel): - schema = 'entity' - - -class Help(BaseModel): - schema = 'help' - - -class Metric(BaseModel): - schema = 'metric' - - -class Parameter(BaseModel): - schema = 'parameter' - - -class Plugins(BaseModel): - schema = 'plugins' - - plugins = [] - - -class Response(BaseModel): - schema = 'response' +class Activity(BaseModel): + plugin: str + params: Dict[str, Any] + analyzed: List[str] = [] class Results(BaseModel): - schema = 'results' - - _terse_keys = ['entries', ] - - activities = [] - entries = [] - - def activity(self, id): - for i in self.activities: - if i.id == id: - return i - return None - - -class SentimentPlugin(BaseModel): - schema = 'sentimentPlugin' - - -class Suggestion(BaseModel): - schema = 'suggestion' - - -class Topic(BaseModel): - schema = 'topic' + activities: List[Activity] = [] + entries: List[Entry] = [] + parameters: Dict[str, str] = {} class Analysis(BaseModel): - ''' - A prov:Activity that results of executing a Plugin on an entry with a set of - parameters. - ''' - schema = 'analysis' - - parameters = alias('prov:used', []) - algorithm = alias('prov:wasAssociatedWith', []) - - @property - def params(self): - outdict = {} - outdict['algorithm'] = self.algorithm - for param in self.parameters: - outdict[param['name']] = param['value'] - return outdict - - @params.setter - def params(self, value): - for k, v in value.items(): - for param in self.parameters: - if param.name == k: - param.value = v - break - else: - self.parameters.append(Parameter(name=k, value=v)) # noqa: F821 - - def param(self, key, default=None): - for param in self.parameters: - if param['name'] == key: - return param['value'] - return default - - @property - def plugin(self): - return self._plugin - - @plugin.setter - def plugin(self, value): - self._plugin = value - self['prov:wasAssociatedWith'] = value.id + plugin: 'Plugin' + params: dict[str, object] = {} def run(self, request): return self.plugin.process(request, self) +class MoralityAnalysis(Analysis): + usedMoralValueModel: str + usedMLModel: Optional[str] = None + + +class Annotation(BaseModel): + annotated: Optional[str] = None + wasGeneratedBy: Optional[str] = None + + def add_provenance(self, entry, activity): + self.annotated = entry.id + self.wasGeneratedBy = activity.id + self[prov['wasGeneratedBy']] = activity.id + activity.analysed.append(entry.id) + + +class MoralAnnotation(Annotation): + def __init__(self, category, confidence=None, intensity=None, + entry=None, activity=None): + super().__init__() + if confidence is not None: + self[amor['confidence']] = confidence + if intensity is not None: + self[amor_mft['hasPolarityIntensity']] = intensity + if entry and activity: + self.add_provenance(entry, activity) + elif entry or activity: + raise Exception() + + @classmethod + def from_label(cls, label): + return cls(label.category, label.confidence, label.intensity) + + class Plugin(BaseModel): - schema = 'plugin' - extra_params = {} + name: str = Field(default='', validate_default=True) + version: Optional[str] = 'dev' + author: Optional[str] = None + extra_params: Dict[str, Any] = {} + description: str = Field(default="", json_schema_extra=dict(hidden=True)) - def activity(self, parameters=None): + @field_validator("description", mode="before") + @classmethod + def dedentdoc(cls, doc: str) -> str: + doc = doc or cls.get('__doc__', None) + return dedent(doc) + + @field_validator("name", mode="before") + @classmethod + def clsname(cls, name: str) -> str: + name = name or cls.__name__.capitalize() + return name + + @computed_field + @property + def id(self) -> str: + return 'endpoint:plugins/{}_{}'.format(self.name, self.version) + + def activity(self, params, *, cls=Analysis): '''Generate an Analysis (prov:Activity) from this plugin and the given parameters''' - a = Analysis() - a.plugin = self - if parameters: - a.params = parameters + a = cls(plugin=self, params=params) return a + + +class Plugins(BaseModel): + plugins: List[Plugin] = [] + +class Emotion: + hasEmotionIntensity: str = Field(serialization_alias="onyx:hasEmotionIntensity") + hasEmotionCategory: str = Field(serialization_alias="onyx:hasEmotionCategory") + +class Sentiment: + pass + +class SentimentPlugin(Plugin): + pass + +class Suggestion: + pass + +class Topic: + pass -# More classes could be added programmatically +class Context: + pass -def _class_from_schema(name, schema=None, schema_file=None, base_classes=None): - base_classes = base_classes or [] - base_classes.append(BaseModel) - attrs = {} - if schema: - attrs['schema'] = schema - elif schema_file: - attrs['schema_file'] = schema_file - else: - attrs['schema'] = name - name = "".join((name[0].upper(), name[1:])) - return BaseMeta(name, base_classes, attrs) +class Dataset: + pass + +class Datasets: + pass + +class Definitions: + pass + +class Dimensions: + pass + +class EmotionAnalysis: + pass + +class EmotionConversion: + pass + +class EmotionConversionPlugin(Plugin): + pass + +class Emotion: + pass + +class EmotionModel: + pass + +class EmotionPlugin(Plugin): + pass + +class EmotionSet: + pass + +class Entity: + pass + +class ErrorResponse(BaseModel): + code: Optional[int] = 500 + message: str -def _add_class_from_schema(*args, **kwargs): - generatedClass = _class_from_schema(*args, **kwargs) - globals()[generatedClass.__name__] = generatedClass - del generatedClass +class Metric: + pass + +class Evaluation: + pass + +class AggregatedEvaluation: + pass + +class Help: + pass + +class Parameter: + pass + + + +def from_dict(c, cls): + return cls.model_validate(d) + + +def from_string(s, cls): + return from_dict(from_json(s, allow_partial=True)) diff --git a/senpy/ns.py b/senpy/ns.py new file mode 100644 index 0000000..168d55f --- /dev/null +++ b/senpy/ns.py @@ -0,0 +1,7 @@ +from rdflib import Namespace + +amor = Namespace('http://www.gsi.upm.es/ontologies/amor/ns#') +amor_bhv = Namespace('http://www.gsi.upm.es/ontologies/amor-bhv/ns#') +amor_mft = Namespace('http://www.gsi.upm.es/ontologies/amor-mft/ns#') +prov = Namespace('http://www.w3.org/ns/prov#') +emoml = Namespace('http://www.gsi.upm.es/ontologies/onyx/vocabularies/emotionml/ns#') diff --git a/senpy/plugins/__init__.py b/senpy/plugins/__init__.py index de3c290..77d5a5a 100644 --- a/senpy/plugins/__init__.py +++ b/senpy/plugins/__init__.py @@ -16,10 +16,7 @@ # limitations under the License. # -from future import standard_library -standard_library.install_aliases() - -from future.utils import with_metaclass +from typing import * from functools import partial import os.path @@ -29,6 +26,8 @@ import pickle import logging import pprint +import deprecation + import inspect import sys import subprocess @@ -38,59 +37,21 @@ import threading import multiprocessing import pkg_resources from nltk import download -from textwrap import dedent from sklearn.base import TransformerMixin, BaseEstimator from itertools import product from .. import models, utils +from ..errors import Error from .. import api from .. import gsitk_compat from .. import testing from .. import config +from .. import ns logger = logging.getLogger(__name__) -class PluginMeta(models.BaseMeta): - _classes = {} - - def __new__(mcs, name, bases, attrs, **kwargs): - plugin_type = set() - for base in bases: - if hasattr(base, '_plugin_type'): - plugin_type |= base._plugin_type - plugin_type.add(name) - alias = attrs.get('name', name).lower() - attrs['_plugin_type'] = plugin_type - logger.debug('Adding new plugin class: %s %s %s %s', name, bases, attrs, plugin_type) - attrs['name'] = alias - if 'description' not in attrs: - doc = attrs.get('__doc__', None) - if doc: - attrs['description'] = dedent(doc) - else: - logger.warning( - ('Plugin {} does not have a description. ' - 'Please, add a short summary to help other developers' - ).format(name)) - cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs) - - if alias in mcs._classes: - if config.testing: - raise Exception( - ('The type of plugin {} already exists. ' - 'Please, choose a different name').format(name)) - else: - logger.warning('Overloading plugin class: {}'.format(alias)) - mcs._classes[alias] = cls - return cls - - @classmethod - def for_type(cls, ptype): - return cls._classes[ptype] - - -class Plugin(with_metaclass(PluginMeta, models.Plugin)): +class Plugin(models.Plugin): ''' Base class for all plugins in senpy. A plugin must provide at least these attributes: @@ -103,23 +64,16 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): ''' - _terse_keys = ['name', '@id', '@type', 'author', 'description', - 'extra_params', 'is_activated', 'url', 'version'] + data_folder: str = "." + test_cases: List[Dict] = [] - def __init__(self, info=None, data_folder=None, **kwargs): + + def __init__(self, *, data_folder=None, **data): """ Provides a canonical name for plugins and serves as base for other kinds of plugins. """ - logger.debug("Initialising %s", info) - super(Plugin, self).__init__(**kwargs) - if info: - self.update(info) - self.validate() - self.id = 'endpoint:plugins/{}_{}'.format(self['name'], - self['version']) - self.is_activated = False - self._lock = threading.Lock() + super().__init__(**data) self._directory = os.path.abspath( os.path.dirname(inspect.getfile(self.__class__))) @@ -128,7 +82,6 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): if not data_folder: data_folder = os.getcwd() - data_folder = os.path.abspath(data_folder) subdir = os.path.join(data_folder, self.name) @@ -149,35 +102,9 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): def log(self): return self._log - def validate(self): - missing = [] - for x in ['name', 'description', 'version']: - if x not in self: - missing.append(x) - if missing: - raise models.Error( - 'Missing configuration parameters: {}'.format(missing)) - def get_folder(self): return os.path.dirname(inspect.getfile(self.__class__)) - def _activate(self): - if self.is_activated: - return - self.activate() - self.is_activated = True - return self.is_activated - - def _deactivate(self): - self.is_activated = False - self.deactivate() - - def activate(self): - pass - - def deactivate(self): - pass - def process(self, request, activity, **kwargs): """ An implemented plugin should override this method. @@ -195,7 +122,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): if inspect.isgenerator(results): for result in results: yield result - else: + elif results: yield results def process_entry(self, entry, activity): @@ -210,8 +137,6 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): ) def test(self, test_cases=None): - if not self.is_activated: - self._activate() if not test_cases: if not hasattr(self, 'test_cases'): raise AttributeError( @@ -235,7 +160,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): def test_case(self, case, mock=testing.MOCK_REQUESTS): if 'entry' not in case and 'input' in case: entry = models.Entry(_auto_id=False) - entry.nif__isString = case['input'] + entry.text = case['input'] case['entry'] = entry entry = models.Entry(case['entry']) given_parameters = case.get('params', case.get('parameters', {})) @@ -265,7 +190,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): expected = [expected] utils.check_template(res.entries, expected) res.validate() - except models.Error: + except Error: if should_fail: return raise @@ -302,38 +227,16 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): SenpyPlugin = Plugin -class FailedPlugin(Plugin): - """A plugin that has failed to initialize.""" - version = 0 - - def __init__(self, info, function): - super().__init__(info) - a = info.get('name', info.get('module', self.name)) - self['name'] == a - self._function = function - self.is_activated = False - - def retry(self): - return self._function() - - def test(self): - ''' - A module that failed to load cannot be tested. But non-optional - plugins should not fail to load in strict mode. - ''' - assert self.optional and not config.strict - - class Analyser(Plugin): ''' A subclass of Plugin that analyses text and provides an annotation. ''' - # Deprecated + @deprecation.deprecated(details="Use process instead") def analyse(self, request, activity): return super(Analyser, self).process(request, activity) - # Deprecated + @deprecation.deprecated(details="Use process_entries instead") def analyse_entries(self, entries, activity): for i in super(Analyser, self).process_entries(entries, activity): yield i @@ -402,14 +305,12 @@ class Evaluable(Plugin): return evaluate([self], *args, **kwargs) -class SentimentPlugin(Analyser, Evaluable, models.SentimentPlugin): +class SentimentPlugin(Analyser, Evaluable): ''' Sentiment plugins provide sentiment annotation (using Marl) ''' - minPolarityValue = 0 - maxPolarityValue = 1 - - _terse_keys = Analyser._terse_keys + ['minPolarityValue', 'maxPolarityValue'] + minPolarityValue: float = 0 + maxPolarityValue: float = 1 def test_case(self, case): if 'polarity' in case: @@ -441,7 +342,7 @@ class SentimentPlugin(Analyser, Evaluable, models.SentimentPlugin): for feat in X: if isinstance(feat, list): feat = ' '.join(feat) - entries.append(models.Entry(nif__isString=feat)) + entries.append(models.Entry(text=feat)) labels = [] for e in self.process_entries(entries, activity): sent = e.sentiments[0].polarity @@ -458,10 +359,27 @@ class EmotionPlugin(Analyser, models.EmotionPlugin): ''' Emotion plugins provide emotion annotation (using Onyx) ''' - minEmotionValue = 0 - maxEmotionValue = 1 + minEmotionValue: float = 0 + maxEmotionValue: float = 1 - _terse_keys = Analyser._terse_keys + ['minEmotionValue', 'maxEmotionValue'] + +class AnnotationPlugin(AnalysisPlugin): + def process_entry(self, entry, activity, **kwargs): + for annotation in self.annotate(entry): + annotation.add_provenance(entry, activity) + yield entry + + def annotate(self, entry, **kwargs): + raise NotImplemented("this should be implemented in subclasses") + + +class MoralityPlugin(AnnotationPlugin): + moralValueModel: str = ns.amor_mft['MoralFoundationTHeory'] + + def activity(self, parameters=None): + return models.MoralityAnalysis(plugin=self, + usedMLModel=None, + usedMoralValueModel=self.model) class EmotionConversion(Conversion): @@ -545,7 +463,7 @@ class TextBox(Box): '''A black box plugin that takes only text as input''' def to_features(self, entry, activity): - return [entry['nif:isString']] + return [entry.text] class SentimentBox(TextBox, SentimentPlugin): @@ -553,14 +471,14 @@ class SentimentBox(TextBox, SentimentPlugin): A box plugin where the output is only a polarity label or a tuple (polarity, polarityValue) ''' - classes = ['marl:Positive', 'marl:Neutral', 'marl:Negative'] - binary = True + classes: List[str] = ['marl:Positive', 'marl:Neutral', 'marl:Negative'] + binary: bool = True def to_entry(self, features, entry, activity, **kwargs): if len(features) != len(self.classes): - raise models.Error('The number of features ({}) does not match the classes ' - '(plugin.classes ({})'.format(len(features), len(self.classes))) + raise Error('The number of features ({}) does not match the classes ' + '(plugin.classes ({})'.format(len(features), len(self.classes))) minValue = activity.param('marl:minPolarityValue', 0) maxValue = activity.param('marl:minPolarityValue', 1) @@ -590,8 +508,8 @@ class EmotionBox(TextBox, EmotionPlugin): A box plugin where the output is only an a tuple of emotion labels ''' - EMOTIONS = [] - with_intensity = True + EMOTIONS: List[str] = [] + with_intensity: bool = True def to_entry(self, features, entry, activity, **kwargs): s = models.EmotionSet() @@ -697,7 +615,7 @@ def pfilter(plugins, plugin_type=Analyser, **kwargs): logger.debug('Class: {}'.format(pclass)) candidates = filter(lambda x: isinstance(x, pclass), plugins) except KeyError: - raise models.Error('{} is not a valid type'.format(plugin_type)) + raise Error('{} is not a valid type'.format(plugin_type)) else: candidates = plugins @@ -775,7 +693,7 @@ def install_deps(*plugins): exitcode = process.wait() installed = True if exitcode != 0: - raise models.Error( + raise Error( "Dependencies not properly installed: {}".format(pip_args)) installed_nltk = download(list(nltk_resources)) return installed or installed_nltk @@ -855,7 +773,7 @@ def from_module(module, **kwargs): def one_from_module(module, root, info, **kwargs): if '@type' in info: cls = PluginMeta.from_type(info['@type']) - return cls(info=info, **kwargs) + return cls(**info, **kwargs) instance = next( from_module(module=module, root=root, info=info, **kwargs), None) if not instance: @@ -888,7 +806,7 @@ def _from_module_name(module, root, info=None, **kwargs): def _from_loaded_module(module, info=None, **kwargs): for cls in _classes_in_module(module): - yield cls(info=info, **kwargs) + yield cls(**(info or {}), **kwargs) for instance in _instances_in_module(module): yield instance @@ -899,7 +817,7 @@ cached_evs = {} def evaluate(plugins, datasets, **kwargs): for plug in plugins: if not hasattr(plug, 'as_pipe'): - raise models.Error('Plugin {} cannot be evaluated'.format(plug.name)) + raise Error('Plugin {} cannot be evaluated'.format(plug.name)) if not isinstance(datasets, dict): datasets = gsitk_compat.prepare(datasets, download=True) @@ -961,12 +879,11 @@ def evaluations_to_JSONLD(results, flatten=False): class ScikitWrapper(BaseEstimator, TransformerMixin): - def __init__(self, plugin=None): + def __init__(self, plugin=None, **data): + super().__init__(**data) self.plugin = plugin def fit(self, X=None, y=None): - if self.plugin is not None and not self.plugin.is_activated: - self.plugin.activate() return self def transform(self, X): diff --git a/senpy/plugins/emotion/anew/emotion_anew_plugin.py b/senpy/plugins/emotion/anew/emotion_anew_plugin.py index fea6878..095acc7 100644 --- a/senpy/plugins/emotion/anew/emotion_anew_plugin.py +++ b/senpy/plugins/emotion/anew/emotion_anew_plugin.py @@ -70,11 +70,11 @@ pattern.text._read = _read class ANEW(EmotionPlugin): description = "This plugin consists on an emotion classifier using ANEW lexicon dictionary. It averages the VAD (valence-arousal-dominance) value of each word in the text that is also in the ANEW dictionary. To obtain a categorical value (e.g., happy) use the emotion conversion API (e.g., `emotion-model=emoml:big6`)." - author = "@icorcuera" - version = "0.5.2" + author: str = "@icorcuera" + version: str = "0.5.2" name = "emotion-anew" - extra_params = { + extra_params: dict = { "language": { "description": "language of the input", "aliases": ["language", "l"], @@ -89,7 +89,8 @@ class ANEW(EmotionPlugin): onyx__usesEmotionModel = MODEL nltk_resources = ['stopwords'] - def activate(self, *args, **kwargs): + def __init__(self, **kwargs): + super().__init__(**kwargs) self._stopwords = stopwords.words('english') dictionary={} dictionary['es'] = {} @@ -204,7 +205,7 @@ class ANEW(EmotionPlugin): yield entry - test_cases = [ + test_cases: list[dict] = [ { 'name': 'anger with VAD=(2.12, 6.95, 5.05)', 'input': 'I hate you', diff --git a/senpy/plugins/emotion/depechemood_plugin.py b/senpy/plugins/emotion/depechemood_plugin.py index 3f26ab1..3ea758b 100644 --- a/senpy/plugins/emotion/depechemood_plugin.py +++ b/senpy/plugins/emotion/depechemood_plugin.py @@ -31,14 +31,15 @@ class DepecheMood(EmotionBox): DepecheMood is an emotion lexicon automatically generated from news articles where users expressed their associated emotions. It contains two languages (English and Italian), as well as three types of word representations (token, lemma and lemma#PoS). For English, the lexicon contains 165k tokens, while the Italian version contains 116k. Unsupervised techniques can be applied to generate simple but effective baselines. To learn more, please visit https://github.com/marcoguerini/DepecheMood and http://www.depechemood.eu/ ''' - author = 'Oscar Araque' - name = 'emotion-depechemood' - version = '0.1' + author: str = 'Oscar Araque' + name: str = 'emotion-depechemood' + version: str = '0.1' requirements = ['pandas'] optional = True + usesEmotionModel: str = 'wna:WNAModel' + nltk_resources = ["stopwords"] - onyx__usesEmotionModel = 'wna:WNAModel' EMOTIONS = ['wna:negative-fear', 'wna:amusement', @@ -50,16 +51,12 @@ class DepecheMood(EmotionBox): 'wna:sadness'] DM_EMOTIONS = ['AFRAID', 'AMUSED', 'ANGRY', 'ANNOYED', 'DONT_CARE', 'HAPPY', 'INSPIRED', 'SAD',] + LEXICON_URL = "https://github.com/marcoguerini/DepecheMood/raw/master/DepecheMood%2B%2B/DepecheMood_english_token_full.tsv" - def __init__(self, *args, **kwargs): - super(DepecheMood, self).__init__(*args, **kwargs) - self.LEXICON_URL = "https://github.com/marcoguerini/DepecheMood/raw/master/DepecheMood%2B%2B/DepecheMood_english_token_full.tsv" + def __init__(self, **data): + super().__init__(**data) self._denoise = ignore(set(string.punctuation)|set('«»')) self._stop_words = [] - self._lex_vocab = None - self._lex = None - - def activate(self): self._lex = self.download_lex() self._lex_vocab = set(list(self._lex.keys())) self._stop_words = stopwords.words('english') + [''] @@ -127,7 +124,7 @@ class DepecheMood(EmotionBox): estimation = self.estimate_all_emotions(tokens) return estimation - test_cases = [ + test_cases: list[dict] = [ { 'entry': { 'nif:isString': 'My cat is very happy', diff --git a/senpy/plugins/emotion/wnaffect/emotion_wnaffect_plugin.py b/senpy/plugins/emotion/wnaffect/emotion_wnaffect_plugin.py index 7c152e2..6c34d46 100644 --- a/senpy/plugins/emotion/wnaffect/emotion_wnaffect_plugin.py +++ b/senpy/plugins/emotion/wnaffect/emotion_wnaffect_plugin.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import division +from typing import * import re import nltk import os @@ -19,10 +20,11 @@ class WNAffect(EmotionPlugin, ShelfMixin): of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness or neutral. The only available language is English (en) ''' - name = 'emotion-wnaffect' - author = ["@icorcuera", "@balkian"] - version = '0.2' - extra_params = { + name: str = 'emotion-wnaffect' + author: List[str] = ["@icorcuera", "@balkian"] + version: str = '0.2' + + extra_params: Dict[str, Dict] = { 'language': { "@id": 'lang_wnaffect', 'description': 'language of the input', @@ -31,6 +33,7 @@ class WNAffect(EmotionPlugin, ShelfMixin): 'options': ['en',] } } + optional = True requirements = [ "nltk>=3.0.5", @@ -42,42 +45,8 @@ class WNAffect(EmotionPlugin, ShelfMixin): onyx__usesEmotionModel = "emoml:big6" nltk_resources = ['stopwords', 'averaged_perceptron_tagger_eng', 'wordnet'] - def _load_synsets(self, synsets_path): - """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str).""" - tree = ET.parse(synsets_path) - root = tree.getroot() - pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"} - - synsets = {} - for pos in ["noun", "adj", "verb", "adv"]: - tag = pos_map[pos] - synsets[tag] = {} - for elem in root.findall( - ".//{0}-syn-list//{0}-syn".format(pos, pos)): - offset = int(elem.get("id")[2:]) - if not offset: continue - if elem.get("categ"): - synsets[tag][offset] = Emo.emotions[elem.get( - "categ")] if elem.get( - "categ") in Emo.emotions else None - elif elem.get("noun-id"): - synsets[tag][offset] = synsets[pos_map["noun"]][int( - elem.get("noun-id")[2:])] - return synsets - - def _load_emotions(self, hierarchy_path): - """Loads the hierarchy of emotions from the WordNet-Affect xml.""" - - tree = ET.parse(hierarchy_path) - root = tree.getroot() - for elem in root.findall("categ"): - name = elem.get("name") - if name == "root": - Emo.emotions["root"] = Emo("root") - else: - Emo.emotions[name] = Emo(name, elem.get("isa")) - - def activate(self, *args, **kwargs): + def __init__(self, **kwargs): + super().__init__(**kwargs) self._stopwords = stopwords.words('english') self._wnlemma = wordnet.WordNetLemmatizer() @@ -119,10 +88,42 @@ class WNAffect(EmotionPlugin, ShelfMixin): self._wn16_path = self.wn16_path self._wn16 = WordNetCorpusReader(self.find_file(self._wn16_path), nltk.data.find(self.find_file(self._wn16_path))) + self.save() + def _load_synsets(self, synsets_path): + """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str).""" + tree = ET.parse(synsets_path) + root = tree.getroot() + pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"} - def deactivate(self, *args, **kwargs): - self.save(ignore_errors=True) + synsets = {} + for pos in ["noun", "adj", "verb", "adv"]: + tag = pos_map[pos] + synsets[tag] = {} + for elem in root.findall( + ".//{0}-syn-list//{0}-syn".format(pos, pos)): + offset = int(elem.get("id")[2:]) + if not offset: continue + if elem.get("categ"): + synsets[tag][offset] = Emo.emotions[elem.get( + "categ")] if elem.get( + "categ") in Emo.emotions else None + elif elem.get("noun-id"): + synsets[tag][offset] = synsets[pos_map["noun"]][int( + elem.get("noun-id")[2:])] + return synsets + + def _load_emotions(self, hierarchy_path): + """Loads the hierarchy of emotions from the WordNet-Affect xml.""" + + tree = ET.parse(hierarchy_path) + root = tree.getroot() + for elem in root.findall("categ"): + name = elem.get("name") + if name == "root": + Emo.emotions["root"] = Emo("root") + else: + Emo.emotions[name] = Emo(name, elem.get("isa")) def _my_preprocessor(self, text): @@ -268,7 +269,6 @@ class WNAffect(EmotionPlugin, ShelfMixin): 'language': 'en', 'algorithm': 'emotion-wnaffect'} - self.activate() texts = {'I hate you': 'anger', 'i am sad': 'sadness', 'i am happy with my marks': 'joy', diff --git a/senpy/plugins/misc/split_plugin.py b/senpy/plugins/misc/split_plugin.py index faabb2a..ebbb34d 100644 --- a/senpy/plugins/misc/split_plugin.py +++ b/senpy/plugins/misc/split_plugin.py @@ -29,12 +29,12 @@ class Split(Transformation): (or paragraphs) is required. ''' - author = ["@militarpancho", '@balkian'] - version = '0.3' + author: str = ["@militarpancho", '@balkian'] + version: str = '0.3' url = "https://github.com/gsi-upm/senpy" nltk_resources = ['punkt'] - extra_params = { + extra_params: dict = { 'delimiter': { 'description': 'Split text into paragraphs or sentences.', 'aliases': ['type', 't'], @@ -47,7 +47,7 @@ class Split(Transformation): def analyse_entry(self, entry, activity): yield entry chunker_type = activity.params["delimiter"] - original_text = entry['nif:isString'] + original_text = entry.text if chunker_type == "sentence": tokenizer = PunktSentenceTokenizer() if chunker_type == "paragraph": @@ -59,12 +59,12 @@ class Split(Transformation): for i, chunk in enumerate(chars): start, end = chunk e = Entry() - e['nif:isString'] = original_text[start:end] + e.text = original_text[start:end] if entry.id: e.id = entry.id + "#char={},{}".format(start, end) yield e - test_cases = [ + test_cases: list[dict] = [ { 'entry': { 'nif:isString': 'Hello. World.' diff --git a/senpy/plugins/postprocessing/emotion/centroids.py b/senpy/plugins/postprocessing/emotion/centroids.py index 14ccfa7..3e9b05e 100644 --- a/senpy/plugins/postprocessing/emotion/centroids.py +++ b/senpy/plugins/postprocessing/emotion/centroids.py @@ -14,8 +14,11 @@ # limitations under the License. # +from typing import * + from senpy.plugins import EmotionConversionPlugin -from senpy.models import EmotionSet, Emotion, Error +from senpy.models import EmotionSet, Emotion, EmotionConversion +from senpy.errors import Error import logging logger = logging.getLogger(__name__) @@ -27,16 +30,16 @@ class CentroidConversion(EmotionConversionPlugin): categorical one, and vice versa. The centroids used in the conversion are configurable and appear in the semantic description of the plugin. ''' - def __init__(self, info, *args, **kwargs): - if 'centroids' not in info: - raise Error('Centroid conversion plugins should provide ' - 'the centroids in their senpy file') - if 'onyx:doesConversion' not in info: - if 'centroids_direction' not in info: - raise Error('Please, provide centroids direction') - cf, ct = info['centroids_direction'] - info['onyx:doesConversion'] = [{ + centroids: Dict + centroids_direction: tuple[str, str] + aliases: Dict[str, str] ={} + + def __super__(self, centroids, centroids_direction, doesConversion=None, aliases={}, *args, **kwargs): + super().__init__(*args, **kwargs) + cf, ct = centroids_direction + if doesConversion is None: + doesConversion = [{ 'onyx:conversionFrom': cf, 'onyx:conversionTo': ct }, { @@ -44,17 +47,16 @@ class CentroidConversion(EmotionConversionPlugin): 'onyx:conversionTo': cf }] - if 'aliases' in info: - aliases = info['aliases'] + + if aliases: ncentroids = {} - for k1, v1 in info['centroids'].items(): + for k1, v1 in centroids.items(): nv1 = {} for k2, v2 in v1.items(): nv1[aliases.get(k2, k2)] = v2 ncentroids[aliases.get(k1, k1)] = nv1 - info['centroids'] = ncentroids - - super(CentroidConversion, self).__init__(info, *args, **kwargs) + centroids = centroids + self.centroids = centroids self.dimensions = set() for c in self.centroids.values(): @@ -152,7 +154,7 @@ class CentroidConversion(EmotionConversionPlugin): "centroids_direction": ["emoml:big6", "emoml:fsre-dimensions"] } - c = CentroidConversion(info) + c = CentroidConversion.parse_obj(info) es1 = EmotionSet() e1 = Emotion() diff --git a/senpy/plugins/postprocessing/emotion/ekman2fsre.senpy b/senpy/plugins/postprocessing/emotion/ekman2fsre.senpy deleted file mode 100644 index aaa66b9..0000000 --- a/senpy/plugins/postprocessing/emotion/ekman2fsre.senpy +++ /dev/null @@ -1,52 +0,0 @@ ---- -name: Ekman2FSRE -module: senpy.plugins.postprocessing.emotion.centroids -description: Plugin to convert emotion sets from Ekman to VAD -version: 0.2 -# No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction -neutralValue: 5.0 -centroids: - anger: - A: 6.95 - D: 5.1 - V: 2.7 - S: 5.0 - disgust: - A: 5.3 - D: 8.05 - V: 2.7 - S: 5.0 - fear: - A: 6.5 - D: 3.6 - V: 3.2 - S: 5.0 - happiness: - A: 7.22 - D: 6.28 - V: 8.6 - S: 5.0 - sadness: - A: 5.21 - D: 2.82 - V: 2.21 - S: 5.0 - surprise: - A: 5.0 - D: 5.0 - V: 5.0 - S: 10.0 -centroids_direction: - - emoml:big6 - - emoml:fsre-dimensions -aliases: # These are aliases for any key in the centroid, to avoid repeating a long name several times - A: emoml:fsre-dimensions_arousal - V: emoml:fsre-dimensions_valence - D: emoml:fsre-dimensions_potency - S: emoml:fsre-dimensions_unpredictability - anger: emoml:big6anger - disgust: emoml:big6disgust - fear: emoml:big6fear - happiness: emoml:big6happiness - sadness: emoml:big6sadness - surprise: emoml:big6surprise diff --git a/senpy/plugins/postprocessing/emotion/ekman2fsre_plugin.py b/senpy/plugins/postprocessing/emotion/ekman2fsre_plugin.py new file mode 100644 index 0000000..3151493 --- /dev/null +++ b/senpy/plugins/postprocessing/emotion/ekman2fsre_plugin.py @@ -0,0 +1,61 @@ +from typing import * +from senpy.ns import emoml +from senpy.plugins.postprocessing.emotion.centroids import CentroidConversion + + +class Ekman2FSRE(CentroidConversion): + ''' Plugin to convert emotion sets from Ekman to VAD ''' + version: str = '0.2' + # No need to specify doesConversion because centroids.py adds it automatically from centroids_direction + neutralValue: float = 5.0 + centroids: Dict = { + 'anger': { + 'A': 6.95, + 'D': 5.1, + 'V': 2.7, + 'S': 5.0, + }, + 'disgust': { + 'A': 5.3, + 'D': 8.05, + 'V': 2.7, + 'S': 5.0, + }, + 'fear': { + 'A': 6.5, + 'D': 3.6, + 'V': 3.2, + 'S': 5.0, + }, + 'happiness':{ + 'A': 7.22, + 'D': 6.28, + 'V': 8.6, + 'S': 5.0, + }, + 'sadness': { + 'A': 5.21, + 'D': 2.82, + 'V': 2.21, + 'S': 5.0 , + }, + 'surprise': { + 'A': 5.0, + 'D': 5.0, + 'V': 5.0, + 'S': 10.0, + } + } + centroids_direction: tuple[str, str] = ('emoml:big6', 'emoml:fsre-dimensions') + aliases: Dict[str, str] = { # These are aliases for any key in the centroid, to avoid repeating a long name several times + 'A': emoml['emoml:fsre-dimensions_arousal'], + 'V': emoml['emoml:fsre-dimensions_valence'], + 'D': emoml['emoml:fsre-dimensions_potency'], + 'S': emoml['emoml:fsre-dimensions_unpredictability'], + 'anger': emoml['emoml:big6anger'], + 'disgust': emoml['emoml:big6disgust'], + 'fear': emoml['emoml:big6fear'], + 'happiness': emoml['emoml:big6happiness'], + 'sadness': emoml['emoml:big6sadness'], + 'surprise': emoml['emoml:big6surprise'], + } diff --git a/senpy/plugins/postprocessing/emotion/ekman2vad.senpy b/senpy/plugins/postprocessing/emotion/ekman2vad.senpy deleted file mode 100644 index a08938d..0000000 --- a/senpy/plugins/postprocessing/emotion/ekman2vad.senpy +++ /dev/null @@ -1,40 +0,0 @@ ---- -name: Ekman2PAD -module: senpy.plugins.postprocessing.emotion.centroids -description: Plugin to convert emotion sets from Ekman to VAD -version: 0.2 -# No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction -neutralValue: 5.0 -centroids: - anger: - A: 6.95 - D: 5.1 - P: 2.7 - disgust: - A: 5.3 - D: 8.05 - P: 2.7 - fear: - A: 6.5 - D: 3.6 - P: 3.2 - happiness: - A: 7.22 - D: 6.28 - P: 8.6 - sadness: - A: 5.21 - D: 2.82 - P: 2.21 -centroids_direction: - - emoml:big6 - - emoml:pad-dimensions -aliases: # These are aliases for any key in the centroid, to avoid repeating a long name several times - P: emoml:pad-dimensions_pleasure - A: emoml:pad-dimensions_arousal - D: emoml:pad-dimensions_dominance - anger: emoml:big6anger - disgust: emoml:big6disgust - fear: emoml:big6fear - happiness: emoml:big6happiness - sadness: emoml:big6sadness diff --git a/senpy/plugins/postprocessing/emotion/ekman2vad_plugin.py b/senpy/plugins/postprocessing/emotion/ekman2vad_plugin.py new file mode 100644 index 0000000..d66ba0a --- /dev/null +++ b/senpy/plugins/postprocessing/emotion/ekman2vad_plugin.py @@ -0,0 +1,54 @@ +from typing import * +from senpy.ns import emoml +from senpy.plugins.postprocessing.emotion.centroids import CentroidConversion + + +class Ekman2PAD(CentroidConversion): + '''Plugin to convert emotion sets from Ekman to VAD''' + version: str = '0.2' + # No need to specify doesConversion because centroids.py adds it automatically from centroids_direction + neutralValue: float = 5.0 + centroids: Dict = { + 'anger': { + 'A': 6.95, + 'D': 5.1, + 'V': 2.7, + }, + 'disgust': { + 'A': 5.3, + 'D': 8.05, + 'V': 2.7, + }, + 'fear': { + 'A': 6.5, + 'D': 3.6, + 'V': 3.2, + }, + 'happiness':{ + 'A': 7.22, + 'D': 6.28, + 'V': 8.6, + }, + 'sadness': { + 'A': 5.21, + 'D': 2.82, + 'V': 2.21, + }, + 'surprise': { + 'A': 5.0, + 'D': 5.0, + 'V': 5.0, + } + } + centroids_direction: tuple[str, str] = (emoml['big6'], emoml['pad-dimensions']) + aliases: Dict[str, str] = { # These are aliases for any key in the centroid, to avoid repeating a long name several times + 'A': emoml['pad-dimensions_arousal'], + 'V': emoml['pad-dimensions_potency'], + 'D': emoml['pad-dimensions_dominance'], + 'anger': emoml['emoml:big6anger'], + 'disgust': emoml['emoml:big6disgust'], + 'fear': emoml['emoml:big6fear'], + 'happiness': emoml['emoml:big6happiness'], + 'sadness': emoml['emoml:big6sadness'], + 'surprise': emoml['emoml:big6surprise'], + } diff --git a/senpy/plugins/postprocessing/emotion/maxEmotion_plugin.py b/senpy/plugins/postprocessing/emotion/maxEmotion_plugin.py index 80d74f0..25298b9 100644 --- a/senpy/plugins/postprocessing/emotion/maxEmotion_plugin.py +++ b/senpy/plugins/postprocessing/emotion/maxEmotion_plugin.py @@ -19,15 +19,15 @@ from senpy import PostProcessing, easy_test class MaxEmotion(PostProcessing): '''Plugin to extract the emotion with highest value from an EmotionSet''' - author = '@dsuarezsouto' - version = '0.1' + author: str = '@dsuarezsouto' + version: str = '0.1' def process_entry(self, entry, activity): if len(entry.emotions) < 1: yield entry return - set_emotions = entry.emotions[0]['onyx:hasEmotion'] + set_emotions = entry.emotions[0].hasEmotion # If there is only one emotion, do not modify it if len(set_emotions) < 2: @@ -38,17 +38,16 @@ class MaxEmotion(PostProcessing): # Extract max emotion from the set emotions (emotion with highest intensity) for tmp_emotion in set_emotions: - if tmp_emotion['onyx:hasEmotionIntensity'] > max_emotion[ - 'onyx:hasEmotionIntensity']: + if tmp_emotion.hasEmotionIntensity > max_emotion.hasEmotionIntensity: max_emotion = tmp_emotion - if max_emotion['onyx:hasEmotionIntensity'] == 0: - max_emotion['onyx:hasEmotionCategory'] = "neutral" - max_emotion['onyx:hasEmotionIntensity'] = 1.0 + if max_emotion.hasEmotionIntensity == 0: + max_emotion.hasEmotionCategory = "neutral" + max_emotion.hasEmotionIntensity = 1.0 - entry.emotions[0]['onyx:hasEmotion'] = [max_emotion] + entry.emotions[0].hasEmotion = [max_emotion] - entry.emotions[0]['prov:wasGeneratedBy'] = activity.id + entry.emotions[0].wasGeneratedBy = activity.id yield entry def check(self, request, plugins): @@ -57,7 +56,7 @@ class MaxEmotion(PostProcessing): # Test Cases: # 1 Normal Situation. # 2 Case to return a Neutral Emotion. - test_cases = [ + test_cases: list[dict] = [ { "name": "If there are several emotions within an emotion set, reduce it to one.", "entry": { diff --git a/senpy/plugins/sentiment/basic/sentiment_basic_plugin.py b/senpy/plugins/sentiment/basic/sentiment_basic_plugin.py index d76d70b..b036e7f 100644 --- a/senpy/plugins/sentiment/basic/sentiment_basic_plugin.py +++ b/senpy/plugins/sentiment/basic/sentiment_basic_plugin.py @@ -13,7 +13,8 @@ from scipy.interpolate import interp1d from os import path from senpy.plugins import SentimentBox, SenpyPlugin -from senpy.models import Results, Entry, Sentiment, Error +from senpy.models import Results, Entry, Sentiment +from senpy.errors import Error if sys.version_info[0] >= 3: unicode = str @@ -24,9 +25,9 @@ class SentimentBasic(SentimentBox): Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es. ''' name = "sentiment-basic" - author = "github.com/nachtkatze" - version = "0.1.1" - extra_params = { + author: str = "github.com/nachtkatze" + version: str = "0.1.1" + extra_params: dict = { "language": { "description": "language of the text", "aliases": ["language", "l"], @@ -36,29 +37,30 @@ class SentimentBasic(SentimentBox): } } sentiword_path = "SentiWordNet_3.0.txt" - pos_path = "unigram_spanish.pickle" maxPolarityValue = 1 minPolarityValue = -1 nltk_resources = ['punkt_tab','wordnet', 'omw', 'omw-1.4'] optional = True requirements = ['nltk>=3.0.5', 'scipy>=0.14.0', 'textblob==0.17'] with_polarity = False + _pos_path = "unigram_spanish.pickle" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._swn = self._load_swn() + self._pos_tagger = self._load_pos_tagger() def _load_swn(self): - self.swn_path = self.find_file(self.sentiword_path) - swn = SentiWordNet(self.swn_path) + self._swn_path = self.find_file(self.sentiword_path) + swn = SentiWordNet(self._swn_path) return swn def _load_pos_tagger(self): - self.pos_path = self.find_file(self.pos_path) - with open(self.pos_path, 'rb') as f: + self._pos_path = self.find_file(self._pos_path) + with open(self._pos_path, 'rb') as f: tagger = pickle.load(f) return tagger - def activate(self, *args, **kwargs): - self._swn = self._load_swn() - self._pos_tagger = self._load_pos_tagger() - def _remove_punctuation(self, tokens): return [t for t in tokens if t not in string.punctuation] @@ -156,7 +158,7 @@ class SentimentBasic(SentimentBox): return [0, 1, 0] - test_cases = [ + test_cases: list[dict] = [ { 'input': 'Odio ir al cine', 'params': {'language': 'es'}, diff --git a/senpy/plugins/sentiment/meaningcloud_plugin.py b/senpy/plugins/sentiment/meaningcloud_plugin.py index ae5a0c9..56a6728 100644 --- a/senpy/plugins/sentiment/meaningcloud_plugin.py +++ b/senpy/plugins/sentiment/meaningcloud_plugin.py @@ -34,7 +34,8 @@ import os from os import path import time from senpy.plugins import SentimentPlugin -from senpy.models import Results, Entry, Entity, Topic, Sentiment, Error +from senpy.models import Results, Entry, Entity, Topic, Sentiment +from senpy.errors import Error from senpy.utils import check_template @@ -49,13 +50,13 @@ class MeaningCloudPlugin(SentimentPlugin): http://senpy.cluster.gsi.dit.upm.es/api/?algo=meaningCloud&language=en&apiKey=YOUR_API_KEY&input=I%20love%20Madrid. ''' - name = 'sentiment-meaningcloud' - author = 'GSI UPM' - version = "1.1" - maxPolarityValue = 1 - minPolarityValue = -1 + name: str = 'sentiment-meaningcloud' + author: str = 'GSI UPM' + version: str = "1.1" + maxPolarityValue: float = 1 + minPolarityValue: float = -1 - extra_params = { + extra_params: dict = { "language": { "description": "language of the input", "aliases": ["language", "l"], @@ -86,7 +87,7 @@ class MeaningCloudPlugin(SentimentPlugin): def analyse_entry(self, entry, activity): params = activity.params - txt = entry['nif:isString'] + txt = entry.text api = 'http://api.meaningcloud.com/' lang = params.get("language") model = "general" @@ -175,7 +176,7 @@ class MeaningCloudPlugin(SentimentPlugin): entry['senpy:hasTopic'].append(concept) yield entry - test_cases = [ + test_cases: list[dict] = [ { 'params': { 'algo': 'sentiment-meaningCloud', diff --git a/senpy/plugins/sentiment/sentiment140_plugin.py b/senpy/plugins/sentiment/sentiment140_plugin.py index a0a391b..20c1887 100644 --- a/senpy/plugins/sentiment/sentiment140_plugin.py +++ b/senpy/plugins/sentiment/sentiment140_plugin.py @@ -25,10 +25,10 @@ ENDPOINT = 'http://www.sentiment140.com/api/bulkClassifyJson' class Sentiment140(SentimentBox): '''Connects to the sentiment140 free API: http://sentiment140.com''' - author = "@balkian" - version = '0.2' + author: str = "@balkian" + version: str = '0.2' url = "https://github.com/gsi-upm/senpy-plugins-community" - extra_params = { + extra_params: dict = { 'language': { "@id": 'lang_sentiment140', 'description': 'language of the text', @@ -66,7 +66,7 @@ class Sentiment140(SentimentBox): continue yield [0, 1, 0] - test_cases = [ + test_cases: list[dict] = [ { 'entry': { 'nif:isString': 'I love Titanic' diff --git a/senpy/plugins/sentiment/vader/vader_plugin.py b/senpy/plugins/sentiment/vader/vader_plugin.py index 3b06f0c..e06a25c 100644 --- a/senpy/plugins/sentiment/vader/vader_plugin.py +++ b/senpy/plugins/sentiment/vader/vader_plugin.py @@ -12,9 +12,9 @@ class VaderSentimentPlugin(SentimentBox): ''' name = "sentiment-vader" module = "sentiment-vader" - author = "@icorcuera" - version = "0.1.1" - extra_params = { + author: str = "@icorcuera" + version: str = "0.1.1" + extra_params: dict = { "language": { "description": "language of the input", "@id": "lang_rand", @@ -51,9 +51,9 @@ class VaderSentimentPlugin(SentimentBox): return sentiments - test_cases = [] + test_cases: list[dict] = [] - test_cases = [ + test_cases: list[dict] = [ { 'input': 'I am tired :(', 'polarity': 'marl:Negative' diff --git a/senpy/schemas/context.jsonld b/senpy/schemas/context.jsonld index 6f274bd..e85f03f 100644 --- a/senpy/schemas/context.jsonld +++ b/senpy/schemas/context.jsonld @@ -1,16 +1,26 @@ { "@context": { - "@vocab": "http://www.gsi.upm.es/onto/senpy/ns#", + "@vocab": "http://www.gsi.upm.es/ontologies/senpy/ns#", + "amor": "http://www.gsi.upm.es/ontologies/amor/ns#", + "amor-bhv": "http://www.gsi.upm.es/ontologies/amor/models/bhv/ns#", + "amor-mft": "http://www.gsi.upm.es/ontologies/amor/models/mft/ns#", + "bhv": "http://www.gsi.upm.es/ontologies/bhv#", "dc": "http://dublincore.org/2012/06/14/dcelements#", - "senpy": "http://www.gsi.upm.es/onto/senpy/ns#", - "prov": "http://www.w3.org/ns/prov#", - "nif": "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#", - "marl": "http://www.gsi.upm.es/ontologies/marl/ns#", - "onyx": "http://www.gsi.upm.es/ontologies/onyx/ns#", - "wna": "http://www.gsi.upm.es/ontologies/wnaffect/ns#", "emoml": "http://www.gsi.upm.es/ontologies/onyx/vocabularies/emotionml/ns#", - "xsd": "http://www.w3.org/2001/XMLSchema#", "fam": "http://vocab.fusepool.info/fam#", + "marl": "http://www.gsi.upm.es/ontologies/marl/ns#", + "mft": "http://www.gsi.upm.es/ontologies/mft/ns#", + "mls": "http://www.w3.org/ns/mls#", + "nif": "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#", + "onyx": "http://www.gsi.upm.es/ontologies/onyx/ns#", + "owl": "http://www.w3.org/2002/07/owl#", + "prov": "http://www.w3.org/ns/prov#", + "prov": "http://www.w3.org/ns/prov#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "schema": "http://schema.org/", + "senpy": "http://www.gsi.upm.es/onto/senpy/ns#", + "wna": "http://www.gsi.upm.es/ontologies/wnaffect/ns#", + "xsd": "http://www.w3.org/2001/XMLSchema#", "topics": { "@id": "nif:topic", "@container": "@set" @@ -71,7 +81,6 @@ }, "errors": { "@type": "ParameterError" - }, - "prefix": "http://senpy.invalid/" + } } } diff --git a/senpy/utils.py b/senpy/utils.py index 59e90a5..50e0e7c 100644 --- a/senpy/utils.py +++ b/senpy/utils.py @@ -83,7 +83,6 @@ def easy_load(app=None, plugin_list=None, plugin_folder=None, **kwargs): for plugin in plugin_list: sp.add_plugin(plugin) sp.install_deps() - sp.activate_all() return sp, app diff --git a/tests/test_api.py b/tests/test_api.py index aabf226..3a664e3 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -21,7 +21,8 @@ logger = logging.getLogger(__name__) from unittest import TestCase from senpy.api import (boolean, parse_params, get_extra_params, parse_analyses, API_PARAMS, NIF_PARAMS, WEB_PARAMS) -from senpy.models import Error, Plugin +from senpy.errors import Error +from senpy.models import Plugin class APITest(TestCase): @@ -72,7 +73,7 @@ class APITest(TestCase): in2 = { 'apikey': 25 } - extra_params = { + extra_params: dict = { "apikey": { "aliases": [ "apikey", @@ -110,7 +111,7 @@ class APITest(TestCase): def test_parse_analyses(self): '''The API should parse user parameters and return them in a format that plugins can use''' plugins = [ - Plugin({ + Plugin.parse_obj({ 'name': 'plugin1', 'extra_params': { # Incompatible parameter @@ -133,7 +134,7 @@ class APITest(TestCase): 'options': ['value2_1', 'value2_2', 'value3_3'] } } - }), Plugin({ + }), Plugin.parse_obj({ 'name': 'plugin2', 'extra_params': { 'param0': { @@ -186,7 +187,7 @@ class APITest(TestCase): def test_get_extra_params(self): '''The API should return the list of valid parameters for a set of plugins''' plugins = [ - Plugin({ + Plugin.parse_obj({ 'name': 'plugin1', 'extra_params': { # Incompatible parameter @@ -208,7 +209,7 @@ class APITest(TestCase): 'options': ['value2_1', 'value2_2', 'value3_3'] } } - }), Plugin({ + }), Plugin.parse_obj({ 'name': 'plugin2', 'extra_params': { 'param0': { @@ -234,14 +235,14 @@ class APITest(TestCase): expected = { # Overlapping parameters - 'plugin1.param0': plugins[0]['extra_params']['param0'], - 'plugin1.param1': plugins[0]['extra_params']['param1'], - 'plugin2.param0': plugins[1]['extra_params']['param0'], - 'plugin2.param1': plugins[1]['extra_params']['param1'], + 'plugin1.param0': plugins[0].extra_params['param0'], + 'plugin1.param1': plugins[0].extra_params['param1'], + 'plugin2.param0': plugins[1].extra_params['param0'], + 'plugin2.param1': plugins[1].extra_params['param1'], # Non-overlapping parameters - 'param2': plugins[0]['extra_params']['param2'], - 'param3': plugins[1]['extra_params']['param3'], + 'param2': plugins[0].extra_params['param2'], + 'param3': plugins[1].extra_params['param3'], # Intersection of overlapping parameters 'param1': { diff --git a/tests/test_blueprints.py b/tests/test_blueprints.py index d2d8008..3d72053 100644 --- a/tests/test_blueprints.py +++ b/tests/test_blueprints.py @@ -38,11 +38,8 @@ class BlueprintsTest(TestCase): """Set up only once, and re-use in every individual test""" cls.app = Flask("test_extensions") cls.client = cls.app.test_client() - cls.senpy = Senpy(default_plugins=True, strict=False) # Ignore any optional plugins - cls.senpy.init_app(cls.app) cls.dir = os.path.join(os.path.dirname(__file__), "..") - cls.senpy.add_folder(cls.dir) - cls.senpy.activate_all() + cls.senpy = Senpy(default_plugins=True, app=cls.app, plugin_folders=[cls.dir, "."], strict=False) # Ignore any optional plugins cls.senpy.default_plugin = 'Dummy' def setUp(self): diff --git a/tests/test_cli.py b/tests/test_cli.py index fadf137..8850a8a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) from unittest import TestCase from senpy.cli import main_function -from senpy.models import Error +from senpy.errors import Error class CLITest(TestCase): diff --git a/tests/test_client.py b/tests/test_client.py index 7ac74fe..af9cd79 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -18,7 +18,8 @@ from unittest import TestCase from senpy.testing import patch_requests from senpy.client import Client -from senpy.models import Results, Plugins, Error +from senpy.models import Results, Plugins +from senpy.errors import Error from senpy.plugins import AnalysisPlugin diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 6d3aeba..a70e233 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -23,7 +23,8 @@ import logging from functools import partial from senpy.extensions import Senpy from senpy import plugins, config, api -from senpy.models import Error, Results, Entry, EmotionSet, Emotion, Plugin +from senpy.models import Results, Entry, EmotionSet, Emotion, Plugin +from senpy.errors import Error from flask import Flask from unittest import TestCase @@ -41,8 +42,7 @@ class ExtensionsTest(TestCase): self.senpy = Senpy(plugin_folder=self.examples_dir, app=self.app, default_plugins=False) - self.senpy.deactivate_all() - self.senpy.activate_plugin("Dummy", sync=True) + self.senpy.default_plugin = "Dummy" self.app.config['TESTING'] = True # Tell Flask not to catch Exceptions def test_init(self): @@ -62,9 +62,9 @@ class ExtensionsTest(TestCase): '''Should be able to add and delete new plugins. ''' new = plugins.Analyser(name='new', description='new', version=0) self.senpy.add_plugin(new) - assert new in self.senpy.plugins(is_activated=False) + assert new in self.senpy.plugins() self.senpy.delete_plugin(new) - assert new not in self.senpy.plugins(is_activated=False) + assert new not in self.senpy.plugins() def test_adding_folder(self): """ It should be possible for senpy to look for plugins in more folders. """ @@ -74,7 +74,7 @@ class ExtensionsTest(TestCase): default_plugins=False) assert not senpy.analysis_plugins() senpy.add_folder(self.examples_dir) - assert senpy.plugins(plugin_type=plugins.Analyser, is_activated=False) + assert senpy.plugins(plugin_type=plugins.Analyser) self.assertRaises(AttributeError, senpy.add_folder, 'DOES NOT EXIST') def test_installing(self): @@ -94,9 +94,8 @@ class ExtensionsTest(TestCase): def test_enabling(self): """ Enabling a plugin """ - self.senpy.activate_all(sync=True) assert len(self.senpy.plugins()) >= 3 - assert self.senpy.get_plugin("Sleep").is_activated + assert self.senpy.get_plugin("Sleep") def test_installing_nonexistent(self): """ Fail if the dependencies cannot be met """ @@ -110,23 +109,14 @@ class ExtensionsTest(TestCase): with self.assertRaises(Error): plugins.install_deps(info) - def test_disabling(self): - """ Disabling a plugin """ - self.senpy.deactivate_all(sync=True) - assert not self.senpy.get_plugin("dummy").is_activated - assert not self.senpy.get_plugin("sleep").is_activated - def test_default(self): """ Default plugin should be set """ assert self.senpy.default_plugin assert self.senpy.default_plugin.name == "dummy" - self.senpy.deactivate_all(sync=True) - logging.debug("Default: {}".format(self.senpy.default_plugin)) - assert self.senpy.default_plugin is None def test_noplugin(self): """ Don't analyse if there isn't any plugin installed """ - self.senpy.deactivate_all(sync=True) + nosenpy = Senpy(default_plugins=False, plugin_folders=[]) self.assertRaises(Error, partial(analyse, self.senpy, input="tupni")) def test_analyse(self): @@ -177,7 +167,7 @@ class ExtensionsTest(TestCase): def test_analyse_error(self): class ErrorPlugin(plugins.Analyser): - author = 'nobody' + author: str = 'nobody' version = 0 ex = Error() @@ -205,17 +195,12 @@ class ExtensionsTest(TestCase): """ Filtering plugins """ assert len(self.senpy.plugins(name="Dummy")) > 0 assert not len(self.senpy.plugins(name="NotDummy")) - assert self.senpy.plugins(name="Dummy", is_activated=True) - self.senpy.deactivate_plugin("Dummy", sync=True) - assert not len(self.senpy.plugins(name="Dummy", - is_activated=True)) def test_load_default_plugins(self): senpy = Senpy(plugin_folder=self.examples_dir, default_plugins=True) - assert len(senpy.plugins(is_activated=False)) > 1 + assert len(senpy.plugins()) > 1 def test_convert_emotions(self): - self.senpy.activate_all(sync=True) plugin = Plugin({ 'id': 'imaginary', 'onyx:usesEmotionModel': 'emoml:fsre-dimensions' diff --git a/tests/test_models.py b/tests/test_models.py index 5583cf7..4f22633 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -26,14 +26,14 @@ from senpy.models import (Analysis, EmotionAnalysis, EmotionSet, Entry, - Error, + ErrorResponse, Results, Sentiment, SentimentPlugin, Plugins, from_string, - from_dict, - subtypes) + from_dict) +from senpy.errors import Error from senpy import plugins from pprint import pprint @@ -117,7 +117,7 @@ class ModelsTest(TestCase): def test_plugins(self): self.assertRaises(Error, plugins.Plugin) - p = plugins.SentimentPlugin({"name": "dummy", + p = plugins.SentimentPlugin.parse_obj({"name": "dummy", "description": "I do nothing", "version": 0, "extra_params": { @@ -152,11 +152,6 @@ class ModelsTest(TestCase): s = str(r) assert "_testing" not in s - def test_serialize(self): - for k, v in subtypes().items(): - e = v() - e.serialize() - def test_turtle(self): """Any model should be serializable as a turtle file""" ana = EmotionAnalysis() diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 44f7a5e..5ae163f 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -35,24 +35,23 @@ ROOT = os.path.join(os.path.dirname(__file__), '..') class ShelfDummyPlugin(plugins.SentimentPlugin, plugins.ShelfMixin): '''Dummy plugin for tests.''' - name = 'Shelf' - version = 0 - author = 'the senpy community' + name: str = 'Shelf' + version: str = '0' + author: str = 'the senpy community' - def activate(self, *args, **kwargs): + def __init__(self, **kwargs): + super().__init__(**kwargs) if 'counter' not in self.sh: self.sh['counter'] = 0 self.save() - def deactivate(self, *args, **kwargs): - self.save() - def analyse(self, *args, **kwargs): self.sh['counter'] = self.sh['counter'] + 1 e = Entry() e.nif__isString = self.sh['counter'] r = Results() r.entries.append(e) + self.save() return r @@ -82,7 +81,6 @@ class PluginsTest(TestCase): info={'name': 'default_shelve_file', 'description': 'Dummy plugin for tests', 'version': 'test'}) - a.activate() assert os.path.isfile(a.shelf_file) os.remove(a.shelf_file) @@ -114,8 +112,6 @@ class PluginsTest(TestCase): 'version': 'test', 'shelf_file': newfile }) - assert a.sh == {} - a.activate() assert a.sh == {'counter': 0} assert a.shelf_file == newfile @@ -137,12 +133,10 @@ class PluginsTest(TestCase): 'shelf_file': self.shelf_file, 'version': 'test' }) - a.activate() assert a.shelf_file == self.shelf_file res1 = a.analyse(input=1) assert res1.entries[0].nif__isString == 100 - a.deactivate() del a with open(self.shelf_file, 'rb') as f: @@ -190,7 +184,6 @@ class PluginsTest(TestCase): 'version': 'test', 'shelf_file': self.shelf_file }) - a.activate() print('Shelf file: %s' % a.shelf_file) a.sh['a'] = 'fromA' a.save() @@ -201,7 +194,6 @@ class PluginsTest(TestCase): 'version': 'test', 'shelf_file': self.shelf_file }) - b.activate() assert b.sh['a'] == 'fromA' b.sh['a'] = 'fromB' assert b.sh['a'] == 'fromB' @@ -228,8 +220,8 @@ class PluginsTest(TestCase): class MyBox(plugins.Box): ''' Vague description''' - author = 'me' - version = 0 + author: str = 'me' + version: str = 0 def to_features(self, entry, **kwargs): return entry.text.split() @@ -243,7 +235,7 @@ class PluginsTest(TestCase): entry.myAnnotation = 'DETECTED' return entry - test_cases = [ + test_cases: list[dict] = [ { 'input': "nothing here", 'expected': {'myAnnotation': 'DETECTED'}, @@ -260,8 +252,8 @@ class PluginsTest(TestCase): class SentimentBox(plugins.SentimentBox): ''' Vague description''' - author = 'me' - version = 0 + author: str = 'me' + version: str = 0 def predict_one(self, features, **kwargs): text = ' '.join(features) @@ -269,7 +261,7 @@ class PluginsTest(TestCase): return [1, 0, 0] return [0, 0, 1] - test_cases = [ + test_cases: list[dict] = [ { 'input': 'a happy face :)', 'polarity': 'marl:Positive' @@ -355,7 +347,7 @@ class PluginsTest(TestCase): class DummyPlugin(plugins.SentimentBox): description = 'Plugin to test evaluation' - version = 0 + version: str = 0 classes = ['marl:Positive', 'marl:Negative'] @@ -365,7 +357,7 @@ class PluginsTest(TestCase): class SmartPlugin(plugins.SentimentBox): description = 'Plugin to test evaluation' - version = 0 + version: str = 0 classes = ['marl:Positive', 'marl:Negative'] diff --git a/tests/test_schemas.py b/tests/test_schemas.py deleted file mode 100644 index 6a3a340..0000000 --- a/tests/test_schemas.py +++ /dev/null @@ -1,80 +0,0 @@ -# -# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from __future__ import print_function - -import json -import unittest -import os -from os import path -from fnmatch import fnmatch - -from jsonschema import RefResolver, Draft4Validator, ValidationError - -from senpy.models import read_schema - -root_path = path.join(path.dirname(path.realpath(__file__)), '..') -schema_folder = path.join(root_path, 'senpy', 'schemas') -examples_path = path.join(root_path, 'docs', 'examples') -bad_examples_path = path.join(root_path, 'docs', 'bad-examples') - - -class JSONSchemaTests(unittest.TestCase): - def test_definitions(self): - read_schema('definitions.json') - - -def do_create_(jsfile, success): - def do_expected(self): - with open(jsfile) as f: - js = json.load(f) - try: - assert '@type' in js - schema_name = js['@type'] - with open(os.path.join(schema_folder, schema_name + - ".json")) as file_object: - schema = json.load(file_object) - resolver = RefResolver('file://' + schema_folder + '/', schema) - validator = Draft4Validator(schema, resolver=resolver) - validator.validate(js) - except (AssertionError, ValidationError, KeyError) as ex: - if success: - raise - return - assert success - return do_expected - - -def add_examples(dirname, success): - for dirpath, dirnames, filenames in os.walk(dirname): - for i in filenames: - if fnmatch(i, '*.json'): - filename = path.join(dirpath, i) - test_method = do_create_(filename, success) - test_method.__name__ = 'test_file_%s_success_%s' % (filename, - success) - test_method.__doc__ = '%s should %svalidate' % (filename, '' - if success else - 'not') - setattr(JSONSchemaTests, test_method.__name__, test_method) - del test_method - - -add_examples(examples_path, True) -add_examples(bad_examples_path, False) - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_semantics.py b/tests/test_semantics.py index 9b685a7..2634959 100644 --- a/tests/test_semantics.py +++ b/tests/test_semantics.py @@ -42,12 +42,10 @@ class SemanticsTest(TestCase): """Set up only once, and re-use in every individual test""" cls.app = Flask("test_extensions") cls.client = cls.app.test_client() - cls.senpy = Senpy(default_plugins=True) + cls.senpy = Senpy(plugin_folder=None, default_plugins=True) cls.senpy.init_app(cls.app) - cls.dir = os.path.join(os.path.dirname(__file__), "..") - cls.senpy.add_folder(cls.dir) - cls.senpy.activate_all() - cls.senpy.default_plugin = 'Dummy' + #cls.dir = os.path.join(os.path.dirname(__file__), "..") + #cls.senpy.add_folder(cls.dir) def setUp(self): self.app.config['TESTING'] = True # Tell Flask not to catch Exceptions @@ -57,10 +55,10 @@ class SemanticsTest(TestCase): def test_sentiment(self): """ - A sentiment analysis call in JSON-LD + a sentiment analysis call in json-ld """ - # We use expanded JSON-LD and ignore the context, because in general - # the context is a URIS to the service and that URI is not + # we use expanded json-ld and ignore the context, because in general + # the context is a uris to the service and that uri is not # available outside of self.client params = { 'input': 'hello', @@ -69,28 +67,28 @@ class SemanticsTest(TestCase): 'expanded': True, 'prefix': 'http://default.example/#' } - resp = self.client.get("/api/basic?{}".format(urlencode(params))) + resp = self.client.get("/api/sentiment-basic?{}".format(urlencode(params))) self.assertCode(resp, 200) g = parse_resp(resp, fmt='json-ld') + print('Got this graph: ', g.serialize(format='ttl')) assert g qres = g.query(""" - PREFIX prov: - PREFIX marl: - PREFIX nif: - PREFIX onyx: - PREFIX senpy: + prefix prov: + prefix marl: + prefix nif: + prefix onyx: + prefix senpy: - SELECT DISTINCT ?entry ?text ?sentiment - WHERE { - ?entry a senpy:Entry . - ?entry marl:hasOpinion ?o . - ?entry nif:isString ?text . - ?o marl:hasPolarity ?sentiment . - }""") - assert len(qres) == 1 + SELECT distinct ?entry ?text ?sentiment + WHERE { + ?entry a senpy:Entry . + ?entry marl:hasOpinion ?o . + ?entry nif:isString ?text . + ?o marl:hasPolarity ?sentiment . + }""") + assert len(qres) == 1, "There should only be one result" entry, text, sentiment = list(qres)[0] - assert entry - assert str(text) == 'hello' + assert str(text) == 'hello', "The returned text does not match the input text." assert str(sentiment) in ['marl:Positive', 'marl:Neutral', 'marl:Negative'] def test_sentiment_turtle(self): @@ -104,25 +102,75 @@ class SemanticsTest(TestCase): 'expanded': True, 'prefix': 'http://default.example/#' } - resp = self.client.get("/api/basic?{}".format(urlencode(params))) + resp = self.client.get("/api/sentiment-basic?{}".format(urlencode(params))) self.assertCode(resp, 200) g = parse_resp(resp, 'ttl') + print('Got this graph: ', g.serialize(format='ttl')) qres = g.query(""" PREFIX prov: PREFIX marl: PREFIX nif: PREFIX onyx: - PREFIX senpy: + PREFIX senpy: SELECT DISTINCT ?entry ?text ?sentiment WHERE { - ?entry a senpy:Entry . - ?entry marl:hasOpinion ?o . - ?entry nif:isString ?text . - ?o marl:hasPolarity ?sentiment . + ?entry a senpy:Entry ; + nif:isString ?text ; + marl:hasOpinion [ + marl:hasPolarity ?sentiment + ] . }""") + assert len(qres) == 1, "There should only be one row in the result" + entry, text, sentiment = list(qres)[0] + assert str(text) == 'hello', "Returned text does not match input text" + assert str(sentiment) in ['marl:Positive', 'marl:Neutral', 'marl:Negative'] + + def test_moral(self): + """ + An example of a moral analysis, adapted from the examples for the AMOR project: + http://www.gsi.upm.es/ontologies/amor/examples + """ + # we use expanded json-ld and ignore the context, because in general + # the context is a uris to the service and that uri is not + # available outside of self.client + params = { + 'input': 'hello', + 'in-headers': True, + 'outformat': 'json-ld', + 'expanded': True, + 'prefix': 'http://default.example/#' + } + resp = self.client.get("/api/sentiment-basic?{}".format(urlencode(params))) + self.assertCode(resp, 200) + g = parse_resp(resp, fmt='json-ld') + print('Got this graph: ', g.serialize(format='ttl')) + assert g + qres = g.query(""" + prefix : + prefix amor: + prefix amor-bhv: + prefix amor-mft: + prefix bhv: + prefix mft: + prefix mls: + prefix owl: + prefix prov: + prefix rdfs: + prefix schema: + + SELECT ?analysis ?agent ?model ?annotation ?origin ?category + WHERE { + ?analysis a amor:MoralValueAnalysis ; + prov:wasAssociatedWith ?agent ; + amor:usedMoralValueModel ?model ; + amor:analysed ?origin ; + prov:generated ?annotation . + ?annotation a amor:MoralValueAnnotation ; + amor:hasMoralValueCategory ?category . + }""") assert len(qres) == 1 entry, text, sentiment = list(qres)[0] assert entry assert str(text) == 'hello' - assert str(sentiment) in ['marl:Positive', 'marl:Neutral', 'marl:Negative'] + assert str(sentiment) in ['marl:positive', 'marl:neutral', 'marl:negative']