diff --git a/CHANGELOG.md b/CHANGELOG.md index 72f2e63..1e733ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] +### Added +* The code of many senpy community plugins have been included by default. However, additional files (e.g., licensed data) and/or installing additional dependencies may be necessary for some plugins. Read each plugin's documentation for more information. + ## [1.0.6] ### Fixed * Plugins now get activated for testing diff --git a/Dockerfile.template b/Dockerfile.template index d4cf77b..b656cb1 100644 --- a/Dockerfile.template +++ b/Dockerfile.template @@ -19,7 +19,7 @@ COPY . /usr/src/app/ RUN pip install --no-cache-dir --no-index --no-deps --editable . ONBUILD COPY . /senpy-plugins/ -ONBUILD RUN python -m senpy --only-install -f /senpy-plugins +ONBUILD RUN python -m senpy -i --no-run -f /senpy-plugins ONBUILD WORKDIR /senpy-plugins/ ENTRYPOINT ["python", "-m", "senpy", "-f", "/senpy-plugins/", "--host", "0.0.0.0"] diff --git a/docs/server-cli.rst b/docs/server-cli.rst index 11135f2..be2dcdc 100644 --- a/docs/server-cli.rst +++ b/docs/server-cli.rst @@ -10,8 +10,8 @@ The senpy server is launched via the `senpy` command: usage: senpy [-h] [--level logging_level] [--log-format log_format] [--debug] [--no-default-plugins] [--host HOST] [--port PORT] - [--plugins-folder PLUGINS_FOLDER] [--only-install] [--only-test] - [--test] [--only-list] [--data-folder DATA_FOLDER] + [--plugins-folder PLUGINS_FOLDER] [--install] + [--test] [--no-run] [--data-folder DATA_FOLDER] [--no-threaded] [--no-deps] [--version] [--allow-fail] Run a Senpy server @@ -28,10 +28,9 @@ The senpy server is launched via the `senpy` command: --port PORT, -p PORT Port to listen on. --plugins-folder PLUGINS_FOLDER, -f PLUGINS_FOLDER Where to look for plugins. - --only-install, -i Do not run a server, only install plugin dependencies - --only-test Do not run a server, just test all plugins + --install, -i Install plugin dependencies before launching the server. --test, -t Test all plugins before launching the server - --only-list, --list Do not run a server, only list plugins found + --no-run Do not launch the server --data-folder DATA_FOLDER, --data DATA_FOLDER Where to look for data. It be set with the SENPY_DATA environment variable as well. diff --git a/example-plugins/sklearn/mypipeline.py b/example-plugins/sklearn/mypipeline.py index 45f6eab..9d9e26a 100644 --- a/example-plugins/sklearn/mypipeline.py +++ b/example-plugins/sklearn/mypipeline.py @@ -31,7 +31,7 @@ pipeline = Pipeline([('cv', count_vec), ('clf', clf3)]) pipeline.fit(X_train, y_train) -print('Feature names: {}'.format(count_vec.get_feature_names())) +print('Feature names: {}'.format(count_vec.get_feature_names_out())) print('Class count: {}'.format(clf3.class_count_)) diff --git a/senpy/__main__.py b/senpy/__main__.py index c08c03f..d1d1951 100644 --- a/senpy/__main__.py +++ b/senpy/__main__.py @@ -22,6 +22,7 @@ the server. from flask import Flask from senpy.extensions import Senpy from senpy.utils import easy_test +from senpy.plugins import list_dependencies import logging import os @@ -81,16 +82,21 @@ def main(): action='append', help='Where to look for plugins.') parser.add_argument( - '--only-install', + '--install', '-i', action='store_true', default=False, - help='Do not run a server, only install plugin dependencies') + help='Install plugin dependencies before running.') parser.add_argument( - '--only-test', + '--dependencies', action='store_true', default=False, - help='Do not run a server, just test all plugins') + help='List plugin dependencies') + parser.add_argument( + '--strict', + action='store_true', + default=False, + help='Fail if optional plugins cannot be loaded.') parser.add_argument( '--test', '-t', @@ -98,11 +104,10 @@ def main(): default=False, help='Test all plugins before launching the server') parser.add_argument( - '--only-list', - '--list', + '--no-run', action='store_true', default=False, - help='Do not run a server, only list plugins found') + help='Do not launch the server.') parser.add_argument( '--data-folder', '--data', @@ -156,6 +161,8 @@ def main(): sp = Senpy(app, plugin_folder=None, default_plugins=not args.no_default_plugins, + install=args.install, + strict=args.strict, data_folder=args.data_folder) folders = list(args.plugins_folder) if args.plugins_folder else [] if not folders: @@ -175,17 +182,43 @@ def main(): fpath, maxname=maxname, maxversion=maxversion)) - if args.only_list: - return - if not args.no_deps: + if args.dependencies: + print('Listing dependencies') + missing = [] + installed = [] + for plug in sp.plugins(is_activated=False).values(): + inst, miss, nltkres = list_dependencies(plug) + if not any([inst, miss, nltkres]): + continue + print(f'Plugin: {plug.id}') + for m in miss: + missing.append(f'{m} # {plug.id}') + for i in inst: + installed.append(f'{i} # {plug.id}') + if installed: + print('Installed packages:') + for i in installed: + print(f'\t{i}') + if missing: + print('Missing packages:') + for m in missing: + print(f'\t{m}') + + if args.install: sp.install_deps() - if args.only_install: + + if args.test: + sp.activate_all(sync=True) + easy_test(sp.plugins(is_activated=True), debug=args.debug) + + if args.no_run: return - sp.activate_all(allow_fail=args.allow_fail) - if args.test or args.only_test: - easy_test(sp.plugins(), debug=args.debug) - if args.only_test: - return + + sp.activate_all(sync=True) + if sp.strict: + inactive = sp.plugins(is_activated=False) + assert not inactive + print('Senpy version {}'.format(senpy.__version__)) print('Server running on port %s:%d. Ctrl+C to quit' % (args.host, args.port)) diff --git a/senpy/extensions.py b/senpy/extensions.py index 04c42d6..9e62f0c 100644 --- a/senpy/extensions.py +++ b/senpy/extensions.py @@ -44,6 +44,8 @@ class Senpy(object): app=None, plugin_folder=".", data_folder=None, + install=False, + strict=True, default_plugins=False): default_data = os.path.join(os.getcwd(), 'senpy_data') @@ -57,6 +59,8 @@ class Senpy(object): raise self._default = None + self.strict = strict + self.install = install self._plugins = {} if plugin_folder: self.add_folder(plugin_folder) @@ -148,7 +152,8 @@ class Senpy(object): logger.debug("Adding folder: %s", folder) if os.path.isdir(folder): new_plugins = plugins.from_folder([folder], - data_folder=self.data_folder) + data_folder=self.data_folder, + strict=self.strict) for plugin in new_plugins: self.add_plugin(plugin) else: @@ -173,7 +178,7 @@ class Senpy(object): logger.info('Installing dependencies') # If a plugin is activated, its dependencies should already be installed # Otherwise, it would've failed to activate. - plugins.install_deps(*self.plugins(is_activated=False)) + plugins.install_deps(*self._plugins.values()) def analyse(self, request, analyses=None): """ @@ -340,13 +345,13 @@ class Senpy(object): else: self._default = self._plugins[value.lower()] - def activate_all(self, sync=True, allow_fail=False): + def activate_all(self, sync=True): ps = [] for plug in self._plugins.keys(): try: self.activate_plugin(plug, sync=sync) except Exception as ex: - if not allow_fail: + if self.strict: raise logger.error('Could not activate {}: {}'.format(plug, ex)) return ps @@ -358,15 +363,19 @@ class Senpy(object): return ps def _activate(self, plugin): - success = False with plugin._lock: if plugin.is_activated: + logger.info(f"Plugin is already activated: {plugin.name}") return - plugin._activate() - msg = "Plugin activated: {}".format(plugin.name) - logger.info(msg) - success = plugin.is_activated - return success + try: + assert plugin._activate() + logger.info(f"Plugin activated: {plugin.name}") + except Exception as ex: + if getattr(plugin, "optional", False) and not self.strict: + logger.info(f"Plugin could NOT be activated: {plugin.name}") + return False + raise + return plugin.is_activated def activate_plugin(self, plugin_name, sync=True): plugin_name = plugin_name.lower() diff --git a/senpy/plugins/__init__.py b/senpy/plugins/__init__.py index 3b10919..f09dd0f 100644 --- a/senpy/plugins/__init__.py +++ b/senpy/plugins/__init__.py @@ -155,8 +155,11 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): return os.path.dirname(inspect.getfile(self.__class__)) def _activate(self): + if self.is_activated: + return self.activate() self.is_activated = True + return self.is_activated def _deactivate(self): self.is_activated = False @@ -262,11 +265,13 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): assert not should_fail def find_file(self, fname): + tried = [] for p in self._data_paths: - alternative = os.path.join(p, fname) + alternative = os.path.abspath(os.path.join(p, fname)) if os.path.exists(alternative): return alternative - raise IOError('File does not exist: {}'.format(fname)) + tried.append(alternative) + raise IOError(f'File does not exist: {fname}. Tried: {tried}') def path(self, fpath): if not os.path.isabs(fpath): @@ -290,6 +295,20 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): SenpyPlugin = Plugin +class FailedPlugin(Plugin): + """A plugin that has failed to initialize.""" + version = 0 + + def __init__(self, info, function): + super().__init__(info) + a = info.get('name', info.get('module', self.name)) + self['name'] == a + self._function = function + + def retry(self): + return self._function() + + class Analyser(Plugin): ''' A subclass of Plugin that analyses text and provides an annotation. @@ -699,23 +718,31 @@ def missing_requirements(reqs): res = pool.apply_async(pkg_resources.get_distribution, (req,)) queue.append((req, res)) missing = [] + installed = [] for req, job in queue: try: - job.get(1) + installed.append(job.get(1)) except Exception: missing.append(req) - return missing + return installed, missing - -def install_deps(*plugins): - installed = False +def list_dependencies(*plugins): + '''List all dependencies (python and nltk) for the given list of plugins''' nltk_resources = set() - requirements = [] + missing = [] + installed = [] for info in plugins: - requirements = info.get('requirements', []) - if requirements: - requirements += missing_requirements(requirements) + reqs = info.get('requirements', []) + if reqs: + inst, miss= missing_requirements(reqs) + installed += inst + missing += miss nltk_resources |= set(info.get('nltk_resources', [])) + return installed, missing, nltk_resources + +def install_deps(*plugins): + _, requirements, nltk_resources = list_dependencies(*plugins) + installed = False if requirements: logger.info('Installing requirements: ' + str(requirements)) pip_args = [sys.executable, '-m', 'pip', 'install'] @@ -729,8 +756,7 @@ def install_deps(*plugins): if exitcode != 0: raise models.Error( "Dependencies not properly installed: {}".format(pip_args)) - installed |= download(list(nltk_resources)) - return installed + return installed or download(list(nltk_resources)) is_plugin_file = re.compile(r'.*\.senpy$|senpy_[a-zA-Z0-9_]+\.py$|' @@ -747,7 +773,7 @@ def find_plugins(folders): yield fpath -def from_path(fpath, install_on_fail=False, **kwargs): +def from_path(fpath, **kwargs): logger.debug("Loading plugin from {}".format(fpath)) if fpath.endswith('.py'): # We asume root is the dir of the file, and module is the name of the file @@ -757,18 +783,18 @@ def from_path(fpath, install_on_fail=False, **kwargs): yield instance else: info = parse_plugin_info(fpath) - yield from_info(info, install_on_fail=install_on_fail, **kwargs) - + yield from_info(info, **kwargs) def from_folder(folders, loader=from_path, **kwargs): plugins = [] for fpath in find_plugins(folders): for plugin in loader(fpath, **kwargs): - plugins.append(plugin) + if plugin: + plugins.append(plugin) return plugins -def from_info(info, root=None, install_on_fail=True, **kwargs): +def from_info(info, root=None, strict=False, **kwargs): if any(x not in info for x in ('module', )): raise ValueError('Plugin info is not valid: {}'.format(info)) module = info["module"] @@ -780,8 +806,10 @@ def from_info(info, root=None, install_on_fail=True, **kwargs): try: return fun() except (ImportError, LookupError): - install_deps(info) - return fun() + if strict or not str(info.get("optional", "false")).lower() in ["True", "true", "t"]: + raise + print(f"Could not import plugin: { info }") + return FailedPlugin(info, fun) def parse_plugin_info(fpath): diff --git a/senpy/plugins/emotion/anew/emotion-anew.py b/senpy/plugins/emotion/anew/emotion-anew.py index d40c83b..fea6878 100644 --- a/senpy/plugins/emotion/anew/emotion-anew.py +++ b/senpy/plugins/emotion/anew/emotion-anew.py @@ -23,6 +23,51 @@ from senpy.plugins import EmotionPlugin, SenpyPlugin from senpy.models import Results, EmotionSet, Entry, Emotion +### BEGIN WORKAROUND FOR PATTERN +# See: https://github.com/clips/pattern/issues/308 + +import os.path + +import pattern.text + +from pattern.helpers import decode_string +from codecs import BOM_UTF8 + +BOM_UTF8 = BOM_UTF8.decode("utf-8") +decode_utf8 = decode_string + +MODEL = "emoml:pad-dimensions_" +VALENCE = f"{MODEL}_valence" +AROUSAL = f"{MODEL}_arousal" +DOMINANCE = f"{MODEL}_dominance" + +def _read(path, encoding="utf-8", comment=";;;"): + """Returns an iterator over the lines in the file at the given path, + strippping comments and decoding each line to Unicode. + """ + if path: + if isinstance(path, str) and os.path.exists(path): + # From file path. + f = open(path, "r", encoding="utf-8") + elif isinstance(path, str): + # From string. + f = path.splitlines() + else: + # From file or buffer. + f = path + for i, line in enumerate(f): + line = line.strip(BOM_UTF8) if i == 0 and isinstance(line, str) else line + line = line.strip() + line = decode_utf8(line, encoding) + if not line or (comment and line.startswith(comment)): + continue + yield line + + +pattern.text._read = _read +## END WORKAROUND + + class ANEW(EmotionPlugin): description = "This plugin consists on an emotion classifier using ANEW lexicon dictionary. It averages the VAD (valence-arousal-dominance) value of each word in the text that is also in the ANEW dictionary. To obtain a categorical value (e.g., happy) use the emotion conversion API (e.g., `emotion-model=emoml:big6`)." author = "@icorcuera" @@ -41,7 +86,7 @@ class ANEW(EmotionPlugin): anew_path_es = "Dictionary/Redondo(2007).csv" anew_path_en = "Dictionary/ANEW2010All.txt" - onyx__usesEmotionModel = "emoml:pad-dimensions" + onyx__usesEmotionModel = MODEL nltk_resources = ['stopwords'] def activate(self, *args, **kwargs): @@ -147,9 +192,9 @@ class ANEW(EmotionPlugin): emotions.id = "Emotions0" emotion1 = Emotion(id="Emotion0") - emotion1["emoml:pad-dimensions_pleasure"] = feature_set['V'] - emotion1["emoml:pad-dimensions_arousal"] = feature_set['A'] - emotion1["emoml:pad-dimensions_dominance"] = feature_set['D'] + emotion1[VALENCE] = feature_set['V'] + emotion1[AROUSAL] = feature_set['A'] + emotion1[DOMINANCE] = feature_set['D'] emotion1.prov(activity) emotions.prov(activity) @@ -159,7 +204,6 @@ class ANEW(EmotionPlugin): yield entry - ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#" test_cases = [ { 'name': 'anger with VAD=(2.12, 6.95, 5.05)', @@ -167,9 +211,9 @@ class ANEW(EmotionPlugin): 'expected': { 'onyx:hasEmotionSet': [{ 'onyx:hasEmotion': [{ - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 6.95, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 5.05, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 2.12, + AROUSAL: 6.95, + DOMINANCE: 5.05, + VALENCE: 2.12, }] }] } @@ -178,9 +222,7 @@ class ANEW(EmotionPlugin): 'expected': { 'onyx:hasEmotionSet': [{ 'onyx:hasEmotion': [{ - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 4.13, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 3.45, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 1.61, + f"{MODEL}_arousal": 4.13, }] }] @@ -191,9 +233,9 @@ class ANEW(EmotionPlugin): 'expected': { 'onyx:hasEmotionSet': [{ 'onyx:hasEmotion': [{ - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 6.49, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 6.63, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 8.21, + AROUSAL: 6.49, + DOMINANCE: 6.63, + VALENCE: 8.21, }] }] } @@ -203,9 +245,9 @@ class ANEW(EmotionPlugin): 'expected': { 'onyx:hasEmotionSet': [{ 'onyx:hasEmotion': [{ - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 5.8100000000000005, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 4.33, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 5.050000000000001, + AROUSAL: 5.8100000000000005, + DOMINANCE: 4.33, + VALENCE: 5.050000000000001, }] }] @@ -216,9 +258,9 @@ class ANEW(EmotionPlugin): 'expected': { 'onyx:hasEmotionSet': [{ 'onyx:hasEmotion': [{ - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 5.09, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 4.4, - "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 5.109999999999999, + AROUSAL: 5.09, + DOMINANCE: 4.4, + VALENCE: 5.109999999999999, }] }] diff --git a/senpy/plugins/emotion/anew/emotion-anew.senpy b/senpy/plugins/emotion/anew/emotion-anew.senpy index 4718fc6..b9d72fa 100644 --- a/senpy/plugins/emotion/anew/emotion-anew.senpy +++ b/senpy/plugins/emotion/anew/emotion-anew.senpy @@ -1,5 +1,6 @@ --- module: emotion-anew +optional: true requirements: - numpy - pandas diff --git a/senpy/plugins/emotion/depechemood_plugin.py b/senpy/plugins/emotion/depechemood_plugin.py index d80e748..aa3948b 100644 --- a/senpy/plugins/emotion/depechemood_plugin.py +++ b/senpy/plugins/emotion/depechemood_plugin.py @@ -33,6 +33,7 @@ class DepecheMood(EmotionBox): name = 'emotion-depechemood' version = '0.1' requirements = ['pandas'] + optional = True nltk_resources = ["stopwords"] onyx__usesEmotionModel = 'wna:WNAModel' diff --git a/senpy/plugins/emotion/wnaffect/emotion-wnaffect.senpy b/senpy/plugins/emotion/wnaffect/emotion-wnaffect.senpy index 54eea36..2ecdf63 100644 --- a/senpy/plugins/emotion/wnaffect/emotion-wnaffect.senpy +++ b/senpy/plugins/emotion/wnaffect/emotion-wnaffect.senpy @@ -1,5 +1,6 @@ --- module: emotion-wnaffect +optional: true requirements: - nltk>=3.0.5 - lxml>=3.4.2 diff --git a/senpy/plugins/sentiment/basic/sentiment-basic.senpy b/senpy/plugins/sentiment/basic/sentiment-basic.senpy index fee2f7c..c038a8f 100644 --- a/senpy/plugins/sentiment/basic/sentiment-basic.senpy +++ b/senpy/plugins/sentiment/basic/sentiment-basic.senpy @@ -1,5 +1,6 @@ --- module: sentiment-basic +optional: true requirements: - nltk>=3.0.5 - scipy>=0.14.0 diff --git a/senpy/utils.py b/senpy/utils.py index b814119..59e90a5 100644 --- a/senpy/utils.py +++ b/senpy/utils.py @@ -44,6 +44,10 @@ def check_template(indict, template): raise models.Error(('Element not found.' '\nExpected: {}\nIn: {}').format(pprint.pformat(e), pprint.pformat(indict))) + elif isinstance(template, float) and isinstance(indict, float): + diff = abs(indict - template) + if (diff > 0) and diff/(abs(indict+template)) > 0.05: + raise models.Error('Differences greater than 10% found.\n') else: if indict != template: raise models.Error(('Differences found.\n' diff --git a/tests/test_blueprints.py b/tests/test_blueprints.py index 783a466..1aa79ee 100644 --- a/tests/test_blueprints.py +++ b/tests/test_blueprints.py @@ -209,8 +209,8 @@ class BlueprintsTest(TestCase): """ # First, we split by sentence twice. Each call should generate 3 additional entries # (one per sentence in the original). - resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.' - '\nA new paragraph&delimiter=sentence&verbose') + resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.%0A' + 'A new paragraph&delimiter=sentence&verbose') js = parse_resp(resp) assert len(js['activities']) == 2 assert len(js['entries']) == 7 @@ -218,9 +218,8 @@ class BlueprintsTest(TestCase): # Now, we split by sentence. This produces 3 additional entries. # Then, we split by paragraph. This should create 2 additional entries (One per paragraph # in the original text) - resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.' - '\nA new paragraph&0.delimiter=sentence' - '&1.delimiter=paragraph&verbose') + resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.%0AA new paragraph' + '&0.delimiter=sentence&1.delimiter=paragraph&verbose') # Calling dummy twice, should return the same string self.assertCode(resp, 200) js = parse_resp(resp) diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 838352e..001fc80 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -255,3 +255,9 @@ class ExtensionsTest(TestCase): self.senpy.analyse(r3) assert len(r3.entries[0].emotions) == 1 r3.jsonld() + + def testDefaultPlugins(self): + '''The default set of plugins should all load''' + self.app = Flask('test_extensions') + self.examples_dir = os.path.join(os.path.dirname(__file__), '..', 'example-plugins') + self.senpy = Senpy(app=self.app, default_plugins=False, strict=True)