1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-12-21 12:38:13 +00:00

update to pass tests with community plugins

This commit is contained in:
J. Fernando Sánchez 2023-09-22 23:28:19 +02:00
parent 5b28b6d1b4
commit 4f95fbcbd1
15 changed files with 206 additions and 78 deletions

View File

@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Added
* The code of many senpy community plugins have been included by default. However, additional files (e.g., licensed data) and/or installing additional dependencies may be necessary for some plugins. Read each plugin's documentation for more information.
## [1.0.6]
### Fixed
* Plugins now get activated for testing

View File

@ -19,7 +19,7 @@ COPY . /usr/src/app/
RUN pip install --no-cache-dir --no-index --no-deps --editable .
ONBUILD COPY . /senpy-plugins/
ONBUILD RUN python -m senpy --only-install -f /senpy-plugins
ONBUILD RUN python -m senpy -i --no-run -f /senpy-plugins
ONBUILD WORKDIR /senpy-plugins/
ENTRYPOINT ["python", "-m", "senpy", "-f", "/senpy-plugins/", "--host", "0.0.0.0"]

View File

@ -10,8 +10,8 @@ The senpy server is launched via the `senpy` command:
usage: senpy [-h] [--level logging_level] [--log-format log_format] [--debug]
[--no-default-plugins] [--host HOST] [--port PORT]
[--plugins-folder PLUGINS_FOLDER] [--only-install] [--only-test]
[--test] [--only-list] [--data-folder DATA_FOLDER]
[--plugins-folder PLUGINS_FOLDER] [--install]
[--test] [--no-run] [--data-folder DATA_FOLDER]
[--no-threaded] [--no-deps] [--version] [--allow-fail]
Run a Senpy server
@ -28,10 +28,9 @@ The senpy server is launched via the `senpy` command:
--port PORT, -p PORT Port to listen on.
--plugins-folder PLUGINS_FOLDER, -f PLUGINS_FOLDER
Where to look for plugins.
--only-install, -i Do not run a server, only install plugin dependencies
--only-test Do not run a server, just test all plugins
--install, -i Install plugin dependencies before launching the server.
--test, -t Test all plugins before launching the server
--only-list, --list Do not run a server, only list plugins found
--no-run Do not launch the server
--data-folder DATA_FOLDER, --data DATA_FOLDER
Where to look for data. It be set with the SENPY_DATA
environment variable as well.

View File

@ -31,7 +31,7 @@ pipeline = Pipeline([('cv', count_vec),
('clf', clf3)])
pipeline.fit(X_train, y_train)
print('Feature names: {}'.format(count_vec.get_feature_names()))
print('Feature names: {}'.format(count_vec.get_feature_names_out()))
print('Class count: {}'.format(clf3.class_count_))

View File

@ -22,6 +22,7 @@ the server.
from flask import Flask
from senpy.extensions import Senpy
from senpy.utils import easy_test
from senpy.plugins import list_dependencies
import logging
import os
@ -81,16 +82,21 @@ def main():
action='append',
help='Where to look for plugins.')
parser.add_argument(
'--only-install',
'--install',
'-i',
action='store_true',
default=False,
help='Do not run a server, only install plugin dependencies')
help='Install plugin dependencies before running.')
parser.add_argument(
'--only-test',
'--dependencies',
action='store_true',
default=False,
help='Do not run a server, just test all plugins')
help='List plugin dependencies')
parser.add_argument(
'--strict',
action='store_true',
default=False,
help='Fail if optional plugins cannot be loaded.')
parser.add_argument(
'--test',
'-t',
@ -98,11 +104,10 @@ def main():
default=False,
help='Test all plugins before launching the server')
parser.add_argument(
'--only-list',
'--list',
'--no-run',
action='store_true',
default=False,
help='Do not run a server, only list plugins found')
help='Do not launch the server.')
parser.add_argument(
'--data-folder',
'--data',
@ -156,6 +161,8 @@ def main():
sp = Senpy(app,
plugin_folder=None,
default_plugins=not args.no_default_plugins,
install=args.install,
strict=args.strict,
data_folder=args.data_folder)
folders = list(args.plugins_folder) if args.plugins_folder else []
if not folders:
@ -175,17 +182,43 @@ def main():
fpath,
maxname=maxname,
maxversion=maxversion))
if args.only_list:
return
if not args.no_deps:
if args.dependencies:
print('Listing dependencies')
missing = []
installed = []
for plug in sp.plugins(is_activated=False).values():
inst, miss, nltkres = list_dependencies(plug)
if not any([inst, miss, nltkres]):
continue
print(f'Plugin: {plug.id}')
for m in miss:
missing.append(f'{m} # {plug.id}')
for i in inst:
installed.append(f'{i} # {plug.id}')
if installed:
print('Installed packages:')
for i in installed:
print(f'\t{i}')
if missing:
print('Missing packages:')
for m in missing:
print(f'\t{m}')
if args.install:
sp.install_deps()
if args.only_install:
if args.test:
sp.activate_all(sync=True)
easy_test(sp.plugins(is_activated=True), debug=args.debug)
if args.no_run:
return
sp.activate_all(allow_fail=args.allow_fail)
if args.test or args.only_test:
easy_test(sp.plugins(), debug=args.debug)
if args.only_test:
return
sp.activate_all(sync=True)
if sp.strict:
inactive = sp.plugins(is_activated=False)
assert not inactive
print('Senpy version {}'.format(senpy.__version__))
print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
args.port))

View File

@ -44,6 +44,8 @@ class Senpy(object):
app=None,
plugin_folder=".",
data_folder=None,
install=False,
strict=True,
default_plugins=False):
default_data = os.path.join(os.getcwd(), 'senpy_data')
@ -57,6 +59,8 @@ class Senpy(object):
raise
self._default = None
self.strict = strict
self.install = install
self._plugins = {}
if plugin_folder:
self.add_folder(plugin_folder)
@ -148,7 +152,8 @@ class Senpy(object):
logger.debug("Adding folder: %s", folder)
if os.path.isdir(folder):
new_plugins = plugins.from_folder([folder],
data_folder=self.data_folder)
data_folder=self.data_folder,
strict=self.strict)
for plugin in new_plugins:
self.add_plugin(plugin)
else:
@ -173,7 +178,7 @@ class Senpy(object):
logger.info('Installing dependencies')
# If a plugin is activated, its dependencies should already be installed
# Otherwise, it would've failed to activate.
plugins.install_deps(*self.plugins(is_activated=False))
plugins.install_deps(*self._plugins.values())
def analyse(self, request, analyses=None):
"""
@ -340,13 +345,13 @@ class Senpy(object):
else:
self._default = self._plugins[value.lower()]
def activate_all(self, sync=True, allow_fail=False):
def activate_all(self, sync=True):
ps = []
for plug in self._plugins.keys():
try:
self.activate_plugin(plug, sync=sync)
except Exception as ex:
if not allow_fail:
if self.strict:
raise
logger.error('Could not activate {}: {}'.format(plug, ex))
return ps
@ -358,15 +363,19 @@ class Senpy(object):
return ps
def _activate(self, plugin):
success = False
with plugin._lock:
if plugin.is_activated:
logger.info(f"Plugin is already activated: {plugin.name}")
return
plugin._activate()
msg = "Plugin activated: {}".format(plugin.name)
logger.info(msg)
success = plugin.is_activated
return success
try:
assert plugin._activate()
logger.info(f"Plugin activated: {plugin.name}")
except Exception as ex:
if getattr(plugin, "optional", False) and not self.strict:
logger.info(f"Plugin could NOT be activated: {plugin.name}")
return False
raise
return plugin.is_activated
def activate_plugin(self, plugin_name, sync=True):
plugin_name = plugin_name.lower()

View File

@ -155,8 +155,11 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
return os.path.dirname(inspect.getfile(self.__class__))
def _activate(self):
if self.is_activated:
return
self.activate()
self.is_activated = True
return self.is_activated
def _deactivate(self):
self.is_activated = False
@ -262,11 +265,13 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
assert not should_fail
def find_file(self, fname):
tried = []
for p in self._data_paths:
alternative = os.path.join(p, fname)
alternative = os.path.abspath(os.path.join(p, fname))
if os.path.exists(alternative):
return alternative
raise IOError('File does not exist: {}'.format(fname))
tried.append(alternative)
raise IOError(f'File does not exist: {fname}. Tried: {tried}')
def path(self, fpath):
if not os.path.isabs(fpath):
@ -290,6 +295,20 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
SenpyPlugin = Plugin
class FailedPlugin(Plugin):
"""A plugin that has failed to initialize."""
version = 0
def __init__(self, info, function):
super().__init__(info)
a = info.get('name', info.get('module', self.name))
self['name'] == a
self._function = function
def retry(self):
return self._function()
class Analyser(Plugin):
'''
A subclass of Plugin that analyses text and provides an annotation.
@ -699,23 +718,31 @@ def missing_requirements(reqs):
res = pool.apply_async(pkg_resources.get_distribution, (req,))
queue.append((req, res))
missing = []
installed = []
for req, job in queue:
try:
job.get(1)
installed.append(job.get(1))
except Exception:
missing.append(req)
return missing
return installed, missing
def list_dependencies(*plugins):
'''List all dependencies (python and nltk) for the given list of plugins'''
nltk_resources = set()
missing = []
installed = []
for info in plugins:
reqs = info.get('requirements', [])
if reqs:
inst, miss= missing_requirements(reqs)
installed += inst
missing += miss
nltk_resources |= set(info.get('nltk_resources', []))
return installed, missing, nltk_resources
def install_deps(*plugins):
_, requirements, nltk_resources = list_dependencies(*plugins)
installed = False
nltk_resources = set()
requirements = []
for info in plugins:
requirements = info.get('requirements', [])
if requirements:
requirements += missing_requirements(requirements)
nltk_resources |= set(info.get('nltk_resources', []))
if requirements:
logger.info('Installing requirements: ' + str(requirements))
pip_args = [sys.executable, '-m', 'pip', 'install']
@ -729,8 +756,7 @@ def install_deps(*plugins):
if exitcode != 0:
raise models.Error(
"Dependencies not properly installed: {}".format(pip_args))
installed |= download(list(nltk_resources))
return installed
return installed or download(list(nltk_resources))
is_plugin_file = re.compile(r'.*\.senpy$|senpy_[a-zA-Z0-9_]+\.py$|'
@ -747,7 +773,7 @@ def find_plugins(folders):
yield fpath
def from_path(fpath, install_on_fail=False, **kwargs):
def from_path(fpath, **kwargs):
logger.debug("Loading plugin from {}".format(fpath))
if fpath.endswith('.py'):
# We asume root is the dir of the file, and module is the name of the file
@ -757,18 +783,18 @@ def from_path(fpath, install_on_fail=False, **kwargs):
yield instance
else:
info = parse_plugin_info(fpath)
yield from_info(info, install_on_fail=install_on_fail, **kwargs)
yield from_info(info, **kwargs)
def from_folder(folders, loader=from_path, **kwargs):
plugins = []
for fpath in find_plugins(folders):
for plugin in loader(fpath, **kwargs):
plugins.append(plugin)
if plugin:
plugins.append(plugin)
return plugins
def from_info(info, root=None, install_on_fail=True, **kwargs):
def from_info(info, root=None, strict=False, **kwargs):
if any(x not in info for x in ('module', )):
raise ValueError('Plugin info is not valid: {}'.format(info))
module = info["module"]
@ -780,8 +806,10 @@ def from_info(info, root=None, install_on_fail=True, **kwargs):
try:
return fun()
except (ImportError, LookupError):
install_deps(info)
return fun()
if strict or not str(info.get("optional", "false")).lower() in ["True", "true", "t"]:
raise
print(f"Could not import plugin: { info }")
return FailedPlugin(info, fun)
def parse_plugin_info(fpath):

View File

@ -23,6 +23,51 @@ from senpy.plugins import EmotionPlugin, SenpyPlugin
from senpy.models import Results, EmotionSet, Entry, Emotion
### BEGIN WORKAROUND FOR PATTERN
# See: https://github.com/clips/pattern/issues/308
import os.path
import pattern.text
from pattern.helpers import decode_string
from codecs import BOM_UTF8
BOM_UTF8 = BOM_UTF8.decode("utf-8")
decode_utf8 = decode_string
MODEL = "emoml:pad-dimensions_"
VALENCE = f"{MODEL}_valence"
AROUSAL = f"{MODEL}_arousal"
DOMINANCE = f"{MODEL}_dominance"
def _read(path, encoding="utf-8", comment=";;;"):
"""Returns an iterator over the lines in the file at the given path,
strippping comments and decoding each line to Unicode.
"""
if path:
if isinstance(path, str) and os.path.exists(path):
# From file path.
f = open(path, "r", encoding="utf-8")
elif isinstance(path, str):
# From string.
f = path.splitlines()
else:
# From file or buffer.
f = path
for i, line in enumerate(f):
line = line.strip(BOM_UTF8) if i == 0 and isinstance(line, str) else line
line = line.strip()
line = decode_utf8(line, encoding)
if not line or (comment and line.startswith(comment)):
continue
yield line
pattern.text._read = _read
## END WORKAROUND
class ANEW(EmotionPlugin):
description = "This plugin consists on an emotion classifier using ANEW lexicon dictionary. It averages the VAD (valence-arousal-dominance) value of each word in the text that is also in the ANEW dictionary. To obtain a categorical value (e.g., happy) use the emotion conversion API (e.g., `emotion-model=emoml:big6`)."
author = "@icorcuera"
@ -41,7 +86,7 @@ class ANEW(EmotionPlugin):
anew_path_es = "Dictionary/Redondo(2007).csv"
anew_path_en = "Dictionary/ANEW2010All.txt"
onyx__usesEmotionModel = "emoml:pad-dimensions"
onyx__usesEmotionModel = MODEL
nltk_resources = ['stopwords']
def activate(self, *args, **kwargs):
@ -147,9 +192,9 @@ class ANEW(EmotionPlugin):
emotions.id = "Emotions0"
emotion1 = Emotion(id="Emotion0")
emotion1["emoml:pad-dimensions_pleasure"] = feature_set['V']
emotion1["emoml:pad-dimensions_arousal"] = feature_set['A']
emotion1["emoml:pad-dimensions_dominance"] = feature_set['D']
emotion1[VALENCE] = feature_set['V']
emotion1[AROUSAL] = feature_set['A']
emotion1[DOMINANCE] = feature_set['D']
emotion1.prov(activity)
emotions.prov(activity)
@ -159,7 +204,6 @@ class ANEW(EmotionPlugin):
yield entry
ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#"
test_cases = [
{
'name': 'anger with VAD=(2.12, 6.95, 5.05)',
@ -167,9 +211,9 @@ class ANEW(EmotionPlugin):
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 6.95,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 5.05,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 2.12,
AROUSAL: 6.95,
DOMINANCE: 5.05,
VALENCE: 2.12,
}]
}]
}
@ -178,9 +222,7 @@ class ANEW(EmotionPlugin):
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 4.13,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 3.45,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 1.61,
f"{MODEL}_arousal": 4.13,
}]
}]
@ -191,9 +233,9 @@ class ANEW(EmotionPlugin):
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 6.49,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 6.63,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 8.21,
AROUSAL: 6.49,
DOMINANCE: 6.63,
VALENCE: 8.21,
}]
}]
}
@ -203,9 +245,9 @@ class ANEW(EmotionPlugin):
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 5.8100000000000005,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 4.33,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 5.050000000000001,
AROUSAL: 5.8100000000000005,
DOMINANCE: 4.33,
VALENCE: 5.050000000000001,
}]
}]
@ -216,9 +258,9 @@ class ANEW(EmotionPlugin):
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 5.09,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 4.4,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 5.109999999999999,
AROUSAL: 5.09,
DOMINANCE: 4.4,
VALENCE: 5.109999999999999,
}]
}]

View File

@ -1,5 +1,6 @@
---
module: emotion-anew
optional: true
requirements:
- numpy
- pandas

View File

@ -33,6 +33,7 @@ class DepecheMood(EmotionBox):
name = 'emotion-depechemood'
version = '0.1'
requirements = ['pandas']
optional = True
nltk_resources = ["stopwords"]
onyx__usesEmotionModel = 'wna:WNAModel'

View File

@ -1,5 +1,6 @@
---
module: emotion-wnaffect
optional: true
requirements:
- nltk>=3.0.5
- lxml>=3.4.2

View File

@ -1,5 +1,6 @@
---
module: sentiment-basic
optional: true
requirements:
- nltk>=3.0.5
- scipy>=0.14.0

View File

@ -44,6 +44,10 @@ def check_template(indict, template):
raise models.Error(('Element not found.'
'\nExpected: {}\nIn: {}').format(pprint.pformat(e),
pprint.pformat(indict)))
elif isinstance(template, float) and isinstance(indict, float):
diff = abs(indict - template)
if (diff > 0) and diff/(abs(indict+template)) > 0.05:
raise models.Error('Differences greater than 10% found.\n')
else:
if indict != template:
raise models.Error(('Differences found.\n'

View File

@ -209,8 +209,8 @@ class BlueprintsTest(TestCase):
"""
# First, we split by sentence twice. Each call should generate 3 additional entries
# (one per sentence in the original).
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
'\nA new paragraph&delimiter=sentence&verbose')
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.%0A'
'A new paragraph&delimiter=sentence&verbose')
js = parse_resp(resp)
assert len(js['activities']) == 2
assert len(js['entries']) == 7
@ -218,9 +218,8 @@ class BlueprintsTest(TestCase):
# Now, we split by sentence. This produces 3 additional entries.
# Then, we split by paragraph. This should create 2 additional entries (One per paragraph
# in the original text)
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.'
'\nA new paragraph&0.delimiter=sentence'
'&1.delimiter=paragraph&verbose')
resp = self.client.get('/api/split/split?i=The first sentence. The second sentence.%0AA new paragraph'
'&0.delimiter=sentence&1.delimiter=paragraph&verbose')
# Calling dummy twice, should return the same string
self.assertCode(resp, 200)
js = parse_resp(resp)

View File

@ -255,3 +255,9 @@ class ExtensionsTest(TestCase):
self.senpy.analyse(r3)
assert len(r3.entries[0].emotions) == 1
r3.jsonld()
def testDefaultPlugins(self):
'''The default set of plugins should all load'''
self.app = Flask('test_extensions')
self.examples_dir = os.path.join(os.path.dirname(__file__), '..', 'example-plugins')
self.senpy = Senpy(app=self.app, default_plugins=False, strict=True)