mirror of
https://github.com/gsi-upm/senpy
synced 2025-09-16 19:42:21 +00:00
Compare commits
28 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
1313853788 | ||
|
697e779767 | ||
|
48f5ffafa1 | ||
|
73f7cbbe8a | ||
|
07a41236f8 | ||
|
55db97cf62 | ||
|
d8dead1908 | ||
|
87dcdb9fbc | ||
|
67ef4b60bd | ||
|
da4b11e5b5 | ||
|
c0aa7ddc3c | ||
|
5e2ada1654 | ||
|
7a188586c5 | ||
|
b768b215c5 | ||
|
d1f1b9a15a | ||
|
52a0f3f4c8 | ||
|
55c32dcd7c | ||
|
0093bc34d5 | ||
|
67bae9a20d | ||
|
551a5cb176 | ||
|
d6f4cc2dd2 | ||
|
4af692091a | ||
|
ec68ff0b90 | ||
|
738da490db | ||
|
d29c42fd2e | ||
|
23c88d0acc | ||
|
dcaaa591b7 | ||
|
15ab5f4c25 |
@@ -77,7 +77,6 @@ push-latest: $(addprefix push-latest-,$(PYVERSIONS)) ## Push the "latest" tag to
|
||||
docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGEWTAG)'
|
||||
docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGENAME)'
|
||||
docker push '$(IMAGENAME):latest'
|
||||
docker push '$(IMAGEWTAG)'
|
||||
|
||||
push-latest-%: build-% ## Push the latest image for a specific python version
|
||||
docker tag $(IMAGENAME):$(VERSION)-python$* $(IMAGENAME):python$*
|
||||
|
@@ -6,8 +6,6 @@ RUN apt-get update && apt-get install -y \
|
||||
libblas-dev liblapack-dev liblapacke-dev gfortran \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade numpy scipy scikit-learn
|
||||
|
||||
RUN mkdir /cache/ /senpy-plugins /data/
|
||||
|
||||
VOLUME /data/
|
||||
@@ -20,8 +18,8 @@ ONBUILD WORKDIR /senpy-plugins/
|
||||
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
COPY test-requirements.txt requirements.txt /usr/src/app/
|
||||
RUN pip install --no-cache-dir --use-wheel -r test-requirements.txt -r requirements.txt
|
||||
COPY test-requirements.txt requirements.txt extra-requirements.txt /usr/src/app/
|
||||
RUN pip install --no-cache-dir -r test-requirements.txt -r requirements.txt -r extra-requirements.txt
|
||||
COPY . /usr/src/app/
|
||||
RUN pip install --no-cache-dir --no-index --no-deps --editable .
|
||||
|
||||
|
@@ -1,5 +1,6 @@
|
||||
include requirements.txt
|
||||
include test-requirements.txt
|
||||
include extra-requirements.txt
|
||||
include README.rst
|
||||
include senpy/VERSION
|
||||
graft senpy/plugins
|
||||
|
@@ -1,8 +1,11 @@
|
||||
What is Senpy?
|
||||
--------------
|
||||
|
||||
Web services can get really complex: data validation, user interaction, formatting, logging., etc.
|
||||
The figure below summarizes the typical features in an analysis service.
|
||||
Senpy is a framework for text analysis using Linked Data. There are three main applications of Senpy so far: sentiment and emotion analysis, user profiling and entity recoginition. Annotations and Services are compliant with NIF (NLP Interchange Format).
|
||||
|
||||
Senpy aims at providing a framework where analysis modules can be integrated easily as plugins, and providing a core functionality for managing tasks such as data validation, user interaction, formatting, logging, translation to linked data, etc.
|
||||
|
||||
The figure below summarizes the typical features in a text analysis service.
|
||||
Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems.
|
||||
|
||||
.. image:: senpy-framework.png
|
||||
|
@@ -1,8 +1,24 @@
|
||||
Vocabularies and model
|
||||
======================
|
||||
|
||||
The model used in Senpy is based on the following vocabularies:
|
||||
The model used in Senpy is based on NIF 2.0 [1], which defines a semantic format and API for improving interoperability among natural language processing services.
|
||||
|
||||
* Marl, a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
|
||||
* Onyx, which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
|
||||
* NIF 2.0, which defines a semantic format and APO for improving interoperability among natural language processing services
|
||||
Senpy has been applied to sentiment and emotion analysis services using the following vocabularies:
|
||||
|
||||
* Marl [2,6], a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
|
||||
* Onyx [3,5], which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
|
||||
|
||||
An overview of the vocabularies and their use can be found in [4].
|
||||
|
||||
|
||||
[1] Guidelines for developing NIF-based NLP services, Final Community Group Report 22 December 2015 Available at: https://www.w3.org/2015/09/bpmlod-reports/nif-based-nlp-webservices/
|
||||
|
||||
[2] Marl Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/marl/
|
||||
|
||||
[3] Onyx Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/onyx/
|
||||
|
||||
[4] Iglesias, C. A., Sánchez-Rada, J. F., Vulcu, G., & Buitelaar, P. (2017). Linked Data Models for Sentiment and Emotion Analysis in Social Networks. In Sentiment Analysis in Social Networks (pp. 49-69).
|
||||
|
||||
[5] Sánchez-Rada, J. F., & Iglesias, C. A. (2016). Onyx: A linked data approach to emotion representation. Information Processing & Management, 52(1), 99-114.
|
||||
|
||||
[6] Westerski, A., Iglesias Fernandez, C. A., & Tapia Rico, F. (2011). Linked opinions: Describing sentiments on the structured web of data.
|
||||
|
@@ -18,7 +18,7 @@ class BasicBox(SentimentBox):
|
||||
'default': 'marl:Neutral'
|
||||
}
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
output = basic.get_polarity(input)
|
||||
return self.mappings.get(output, self.mappings['default'])
|
||||
|
||||
|
@@ -18,7 +18,7 @@ class Basic(MappingMixin, SentimentBox):
|
||||
'default': 'marl:Neutral'
|
||||
}
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
return basic.get_polarity(input)
|
||||
|
||||
test_cases = [{
|
||||
|
@@ -18,7 +18,7 @@ class PipelineSentiment(MappingMixin, SentimentBox):
|
||||
-1: 'marl:Negative'
|
||||
}
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
return pipeline.predict([input, ])[0]
|
||||
|
||||
test_cases = [
|
||||
|
1
extra-requirements.txt
Normal file
1
extra-requirements.txt
Normal file
@@ -0,0 +1 @@
|
||||
gsitk
|
@@ -9,3 +9,6 @@ jsonref
|
||||
PyYAML
|
||||
rdflib
|
||||
rdflib-jsonld
|
||||
numpy
|
||||
scipy
|
||||
scikit-learn
|
||||
|
@@ -22,6 +22,7 @@ the server.
|
||||
|
||||
from flask import Flask
|
||||
from senpy.extensions import Senpy
|
||||
from senpy.utils import easy_test
|
||||
|
||||
import logging
|
||||
import os
|
||||
@@ -39,7 +40,7 @@ def main():
|
||||
'-l',
|
||||
metavar='logging_level',
|
||||
type=str,
|
||||
default="ERROR",
|
||||
default="WARN",
|
||||
help='Logging level')
|
||||
parser.add_argument(
|
||||
'--debug',
|
||||
@@ -75,6 +76,12 @@ def main():
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Do not run a server, only install plugin dependencies')
|
||||
parser.add_argument(
|
||||
'--only-test',
|
||||
'-t',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Do not run a server, just test all plugins')
|
||||
parser.add_argument(
|
||||
'--only-list',
|
||||
'--list',
|
||||
@@ -122,6 +129,9 @@ def main():
|
||||
if args.only_install:
|
||||
return
|
||||
sp.activate_all()
|
||||
if args.only_test:
|
||||
easy_test(sp.plugins(), debug=args.debug)
|
||||
return
|
||||
print('Senpy version {}'.format(senpy.__version__))
|
||||
print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
|
||||
args.port))
|
||||
|
17
senpy/api.py
17
senpy/api.py
@@ -53,6 +53,21 @@ API_PARAMS = {
|
||||
}
|
||||
}
|
||||
|
||||
EVAL_PARAMS = {
|
||||
"algorithm": {
|
||||
"aliases": ["plug", "p", "plugins", "algorithms", 'algo', 'a', 'plugin'],
|
||||
"description": "Plugins to be evaluated",
|
||||
"required": True,
|
||||
"help": "See activated plugins in /plugins"
|
||||
},
|
||||
"dataset": {
|
||||
"aliases": ["datasets", "data", "d"],
|
||||
"description": "Datasets to be evaluated",
|
||||
"required": True,
|
||||
"help": "See avalaible datasets in /datasets"
|
||||
}
|
||||
}
|
||||
|
||||
PLUGINS_PARAMS = {
|
||||
"plugin_type": {
|
||||
"@id": "pluginType",
|
||||
@@ -132,7 +147,7 @@ def parse_params(indict, *specs):
|
||||
for param, options in iteritems(spec):
|
||||
for alias in options.get("aliases", []):
|
||||
# Replace each alias with the correct name of the parameter
|
||||
if alias in indict and alias is not param:
|
||||
if alias in indict and alias != param:
|
||||
outdict[param] = indict[alias]
|
||||
del outdict[alias]
|
||||
continue
|
||||
|
@@ -19,7 +19,7 @@ Blueprints for Senpy
|
||||
"""
|
||||
from flask import (Blueprint, request, current_app, render_template, url_for,
|
||||
jsonify)
|
||||
from .models import Error, Response, Help, Plugins, read_schema
|
||||
from .models import Error, Response, Help, Plugins, read_schema, dump_schema, Datasets
|
||||
from . import api
|
||||
from .version import __version__
|
||||
from functools import wraps
|
||||
@@ -67,9 +67,9 @@ def index():
|
||||
@api_blueprint.route('/schemas/<schema>')
|
||||
def schema(schema="definitions"):
|
||||
try:
|
||||
return jsonify(read_schema(schema))
|
||||
except Exception: # Should be FileNotFoundError, but it's missing from py2
|
||||
return Error(message="Schema not found", status=404).flask()
|
||||
return dump_schema(read_schema(schema))
|
||||
except Exception as ex: # Should be FileNotFoundError, but it's missing from py2
|
||||
return Error(message="Schema not found: {}".format(ex), status=404).flask()
|
||||
|
||||
|
||||
def basic_api(f):
|
||||
@@ -134,6 +134,19 @@ def api_root():
|
||||
return current_app.senpy.analyse(req)
|
||||
|
||||
|
||||
@api_blueprint.route('/evaluate/', methods=['POST', 'GET'])
|
||||
@basic_api
|
||||
def evaluate():
|
||||
if request.parameters['help']:
|
||||
dic = dict(api.EVAL_PARAMS)
|
||||
response = Help(parameters=dic)
|
||||
return response
|
||||
else:
|
||||
params = api.parse_params(request.parameters, api.EVAL_PARAMS)
|
||||
response = current_app.senpy.evaluate(params)
|
||||
return response
|
||||
|
||||
|
||||
@api_blueprint.route('/plugins/', methods=['POST', 'GET'])
|
||||
@basic_api
|
||||
def plugins():
|
||||
@@ -150,3 +163,12 @@ def plugins():
|
||||
def plugin(plugin=None):
|
||||
sp = current_app.senpy
|
||||
return sp.get_plugin(plugin)
|
||||
|
||||
|
||||
@api_blueprint.route('/datasets/', methods=['POST', 'GET'])
|
||||
@basic_api
|
||||
def datasets():
|
||||
sp = current_app.senpy
|
||||
datasets = sp.datasets
|
||||
dic = Datasets(datasets=list(datasets.values()))
|
||||
return dic
|
||||
|
@@ -12,10 +12,17 @@ class Client(object):
|
||||
def analyse(self, input, method='GET', **kwargs):
|
||||
return self.request('/', method=method, input=input, **kwargs)
|
||||
|
||||
def evaluate(self, input, method='GET', **kwargs):
|
||||
return self.request('/evaluate', method = method, input=input, **kwargs)
|
||||
|
||||
def plugins(self, *args, **kwargs):
|
||||
resp = self.request(path='/plugins').plugins
|
||||
return {p.name: p for p in resp}
|
||||
|
||||
def datasets(self):
|
||||
resp = self.request(path='/datasets').datasets
|
||||
return {d.name: d for d in resp}
|
||||
|
||||
def request(self, path=None, method='GET', **params):
|
||||
url = '{}{}'.format(self.endpoint, path)
|
||||
response = requests.request(method=method, url=url, params=params)
|
||||
|
@@ -6,8 +6,8 @@ from future import standard_library
|
||||
standard_library.install_aliases()
|
||||
|
||||
from . import plugins, api
|
||||
from .plugins import Plugin
|
||||
from .models import Error
|
||||
from .plugins import Plugin, evaluate
|
||||
from .models import Error, AggregatedEvaluation
|
||||
from .blueprints import api_blueprint, demo_blueprint, ns_blueprint
|
||||
|
||||
from threading import Thread
|
||||
@@ -16,14 +16,20 @@ import os
|
||||
import copy
|
||||
import errno
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from gsitk.datasets.datasets import DatasetManager
|
||||
GSITK_AVAILABLE = True
|
||||
except ImportError:
|
||||
logger.warn('GSITK is not installed. Some functions will be unavailable.')
|
||||
GSITK_AVAILABLE = False
|
||||
|
||||
|
||||
class Senpy(object):
|
||||
""" Default Senpy extension for Flask """
|
||||
|
||||
def __init__(self,
|
||||
app=None,
|
||||
plugin_folder=".",
|
||||
@@ -171,25 +177,66 @@ class Senpy(object):
|
||||
by api.parse_call().
|
||||
"""
|
||||
logger.debug("analysing request: {}".format(request))
|
||||
try:
|
||||
entries = request.entries
|
||||
request.entries = []
|
||||
plugins = self._get_plugins(request)
|
||||
results = request
|
||||
for i in self._process_entries(entries, results, plugins):
|
||||
results.entries.append(i)
|
||||
self.convert_emotions(results)
|
||||
logger.debug("Returning analysis result: {}".format(results))
|
||||
except (Error, Exception) as ex:
|
||||
if not isinstance(ex, Error):
|
||||
msg = "Error during analysis: {} \n\t{}".format(ex,
|
||||
traceback.format_exc())
|
||||
ex = Error(message=msg, status=500)
|
||||
logger.exception('Error returning analysis result')
|
||||
raise ex
|
||||
entries = request.entries
|
||||
request.entries = []
|
||||
plugins = self._get_plugins(request)
|
||||
results = request
|
||||
for i in self._process_entries(entries, results, plugins):
|
||||
results.entries.append(i)
|
||||
self.convert_emotions(results)
|
||||
logger.debug("Returning analysis result: {}".format(results))
|
||||
results.analysis = [i['plugin'].id for i in results.analysis]
|
||||
return results
|
||||
|
||||
def _get_datasets(self, request):
|
||||
if not self.datasets:
|
||||
raise Error(
|
||||
status=404,
|
||||
message=("No datasets found."
|
||||
" Please verify DatasetManager"))
|
||||
datasets_name = request.parameters.get('dataset', None).split(',')
|
||||
for dataset in datasets_name:
|
||||
if dataset not in self.datasets:
|
||||
logger.debug(("The dataset '{}' is not valid\n"
|
||||
"Valid datasets: {}").format(dataset,
|
||||
self.datasets.keys()))
|
||||
raise Error(
|
||||
status=404,
|
||||
message="The dataset '{}' is not valid".format(dataset))
|
||||
dm = DatasetManager()
|
||||
datasets = dm.prepare_datasets(datasets_name)
|
||||
return datasets
|
||||
|
||||
@property
|
||||
def datasets(self):
|
||||
if not GSITK_AVAILABLE:
|
||||
raise Exception('GSITK is not available. Install it to use this function.')
|
||||
self._dataset_list = {}
|
||||
dm = DatasetManager()
|
||||
for item in dm.get_datasets():
|
||||
for key in item:
|
||||
if key in self._dataset_list:
|
||||
continue
|
||||
properties = item[key]
|
||||
properties['@id'] = key
|
||||
self._dataset_list[key] = properties
|
||||
return self._dataset_list
|
||||
|
||||
def evaluate(self, params):
|
||||
if not GSITK_AVAILABLE:
|
||||
raise Exception('GSITK is not available. Install it to use this function.')
|
||||
logger.debug("evaluating request: {}".format(params))
|
||||
results = AggregatedEvaluation()
|
||||
results.parameters = params
|
||||
datasets = self._get_datasets(results)
|
||||
plugins = self._get_plugins(results)
|
||||
for eval in evaluate(plugins, datasets):
|
||||
results.evaluations.append(eval)
|
||||
if 'with_parameters' not in results.parameters:
|
||||
del results.parameters
|
||||
logger.debug("Returning evaluation result: {}".format(results))
|
||||
return results
|
||||
|
||||
def _conversion_candidates(self, fromModel, toModel):
|
||||
candidates = self.plugins(plugin_type='emotionConversionPlugin')
|
||||
for candidate in candidates:
|
||||
|
@@ -51,6 +51,10 @@ def read_schema(schema_file, absolute=False):
|
||||
return jsonref.load(f, base_uri=schema_uri)
|
||||
|
||||
|
||||
def dump_schema(schema):
|
||||
return jsonref.dumps(schema)
|
||||
|
||||
|
||||
def load_context(context):
|
||||
logging.debug('Loading context: {}'.format(context))
|
||||
if not context:
|
||||
@@ -199,24 +203,27 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
|
||||
context_uri=None,
|
||||
prefix=None,
|
||||
expanded=False):
|
||||
ser = self.serializable()
|
||||
|
||||
result = jsonld.compact(
|
||||
ser,
|
||||
self._context,
|
||||
options={
|
||||
'base': prefix,
|
||||
'expandContext': self._context,
|
||||
'senpy': prefix
|
||||
})
|
||||
if context_uri:
|
||||
result['@context'] = context_uri
|
||||
result = self.serializable()
|
||||
if context_uri or with_context:
|
||||
result['@context'] = context_uri or self._context
|
||||
|
||||
# result = jsonld.compact(result,
|
||||
# self._context,
|
||||
# options={
|
||||
# 'base': prefix,
|
||||
# 'expandContext': self._context,
|
||||
# 'senpy': prefix
|
||||
# })
|
||||
if expanded:
|
||||
result = jsonld.expand(
|
||||
result, options={'base': prefix,
|
||||
'expandContext': self._context})
|
||||
if not with_context:
|
||||
del result['@context']
|
||||
try:
|
||||
del result['@context']
|
||||
except KeyError:
|
||||
pass
|
||||
return result
|
||||
|
||||
def validate(self, obj=None):
|
||||
@@ -319,7 +326,10 @@ def _add_class_from_schema(*args, **kwargs):
|
||||
|
||||
|
||||
for i in [
|
||||
'aggregatedEvaluation',
|
||||
'analysis',
|
||||
'dataset',
|
||||
'datasets',
|
||||
'emotion',
|
||||
'emotionConversion',
|
||||
'emotionConversionPlugin',
|
||||
@@ -327,12 +337,17 @@ for i in [
|
||||
'emotionModel',
|
||||
'emotionPlugin',
|
||||
'emotionSet',
|
||||
'evaluation',
|
||||
'entity',
|
||||
'help',
|
||||
'metric',
|
||||
'plugin',
|
||||
'plugins',
|
||||
'response',
|
||||
'results',
|
||||
'sentimentPlugin',
|
||||
'suggestion',
|
||||
'topic',
|
||||
|
||||
]:
|
||||
_add_class_from_schema(i)
|
||||
|
@@ -18,12 +18,22 @@ import subprocess
|
||||
import importlib
|
||||
import yaml
|
||||
import threading
|
||||
import nltk
|
||||
|
||||
from .. import models, utils
|
||||
from .. import api
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from gsitk.evaluation.evaluation import Evaluation as Eval
|
||||
from sklearn.pipeline import Pipeline
|
||||
GSITK_AVAILABLE = True
|
||||
except ImportError:
|
||||
logger.warn('GSITK is not installed. Some functions will be unavailable.')
|
||||
GSITK_AVAILABLE = False
|
||||
|
||||
|
||||
class PluginMeta(models.BaseMeta):
|
||||
_classes = {}
|
||||
@@ -38,11 +48,11 @@ class PluginMeta(models.BaseMeta):
|
||||
attrs['name'] = alias
|
||||
if 'description' not in attrs:
|
||||
doc = attrs.get('__doc__', None)
|
||||
if not doc:
|
||||
raise Exception(('Please, add a description or '
|
||||
'documentation to class {}').format(name))
|
||||
attrs['description'] = doc
|
||||
attrs['name'] = alias
|
||||
if doc:
|
||||
attrs['description'] = doc
|
||||
else:
|
||||
logger.warn(('Plugin {} does not have a description. '
|
||||
'Please, add a short summary to help other developers').format(name))
|
||||
cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs)
|
||||
|
||||
if alias in mcs._classes:
|
||||
@@ -86,6 +96,16 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
|
||||
self.is_activated = False
|
||||
self._lock = threading.Lock()
|
||||
self.data_folder = data_folder or os.getcwd()
|
||||
self._directory = os.path.abspath(os.path.dirname(inspect.getfile(self.__class__)))
|
||||
self._data_paths = ['',
|
||||
self._directory,
|
||||
os.path.join(self._directory, 'data'),
|
||||
self.data_folder]
|
||||
self._log = logging.getLogger('{}.{}'.format(__name__, self.name))
|
||||
|
||||
@property
|
||||
def log(self):
|
||||
return self._log
|
||||
|
||||
def validate(self):
|
||||
missing = []
|
||||
@@ -114,9 +134,9 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
|
||||
for case in test_cases:
|
||||
try:
|
||||
self.test_case(case)
|
||||
logger.debug('Test case passed:\n{}'.format(pprint.pformat(case)))
|
||||
self.log.debug('Test case passed:\n{}'.format(pprint.pformat(case)))
|
||||
except Exception as ex:
|
||||
logger.warn('Test case failed:\n{}'.format(pprint.pformat(case)))
|
||||
self.log.warn('Test case failed:\n{}'.format(pprint.pformat(case)))
|
||||
raise
|
||||
|
||||
def test_case(self, case):
|
||||
@@ -139,10 +159,22 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
|
||||
raise
|
||||
assert not should_fail
|
||||
|
||||
def open(self, fpath, *args, **kwargs):
|
||||
if not os.path.isabs(fpath):
|
||||
fpath = os.path.join(self.data_folder, fpath)
|
||||
return open(fpath, *args, **kwargs)
|
||||
def find_file(self, fname):
|
||||
for p in self._data_paths:
|
||||
alternative = os.path.join(p, fname)
|
||||
if os.path.exists(alternative):
|
||||
return alternative
|
||||
raise IOError('File does not exist: {}'.format(fname))
|
||||
|
||||
def open(self, fpath, mode='r'):
|
||||
if 'w' in mode:
|
||||
# When writing, only use absolute paths or data_folder
|
||||
if not os.path.isabs(fpath):
|
||||
fpath = os.path.join(self.data_folder, fpath)
|
||||
else:
|
||||
fpath = self.find_file(fpath)
|
||||
|
||||
return open(fpath, mode=mode)
|
||||
|
||||
def serve(self, debug=True, **kwargs):
|
||||
utils.easy(plugin_list=[self, ], plugin_folder=None, debug=debug, **kwargs)
|
||||
@@ -177,7 +209,7 @@ class Analysis(Plugin):
|
||||
|
||||
def analyse_entries(self, entries, parameters):
|
||||
for entry in entries:
|
||||
logger.debug('Analysing entry with plugin {}: {}'.format(self, entry))
|
||||
self.log.debug('Analysing entry with plugin {}: {}'.format(self, entry))
|
||||
results = self.analyse_entry(entry, parameters)
|
||||
if inspect.isgenerator(results):
|
||||
for result in results:
|
||||
@@ -251,7 +283,7 @@ class Box(AnalysisPlugin):
|
||||
|
||||
.. code-block::
|
||||
|
||||
entry --> input() --> predict() --> output() --> entry'
|
||||
entry --> input() --> predict_one() --> output() --> entry'
|
||||
|
||||
|
||||
In other words: their ``input`` method convers a query (entry and a set of parameters) into
|
||||
@@ -267,15 +299,33 @@ class Box(AnalysisPlugin):
|
||||
'''Transforms the results of the black box into an entry'''
|
||||
return output
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
raise NotImplementedError('You should define the behavior of this plugin')
|
||||
|
||||
def analyse_entries(self, entries, params):
|
||||
for entry in entries:
|
||||
input = self.input(entry=entry, params=params)
|
||||
results = self.predict(input=input)
|
||||
results = self.predict_one(input=input)
|
||||
yield self.output(output=results, entry=entry, params=params)
|
||||
|
||||
def fit(self, X=None, y=None):
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
return [self.predict_one(x) for x in X]
|
||||
|
||||
def predict(self, X):
|
||||
return self.transform(X)
|
||||
|
||||
def fit_transform(self, X, y):
|
||||
self.fit(X, y)
|
||||
return self.transform(X)
|
||||
|
||||
def as_pipe(self):
|
||||
pipe = Pipeline([('plugin', self)])
|
||||
pipe.name = self.name
|
||||
return pipe
|
||||
|
||||
|
||||
class TextBox(Box):
|
||||
'''A black box plugin that takes only text as input'''
|
||||
@@ -348,7 +398,7 @@ class ShelfMixin(object):
|
||||
with self.open(self.shelf_file, 'rb') as p:
|
||||
self._sh = pickle.load(p)
|
||||
except (IndexError, EOFError, pickle.UnpicklingError):
|
||||
logger.warning('{} has a corrupted shelf file!'.format(self.id))
|
||||
self.log.warning('Corrupted shelf file: {}'.format(self.shelf_file))
|
||||
if not self.get('force_shelf', False):
|
||||
raise
|
||||
return self._sh
|
||||
@@ -375,32 +425,31 @@ class ShelfMixin(object):
|
||||
self._shelf_file = value
|
||||
|
||||
def save(self):
|
||||
logger.debug('saving pickle')
|
||||
self.log.debug('Saving pickle')
|
||||
if hasattr(self, '_sh') and self._sh is not None:
|
||||
with self.open(self.shelf_file, 'wb') as f:
|
||||
pickle.dump(self._sh, f)
|
||||
|
||||
|
||||
def pfilter(plugins, **kwargs):
|
||||
def pfilter(plugins, plugin_type=Analysis, **kwargs):
|
||||
""" Filter plugins by different criteria """
|
||||
if isinstance(plugins, models.Plugins):
|
||||
plugins = plugins.plugins
|
||||
elif isinstance(plugins, dict):
|
||||
plugins = plugins.values()
|
||||
ptype = kwargs.pop('plugin_type', Plugin)
|
||||
logger.debug('#' * 100)
|
||||
logger.debug('ptype {}'.format(ptype))
|
||||
if ptype:
|
||||
if isinstance(ptype, PluginMeta):
|
||||
ptype = ptype.__name__
|
||||
logger.debug('plugin_type {}'.format(plugin_type))
|
||||
if plugin_type:
|
||||
if isinstance(plugin_type, PluginMeta):
|
||||
plugin_type = plugin_type.__name__
|
||||
try:
|
||||
ptype = ptype[0].upper() + ptype[1:]
|
||||
pclass = globals()[ptype]
|
||||
plugin_type = plugin_type[0].upper() + plugin_type[1:]
|
||||
pclass = globals()[plugin_type]
|
||||
logger.debug('Class: {}'.format(pclass))
|
||||
candidates = filter(lambda x: isinstance(x, pclass),
|
||||
plugins)
|
||||
except KeyError:
|
||||
raise models.Error('{} is not a valid type'.format(ptype))
|
||||
raise models.Error('{} is not a valid type'.format(plugin_type))
|
||||
else:
|
||||
candidates = plugins
|
||||
|
||||
@@ -435,10 +484,11 @@ def _log_subprocess_output(process):
|
||||
|
||||
def install_deps(*plugins):
|
||||
installed = False
|
||||
nltk_resources = set()
|
||||
for info in plugins:
|
||||
requirements = info.get('requirements', [])
|
||||
if requirements:
|
||||
pip_args = [sys.executable, '-m', 'pip', 'install', '--use-wheel']
|
||||
pip_args = [sys.executable, '-m', 'pip', 'install']
|
||||
for req in requirements:
|
||||
pip_args.append(req)
|
||||
logger.info('Installing requirements: ' + str(requirements))
|
||||
@@ -450,6 +500,9 @@ def install_deps(*plugins):
|
||||
installed = True
|
||||
if exitcode != 0:
|
||||
raise models.Error("Dependencies not properly installed")
|
||||
nltk_resources |= set(info.get('nltk_resources', []))
|
||||
|
||||
installed |= nltk.download(list(nltk_resources))
|
||||
return installed
|
||||
|
||||
|
||||
@@ -546,12 +599,14 @@ def _instances_in_module(module):
|
||||
def _from_module_name(module, root, info=None, install=True, **kwargs):
|
||||
try:
|
||||
module = load_module(module, root)
|
||||
except ImportError:
|
||||
except (ImportError, LookupError):
|
||||
if not install or not info:
|
||||
raise
|
||||
install_deps(info)
|
||||
module = load_module(module, root)
|
||||
for plugin in _from_loaded_module(module=module, root=root, info=info, **kwargs):
|
||||
if install:
|
||||
install_deps(plugin)
|
||||
yield plugin
|
||||
|
||||
|
||||
@@ -560,3 +615,50 @@ def _from_loaded_module(module, info=None, **kwargs):
|
||||
yield cls(info=info, **kwargs)
|
||||
for instance in _instances_in_module(module):
|
||||
yield instance
|
||||
|
||||
|
||||
def evaluate(plugins, datasets, **kwargs):
|
||||
if not GSITK_AVAILABLE:
|
||||
raise Exception('GSITK is not available. Install it to use this function.')
|
||||
|
||||
ev = Eval(tuples=None,
|
||||
datasets=datasets,
|
||||
pipelines=[plugin.as_pipe() for plugin in plugins])
|
||||
ev.evaluate()
|
||||
results = ev.results
|
||||
evaluations = evaluations_to_JSONLD(results, **kwargs)
|
||||
return evaluations
|
||||
|
||||
|
||||
def evaluations_to_JSONLD(results, flatten=False):
|
||||
'''
|
||||
Map the evaluation results to a JSONLD scheme
|
||||
'''
|
||||
|
||||
evaluations = list()
|
||||
metric_names = ['accuracy', 'precision_macro', 'recall_macro',
|
||||
'f1_macro', 'f1_weighted', 'f1_micro', 'f1_macro']
|
||||
|
||||
for index, row in results.iterrows():
|
||||
evaluation = models.Evaluation()
|
||||
if row.get('CV', True):
|
||||
evaluation['@type'] = ['StaticCV', 'Evaluation']
|
||||
evaluation.evaluatesOn = row['Dataset']
|
||||
evaluation.evaluates = row['Model']
|
||||
i = 0
|
||||
if flatten:
|
||||
metric = models.Metric()
|
||||
for name in metric_names:
|
||||
metric[name] = row[name]
|
||||
evaluation.metrics.append(metric)
|
||||
else:
|
||||
# We should probably discontinue this representation
|
||||
for name in metric_names:
|
||||
metric = models.Metric()
|
||||
metric['@id'] = 'Metric' + str(i)
|
||||
metric['@type'] = name.capitalize()
|
||||
metric.value = row[name]
|
||||
evaluation.metrics.append(metric)
|
||||
i += 1
|
||||
evaluations.append(evaluation)
|
||||
return evaluations
|
||||
|
@@ -42,7 +42,7 @@ class Sentiment140Plugin(SentimentPlugin):
|
||||
from requests.
|
||||
'''
|
||||
from senpy.test import patch_requests
|
||||
expected = {"data": [{"polarity": 10}]}
|
||||
expected = {"data": [{"polarity": 4}]}
|
||||
with patch_requests(expected) as (request, response):
|
||||
super(Sentiment140Plugin, self).test(*args, **kwargs)
|
||||
assert request.called
|
||||
|
38
senpy/schemas/aggregatedEvaluation.json
Normal file
38
senpy/schemas/aggregatedEvaluation.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"allOf": [
|
||||
{"$ref": "response.json"},
|
||||
{
|
||||
"title": "AggregatedEvaluation",
|
||||
"description": "The results of the evaluation",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@context": {
|
||||
"$ref": "context.json"
|
||||
},
|
||||
"@type": {
|
||||
"default": "AggregatedEvaluation"
|
||||
},
|
||||
"@id": {
|
||||
"description": "ID of the aggregated evaluation",
|
||||
"type": "string"
|
||||
},
|
||||
"evaluations": {
|
||||
"default": [],
|
||||
"type": "array",
|
||||
"items": {
|
||||
"anyOf": [
|
||||
{
|
||||
"$ref": "evaluation.json"
|
||||
},{
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
},
|
||||
"required": ["@id", "evaluations"]
|
||||
}
|
||||
]
|
||||
}
|
@@ -10,8 +10,10 @@
|
||||
"wna": "http://www.gsi.dit.upm.es/ontologies/wnaffect/ns#",
|
||||
"emoml": "http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/emotionml/ns#",
|
||||
"xsd": "http://www.w3.org/2001/XMLSchema#",
|
||||
"fam": "http://vocab.fusepool.info/fam#",
|
||||
"topics": {
|
||||
"@id": "dc:subject"
|
||||
"@id": "nif:topic",
|
||||
"@container": "@set"
|
||||
},
|
||||
"entities": {
|
||||
"@id": "me:hasEntities"
|
||||
|
29
senpy/schemas/dataset.json
Normal file
29
senpy/schemas/dataset.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"name": "Dataset",
|
||||
"properties": {
|
||||
"@id": {
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"compression": {
|
||||
"type": "string"
|
||||
},
|
||||
"expected_bytes": {
|
||||
"type": "int"
|
||||
},
|
||||
"filename": {
|
||||
"description": "Name of the dataset",
|
||||
"type": "string"
|
||||
},
|
||||
"url": {
|
||||
"description": "Classifier or plugin evaluated",
|
||||
"type": "string"
|
||||
},
|
||||
"stats": {
|
||||
}
|
||||
},
|
||||
"required": ["@id"]
|
||||
}
|
18
senpy/schemas/datasets.json
Normal file
18
senpy/schemas/datasets.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"allOf": [
|
||||
{"$ref": "response.json"},
|
||||
{
|
||||
"required": ["datasets"],
|
||||
"properties": {
|
||||
"datasets": {
|
||||
"type": "array",
|
||||
"default": [],
|
||||
"items": {
|
||||
"$ref": "dataset.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
@@ -41,5 +41,20 @@
|
||||
},
|
||||
"Response": {
|
||||
"$ref": "response.json"
|
||||
},
|
||||
"AggregatedEvaluation": {
|
||||
"$ref": "aggregatedEvaluation.json"
|
||||
},
|
||||
"Evaluation": {
|
||||
"$ref": "evaluation.json"
|
||||
},
|
||||
"Metric": {
|
||||
"$ref": "metric.json"
|
||||
},
|
||||
"Dataset": {
|
||||
"$ref": "dataset.json"
|
||||
},
|
||||
"Datasets": {
|
||||
"$ref": "datasets.json"
|
||||
}
|
||||
}
|
||||
|
28
senpy/schemas/evaluation.json
Normal file
28
senpy/schemas/evaluation.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"name": "Evaluation",
|
||||
"properties": {
|
||||
"@id": {
|
||||
"type": "string"
|
||||
},
|
||||
"@type": {
|
||||
"type": "array",
|
||||
"default": "Evaluation"
|
||||
|
||||
},
|
||||
"metrics": {
|
||||
"type": "array",
|
||||
"items": {"$ref": "metric.json" },
|
||||
"default": []
|
||||
},
|
||||
"evaluatesOn": {
|
||||
"description": "Name of the dataset evaluated ",
|
||||
"type": "string"
|
||||
},
|
||||
"evaluates": {
|
||||
"description": "Classifier or plugin evaluated",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["@id", "metrics"]
|
||||
}
|
24
senpy/schemas/metric.json
Normal file
24
senpy/schemas/metric.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||
"properties": {
|
||||
"@id": {
|
||||
"type": "string"
|
||||
},
|
||||
"@type": {
|
||||
"type": "string"
|
||||
},
|
||||
"maxValue": {
|
||||
"type": "number"
|
||||
},
|
||||
"minValue": {
|
||||
"type": "number"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
},
|
||||
"deviation": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["@id"]
|
||||
}
|
@@ -33,6 +33,10 @@ function get_plugins(response){
|
||||
plugins = response.plugins;
|
||||
}
|
||||
|
||||
function get_datasets(response){
|
||||
datasets = response.datasets
|
||||
}
|
||||
|
||||
function group_plugins(){
|
||||
for (r in plugins){
|
||||
ptype = plugins[r]['@type'];
|
||||
@@ -77,7 +81,10 @@ function draw_plugins_selection(){
|
||||
}
|
||||
}
|
||||
html += "</optgroup>"
|
||||
document.getElementById('plugins').innerHTML = html;
|
||||
// Two elements with plugin class
|
||||
// One from the evaluate tab and another one from the analyse tab
|
||||
document.getElementsByClassName('plugin')[0].innerHTML = html;
|
||||
document.getElementsByClassName('plugin')[1].innerHTML = html;
|
||||
}
|
||||
|
||||
function draw_plugins_list(){
|
||||
@@ -98,15 +105,29 @@ function draw_plugins_list(){
|
||||
}
|
||||
}
|
||||
|
||||
function draw_datasets(){
|
||||
html = "";
|
||||
repeated_html = "<input class=\"checks-datasets\" type=\"checkbox\" value=\"";
|
||||
for (dataset in datasets){
|
||||
html += repeated_html+datasets[dataset]["@id"]+"\">"+datasets[dataset]["@id"];
|
||||
html += "<br>"
|
||||
}
|
||||
document.getElementById("datasets").innerHTML = html;
|
||||
}
|
||||
|
||||
$(document).ready(function() {
|
||||
var response = JSON.parse($.ajax({type: "GET", url: "/api/plugins/" , async: false}).responseText);
|
||||
defaultPlugin= JSON.parse($.ajax({type: "GET", url: "/api/plugins/default" , async: false}).responseText);
|
||||
var response2 = JSON.parse($.ajax({type: "GET", url: "/api/datasets/" , async: false}).responseText);
|
||||
|
||||
get_plugins(response);
|
||||
get_default_parameters();
|
||||
get_datasets(response2);
|
||||
|
||||
draw_plugins_list();
|
||||
draw_plugins_selection();
|
||||
draw_parameters();
|
||||
draw_datasets();
|
||||
|
||||
$(window).on('hashchange', hashchanged);
|
||||
hashchanged();
|
||||
@@ -129,7 +150,7 @@ function draw_default_parameters(){
|
||||
}
|
||||
|
||||
function draw_extra_parameters(){
|
||||
var plugin = document.getElementById("plugins").options[document.getElementById("plugins").selectedIndex].value;
|
||||
var plugin = document.getElementsByClassName('plugin')[0].options[document.getElementsByClassName('plugin')[0].selectedIndex].value;
|
||||
get_parameters();
|
||||
|
||||
var extra_params = document.getElementById("extra_params");
|
||||
@@ -240,13 +261,16 @@ function add_param(key, value){
|
||||
return "&"+key+"="+value;
|
||||
}
|
||||
|
||||
|
||||
function load_JSON(){
|
||||
url = "/api";
|
||||
var container = document.getElementById('results');
|
||||
var rawcontainer = document.getElementById("jsonraw");
|
||||
rawcontainer.innerHTML = '';
|
||||
container.innerHTML = '';
|
||||
var plugin = document.getElementById("plugins").options[document.getElementById("plugins").selectedIndex].value;
|
||||
|
||||
var plugin = document.getElementsByClassName("plugin")[0].options[document.getElementsByClassName("plugin")[0].selectedIndex].value;
|
||||
|
||||
var input = encodeURIComponent(document.getElementById("input").value);
|
||||
url += "?algo="+plugin+"&i="+input
|
||||
|
||||
@@ -278,3 +302,85 @@ function load_JSON(){
|
||||
// location.hash = 'raw';
|
||||
}
|
||||
}
|
||||
|
||||
function get_datasets_from_checkbox(){
|
||||
var checks = document.getElementsByClassName("checks-datasets");
|
||||
|
||||
datasets = "";
|
||||
for (var i = 0; i < checks.length; i++){
|
||||
if (checks[i].checked){
|
||||
datasets += checks[i].value + ",";
|
||||
}
|
||||
}
|
||||
datasets = datasets.slice(0, -1);
|
||||
}
|
||||
|
||||
|
||||
function create_body_metrics(evaluations){
|
||||
var new_tbody = document.createElement('tbody')
|
||||
var metric_html = ""
|
||||
for (var eval in evaluations){
|
||||
metric_html += "<tr><th>"+evaluations[eval].evaluates+"</th><th>"+evaluations[eval].evaluatesOn+"</th>";
|
||||
for (var metric in evaluations[eval].metrics){
|
||||
metric_html += "<th>"+parseFloat(evaluations[eval].metrics[metric].value.toFixed(4))+"</th>";
|
||||
}
|
||||
metric_html += "</tr>";
|
||||
}
|
||||
new_tbody.innerHTML = metric_html
|
||||
return new_tbody
|
||||
}
|
||||
|
||||
function evaluate_JSON(){
|
||||
|
||||
url = "/api/evaluate";
|
||||
|
||||
var container = document.getElementById('results_eval');
|
||||
var rawcontainer = document.getElementById('jsonraw_eval');
|
||||
var table = document.getElementById("eval_table");
|
||||
|
||||
rawcontainer.innerHTML = "";
|
||||
container.innerHTML = "";
|
||||
|
||||
var plugin = document.getElementsByClassName("plugin")[0].options[document.getElementsByClassName("plugin")[0].selectedIndex].value;
|
||||
|
||||
get_datasets_from_checkbox();
|
||||
|
||||
url += "?algo="+plugin+"&dataset="+datasets
|
||||
|
||||
var response = $.ajax({type: "GET", url: url , async: false, dataType: 'json'}).responseText;
|
||||
rawcontainer.innerHTML = replaceURLWithHTMLLinks(response);
|
||||
|
||||
document.getElementById("input_request_eval").innerHTML = "<a href='"+url+"'>"+url+"</a>"
|
||||
document.getElementById("evaluate-div").style.display = 'block';
|
||||
|
||||
try {
|
||||
response = JSON.parse(response);
|
||||
var options = {
|
||||
mode: 'view'
|
||||
};
|
||||
|
||||
//Control the single response results
|
||||
if (!(Array.isArray(response.evaluations))){
|
||||
response.evaluations = [response.evaluations]
|
||||
}
|
||||
|
||||
new_tbody = create_body_metrics(response.evaluations)
|
||||
table.replaceChild(new_tbody, table.lastElementChild)
|
||||
|
||||
var editor = new JSONEditor(container, options, response);
|
||||
editor.expandAll();
|
||||
// $('#results-div a[href="#viewer"]').tab('show');
|
||||
$('#evaluate-div a[href="#evaluate-table"]').click();
|
||||
// location.hash = 'raw';
|
||||
|
||||
|
||||
}
|
||||
catch(err){
|
||||
console.log("Error decoding JSON (got turtle?)");
|
||||
$('#evaluate-div a[href="#evaluate-raw"]').click();
|
||||
// location.hash = 'raw';
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@@ -32,6 +32,8 @@
|
||||
<ul class="nav nav-tabs" role="tablist">
|
||||
<li role="presentation" ><a class="active" href="#about">About</a></li>
|
||||
<li role="presentation"class="active"><a class="active" href="#test">Test it</a></li>
|
||||
<li role="presentation"><a class="active" href="#evaluate">Evaluate Plugins</a></li>
|
||||
|
||||
</ul>
|
||||
|
||||
<div class="tab-content">
|
||||
@@ -54,6 +56,7 @@
|
||||
<ul>
|
||||
<li>List all available plugins: <a href="/api/plugins">/api/plugins</a></li>
|
||||
<li>Get information about the default plugin: <a href="/api/plugins/default">/api/plugins/default</a></li>
|
||||
<li>List all available datasets: <a href="/api/datasets">/api/datasets</a></li>
|
||||
<li>Download the JSON-LD context used: <a href="/api/contexts/Results.jsonld">/api/contexts/Results.jsonld</a></li>
|
||||
</ul>
|
||||
|
||||
@@ -95,7 +98,7 @@ I cannot believe it!</textarea>
|
||||
</div>
|
||||
<div>
|
||||
<label>Select the plugin:</label>
|
||||
<select id="plugins" name="plugins" onchange="draw_extra_parameters()">
|
||||
<select id="plugins" name="plugins" class=plugin onchange="draw_extra_parameters()">
|
||||
</select>
|
||||
</div>
|
||||
<!-- PARAMETERS -->
|
||||
@@ -151,6 +154,70 @@ I cannot believe it!</textarea>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane" id="evaluate">
|
||||
<div class="well">
|
||||
<form id="form" class="container" onsubmit="return getPlugins();" accept-charset="utf-8">
|
||||
<div>
|
||||
<label>Select the plugin:</label>
|
||||
<select id="plugins-eval" name="plugins-eval" class=plugin onchange="draw_extra_parameters()">
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label>Select the datasets:</label>
|
||||
<div id="datasets" name="datasets" >
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<a id="preview" class="btn btn-lg btn-primary" onclick="evaluate_JSON()">Evaluate Plugin!</a>
|
||||
<!--<button id="visualise" name="type" type="button">Visualise!</button>-->
|
||||
</form>
|
||||
</div>
|
||||
<span id="input_request_eval"></span>
|
||||
<div id="evaluate-div">
|
||||
<ul class="nav nav-tabs" role="tablist">
|
||||
<li role="presentation" class="active"><a data-toggle="tab" class="active" href="#evaluate-viewer">Viewer</a></li>
|
||||
<li role="presentation"><a data-toggle="tab" class="active" href="#evaluate-raw">Raw</a></li>
|
||||
<li role="presentation"><a data-toggle="tab" class="active" href="#evaluate-table">Table</a></li>
|
||||
</ul>
|
||||
<div class="tab-content" id="evaluate-container">
|
||||
|
||||
<div class="tab-pane active" id="evaluate-viewer">
|
||||
<div id="content">
|
||||
<pre id="results_eval" class="results_eval"></pre>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane" id="evaluate-raw">
|
||||
<div id="content">
|
||||
<pre id="jsonraw_eval" class="results_eval"></pre>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane" id="evaluate-table">
|
||||
<table id="eval_table" class="table table-condensed">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Plugin</th>
|
||||
<th>Dataset</th>
|
||||
<th>Accuracy</th>
|
||||
<th>Precision_macro</th>
|
||||
<th>Recall_macro</th>
|
||||
<th>F1_macro</th>
|
||||
<th>F1_weighted</th>
|
||||
<th>F1_micro</th>
|
||||
<th>F1</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<a href="http://www.gsi.dit.upm.es" target="_blank"><img class="center-block" src="static/img/gsi.png"/> </a>
|
||||
|
||||
</div>
|
||||
|
@@ -3,6 +3,8 @@ try:
|
||||
except ImportError:
|
||||
from mock import patch, MagicMock
|
||||
|
||||
from past.builtins import basestring
|
||||
|
||||
|
||||
import json
|
||||
from contextlib import contextmanager
|
||||
@@ -15,13 +17,17 @@ def patch_requests(value, code=200):
|
||||
success = MagicMock()
|
||||
if isinstance(value, BaseModel):
|
||||
value = value.jsonld()
|
||||
data = json.dumps(value)
|
||||
if not isinstance(value, basestring):
|
||||
data = json.dumps(value)
|
||||
else:
|
||||
data = value
|
||||
|
||||
success.json.return_value = value
|
||||
success.data.return_value = data
|
||||
success.status_code = code
|
||||
|
||||
success.content = json.dumps(value)
|
||||
success.status_code = code
|
||||
success.content = data
|
||||
success.text = data
|
||||
|
||||
method_mocker = MagicMock()
|
||||
method_mocker.return_value = success
|
||||
with patch.multiple('requests', request=method_mocker,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from . import models, __version__
|
||||
from collections import MutableMapping
|
||||
import pprint
|
||||
import pdb
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -32,8 +33,8 @@ def check_template(indict, template):
|
||||
if indict != template:
|
||||
raise models.Error(('Differences found.\n'
|
||||
'\tExpected: {}\n'
|
||||
'\tFound: {}').format(pprint.pformat(indict),
|
||||
pprint.pformat(template)))
|
||||
'\tFound: {}').format(pprint.pformat(template),
|
||||
pprint.pformat(indict)))
|
||||
|
||||
|
||||
def convert_dictionary(original, mappings):
|
||||
@@ -67,17 +68,23 @@ def easy_load(app=None, plugin_list=None, plugin_folder=None, **kwargs):
|
||||
return sp, app
|
||||
|
||||
|
||||
def easy_test(plugin_list=None):
|
||||
def easy_test(plugin_list=None, debug=True):
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
if not plugin_list:
|
||||
from . import plugins
|
||||
import __main__
|
||||
plugin_list = plugins.from_module(__main__)
|
||||
for plug in plugin_list:
|
||||
plug.test()
|
||||
logger.info('The tests for {} passed!'.format(plug.name))
|
||||
logger.info('All tests passed!')
|
||||
try:
|
||||
if not plugin_list:
|
||||
import __main__
|
||||
logger.info('Loading classes from {}'.format(__main__))
|
||||
from . import plugins
|
||||
plugin_list = plugins.from_module(__main__)
|
||||
for plug in plugin_list:
|
||||
plug.test()
|
||||
plug.log.info('My tests passed!')
|
||||
logger.info('All tests passed!')
|
||||
except Exception:
|
||||
if not debug:
|
||||
raise
|
||||
pdb.post_mortem()
|
||||
|
||||
|
||||
def easy(host='0.0.0.0', port=5000, debug=True, **kwargs):
|
||||
|
27
setup.py
27
setup.py
@@ -1,23 +1,20 @@
|
||||
import pip
|
||||
from setuptools import setup
|
||||
# parse_requirements() returns generator of pip.req.InstallRequirement objects
|
||||
from pip.req import parse_requirements
|
||||
|
||||
with open('senpy/VERSION') as f:
|
||||
__version__ = f.read().strip()
|
||||
assert __version__
|
||||
|
||||
try:
|
||||
install_reqs = parse_requirements(
|
||||
"requirements.txt", session=pip.download.PipSession())
|
||||
test_reqs = parse_requirements(
|
||||
"test-requirements.txt", session=pip.download.PipSession())
|
||||
except AttributeError:
|
||||
install_reqs = parse_requirements("requirements.txt")
|
||||
test_reqs = parse_requirements("test-requirements.txt")
|
||||
|
||||
install_reqs = [str(ir.req) for ir in install_reqs]
|
||||
test_reqs = [str(ir.req) for ir in test_reqs]
|
||||
def parse_requirements(filename):
|
||||
""" load requirements from a pip requirements file """
|
||||
with open(filename, 'r') as f:
|
||||
lineiter = list(line.strip() for line in f)
|
||||
return [line for line in lineiter if line and not line.startswith("#")]
|
||||
|
||||
|
||||
install_reqs = parse_requirements("requirements.txt")
|
||||
test_reqs = parse_requirements("test-requirements.txt")
|
||||
extra_reqs = parse_requirements("extra-requirements.txt")
|
||||
|
||||
|
||||
setup(
|
||||
@@ -38,9 +35,7 @@ setup(
|
||||
tests_require=test_reqs,
|
||||
setup_requires=['pytest-runner', ],
|
||||
extras_require={
|
||||
'evaluation': [
|
||||
'gsitk'
|
||||
]
|
||||
'evaluation': extra_reqs
|
||||
},
|
||||
include_package_data=True,
|
||||
entry_points={
|
||||
|
@@ -32,7 +32,7 @@ class APITest(TestCase):
|
||||
query = {}
|
||||
plug_params = {
|
||||
'hello': {
|
||||
'aliases': ['hello', 'hiya'],
|
||||
'aliases': ['hiya', 'hello'],
|
||||
'required': True
|
||||
}
|
||||
}
|
||||
@@ -48,6 +48,26 @@ class APITest(TestCase):
|
||||
assert 'hello' in p
|
||||
assert p['hello'] == 'dlrow'
|
||||
|
||||
def test_parameters2(self):
|
||||
in1 = {
|
||||
'meaningcloud-key': 5
|
||||
}
|
||||
in2 = {
|
||||
'apikey': 25
|
||||
}
|
||||
extra_params = {
|
||||
"apikey": {
|
||||
"aliases": [
|
||||
"apikey",
|
||||
"meaningcloud-key"
|
||||
],
|
||||
"required": True
|
||||
}
|
||||
}
|
||||
p1 = parse_params(in1, extra_params)
|
||||
p2 = parse_params(in2, extra_params)
|
||||
assert (p2['apikey'] / p1['apikey']) == 5
|
||||
|
||||
def test_default(self):
|
||||
spec = {
|
||||
'hello': {
|
||||
|
@@ -47,7 +47,7 @@ class ExtensionsTest(TestCase):
|
||||
|
||||
def test_add_delete(self):
|
||||
'''Should be able to add and delete new plugins. '''
|
||||
new = plugins.Plugin(name='new', description='new', version=0)
|
||||
new = plugins.Analysis(name='new', description='new', version=0)
|
||||
self.senpy.add_plugin(new)
|
||||
assert new in self.senpy.plugins()
|
||||
self.senpy.delete_plugin(new)
|
||||
@@ -182,8 +182,7 @@ class ExtensionsTest(TestCase):
|
||||
analyse(self.senpy, input='nothing', algorithm='MOCK')
|
||||
assert False
|
||||
except Exception as ex:
|
||||
assert 'generic exception on analysis' in ex['message']
|
||||
assert ex['status'] == 500
|
||||
assert 'generic exception on analysis' in str(ex)
|
||||
|
||||
def test_filtering(self):
|
||||
""" Filtering plugins """
|
||||
|
@@ -9,6 +9,7 @@ from senpy.models import (Emotion,
|
||||
EmotionAnalysis,
|
||||
EmotionSet,
|
||||
Entry,
|
||||
Entity,
|
||||
Error,
|
||||
Results,
|
||||
Sentiment,
|
||||
@@ -207,3 +208,14 @@ class ModelsTest(TestCase):
|
||||
recovered = from_string(string)
|
||||
assert isinstance(recovered, Results)
|
||||
assert isinstance(recovered.entries[0], Entry)
|
||||
|
||||
def test_serializable(self):
|
||||
r = Results()
|
||||
e = Entry()
|
||||
ent = Entity()
|
||||
e.entities.append(ent)
|
||||
r.entries.append(e)
|
||||
d = r.serializable()
|
||||
assert d
|
||||
assert d['entries']
|
||||
assert d['entries'][0]['entities']
|
||||
|
@@ -1,15 +1,18 @@
|
||||
#!/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import pickle
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
from unittest import TestCase
|
||||
from unittest import TestCase, skipIf
|
||||
from senpy.models import Results, Entry, EmotionSet, Emotion, Plugins
|
||||
from senpy import plugins
|
||||
from senpy.plugins.conversion.emotion.centroids import CentroidConversion
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class ShelfDummyPlugin(plugins.SentimentPlugin, plugins.ShelfMixin):
|
||||
'''Dummy plugin for tests.'''
|
||||
@@ -212,7 +215,7 @@ class PluginsTest(TestCase):
|
||||
def input(self, entry, **kwargs):
|
||||
return entry.text
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
return 'SIGN' in input
|
||||
|
||||
def output(self, output, entry, **kwargs):
|
||||
@@ -242,7 +245,7 @@ class PluginsTest(TestCase):
|
||||
|
||||
mappings = {'happy': 'marl:Positive', 'sad': 'marl:Negative'}
|
||||
|
||||
def predict(self, input, **kwargs):
|
||||
def predict_one(self, input, **kwargs):
|
||||
return 'happy' if ':)' in input else 'sad'
|
||||
|
||||
test_cases = [
|
||||
@@ -309,6 +312,42 @@ class PluginsTest(TestCase):
|
||||
res = c._backwards_conversion(e)
|
||||
assert res["onyx:hasEmotionCategory"] == "c2"
|
||||
|
||||
@skipIf(sys.version_info < (3, 0),
|
||||
reason="requires Python3")
|
||||
def test_evaluation(self):
|
||||
testdata = []
|
||||
for i in range(50):
|
||||
testdata.append(["good", 1])
|
||||
for i in range(50):
|
||||
testdata.append(["bad", 0])
|
||||
dataset = pd.DataFrame(testdata, columns=['text', 'polarity'])
|
||||
|
||||
class DummyPlugin(plugins.TextBox):
|
||||
description = 'Plugin to test evaluation'
|
||||
version = 0
|
||||
|
||||
def predict_one(self, input):
|
||||
return 0
|
||||
|
||||
class SmartPlugin(plugins.TextBox):
|
||||
description = 'Plugin to test evaluation'
|
||||
version = 0
|
||||
|
||||
def predict_one(self, input):
|
||||
if input == 'good':
|
||||
return 1
|
||||
return 0
|
||||
|
||||
dpipe = DummyPlugin()
|
||||
results = plugins.evaluate(datasets={'testdata': dataset}, plugins=[dpipe], flatten=True)
|
||||
dumb_metrics = results[0].metrics[0]
|
||||
assert abs(dumb_metrics['accuracy'] - 0.5) < 0.01
|
||||
|
||||
spipe = SmartPlugin()
|
||||
results = plugins.evaluate(datasets={'testdata': dataset}, plugins=[spipe], flatten=True)
|
||||
smart_metrics = results[0].metrics[0]
|
||||
assert abs(smart_metrics['accuracy'] - 1) < 0.01
|
||||
|
||||
|
||||
def make_mini_test(fpath):
|
||||
def mini_test(self):
|
||||
|
@@ -8,6 +8,8 @@ from fnmatch import fnmatch
|
||||
|
||||
from jsonschema import RefResolver, Draft4Validator, ValidationError
|
||||
|
||||
from senpy.models import read_schema
|
||||
|
||||
root_path = path.join(path.dirname(path.realpath(__file__)), '..')
|
||||
schema_folder = path.join(root_path, 'senpy', 'schemas')
|
||||
examples_path = path.join(root_path, 'docs', 'examples')
|
||||
@@ -15,7 +17,8 @@ bad_examples_path = path.join(root_path, 'docs', 'bad-examples')
|
||||
|
||||
|
||||
class JSONSchemaTests(unittest.TestCase):
|
||||
pass
|
||||
def test_definitions(self):
|
||||
read_schema('definitions.json')
|
||||
|
||||
|
||||
def do_create_(jsfile, success):
|
||||
|
32
tests/test_test.py
Normal file
32
tests/test_test.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from unittest import TestCase
|
||||
|
||||
import requests
|
||||
import json
|
||||
from senpy.test import patch_requests
|
||||
from senpy.models import Results
|
||||
|
||||
|
||||
class TestTest(TestCase):
|
||||
def test_patch_text(self):
|
||||
with patch_requests('hello'):
|
||||
r = requests.get('http://example.com')
|
||||
assert r.text == 'hello'
|
||||
assert r.content == 'hello'
|
||||
|
||||
def test_patch_json(self):
|
||||
r = Results()
|
||||
with patch_requests(r):
|
||||
res = requests.get('http://example.com')
|
||||
assert res.content == json.dumps(r.jsonld())
|
||||
js = res.json()
|
||||
assert js
|
||||
assert js['@type'] == r['@type']
|
||||
|
||||
def test_patch_dict(self):
|
||||
r = {'nothing': 'new'}
|
||||
with patch_requests(r):
|
||||
res = requests.get('http://example.com')
|
||||
assert res.content == json.dumps(r)
|
||||
js = res.json()
|
||||
assert js
|
||||
assert js['nothing'] == 'new'
|
Reference in New Issue
Block a user