mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-21 15:52:28 +00:00
Add evaluation tests
This commit is contained in:
parent
5e2ada1654
commit
c0aa7ddc3c
@ -18,7 +18,7 @@ class BasicBox(SentimentBox):
|
||||
'default': 'marl:Neutral'
|
||||
}
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
output = basic.get_polarity(input)
|
||||
return self.mappings.get(output, self.mappings['default'])
|
||||
|
||||
|
@ -18,7 +18,7 @@ class Basic(MappingMixin, SentimentBox):
|
||||
'default': 'marl:Neutral'
|
||||
}
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
return basic.get_polarity(input)
|
||||
|
||||
test_cases = [{
|
||||
|
@ -18,7 +18,7 @@ class PipelineSentiment(MappingMixin, SentimentBox):
|
||||
-1: 'marl:Negative'
|
||||
}
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
return pipeline.predict([input, ])[0]
|
||||
|
||||
test_cases = [
|
||||
|
@ -6,7 +6,7 @@ from future import standard_library
|
||||
standard_library.install_aliases()
|
||||
|
||||
from . import plugins, api
|
||||
from .plugins import Plugin
|
||||
from .plugins import Plugin, evaluate
|
||||
from .models import Error, AggregatedEvaluation
|
||||
from .blueprints import api_blueprint, demo_blueprint, ns_blueprint
|
||||
|
||||
@ -17,7 +17,6 @@ import copy
|
||||
import errno
|
||||
import logging
|
||||
|
||||
#Correct this import for managing the datasets
|
||||
from gsitk.datasets.datasets import DatasetManager
|
||||
|
||||
|
||||
@ -197,13 +196,13 @@ class Senpy(object):
|
||||
if dataset not in self.datasets:
|
||||
logger.debug(("The dataset '{}' is not valid\n"
|
||||
"Valid datasets: {}").format(dataset,
|
||||
self.datasets.keys()))
|
||||
self.datasets.keys()))
|
||||
raise Error(
|
||||
status=404,
|
||||
message="The dataset '{}' is not valid".format(dataset))
|
||||
datasets = self._dm.prepare_datasets(datasets_name)
|
||||
return datasets
|
||||
|
||||
|
||||
@property
|
||||
def datasets(self):
|
||||
self._dataset_list = {}
|
||||
@ -219,29 +218,17 @@ class Senpy(object):
|
||||
def evaluate(self, params):
|
||||
|
||||
logger.debug("evaluating request: {}".format(params))
|
||||
try:
|
||||
results = AggregatedEvaluation()
|
||||
results.parameters = params
|
||||
datasets = self._get_datasets(results)
|
||||
plugins = self._get_plugins(results)
|
||||
collector = list()
|
||||
for plugin in plugins:
|
||||
for eval in plugin.score(datasets):
|
||||
results.evaluations.append(eval)
|
||||
if 'with_parameters' not in results.parameters:
|
||||
del results.parameters
|
||||
logger.debug("Returning evaluation result: {}".format(results))
|
||||
except (Error,Exception) as ex:
|
||||
if not isinstance(ex, Error):
|
||||
msg = "Error during evaluation: {} \n\t{}".format(ex,
|
||||
traceback.format_exc())
|
||||
ex = Error(message=msg, status=500)
|
||||
logger.exception('Error returning evaluation result')
|
||||
raise ex
|
||||
#results.evaluations = collector
|
||||
results = AggregatedEvaluation()
|
||||
results.parameters = params
|
||||
datasets = self._get_datasets(results)
|
||||
plugins = self._get_plugins(results)
|
||||
for eval in evaluate(plugins, datasets):
|
||||
results.evaluations.append(eval)
|
||||
if 'with_parameters' not in results.parameters:
|
||||
del results.parameters
|
||||
logger.debug("Returning evaluation result: {}".format(results))
|
||||
return results
|
||||
|
||||
|
||||
def _conversion_candidates(self, fromModel, toModel):
|
||||
candidates = self.plugins(plugin_type='emotionConversionPlugin')
|
||||
for candidate in candidates:
|
||||
|
@ -25,6 +25,8 @@ from .. import api
|
||||
from gsitk.evaluation.evaluation import Evaluation as Eval
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -254,7 +256,7 @@ class Box(AnalysisPlugin):
|
||||
|
||||
.. code-block::
|
||||
|
||||
entry --> input() --> predict() --> output() --> entry'
|
||||
entry --> input() --> predict_one() --> output() --> entry'
|
||||
|
||||
|
||||
In other words: their ``input`` method convers a query (entry and a set of parameters) into
|
||||
@ -270,15 +272,33 @@ class Box(AnalysisPlugin):
|
||||
'''Transforms the results of the black box into an entry'''
|
||||
return output
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
raise NotImplementedError('You should define the behavior of this plugin')
|
||||
|
||||
def analyse_entries(self, entries, params):
|
||||
for entry in entries:
|
||||
input = self.input(entry=entry, params=params)
|
||||
results = self.predict(input=input)
|
||||
results = self.predict_one(input=input)
|
||||
yield self.output(output=results, entry=entry, params=params)
|
||||
|
||||
def fit(self, X=None, y=None):
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
return np.array([self.predict_one(x) for x in X])
|
||||
|
||||
def predict(self, X):
|
||||
return self.transform(X)
|
||||
|
||||
def fit_transform(self, X, y):
|
||||
self.fit(X, y)
|
||||
return self.transform(X)
|
||||
|
||||
def as_pipe(self):
|
||||
pipe = Pipeline([('plugin', self)])
|
||||
pipe.name = self.name
|
||||
return pipe
|
||||
|
||||
|
||||
class TextBox(Box):
|
||||
'''A black box plugin that takes only text as input'''
|
||||
@ -323,48 +343,6 @@ class EmotionBox(TextBox, EmotionPlugin):
|
||||
return entry
|
||||
|
||||
|
||||
class EvaluationBox():
|
||||
'''
|
||||
A box plugin where it is implemented the evaluation. It is necessary to have a pipeline.
|
||||
'''
|
||||
|
||||
def score(self, datasets):
|
||||
pipelines = [self._pipeline]
|
||||
|
||||
ev = Eval(tuples = None,
|
||||
datasets = datasets,
|
||||
pipelines = pipelines)
|
||||
ev.evaluate()
|
||||
results = ev.results
|
||||
evaluations = self._evaluations_toJSONLD(results)
|
||||
return evaluations
|
||||
|
||||
def _evaluations_toJSONLD(self, results):
|
||||
'''
|
||||
Map the evaluation results to a JSONLD scheme
|
||||
'''
|
||||
|
||||
evaluations = list()
|
||||
metric_names = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro', 'f1_weighted', 'f1_micro', 'f1_macro']
|
||||
|
||||
for index, row in results.iterrows():
|
||||
|
||||
evaluation = models.Evaluation()
|
||||
if row['CV'] == False:
|
||||
evaluation['@type'] = ['StaticCV', 'Evaluation']
|
||||
evaluation.evaluatesOn = row['Dataset']
|
||||
evaluation.evaluates = row['Model']
|
||||
i = 0
|
||||
for name in metric_names:
|
||||
metric = models.Metric()
|
||||
metric['@id'] = 'Metric' + str(i)
|
||||
metric['@type'] = name.capitalize()
|
||||
metric.value = row[name]
|
||||
evaluation.metrics.append(metric)
|
||||
i+=1
|
||||
evaluations.append(evaluation)
|
||||
return evaluations
|
||||
|
||||
class MappingMixin(object):
|
||||
|
||||
@property
|
||||
@ -605,3 +583,47 @@ def _from_loaded_module(module, info=None, **kwargs):
|
||||
yield cls(info=info, **kwargs)
|
||||
for instance in _instances_in_module(module):
|
||||
yield instance
|
||||
|
||||
|
||||
def evaluate(plugins, datasets, **kwargs):
|
||||
ev = Eval(tuples=None,
|
||||
datasets=datasets,
|
||||
pipelines=[plugin.as_pipe() for plugin in plugins])
|
||||
ev.evaluate()
|
||||
results = ev.results
|
||||
evaluations = evaluations_to_JSONLD(results, **kwargs)
|
||||
return evaluations
|
||||
|
||||
|
||||
def evaluations_to_JSONLD(results, flatten=False):
|
||||
'''
|
||||
Map the evaluation results to a JSONLD scheme
|
||||
'''
|
||||
|
||||
evaluations = list()
|
||||
metric_names = ['accuracy', 'precision_macro', 'recall_macro',
|
||||
'f1_macro', 'f1_weighted', 'f1_micro', 'f1_macro']
|
||||
|
||||
for index, row in results.iterrows():
|
||||
evaluation = models.Evaluation()
|
||||
if row.get('CV', True):
|
||||
evaluation['@type'] = ['StaticCV', 'Evaluation']
|
||||
evaluation.evaluatesOn = row['Dataset']
|
||||
evaluation.evaluates = row['Model']
|
||||
i = 0
|
||||
if flatten:
|
||||
metric = models.Metric()
|
||||
for name in metric_names:
|
||||
metric[name] = row[name]
|
||||
evaluation.metrics.append(metric)
|
||||
else:
|
||||
# We should probably discontinue this representation
|
||||
for name in metric_names:
|
||||
metric = models.Metric()
|
||||
metric['@id'] = 'Metric' + str(i)
|
||||
metric['@type'] = name.capitalize()
|
||||
metric.value = row[name]
|
||||
evaluation.metrics.append(metric)
|
||||
i += 1
|
||||
evaluations.append(evaluation)
|
||||
return evaluations
|
||||
|
@ -43,7 +43,7 @@
|
||||
"$ref": "response.json"
|
||||
},
|
||||
"AggregatedEvaluation": {
|
||||
"$ref": "aggregatedevaluation.json"
|
||||
"$ref": "aggregatedEvaluation.json"
|
||||
},
|
||||
"Evaluation": {
|
||||
"$ref": "evaluation.json"
|
||||
|
@ -10,6 +10,8 @@ from senpy.models import Results, Entry, EmotionSet, Emotion, Plugins
|
||||
from senpy import plugins
|
||||
from senpy.plugins.conversion.emotion.centroids import CentroidConversion
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class ShelfDummyPlugin(plugins.SentimentPlugin, plugins.ShelfMixin):
|
||||
'''Dummy plugin for tests.'''
|
||||
@ -212,7 +214,7 @@ class PluginsTest(TestCase):
|
||||
def input(self, entry, **kwargs):
|
||||
return entry.text
|
||||
|
||||
def predict(self, input):
|
||||
def predict_one(self, input):
|
||||
return 'SIGN' in input
|
||||
|
||||
def output(self, output, entry, **kwargs):
|
||||
@ -242,7 +244,7 @@ class PluginsTest(TestCase):
|
||||
|
||||
mappings = {'happy': 'marl:Positive', 'sad': 'marl:Negative'}
|
||||
|
||||
def predict(self, input, **kwargs):
|
||||
def predict_one(self, input, **kwargs):
|
||||
return 'happy' if ':)' in input else 'sad'
|
||||
|
||||
test_cases = [
|
||||
@ -309,6 +311,40 @@ class PluginsTest(TestCase):
|
||||
res = c._backwards_conversion(e)
|
||||
assert res["onyx:hasEmotionCategory"] == "c2"
|
||||
|
||||
def test_evaluation(self):
|
||||
testdata = []
|
||||
for i in range(50):
|
||||
testdata.append(["good", 1])
|
||||
for i in range(50):
|
||||
testdata.append(["bad", 0])
|
||||
dataset = pd.DataFrame(testdata, columns=['text', 'polarity'])
|
||||
|
||||
class DummyPlugin(plugins.TextBox):
|
||||
description = 'Plugin to test evaluation'
|
||||
version = 0
|
||||
|
||||
def predict_one(self, input):
|
||||
return 0
|
||||
|
||||
class SmartPlugin(plugins.TextBox):
|
||||
description = 'Plugin to test evaluation'
|
||||
version = 0
|
||||
|
||||
def predict_one(self, input):
|
||||
if input == 'good':
|
||||
return 1
|
||||
return 0
|
||||
|
||||
dpipe = DummyPlugin()
|
||||
results = plugins.evaluate(datasets={'testdata': dataset}, plugins=[dpipe], flatten=True)
|
||||
dumb_metrics = results[0].metrics[0]
|
||||
assert abs(dumb_metrics['accuracy'] - 0.5) < 0.01
|
||||
|
||||
spipe = SmartPlugin()
|
||||
results = plugins.evaluate(datasets={'testdata': dataset}, plugins=[spipe], flatten=True)
|
||||
smart_metrics = results[0].metrics[0]
|
||||
assert abs(smart_metrics['accuracy'] - 1) < 0.01
|
||||
|
||||
|
||||
def make_mini_test(fpath):
|
||||
def mini_test(self):
|
||||
|
Loading…
Reference in New Issue
Block a user