mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-22 00:02:28 +00:00
Add evaluation tests
This commit is contained in:
parent
5e2ada1654
commit
c0aa7ddc3c
@ -18,7 +18,7 @@ class BasicBox(SentimentBox):
|
|||||||
'default': 'marl:Neutral'
|
'default': 'marl:Neutral'
|
||||||
}
|
}
|
||||||
|
|
||||||
def predict(self, input):
|
def predict_one(self, input):
|
||||||
output = basic.get_polarity(input)
|
output = basic.get_polarity(input)
|
||||||
return self.mappings.get(output, self.mappings['default'])
|
return self.mappings.get(output, self.mappings['default'])
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ class Basic(MappingMixin, SentimentBox):
|
|||||||
'default': 'marl:Neutral'
|
'default': 'marl:Neutral'
|
||||||
}
|
}
|
||||||
|
|
||||||
def predict(self, input):
|
def predict_one(self, input):
|
||||||
return basic.get_polarity(input)
|
return basic.get_polarity(input)
|
||||||
|
|
||||||
test_cases = [{
|
test_cases = [{
|
||||||
|
@ -18,7 +18,7 @@ class PipelineSentiment(MappingMixin, SentimentBox):
|
|||||||
-1: 'marl:Negative'
|
-1: 'marl:Negative'
|
||||||
}
|
}
|
||||||
|
|
||||||
def predict(self, input):
|
def predict_one(self, input):
|
||||||
return pipeline.predict([input, ])[0]
|
return pipeline.predict([input, ])[0]
|
||||||
|
|
||||||
test_cases = [
|
test_cases = [
|
||||||
|
@ -6,7 +6,7 @@ from future import standard_library
|
|||||||
standard_library.install_aliases()
|
standard_library.install_aliases()
|
||||||
|
|
||||||
from . import plugins, api
|
from . import plugins, api
|
||||||
from .plugins import Plugin
|
from .plugins import Plugin, evaluate
|
||||||
from .models import Error, AggregatedEvaluation
|
from .models import Error, AggregatedEvaluation
|
||||||
from .blueprints import api_blueprint, demo_blueprint, ns_blueprint
|
from .blueprints import api_blueprint, demo_blueprint, ns_blueprint
|
||||||
|
|
||||||
@ -17,7 +17,6 @@ import copy
|
|||||||
import errno
|
import errno
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
#Correct this import for managing the datasets
|
|
||||||
from gsitk.datasets.datasets import DatasetManager
|
from gsitk.datasets.datasets import DatasetManager
|
||||||
|
|
||||||
|
|
||||||
@ -197,13 +196,13 @@ class Senpy(object):
|
|||||||
if dataset not in self.datasets:
|
if dataset not in self.datasets:
|
||||||
logger.debug(("The dataset '{}' is not valid\n"
|
logger.debug(("The dataset '{}' is not valid\n"
|
||||||
"Valid datasets: {}").format(dataset,
|
"Valid datasets: {}").format(dataset,
|
||||||
self.datasets.keys()))
|
self.datasets.keys()))
|
||||||
raise Error(
|
raise Error(
|
||||||
status=404,
|
status=404,
|
||||||
message="The dataset '{}' is not valid".format(dataset))
|
message="The dataset '{}' is not valid".format(dataset))
|
||||||
datasets = self._dm.prepare_datasets(datasets_name)
|
datasets = self._dm.prepare_datasets(datasets_name)
|
||||||
return datasets
|
return datasets
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def datasets(self):
|
def datasets(self):
|
||||||
self._dataset_list = {}
|
self._dataset_list = {}
|
||||||
@ -219,29 +218,17 @@ class Senpy(object):
|
|||||||
def evaluate(self, params):
|
def evaluate(self, params):
|
||||||
|
|
||||||
logger.debug("evaluating request: {}".format(params))
|
logger.debug("evaluating request: {}".format(params))
|
||||||
try:
|
results = AggregatedEvaluation()
|
||||||
results = AggregatedEvaluation()
|
results.parameters = params
|
||||||
results.parameters = params
|
datasets = self._get_datasets(results)
|
||||||
datasets = self._get_datasets(results)
|
plugins = self._get_plugins(results)
|
||||||
plugins = self._get_plugins(results)
|
for eval in evaluate(plugins, datasets):
|
||||||
collector = list()
|
results.evaluations.append(eval)
|
||||||
for plugin in plugins:
|
if 'with_parameters' not in results.parameters:
|
||||||
for eval in plugin.score(datasets):
|
del results.parameters
|
||||||
results.evaluations.append(eval)
|
logger.debug("Returning evaluation result: {}".format(results))
|
||||||
if 'with_parameters' not in results.parameters:
|
|
||||||
del results.parameters
|
|
||||||
logger.debug("Returning evaluation result: {}".format(results))
|
|
||||||
except (Error,Exception) as ex:
|
|
||||||
if not isinstance(ex, Error):
|
|
||||||
msg = "Error during evaluation: {} \n\t{}".format(ex,
|
|
||||||
traceback.format_exc())
|
|
||||||
ex = Error(message=msg, status=500)
|
|
||||||
logger.exception('Error returning evaluation result')
|
|
||||||
raise ex
|
|
||||||
#results.evaluations = collector
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
def _conversion_candidates(self, fromModel, toModel):
|
def _conversion_candidates(self, fromModel, toModel):
|
||||||
candidates = self.plugins(plugin_type='emotionConversionPlugin')
|
candidates = self.plugins(plugin_type='emotionConversionPlugin')
|
||||||
for candidate in candidates:
|
for candidate in candidates:
|
||||||
|
@ -25,6 +25,8 @@ from .. import api
|
|||||||
from gsitk.evaluation.evaluation import Evaluation as Eval
|
from gsitk.evaluation.evaluation import Evaluation as Eval
|
||||||
from sklearn.pipeline import Pipeline
|
from sklearn.pipeline import Pipeline
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -254,7 +256,7 @@ class Box(AnalysisPlugin):
|
|||||||
|
|
||||||
.. code-block::
|
.. code-block::
|
||||||
|
|
||||||
entry --> input() --> predict() --> output() --> entry'
|
entry --> input() --> predict_one() --> output() --> entry'
|
||||||
|
|
||||||
|
|
||||||
In other words: their ``input`` method convers a query (entry and a set of parameters) into
|
In other words: their ``input`` method convers a query (entry and a set of parameters) into
|
||||||
@ -270,15 +272,33 @@ class Box(AnalysisPlugin):
|
|||||||
'''Transforms the results of the black box into an entry'''
|
'''Transforms the results of the black box into an entry'''
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def predict(self, input):
|
def predict_one(self, input):
|
||||||
raise NotImplementedError('You should define the behavior of this plugin')
|
raise NotImplementedError('You should define the behavior of this plugin')
|
||||||
|
|
||||||
def analyse_entries(self, entries, params):
|
def analyse_entries(self, entries, params):
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
input = self.input(entry=entry, params=params)
|
input = self.input(entry=entry, params=params)
|
||||||
results = self.predict(input=input)
|
results = self.predict_one(input=input)
|
||||||
yield self.output(output=results, entry=entry, params=params)
|
yield self.output(output=results, entry=entry, params=params)
|
||||||
|
|
||||||
|
def fit(self, X=None, y=None):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def transform(self, X):
|
||||||
|
return np.array([self.predict_one(x) for x in X])
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
return self.transform(X)
|
||||||
|
|
||||||
|
def fit_transform(self, X, y):
|
||||||
|
self.fit(X, y)
|
||||||
|
return self.transform(X)
|
||||||
|
|
||||||
|
def as_pipe(self):
|
||||||
|
pipe = Pipeline([('plugin', self)])
|
||||||
|
pipe.name = self.name
|
||||||
|
return pipe
|
||||||
|
|
||||||
|
|
||||||
class TextBox(Box):
|
class TextBox(Box):
|
||||||
'''A black box plugin that takes only text as input'''
|
'''A black box plugin that takes only text as input'''
|
||||||
@ -323,48 +343,6 @@ class EmotionBox(TextBox, EmotionPlugin):
|
|||||||
return entry
|
return entry
|
||||||
|
|
||||||
|
|
||||||
class EvaluationBox():
|
|
||||||
'''
|
|
||||||
A box plugin where it is implemented the evaluation. It is necessary to have a pipeline.
|
|
||||||
'''
|
|
||||||
|
|
||||||
def score(self, datasets):
|
|
||||||
pipelines = [self._pipeline]
|
|
||||||
|
|
||||||
ev = Eval(tuples = None,
|
|
||||||
datasets = datasets,
|
|
||||||
pipelines = pipelines)
|
|
||||||
ev.evaluate()
|
|
||||||
results = ev.results
|
|
||||||
evaluations = self._evaluations_toJSONLD(results)
|
|
||||||
return evaluations
|
|
||||||
|
|
||||||
def _evaluations_toJSONLD(self, results):
|
|
||||||
'''
|
|
||||||
Map the evaluation results to a JSONLD scheme
|
|
||||||
'''
|
|
||||||
|
|
||||||
evaluations = list()
|
|
||||||
metric_names = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro', 'f1_weighted', 'f1_micro', 'f1_macro']
|
|
||||||
|
|
||||||
for index, row in results.iterrows():
|
|
||||||
|
|
||||||
evaluation = models.Evaluation()
|
|
||||||
if row['CV'] == False:
|
|
||||||
evaluation['@type'] = ['StaticCV', 'Evaluation']
|
|
||||||
evaluation.evaluatesOn = row['Dataset']
|
|
||||||
evaluation.evaluates = row['Model']
|
|
||||||
i = 0
|
|
||||||
for name in metric_names:
|
|
||||||
metric = models.Metric()
|
|
||||||
metric['@id'] = 'Metric' + str(i)
|
|
||||||
metric['@type'] = name.capitalize()
|
|
||||||
metric.value = row[name]
|
|
||||||
evaluation.metrics.append(metric)
|
|
||||||
i+=1
|
|
||||||
evaluations.append(evaluation)
|
|
||||||
return evaluations
|
|
||||||
|
|
||||||
class MappingMixin(object):
|
class MappingMixin(object):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -605,3 +583,47 @@ def _from_loaded_module(module, info=None, **kwargs):
|
|||||||
yield cls(info=info, **kwargs)
|
yield cls(info=info, **kwargs)
|
||||||
for instance in _instances_in_module(module):
|
for instance in _instances_in_module(module):
|
||||||
yield instance
|
yield instance
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate(plugins, datasets, **kwargs):
|
||||||
|
ev = Eval(tuples=None,
|
||||||
|
datasets=datasets,
|
||||||
|
pipelines=[plugin.as_pipe() for plugin in plugins])
|
||||||
|
ev.evaluate()
|
||||||
|
results = ev.results
|
||||||
|
evaluations = evaluations_to_JSONLD(results, **kwargs)
|
||||||
|
return evaluations
|
||||||
|
|
||||||
|
|
||||||
|
def evaluations_to_JSONLD(results, flatten=False):
|
||||||
|
'''
|
||||||
|
Map the evaluation results to a JSONLD scheme
|
||||||
|
'''
|
||||||
|
|
||||||
|
evaluations = list()
|
||||||
|
metric_names = ['accuracy', 'precision_macro', 'recall_macro',
|
||||||
|
'f1_macro', 'f1_weighted', 'f1_micro', 'f1_macro']
|
||||||
|
|
||||||
|
for index, row in results.iterrows():
|
||||||
|
evaluation = models.Evaluation()
|
||||||
|
if row.get('CV', True):
|
||||||
|
evaluation['@type'] = ['StaticCV', 'Evaluation']
|
||||||
|
evaluation.evaluatesOn = row['Dataset']
|
||||||
|
evaluation.evaluates = row['Model']
|
||||||
|
i = 0
|
||||||
|
if flatten:
|
||||||
|
metric = models.Metric()
|
||||||
|
for name in metric_names:
|
||||||
|
metric[name] = row[name]
|
||||||
|
evaluation.metrics.append(metric)
|
||||||
|
else:
|
||||||
|
# We should probably discontinue this representation
|
||||||
|
for name in metric_names:
|
||||||
|
metric = models.Metric()
|
||||||
|
metric['@id'] = 'Metric' + str(i)
|
||||||
|
metric['@type'] = name.capitalize()
|
||||||
|
metric.value = row[name]
|
||||||
|
evaluation.metrics.append(metric)
|
||||||
|
i += 1
|
||||||
|
evaluations.append(evaluation)
|
||||||
|
return evaluations
|
||||||
|
@ -43,7 +43,7 @@
|
|||||||
"$ref": "response.json"
|
"$ref": "response.json"
|
||||||
},
|
},
|
||||||
"AggregatedEvaluation": {
|
"AggregatedEvaluation": {
|
||||||
"$ref": "aggregatedevaluation.json"
|
"$ref": "aggregatedEvaluation.json"
|
||||||
},
|
},
|
||||||
"Evaluation": {
|
"Evaluation": {
|
||||||
"$ref": "evaluation.json"
|
"$ref": "evaluation.json"
|
||||||
|
@ -10,6 +10,8 @@ from senpy.models import Results, Entry, EmotionSet, Emotion, Plugins
|
|||||||
from senpy import plugins
|
from senpy import plugins
|
||||||
from senpy.plugins.conversion.emotion.centroids import CentroidConversion
|
from senpy.plugins.conversion.emotion.centroids import CentroidConversion
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
class ShelfDummyPlugin(plugins.SentimentPlugin, plugins.ShelfMixin):
|
class ShelfDummyPlugin(plugins.SentimentPlugin, plugins.ShelfMixin):
|
||||||
'''Dummy plugin for tests.'''
|
'''Dummy plugin for tests.'''
|
||||||
@ -212,7 +214,7 @@ class PluginsTest(TestCase):
|
|||||||
def input(self, entry, **kwargs):
|
def input(self, entry, **kwargs):
|
||||||
return entry.text
|
return entry.text
|
||||||
|
|
||||||
def predict(self, input):
|
def predict_one(self, input):
|
||||||
return 'SIGN' in input
|
return 'SIGN' in input
|
||||||
|
|
||||||
def output(self, output, entry, **kwargs):
|
def output(self, output, entry, **kwargs):
|
||||||
@ -242,7 +244,7 @@ class PluginsTest(TestCase):
|
|||||||
|
|
||||||
mappings = {'happy': 'marl:Positive', 'sad': 'marl:Negative'}
|
mappings = {'happy': 'marl:Positive', 'sad': 'marl:Negative'}
|
||||||
|
|
||||||
def predict(self, input, **kwargs):
|
def predict_one(self, input, **kwargs):
|
||||||
return 'happy' if ':)' in input else 'sad'
|
return 'happy' if ':)' in input else 'sad'
|
||||||
|
|
||||||
test_cases = [
|
test_cases = [
|
||||||
@ -309,6 +311,40 @@ class PluginsTest(TestCase):
|
|||||||
res = c._backwards_conversion(e)
|
res = c._backwards_conversion(e)
|
||||||
assert res["onyx:hasEmotionCategory"] == "c2"
|
assert res["onyx:hasEmotionCategory"] == "c2"
|
||||||
|
|
||||||
|
def test_evaluation(self):
|
||||||
|
testdata = []
|
||||||
|
for i in range(50):
|
||||||
|
testdata.append(["good", 1])
|
||||||
|
for i in range(50):
|
||||||
|
testdata.append(["bad", 0])
|
||||||
|
dataset = pd.DataFrame(testdata, columns=['text', 'polarity'])
|
||||||
|
|
||||||
|
class DummyPlugin(plugins.TextBox):
|
||||||
|
description = 'Plugin to test evaluation'
|
||||||
|
version = 0
|
||||||
|
|
||||||
|
def predict_one(self, input):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
class SmartPlugin(plugins.TextBox):
|
||||||
|
description = 'Plugin to test evaluation'
|
||||||
|
version = 0
|
||||||
|
|
||||||
|
def predict_one(self, input):
|
||||||
|
if input == 'good':
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
dpipe = DummyPlugin()
|
||||||
|
results = plugins.evaluate(datasets={'testdata': dataset}, plugins=[dpipe], flatten=True)
|
||||||
|
dumb_metrics = results[0].metrics[0]
|
||||||
|
assert abs(dumb_metrics['accuracy'] - 0.5) < 0.01
|
||||||
|
|
||||||
|
spipe = SmartPlugin()
|
||||||
|
results = plugins.evaluate(datasets={'testdata': dataset}, plugins=[spipe], flatten=True)
|
||||||
|
smart_metrics = results[0].metrics[0]
|
||||||
|
assert abs(smart_metrics['accuracy'] - 1) < 0.01
|
||||||
|
|
||||||
|
|
||||||
def make_mini_test(fpath):
|
def make_mini_test(fpath):
|
||||||
def mini_test(self):
|
def mini_test(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user