|
|
|
@ -25,6 +25,8 @@ from .. import api
|
|
|
|
|
from gsitk.evaluation.evaluation import Evaluation as Eval
|
|
|
|
|
from sklearn.pipeline import Pipeline
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -254,7 +256,7 @@ class Box(AnalysisPlugin):
|
|
|
|
|
|
|
|
|
|
.. code-block::
|
|
|
|
|
|
|
|
|
|
entry --> input() --> predict() --> output() --> entry'
|
|
|
|
|
entry --> input() --> predict_one() --> output() --> entry'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
In other words: their ``input`` method convers a query (entry and a set of parameters) into
|
|
|
|
@ -270,15 +272,33 @@ class Box(AnalysisPlugin):
|
|
|
|
|
'''Transforms the results of the black box into an entry'''
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
def predict(self, input):
|
|
|
|
|
def predict_one(self, input):
|
|
|
|
|
raise NotImplementedError('You should define the behavior of this plugin')
|
|
|
|
|
|
|
|
|
|
def analyse_entries(self, entries, params):
|
|
|
|
|
for entry in entries:
|
|
|
|
|
input = self.input(entry=entry, params=params)
|
|
|
|
|
results = self.predict(input=input)
|
|
|
|
|
results = self.predict_one(input=input)
|
|
|
|
|
yield self.output(output=results, entry=entry, params=params)
|
|
|
|
|
|
|
|
|
|
def fit(self, X=None, y=None):
|
|
|
|
|
return self
|
|
|
|
|
|
|
|
|
|
def transform(self, X):
|
|
|
|
|
return np.array([self.predict_one(x) for x in X])
|
|
|
|
|
|
|
|
|
|
def predict(self, X):
|
|
|
|
|
return self.transform(X)
|
|
|
|
|
|
|
|
|
|
def fit_transform(self, X, y):
|
|
|
|
|
self.fit(X, y)
|
|
|
|
|
return self.transform(X)
|
|
|
|
|
|
|
|
|
|
def as_pipe(self):
|
|
|
|
|
pipe = Pipeline([('plugin', self)])
|
|
|
|
|
pipe.name = self.name
|
|
|
|
|
return pipe
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TextBox(Box):
|
|
|
|
|
'''A black box plugin that takes only text as input'''
|
|
|
|
@ -323,48 +343,6 @@ class EmotionBox(TextBox, EmotionPlugin):
|
|
|
|
|
return entry
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EvaluationBox():
|
|
|
|
|
'''
|
|
|
|
|
A box plugin where it is implemented the evaluation. It is necessary to have a pipeline.
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
def score(self, datasets):
|
|
|
|
|
pipelines = [self._pipeline]
|
|
|
|
|
|
|
|
|
|
ev = Eval(tuples = None,
|
|
|
|
|
datasets = datasets,
|
|
|
|
|
pipelines = pipelines)
|
|
|
|
|
ev.evaluate()
|
|
|
|
|
results = ev.results
|
|
|
|
|
evaluations = self._evaluations_toJSONLD(results)
|
|
|
|
|
return evaluations
|
|
|
|
|
|
|
|
|
|
def _evaluations_toJSONLD(self, results):
|
|
|
|
|
'''
|
|
|
|
|
Map the evaluation results to a JSONLD scheme
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
evaluations = list()
|
|
|
|
|
metric_names = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro', 'f1_weighted', 'f1_micro', 'f1_macro']
|
|
|
|
|
|
|
|
|
|
for index, row in results.iterrows():
|
|
|
|
|
|
|
|
|
|
evaluation = models.Evaluation()
|
|
|
|
|
if row['CV'] == False:
|
|
|
|
|
evaluation['@type'] = ['StaticCV', 'Evaluation']
|
|
|
|
|
evaluation.evaluatesOn = row['Dataset']
|
|
|
|
|
evaluation.evaluates = row['Model']
|
|
|
|
|
i = 0
|
|
|
|
|
for name in metric_names:
|
|
|
|
|
metric = models.Metric()
|
|
|
|
|
metric['@id'] = 'Metric' + str(i)
|
|
|
|
|
metric['@type'] = name.capitalize()
|
|
|
|
|
metric.value = row[name]
|
|
|
|
|
evaluation.metrics.append(metric)
|
|
|
|
|
i+=1
|
|
|
|
|
evaluations.append(evaluation)
|
|
|
|
|
return evaluations
|
|
|
|
|
|
|
|
|
|
class MappingMixin(object):
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
@ -605,3 +583,47 @@ def _from_loaded_module(module, info=None, **kwargs):
|
|
|
|
|
yield cls(info=info, **kwargs)
|
|
|
|
|
for instance in _instances_in_module(module):
|
|
|
|
|
yield instance
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def evaluate(plugins, datasets, **kwargs):
|
|
|
|
|
ev = Eval(tuples=None,
|
|
|
|
|
datasets=datasets,
|
|
|
|
|
pipelines=[plugin.as_pipe() for plugin in plugins])
|
|
|
|
|
ev.evaluate()
|
|
|
|
|
results = ev.results
|
|
|
|
|
evaluations = evaluations_to_JSONLD(results, **kwargs)
|
|
|
|
|
return evaluations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def evaluations_to_JSONLD(results, flatten=False):
|
|
|
|
|
'''
|
|
|
|
|
Map the evaluation results to a JSONLD scheme
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|
evaluations = list()
|
|
|
|
|
metric_names = ['accuracy', 'precision_macro', 'recall_macro',
|
|
|
|
|
'f1_macro', 'f1_weighted', 'f1_micro', 'f1_macro']
|
|
|
|
|
|
|
|
|
|
for index, row in results.iterrows():
|
|
|
|
|
evaluation = models.Evaluation()
|
|
|
|
|
if row.get('CV', True):
|
|
|
|
|
evaluation['@type'] = ['StaticCV', 'Evaluation']
|
|
|
|
|
evaluation.evaluatesOn = row['Dataset']
|
|
|
|
|
evaluation.evaluates = row['Model']
|
|
|
|
|
i = 0
|
|
|
|
|
if flatten:
|
|
|
|
|
metric = models.Metric()
|
|
|
|
|
for name in metric_names:
|
|
|
|
|
metric[name] = row[name]
|
|
|
|
|
evaluation.metrics.append(metric)
|
|
|
|
|
else:
|
|
|
|
|
# We should probably discontinue this representation
|
|
|
|
|
for name in metric_names:
|
|
|
|
|
metric = models.Metric()
|
|
|
|
|
metric['@id'] = 'Metric' + str(i)
|
|
|
|
|
metric['@type'] = name.capitalize()
|
|
|
|
|
metric.value = row[name]
|
|
|
|
|
evaluation.metrics.append(metric)
|
|
|
|
|
i += 1
|
|
|
|
|
evaluations.append(evaluation)
|
|
|
|
|
return evaluations
|
|
|
|
|