senpy/senpy/plugins/__init__.py

#!/usr/local/bin/python
# -*- coding: utf-8 -*-
#
#    Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
#    Licensed under the Apache License, Version 2.0 (the "License");
#    you may not use this file except in compliance with the License.
#    You may obtain a copy of the License at
#
#        http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS,
#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#    See the License for the specific language governing permissions and
#    limitations under the License.
#

from future import standard_library
standard_library.install_aliases()

from future.utils import with_metaclass
from functools import partial

import os.path
import os
import re
import pickle
import logging
import pprint

import inspect
import sys
import subprocess
import importlib
import yaml
import threading
import multiprocessing
import pkg_resources
from nltk import download
from textwrap import dedent
from sklearn.base import TransformerMixin, BaseEstimator
from itertools import product

from .. import models, utils
from .. import api
from .. import gsitk_compat
from .. import testing

logger = logging.getLogger(__name__)


class PluginMeta(models.BaseMeta):
    _classes = {}

    def __new__(mcs, name, bases, attrs, **kwargs):
        plugin_type = set()
        for base in bases:
            if hasattr(base, '_plugin_type'):
                plugin_type |= base._plugin_type
        plugin_type.add(name)
        alias = attrs.get('name', name).lower()
        attrs['_plugin_type'] = plugin_type
        logger.debug('Adding new plugin class: %s %s %s %s', name, bases, attrs, plugin_type)
        attrs['name'] = alias
        if 'description' not in attrs:
            doc = attrs.get('__doc__', None)
            if doc:
                attrs['description'] = dedent(doc)
            else:
                logger.warning(
                    ('Plugin {} does not have a description. '
                     'Please, add a short summary to help other developers'
                     ).format(name))
        cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs)

        if alias in mcs._classes:
            if os.environ.get('SENPY_TESTING', ""):
                raise Exception(
                    ('The type of plugin {} already exists. '
                     'Please, choose a different name').format(name))
            else:
                logger.warning('Overloading plugin class: {}'.format(alias))
        mcs._classes[alias] = cls
        return cls

    @classmethod
    def for_type(cls, ptype):
        return cls._classes[ptype]


class Plugin(with_metaclass(PluginMeta, models.Plugin)):
    '''
    Base class for all plugins in senpy.
    A plugin must provide at least these attributes:

        - version
        - description (or docstring)
        - author

    Additionally, they may provide a URL (url) of a repository or website.

    '''

    _terse_keys = ['name', '@id', '@type', 'author', 'description',
                   'extra_params', 'is_activated', 'url', 'version']

    def __init__(self, info=None, data_folder=None, **kwargs):
        """
        Provides a canonical name for plugins and serves as base for other
        kinds of plugins.
        """
        logger.debug("Initialising %s", info)
        super(Plugin, self).__init__(**kwargs)
        if info:
            self.update(info)
        self.validate()
        self.id = 'endpoint:plugins/{}_{}'.format(self['name'],
                                                  self['version'])
        self.is_activated = False
        self._lock = threading.Lock()
        self._directory = os.path.abspath(
            os.path.dirname(inspect.getfile(self.__class__)))

        data_folder = data_folder or os.getcwd()
        subdir = os.path.join(data_folder, self.name)

        self._data_paths = [
            data_folder,
            subdir,
            self._directory,
            os.path.join(self._directory, 'data'),
        ]

        if os.path.exists(subdir):
            data_folder = subdir
        self.data_folder = data_folder

        self._log = logging.getLogger('{}.{}'.format(__name__, self.name))

    @property
    def log(self):
        return self._log

    def validate(self):
        missing = []
        for x in ['name', 'description', 'version']:
            if x not in self:
                missing.append(x)
        if missing:
            raise models.Error(
                'Missing configuration parameters: {}'.format(missing))

    def get_folder(self):
        return os.path.dirname(inspect.getfile(self.__class__))

    def _activate(self):
        self.activate()
        self.is_activated = True

    def _deactivate(self):
        self.is_activated = False
        self.deactivate()

    def activate(self):
        pass

    def deactivate(self):
        pass

    def process(self, request, activity, **kwargs):
        """
        An implemented plugin should override this method.
        Here, we assume that a process_entries method exists.
        """
        newentries = list(
            self.process_entries(request.entries, activity))
        request.entries = newentries
        return request

    def process_entries(self, entries, activity):
        for entry in entries:
            self.log.debug('Processing entry with plugin %s: %s', self, entry)
            results = self.process_entry(entry, activity)
            if inspect.isgenerator(results):
                for result in results:
                    yield result
            else:
                yield results

    def process_entry(self, entry, activity):
        """
        This base method is here to adapt plugins which only
        implement the *process* function.
        Note that this method may yield an annotated entry or a list of
        entries (e.g. in a tokenizer)
        """
        raise NotImplementedError(
            'You need to implement process, process_entries or process_entry in your plugin'
        )

    def test(self, test_cases=None):
        if not test_cases:
            if not hasattr(self, 'test_cases'):
                raise AttributeError(
                    ('Plugin {} [{}] does not have any defined '
                     'test cases').format(self.id,
                                          inspect.getfile(self.__class__)))
            test_cases = self.test_cases
        for case in test_cases:
            try:
                fmt = 'case: {}'.format(case.get('name', case))
                if 'name' in case:
                    self.log.info('Test case: {}'.format(case['name']))
                self.log.debug('Test case:\n\t{}'.format(
                    pprint.pformat(fmt)))
                self.test_case(case)
            except Exception as ex:
                self.log.warning('Test case failed:\n{}'.format(
                    pprint.pformat(case)))
                raise

    def test_case(self, case, mock=testing.MOCK_REQUESTS):
        if 'entry' not in case and 'input' in case:
            entry = models.Entry(_auto_id=False)
            entry.nif__isString = case['input']
            case['entry'] = entry
        entry = models.Entry(case['entry'])
        given_parameters = case.get('params', case.get('parameters', {}))
        expected = case.get('expected', None)
        should_fail = case.get('should_fail', False)
        responses = case.get('responses', [])

        try:
            request = models.Response()
            parameters = api.parse_params(given_parameters,
                                          self.extra_params)
            request.entries = [
                entry,
            ]

            activity = self.activity(parameters)

            method = partial(self.process, request, activity)

            if mock:
                res = method()
            else:
                with testing.patch_all_requests(responses):
                    res = method()

            if not isinstance(expected, list):
                expected = [expected]
            utils.check_template(res.entries, expected)
            res.validate()
        except models.Error:
            if should_fail:
                return
            raise
        assert not should_fail

    def find_file(self, fname):
        for p in self._data_paths:
            alternative = os.path.join(p, fname)
            if os.path.exists(alternative):
                return alternative
        raise IOError('File does not exist: {}'.format(fname))

    def path(self, fpath):
        if not os.path.isabs(fpath):
            fpath = os.path.join(self.data_folder, fpath)
        return fpath

    def open(self, fpath, mode='r'):
        if 'w' in mode:
            # When writing, only use absolute paths or data_folder
            fpath = self.path(fpath)
        else:
            fpath = self.find_file(fpath)

        return open(fpath, mode=mode)

    def serve(self, debug=True, **kwargs):
        utils.easy(plugin_list=[self, ], plugin_folder=None, debug=debug, **kwargs)


# For backwards compatibility
SenpyPlugin = Plugin


class Analyser(Plugin):
    '''
    A subclass of Plugin that analyses text and provides an annotation.
    '''

    # Deprecated
    def analyse(self, request, activity):
        return super(Analyser, self).process(request, activity)

    # Deprecated
    def analyse_entries(self, entries, activity):
        for i in super(Analyser, self).process_entries(entries, activity):
            yield i

    def process(self, request, activity, **kwargs):
        return self.analyse(request, activity)

    def process_entries(self, entries, activity):
        for i in self.analyse_entries(entries, activity):
            yield i

    def process_entry(self, entry, activity, **kwargs):
        if hasattr(self, 'analyse_entry'):
            for i in self.analyse_entry(entry, activity):
                yield i
        else:
            super(Analyser, self).process_entry(entry, activity, **kwargs)


AnalysisPlugin = Analyser


class Transformation(AnalysisPlugin):
    '''Empty'''
    pass


class Conversion(Plugin):
    '''
    A subclass of Plugins that convert between different annotation models.
    e.g. a conversion of emotion models, or normalization of sentiment values.
    '''

    def process(self, response, parameters, plugins=None, **kwargs):
        plugins = plugins or []
        newentries = []
        for entry in response.entries:
            newentries.append(
                self.convert_entry(entry, parameters, plugins))
        response.entries = newentries
        return response

    def convert_entry(self, entry, parameters, conversions_applied):
        raise NotImplementedError(
            'You should implement a way to convert each entry, or a custom process method'
        )


ConversionPlugin = Conversion


class Evaluable(Plugin):
    '''
    Common class for plugins that can be evaluated with GSITK.

    They should implement the methods below.
    '''

    def as_pipe(self):
        raise Exception('Implement the as_pipe function')

    def evaluate_func(self, X, activity=None):
        raise Exception('Implement the evaluate_func function')

    def evaluate(self, *args, **kwargs):
        return evaluate([self], *args, **kwargs)


class SentimentPlugin(Analyser, Evaluable, models.SentimentPlugin):
    '''
    Sentiment plugins provide sentiment annotation (using Marl)
    '''
    minPolarityValue = 0
    maxPolarityValue = 1

    _terse_keys = Analyser._terse_keys + ['minPolarityValue', 'maxPolarityValue']

    def test_case(self, case):
        if 'polarity' in case:
            expected = case.get('expected', {})
            s = models.Sentiment(_auto_id=False)
            s.marl__hasPolarity = case['polarity']
            if 'marl:hasOpinion' not in expected:
                expected['marl:hasOpinion'] = []
            expected['marl:hasOpinion'].append(s)
            case['expected'] = expected
        super(SentimentPlugin, self).test_case(case)

    def normalize(self, value, minValue, maxValue):
        nv = minValue + (value - self.minPolarityValue) * (
            self.maxPolarityValue - self.minPolarityValue) / (maxValue - minValue)
        return nv

    def as_pipe(self):
        pipe = gsitk_compat.Pipeline([('senpy-plugin', ScikitWrapper(self))])
        pipe.name = self.id
        return pipe

    def evaluate_func(self, X, activity=None):
        if activity is None:
            parameters = api.parse_params({},
                                          self.extra_params)
            activity = self.activity(parameters)
        entries = []
        for feat in X:
            if isinstance(feat, list):
                feat = ' '.join(feat)
            entries.append(models.Entry(nif__isString=feat))
        labels = []
        for e in self.process_entries(entries, activity):
            sent = e.sentiments[0].polarity
            label = -1
            if sent == 'marl:Positive':
                label = 1
            elif sent == 'marl:Negative':
                label = -1
            labels.append(label)
        return labels


class EmotionPlugin(Analyser, models.EmotionPlugin):
    '''
    Emotion plugins provide emotion annotation (using Onyx)
    '''
    minEmotionValue = 0
    maxEmotionValue = 1

    _terse_keys = Analyser._terse_keys + ['minEmotionValue', 'maxEmotionValue']


class EmotionConversion(Conversion):
    '''
    A subclass of Conversion that converts emotion annotations using different models
    '''

    def can_convert(self, fromModel, toModel):
        '''
        Whether this plugin can convert from fromModel to toModel.
        If fromModel is None, it is interpreted as "any Model"
        '''
        for pair in self.onyx__doesConversion:
            if (pair['onyx:conversionTo'] == toModel) and \
               ((fromModel is None) or (pair['onyx:conversionFrom'] == fromModel)):
                return True
        return False


EmotionConversionPlugin = EmotionConversion


class PostProcessing(Plugin):
    '''
    A plugin that converts the output of other plugins (post-processing).
    '''
    def check(self, request, plugins):
        '''Should this plugin be run for this request?'''
        return False


class Box(Analyser):
    '''
    Black box plugins delegate analysis to a function.
    The flow is like this:

    .. code-block::

                   entries --> to_features() --> predict_many() --> to_entry() --> entries'


    In other words: their ``to_features`` method converts a query (entry and a set of parameters)
    into the input to the `predict_one` method, which only uses an array of features.
    The ``to_entry`` method converts the results given by the box into an entry that senpy can
    handle.
    '''

    def to_features(self, entry, activity=None):
        '''Transforms a query (entry+param) into an input for the black box'''
        return entry

    def to_entry(self, features, entry=None, activity=None):
        '''Transforms the results of the black box into an entry'''
        return entry

    def predict_one(self, features, activity=None):
        raise NotImplementedError(
            'You should define the behavior of this plugin')

    def predict_many(self, features, activity=None):
        results = []
        for feat in features:
            results.append(self.predict_one(features=feat, activity=activity))
        return results

    def process_entry(self, entry, activity):
        for i in self.process_entries([entry], activity):
            yield i

    def process_entries(self, entries, activity):
        features = []
        for entry in entries:
            features.append(self.to_features(entry=entry, activity=activity))
        results = self.predict_many(features=features, activity=activity)

        for (result, entry) in zip(results, entries):
            yield self.to_entry(features=result, entry=entry, activity=activity)


class TextBox(Box):
    '''A black box plugin that takes only text as input'''

    def to_features(self, entry, activity):
        return [entry['nif:isString']]


class SentimentBox(TextBox, SentimentPlugin):
    '''
    A box plugin where the output is only a polarity label or a tuple (polarity, polarityValue)
    '''

    classes = ['marl:Positive', 'marl:Neutral', 'marl:Negative']
    binary = True

    def to_entry(self, features, entry, activity, **kwargs):

        if len(features) != len(self.classes):
            raise models.Error('The number of features ({}) does not match the classes '
                               '(plugin.classes ({})'.format(len(features), len(self.classes)))

        minValue = activity.param('marl:minPolarityValue', 0)
        maxValue = activity.param('marl:minPolarityValue', 1)
        activity['marl:minPolarityValue'] = minValue
        activity['marl:maxPolarityValue'] = maxValue

        for k, v in zip(self.classes, features):
            s = models.Sentiment()
            if self.binary:
                if not v:  # Carry on if the value is 0
                    continue
                s['marl:hasPolarity'] = k
            else:
                if v is not None:
                    s['marl:hasPolarity'] = k
                    nv = self.normalize(v, minValue, maxValue)
                    s['marl:polarityValue'] = nv
            s.prov(activity)

            entry.sentiments.append(s)

        return entry


class EmotionBox(TextBox, EmotionPlugin):
    '''
    A box plugin where the output is only an a tuple of emotion labels
    '''

    EMOTIONS = []
    with_intensity = True

    def to_entry(self, features, entry, activity, **kwargs):
        s = models.EmotionSet()

        if len(features) != len(self.EMOTIONS):
            raise Exception(('The number of classes in the plugin and the number of features '
                             'do not match'))

        for label, intensity in zip(self.EMOTIONS, features):
            e = models.Emotion(onyx__hasEmotionCategory=label)
            if self.with_intensity:
                e.onyx__hasEmotionIntensity = intensity
            s.onyx__hasEmotion.append(e)
        s.prov(activity)
        entry.emotions.append(s)
        return entry


class MappingMixin(object):
    @property
    def mappings(self):
        return self._mappings

    @mappings.setter
    def mappings(self, value):
        self._mappings = value

    def to_entry(self, features, entry, activity):
        features = list(features)
        for i, feat in enumerate(features):
            features[i] = self.mappings.get(feat,
                                            self.mappings.get('default',
                                                              feat))
        return super(MappingMixin, self).to_entry(features=features,
                                                  entry=entry,
                                                  activity=activity)


class ShelfMixin(object):
    @property
    def sh(self):
        if not hasattr(self, '_sh') or self._sh is None:
            self._sh = {}
            if os.path.isfile(self.shelf_file):
                try:
                    with self.open(self.shelf_file, 'rb') as p:
                        self._sh = pickle.load(p)
                except (IndexError, EOFError, pickle.UnpicklingError):
                    self.log.warning('Corrupted shelf file: {}'.format(
                        self.shelf_file))
                    if not self.get('force_shelf', False):
                        raise
        return self._sh

    @sh.deleter
    def sh(self):
        if os.path.isfile(self.shelf_file):
            os.remove(self.shelf_file)
            del self._sh
        self.save()

    @sh.setter
    def sh(self, value):
        self._sh = value

    @property
    def shelf_file(self):
        if not hasattr(self, '_shelf_file') or not self._shelf_file:
            self._shelf_file = os.path.join(self.data_folder, self.name + '.p')
        return self._shelf_file

    @shelf_file.setter
    def shelf_file(self, value):
        self._shelf_file = value

    def save(self):
        self.log.debug('Saving pickle')
        if hasattr(self, '_sh') and self._sh is not None:
            with self.open(self.shelf_file, 'wb') as f:
                pickle.dump(self._sh, f)


def pfilter(plugins, plugin_type=Analyser, **kwargs):
    """ Filter plugins by different criteria """
    if isinstance(plugins, models.Plugins):
        plugins = plugins.plugins
    elif isinstance(plugins, dict):
        plugins = plugins.values()
    logger.debug('#' * 100)
    logger.debug('plugin_type {}'.format(plugin_type))
    if plugin_type:
        if isinstance(plugin_type, PluginMeta):
            plugin_type = plugin_type.__name__
        try:
            plugin_type = plugin_type[0].upper() + plugin_type[1:]
            pclass = globals()[plugin_type]
            logger.debug('Class: {}'.format(pclass))
            candidates = filter(lambda x: isinstance(x, pclass), plugins)
        except KeyError:
            raise models.Error('{} is not a valid type'.format(plugin_type))
    else:
        candidates = plugins

    if 'name' in kwargs:
        kwargs['name'] = kwargs['name'].lower()

    logger.debug(candidates)

    def matches(plug):
        res = all(getattr(plug, k, None) == v for (k, v) in kwargs.items())
        logger.debug("matching {} with {}: {}".format(plug.name, kwargs, res))
        return res

    if kwargs:
        candidates = filter(matches, candidates)
    return candidates


def load_module(name, root=None):
    if root:
        sys.path.append(root)
    tmp = importlib.import_module(name)
    if root:
        sys.path.remove(root)
    return tmp


def _log_subprocess_output(process):
    for line in iter(process.stdout.readline, b''):
        logger.info('%s', line.decode())
    for line in iter(process.stderr.readline, b''):
        logger.error('%s', line.decode())


def missing_requirements(reqs):
    queue = []
    pool = multiprocessing.Pool(4)
    for req in reqs:
        res = pool.apply_async(pkg_resources.get_distribution, (req,))
        queue.append((req, res))
    missing = []
    for req, job in queue:
        try:
            job.get(1)
        except Exception:
            missing.append(req)
    return missing


def install_deps(*plugins):
    installed = False
    nltk_resources = set()
    requirements = []
    for info in plugins:
        requirements = info.get('requirements', [])
        if requirements:
            requirements += missing_requirements(requirements)
        nltk_resources |= set(info.get('nltk_resources', []))
    if requirements:
        logger.info('Installing requirements: ' + str(requirements))
        pip_args = [sys.executable, '-m', 'pip', 'install']
        for req in requirements:
            pip_args.append(req)
        process = subprocess.Popen(
            pip_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        _log_subprocess_output(process)
        exitcode = process.wait()
        installed = True
        if exitcode != 0:
            raise models.Error(
                "Dependencies not properly installed: {}".format(pip_args))
    installed |= download(list(nltk_resources))
    return installed


is_plugin_file = re.compile(r'.*\.senpy$|senpy_[a-zA-Z0-9_]+\.py$|'
                            '^(?!test_)[a-zA-Z0-9_]+_plugin.py$')


def find_plugins(folders):
    for search_folder in folders:
        for root, dirnames, filenames in os.walk(search_folder):
            # Do not look for plugins in hidden or special folders
            dirnames[:] = [d for d in dirnames if d[0] not in ['.', '_']]
            for filename in filter(is_plugin_file.match, filenames):
                fpath = os.path.join(root, filename)
                yield fpath


def from_path(fpath, install_on_fail=False, **kwargs):
    logger.debug("Loading plugin from {}".format(fpath))
    if fpath.endswith('.py'):
        # We asume root is the dir of the file, and module is the name of the file
        root = os.path.dirname(fpath)
        module = os.path.basename(fpath)[:-3]
        for instance in _from_module_name(module=module, root=root, **kwargs):
            yield instance
    else:
        info = parse_plugin_info(fpath)
        yield from_info(info, install_on_fail=install_on_fail, **kwargs)


def from_folder(folders, loader=from_path, **kwargs):
    plugins = []
    for fpath in find_plugins(folders):
        for plugin in loader(fpath, **kwargs):
            plugins.append(plugin)
    return plugins


def from_info(info, root=None, install_on_fail=True, **kwargs):
    if any(x not in info for x in ('module', )):
        raise ValueError('Plugin info is not valid: {}'.format(info))
    module = info["module"]

    if not root and '_path' in info:
        root = os.path.dirname(info['_path'])

    fun = partial(one_from_module, module, root=root, info=info, **kwargs)
    try:
        return fun()
    except (ImportError, LookupError):
        install_deps(info)
        return fun()


def parse_plugin_info(fpath):
    logger.debug("Parsing plugin info: {}".format(fpath))
    with open(fpath, 'r') as f:
        info = yaml.load(f, Loader=yaml.FullLoader)
    info['_path'] = fpath
    return info


def from_module(module, **kwargs):

    if inspect.ismodule(module):
        res = _from_loaded_module(module, **kwargs)
    else:
        res = _from_module_name(module, **kwargs)
    for p in res:
        yield p


def one_from_module(module, root, info, **kwargs):
    if '@type' in info:
        cls = PluginMeta.from_type(info['@type'])
        return cls(info=info, **kwargs)
    instance = next(
        from_module(module=module, root=root, info=info, **kwargs), None)
    if not instance:
        raise Exception("No valid plugin for: {}".format(module))
    return instance


def _classes_in_module(module):
    for _, obj in inspect.getmembers(module):
        if inspect.isclass(obj) and inspect.getmodule(obj) == module:
            logger.debug(("Found plugin class:"
                          " {}@{}").format(obj, inspect.getmodule(obj)))
            yield obj


def _instances_in_module(module):
    for _, obj in inspect.getmembers(module):
        if isinstance(obj, Plugin) and inspect.getmodule(obj) == module:
            logger.debug(("Found plugin instance:"
                          " {}@{}").format(obj, inspect.getmodule(obj)))
            yield obj


def _from_module_name(module, root, info=None, **kwargs):
    module = load_module(module, root)
    for plugin in _from_loaded_module(
            module=module, root=root, info=info, **kwargs):
        yield plugin


def _from_loaded_module(module, info=None, **kwargs):
    for cls in _classes_in_module(module):
        yield cls(info=info, **kwargs)
    for instance in _instances_in_module(module):
        yield instance


cached_evs = {}


def evaluate(plugins, datasets, **kwargs):
    for plug in plugins:
        if not hasattr(plug, 'as_pipe'):
            raise models.Error('Plugin {} cannot be evaluated'.format(plug.name))

    if not isinstance(datasets, dict):
        datasets = gsitk_compat.prepare(datasets, download=True)

    tuples = list(product(plugins, datasets))
    missing = []
    for (p, d) in tuples:
        if (p.id, d) not in cached_evs:
            pipe = p.as_pipe()
            missing.append(gsitk_compat.EvalPipeline(pipe, d))
    if missing:
        ev = gsitk_compat.Eval(tuples=missing, datasets=datasets)
        ev.evaluate()
        results = ev.results
        new_ev = evaluations_to_JSONLD(results, **kwargs)
        for ev in new_ev:
            dataset = ev.evaluatesOn
            model = ev.evaluates
            cached_evs[(model, dataset)] = ev
    evaluations = []
    logger.debug('%s. Cached evs: %s', tuples, cached_evs)
    for (p, d) in tuples:
        logger.debug('Adding %s, %s', d, p)
        evaluations.append(cached_evs[(p.id, d)])
    return evaluations


def evaluations_to_JSONLD(results, flatten=False):
    '''
    Map the evaluation results to a JSONLD scheme
    '''

    evaluations = list()
    metric_names = ['accuracy', 'precision_macro', 'recall_macro',
                    'f1_macro', 'f1_weighted', 'f1_micro', 'f1_macro']

    for index, row in results.fillna('Not Available').iterrows():
        evaluation = models.Evaluation()
        if row.get('CV', True):
            evaluation['@type'] = ['StaticCV', 'Evaluation']
        evaluation.evaluatesOn = row['Dataset']
        evaluation.evaluates = row['Model'].rstrip('__' + row['Dataset'])
        i = 0
        if flatten:
            metric = models.Metric()
            for name in metric_names:
                metric[name] = row[name]
            evaluation.metrics.append(metric)
        else:
            # We should probably discontinue this representation
            for name in metric_names:
                metric = models.Metric()
                metric['@type'] = name.capitalize()
                metric.value = row[name]
                evaluation.metrics.append(metric)
                i += 1
        evaluations.append(evaluation)
    return evaluations


class ScikitWrapper(BaseEstimator, TransformerMixin):
    def __init__(self, plugin=None):
        self.plugin = plugin

    def fit(self, X=None, y=None):
        if self.plugin is not None and not self.plugin.is_activated:
            self.plugin.activate()
        return self

    def transform(self, X):
        return self.plugin.evaluate_func(X, None)

    def predict(self, X):
        return self.transform(X)

    def fit_transform(self, X, y):
        self.fit(X, y)
        return self.transform(X)