Add sklearn

* Add sklearn example * Fix test_case * Add SenpyClientUse docs a.k.a. The wise men edition
2026-03-02 08:48:17 +00:00 · 2018-01-07 23:01:07 +01:00
parent 3e2b8baeb2
commit 1087692de2
7 changed files with 220 additions and 3 deletions
--- a/docs/SenpyClientUse.rst
+++ b/docs/SenpyClientUse.rst
@@ -0,0 +1,106 @@
 Client
 ======
 Demo Endpoint
 -------------
 Import Client and send a request
 .. code:: python
    from senpy.client import Client
    c = Client('http://latest.senpy.cluster.gsi.dit.upm.es/api')
    r = c.analyse('I like Pizza', algorithm='sentiment140')
 Print response
 .. code:: python
    for entry in r.entries:
          print('{} -> {}'.format(entry['text'], entry['sentiments'][0]['marl:hasPolarity']))
 .. parsed-literal::
    I like Pizza -> marl:Positive
 Obtain a list of available plugins
 .. code:: python
    for plugin in c.request('/plugins')['plugins']:
        print(plugin['name'])
 .. parsed-literal::
    emoRand
    rand
    sentiment140
 Local Endpoint
 --------------
 Run a docker container with Senpy image and default plugins
 .. code::
    docker run -ti --name 'SenpyEndpoint' -d -p 5000:5000 gsiupm/senpy:0.8.6 --host 0.0.0.0 --default-plugins
 .. parsed-literal::
    a0157cd98057072388bfebeed78a830da7cf0a796f4f1a3fd9188f9f2e5fe562
 Import client and send a request to localhost
 .. code:: python
    c_local = Client('http://127.0.0.1:5000/api')
    r = c_local.analyse('Hello world', algorithm='sentiment140')
 Print response
 .. code:: python
    for entry in r.entries:
          print('{} -> {}'.format(entry['text'], entry['sentiments'][0]['marl:hasPolarity']))
 .. parsed-literal::
    Hello world -> marl:Neutral
 Obtain a list of available plugins deployed locally
 .. code:: python
    c_local.plugins().keys()
 .. parsed-literal::
    rand
    sentiment140
    emoRand
 Stop the docker container
 .. code:: python
    !docker stop SenpyEndpoint
    !docker rm SenpyEndpoint
 .. parsed-literal::
    SenpyEndpoint
    SenpyEndpoint
--- a/example-plugins/dummy_required_plugin.py
+++ b/example-plugins/dummy_required_plugin.py
@@ -22,7 +22,7 @@ class DummyRequired(AnalysisPlugin):
        'entry': {
            'nif:isString': 'Hello',
        },
-        'expected': None
+        'should_fail': True
    }, {
        'entry': {
            'nif:isString': 'Hello',
--- a/example-plugins/sklearn/mydata.py
+++ b/example-plugins/sklearn/mydata.py
@@ -0,0 +1,33 @@
 '''
 Create a dummy dataset.
 Messages with a happy emoticon are labelled positive
 Messages with a sad emoticon are labelled negative
 '''
 import random
 dataset = []
 vocabulary = ['hello', 'world', 'senpy', 'cool', 'goodbye', 'random', 'text']
 emojimap = {
    1: [':)', ],
    -1: [':(', ]
 }
 for tag, values in emojimap.items():
    for i in range(1000):
        msg = ''
        for j in range(3):
            msg += random.choice(vocabulary)
            msg += " "
        msg += random.choice(values)
        dataset.append([msg, tag])
 text = []
 labels = []
 for i in dataset:
    text.append(i[0])
    labels.append(i[1])
--- a/example-plugins/sklearn/mypipeline.py
+++ b/example-plugins/sklearn/mypipeline.py
@@ -0,0 +1,30 @@
 from sklearn.pipeline import Pipeline
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.model_selection import train_test_split
 from mydata import text, labels
 X_train, X_test, y_train, y_test = train_test_split(text, labels, test_size=0.12, random_state=42)
 from sklearn.naive_bayes import MultinomialNB
 count_vec = CountVectorizer(tokenizer=lambda x: x.split())
 clf3 = MultinomialNB()
 pipeline = Pipeline([('cv', count_vec),
                    ('clf', clf3)])
 pipeline.fit(X_train, y_train)
 print('Feature names: {}'.format(count_vec.get_feature_names()))
 print('Class count: {}'.format(clf3.class_count_))
 if __name__ == '__main__':
    print('--Results--')
    tests = [
        (['The sentiment for senpy should be positive :)', ], 1),
        (['The sentiment for anything else should be negative :()', ], -1)
    ]
    for features, expected in tests:
        result = pipeline.predict(features)
        print('Input: {}\nExpected: {}\nGot: {}'.format(features[0], expected, result))
--- a/example-plugins/sklearn/pipeline_plugin.py
+++ b/example-plugins/sklearn/pipeline_plugin.py
@@ -0,0 +1,37 @@
 from senpy import SentimentBox, MappingMixin, easy_test
 from mypipeline import pipeline
 class PipelineSentiment(MappingMixin, SentimentBox):
    '''
    This is a pipeline plugin that wraps a classifier defined in another module
    (mypipeline).
    '''
    author = '@balkian'
    version = 0.1
    maxPolarityValue = 1
    minPolarityValue = -1
    mappings = {
        1: 'marl:Positive',
        -1: 'marl:Negative'
    }
    def box(self, input, *args, **kwargs):
        return pipeline.predict([input, ])[0]
    test_cases = [
        {
            'input': 'The sentiment for senpy should be positive :)',
            'polarity': 'marl:Positive'
        },
        {
            'input': 'The sentiment for senpy should be negative :(',
            'polarity': 'marl:Negative'
        }
    ]
 if __name__ == '__main__':
    easy_test()
--- a/senpy/plugins/init.py
+++ b/senpy/plugins/init.py
@@ -114,6 +114,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
        for case in test_cases:
            try:
                self.test_case(case)
                logger.debug('Test case passed:\n{}'.format(pprint.pformat(case)))
            except Exception as ex:
                logger.warn('Test case failed:\n{}'.format(pprint.pformat(case)))
                raise
@@ -121,7 +122,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
    def test_case(self, case):
        entry = models.Entry(case['entry'])
        given_parameters = case.get('params', case.get('parameters', {}))
-        expected = case['expected']
+        expected = case.get('expected', None)
        should_fail = case.get('should_fail', False)
        try:
            params = api.parse_params(given_parameters, self.extra_params)
@@ -135,6 +136,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
        except models.Error:
            if should_fail:
                return
            raise
        assert not should_fail
    def open(self, fpath, *args, **kwargs):
@@ -213,8 +215,8 @@ class SentimentPlugin(Analysis, models.SentimentPlugin):
    maxPolarityValue = 1
    def test_case(self, case):
        expected = case.get('expected', {})
        if 'polarity' in case:
            expected = case.get('expected', {})
            s = models.Sentiment(_auto_id=False)
            s.marl__hasPolarity = case['polarity']
            if 'sentiments' not in expected:
@@ -320,6 +322,14 @@ class EmotionBox(TextBox, EmotionPlugin):
 class MappingMixin(object):
    @property
    def mappings(self):
        return self._mappings
    @mappings.setter
    def mappings(self, value):
        self._mappings = value
    def output(self, output, entry, params):
        output = self.mappings.get(output,
                                   self.mappings.get('default', output))
--- a/senpy/utils.py
+++ b/senpy/utils.py
@@ -76,6 +76,7 @@ def easy_test(plugin_list=None):
        plugin_list = plugins.from_module(__main__)
    for plug in plugin_list:
        plug.test()
        logger.info('The tests for {} passed!'.format(plug.name))
    logger.info('All tests passed!')