From 1087692de2951da9debbe163523e00c8dd5c223c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Sun, 7 Jan 2018 23:01:07 +0100 Subject: [PATCH] Add sklearn * Add sklearn example * Fix test_case * Add SenpyClientUse docs a.k.a. The wise men edition --- docs/SenpyClientUse.rst | 106 +++++++++++++++++++++ example-plugins/dummy_required_plugin.py | 2 +- example-plugins/sklearn/mydata.py | 33 +++++++ example-plugins/sklearn/mypipeline.py | 30 ++++++ example-plugins/sklearn/pipeline_plugin.py | 37 +++++++ senpy/plugins/__init__.py | 14 ++- senpy/utils.py | 1 + 7 files changed, 220 insertions(+), 3 deletions(-) create mode 100644 docs/SenpyClientUse.rst create mode 100644 example-plugins/sklearn/mydata.py create mode 100644 example-plugins/sklearn/mypipeline.py create mode 100644 example-plugins/sklearn/pipeline_plugin.py diff --git a/docs/SenpyClientUse.rst b/docs/SenpyClientUse.rst new file mode 100644 index 0000000..892eb58 --- /dev/null +++ b/docs/SenpyClientUse.rst @@ -0,0 +1,106 @@ + +Client +====== + +Demo Endpoint +------------- + +Import Client and send a request + +.. code:: python + + from senpy.client import Client + + c = Client('http://latest.senpy.cluster.gsi.dit.upm.es/api') + r = c.analyse('I like Pizza', algorithm='sentiment140') + +Print response + +.. code:: python + + for entry in r.entries: + print('{} -> {}'.format(entry['text'], entry['sentiments'][0]['marl:hasPolarity'])) + + +.. parsed-literal:: + + I like Pizza -> marl:Positive + + +Obtain a list of available plugins + +.. code:: python + + for plugin in c.request('/plugins')['plugins']: + print(plugin['name']) + + +.. parsed-literal:: + + emoRand + rand + sentiment140 + + +Local Endpoint +-------------- + +Run a docker container with Senpy image and default plugins + +.. code:: + + docker run -ti --name 'SenpyEndpoint' -d -p 5000:5000 gsiupm/senpy:0.8.6 --host 0.0.0.0 --default-plugins + + +.. parsed-literal:: + + a0157cd98057072388bfebeed78a830da7cf0a796f4f1a3fd9188f9f2e5fe562 + + +Import client and send a request to localhost + +.. code:: python + + c_local = Client('http://127.0.0.1:5000/api') + r = c_local.analyse('Hello world', algorithm='sentiment140') + +Print response + +.. code:: python + + for entry in r.entries: + print('{} -> {}'.format(entry['text'], entry['sentiments'][0]['marl:hasPolarity'])) + + +.. parsed-literal:: + + Hello world -> marl:Neutral + + +Obtain a list of available plugins deployed locally + +.. code:: python + + c_local.plugins().keys() + + +.. parsed-literal:: + + rand + sentiment140 + emoRand + + +Stop the docker container + +.. code:: python + + !docker stop SenpyEndpoint + !docker rm SenpyEndpoint + + +.. parsed-literal:: + + SenpyEndpoint + SenpyEndpoint + diff --git a/example-plugins/dummy_required_plugin.py b/example-plugins/dummy_required_plugin.py index bc61d38..237d7ba 100644 --- a/example-plugins/dummy_required_plugin.py +++ b/example-plugins/dummy_required_plugin.py @@ -22,7 +22,7 @@ class DummyRequired(AnalysisPlugin): 'entry': { 'nif:isString': 'Hello', }, - 'expected': None + 'should_fail': True }, { 'entry': { 'nif:isString': 'Hello', diff --git a/example-plugins/sklearn/mydata.py b/example-plugins/sklearn/mydata.py new file mode 100644 index 0000000..7368842 --- /dev/null +++ b/example-plugins/sklearn/mydata.py @@ -0,0 +1,33 @@ +''' +Create a dummy dataset. +Messages with a happy emoticon are labelled positive +Messages with a sad emoticon are labelled negative +''' +import random + +dataset = [] + +vocabulary = ['hello', 'world', 'senpy', 'cool', 'goodbye', 'random', 'text'] + +emojimap = { + 1: [':)', ], + -1: [':(', ] +} + + +for tag, values in emojimap.items(): + for i in range(1000): + msg = '' + for j in range(3): + msg += random.choice(vocabulary) + msg += " " + msg += random.choice(values) + dataset.append([msg, tag]) + + +text = [] +labels = [] + +for i in dataset: + text.append(i[0]) + labels.append(i[1]) diff --git a/example-plugins/sklearn/mypipeline.py b/example-plugins/sklearn/mypipeline.py new file mode 100644 index 0000000..0394e1e --- /dev/null +++ b/example-plugins/sklearn/mypipeline.py @@ -0,0 +1,30 @@ +from sklearn.pipeline import Pipeline +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.model_selection import train_test_split + +from mydata import text, labels + +X_train, X_test, y_train, y_test = train_test_split(text, labels, test_size=0.12, random_state=42) + +from sklearn.naive_bayes import MultinomialNB + + +count_vec = CountVectorizer(tokenizer=lambda x: x.split()) +clf3 = MultinomialNB() +pipeline = Pipeline([('cv', count_vec), + ('clf', clf3)]) + +pipeline.fit(X_train, y_train) +print('Feature names: {}'.format(count_vec.get_feature_names())) +print('Class count: {}'.format(clf3.class_count_)) + + +if __name__ == '__main__': + print('--Results--') + tests = [ + (['The sentiment for senpy should be positive :)', ], 1), + (['The sentiment for anything else should be negative :()', ], -1) + ] + for features, expected in tests: + result = pipeline.predict(features) + print('Input: {}\nExpected: {}\nGot: {}'.format(features[0], expected, result)) diff --git a/example-plugins/sklearn/pipeline_plugin.py b/example-plugins/sklearn/pipeline_plugin.py new file mode 100644 index 0000000..1ac1f78 --- /dev/null +++ b/example-plugins/sklearn/pipeline_plugin.py @@ -0,0 +1,37 @@ +from senpy import SentimentBox, MappingMixin, easy_test + +from mypipeline import pipeline + + +class PipelineSentiment(MappingMixin, SentimentBox): + ''' + This is a pipeline plugin that wraps a classifier defined in another module + (mypipeline). + ''' + author = '@balkian' + version = 0.1 + maxPolarityValue = 1 + minPolarityValue = -1 + + mappings = { + 1: 'marl:Positive', + -1: 'marl:Negative' + } + + def box(self, input, *args, **kwargs): + return pipeline.predict([input, ])[0] + + test_cases = [ + { + 'input': 'The sentiment for senpy should be positive :)', + 'polarity': 'marl:Positive' + }, + { + 'input': 'The sentiment for senpy should be negative :(', + 'polarity': 'marl:Negative' + } + ] + + +if __name__ == '__main__': + easy_test() diff --git a/senpy/plugins/__init__.py b/senpy/plugins/__init__.py index 07b50e1..34d4289 100644 --- a/senpy/plugins/__init__.py +++ b/senpy/plugins/__init__.py @@ -114,6 +114,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): for case in test_cases: try: self.test_case(case) + logger.debug('Test case passed:\n{}'.format(pprint.pformat(case))) except Exception as ex: logger.warn('Test case failed:\n{}'.format(pprint.pformat(case))) raise @@ -121,7 +122,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): def test_case(self, case): entry = models.Entry(case['entry']) given_parameters = case.get('params', case.get('parameters', {})) - expected = case['expected'] + expected = case.get('expected', None) should_fail = case.get('should_fail', False) try: params = api.parse_params(given_parameters, self.extra_params) @@ -135,6 +136,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): except models.Error: if should_fail: return + raise assert not should_fail def open(self, fpath, *args, **kwargs): @@ -213,8 +215,8 @@ class SentimentPlugin(Analysis, models.SentimentPlugin): maxPolarityValue = 1 def test_case(self, case): - expected = case.get('expected', {}) if 'polarity' in case: + expected = case.get('expected', {}) s = models.Sentiment(_auto_id=False) s.marl__hasPolarity = case['polarity'] if 'sentiments' not in expected: @@ -320,6 +322,14 @@ class EmotionBox(TextBox, EmotionPlugin): class MappingMixin(object): + @property + def mappings(self): + return self._mappings + + @mappings.setter + def mappings(self, value): + self._mappings = value + def output(self, output, entry, params): output = self.mappings.get(output, self.mappings.get('default', output)) diff --git a/senpy/utils.py b/senpy/utils.py index 1e8c82a..85fb5c3 100644 --- a/senpy/utils.py +++ b/senpy/utils.py @@ -76,6 +76,7 @@ def easy_test(plugin_list=None): plugin_list = plugins.from_module(__main__) for plug in plugin_list: plug.test() + logger.info('The tests for {} passed!'.format(plug.name)) logger.info('All tests passed!')