Add sklearn

* Add sklearn example * Fix test_case * Add SenpyClientUse docs a.k.a. The wise men edition
2026-03-02 08:48:17 +00:00 · 2018-01-07 23:01:07 +01:00
parent 3e2b8baeb2
commit 1087692de2
7 changed files with 220 additions and 3 deletions
--- a/example-plugins/dummy_required_plugin.py
+++ b/example-plugins/dummy_required_plugin.py
@@ -22,7 +22,7 @@ class DummyRequired(AnalysisPlugin):
        'entry': {
            'nif:isString': 'Hello',
        },
-        'expected': None
+        'should_fail': True
    }, {
        'entry': {
            'nif:isString': 'Hello',
--- a/example-plugins/sklearn/mydata.py
+++ b/example-plugins/sklearn/mydata.py
@@ -0,0 +1,33 @@
+'''
+Create a dummy dataset.
+Messages with a happy emoticon are labelled positive
+Messages with a sad emoticon are labelled negative
+'''
+import random
+
+dataset = []
+
+vocabulary = ['hello', 'world', 'senpy', 'cool', 'goodbye', 'random', 'text']
+
+emojimap = {
+    1: [':)', ],
+    -1: [':(', ]
+}
+
+
+for tag, values in emojimap.items():
+    for i in range(1000):
+        msg = ''
+        for j in range(3):
+            msg += random.choice(vocabulary)
+            msg += " "
+        msg += random.choice(values)
+        dataset.append([msg, tag])
+
+
+text = []
+labels = []
+
+for i in dataset:
+    text.append(i[0])
+    labels.append(i[1])
--- a/example-plugins/sklearn/mypipeline.py
+++ b/example-plugins/sklearn/mypipeline.py
@@ -0,0 +1,30 @@
+from sklearn.pipeline import Pipeline
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.model_selection import train_test_split
+
+from mydata import text, labels
+
+X_train, X_test, y_train, y_test = train_test_split(text, labels, test_size=0.12, random_state=42)
+
+from sklearn.naive_bayes import MultinomialNB
+
+
+count_vec = CountVectorizer(tokenizer=lambda x: x.split())
+clf3 = MultinomialNB()
+pipeline = Pipeline([('cv', count_vec),
+                    ('clf', clf3)])
+
+pipeline.fit(X_train, y_train)
+print('Feature names: {}'.format(count_vec.get_feature_names()))
+print('Class count: {}'.format(clf3.class_count_))
+
+
+if __name__ == '__main__':
+    print('--Results--')
+    tests = [
+        (['The sentiment for senpy should be positive :)', ], 1),
+        (['The sentiment for anything else should be negative :()', ], -1)
+    ]
+    for features, expected in tests:
+        result = pipeline.predict(features)
+        print('Input: {}\nExpected: {}\nGot: {}'.format(features[0], expected, result))
--- a/example-plugins/sklearn/pipeline_plugin.py
+++ b/example-plugins/sklearn/pipeline_plugin.py
@@ -0,0 +1,37 @@
+from senpy import SentimentBox, MappingMixin, easy_test
+
+from mypipeline import pipeline
+
+
+class PipelineSentiment(MappingMixin, SentimentBox):
+    '''
+    This is a pipeline plugin that wraps a classifier defined in another module
+    (mypipeline).
+    '''
+    author = '@balkian'
+    version = 0.1
+    maxPolarityValue = 1
+    minPolarityValue = -1
+
+    mappings = {
+        1: 'marl:Positive',
+        -1: 'marl:Negative'
+    }
+
+    def box(self, input, *args, **kwargs):
+        return pipeline.predict([input, ])[0]
+
+    test_cases = [
+        {
+            'input': 'The sentiment for senpy should be positive :)',
+            'polarity': 'marl:Positive'
+        },
+        {
+            'input': 'The sentiment for senpy should be negative :(',
+            'polarity': 'marl:Negative'
+        }
+    ]
+
+
+if __name__ == '__main__':
+    easy_test()