Merged into monorepo

2026-03-07 11:08:16 +00:00 · 2018-06-14 19:38:08 +02:00
parent e51b659030
commit c52a894017
29 changed files with 406 additions and 493 deletions
--- a/sentiment-basic/.gitmodules
+++ b/sentiment-basic/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "data"]
-	path = data
-	url = ../data/sentiment-basic
--- a/sentiment-basic/sentiment-basic.py
+++ b/sentiment-basic/sentiment-basic.py
@@ -1,5 +1,6 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
 import os
-import logging
 import string
 import nltk
 import pickle
@@ -13,24 +14,40 @@ from os import path
 from senpy.plugins import SentimentPlugin, SenpyPlugin
 from senpy.models import Results, Entry, Sentiment

-logger = logging.getLogger(__name__)

-
-class SentiTextPlugin(SentimentPlugin):
+class SentimentBasic(SentimentPlugin):
+    '''
+    Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.
+    '''
+    name = "sentiment-basic"
+    author = "github.com/nachtkatze"
+    version = "0.1.1"
+    extra_params = {
+        "language": {
+            "aliases": ["language", "l"],
+            "required": True,
+            "options": ["en","es", "it", "fr", "auto"],
+            "default": "auto"
+        }
+    }
+    sentiword_path = "SentiWordNet_3.0.txt"
+    pos_path = "unigram_spanish.pickle"
+    maxPolarityValue = 1
+    minPolarityValue = -1
+    nltk_resources = ['punkt','wordnet']

    def _load_swn(self):
-        self.swn_path = path.join(path.abspath(path.dirname(__file__)), self.sentiword_path)
+        self.swn_path = self.find_file(self.sentiword_path)
        swn = SentiWordNet(self.swn_path)
        return swn

    def _load_pos_tagger(self):
-        self.pos_path = path.join(path.abspath(path.dirname(__file__)), self.pos_path)
+        self.pos_path = self.find_file(self.pos_path)
        with open(self.pos_path, 'r') as f:
            tagger = pickle.load(f)
        return tagger

    def activate(self, *args, **kwargs):
-        nltk.download(['punkt','wordnet'])
        self._swn = self._load_swn()
        self._pos_tagger = self._load_pos_tagger()

@@ -54,11 +71,6 @@ class SentiTextPlugin(SentimentPlugin):
            tokens[i]['tokens'] = self._pos_tagger.tag(tokens[i]['tokens'])
        return tokens

-    # def _stopwords(sentences, lang='english'):
-    #     for i in sentences:
-    #         sentences[i]['tokens'] = [t for t in sentences[i]['tokens'] if t not in nltk.corpus.stopwords.words(lang)]
-    #     return sentences
-
    def _compare_synsets(self, synsets, tokens, i):
        for synset in synsets:
            for word in tokens[i]['lemmas']:
@@ -71,7 +83,7 @@ class SentiTextPlugin(SentimentPlugin):

    def analyse_entry(self, entry, params):
        language = params.get("language")
-        text = entry.get("text", None)
+        text = entry.text
        tokens = self._tokenize(text)
        tokens = self._pos(tokens)
        sufixes = {'es':'spa','en':'eng','it':'ita','fr':'fra'}
@@ -130,19 +142,41 @@ class SentiTextPlugin(SentimentPlugin):
            except:
                if n_pos == 0 and n_neg == 0:
                    g_score = 0.5
-            polarity = 'marl:Neutral'
-            polarity_value = 0
-            if g_score > 0.5:
+            if g_score >= 0.5:
                polarity = 'marl:Positive'
                polarity_value = 1
            elif g_score < 0.5:
                polarity = 'marl:Negative'
                polarity_value = -1
+            else:
+                polarity = 'marl:Neutral'
+                polarity_value = 0
            opinion = Sentiment(id="Opinion0"+'_'+str(i),
                          marl__hasPolarity=polarity,
                          marl__polarityValue=polarity_value)

-
+            opinion.prov(self)
            entry.sentiments.append(opinion)

        yield entry
+
+    test_cases = [
+        {
+            'input': u'Odio ir al cine',
+            'params': {'language': 'es'},
+            'polarity': 'marl:Negative'
+
+        },
+        {
+            'input': u'El cielo está nublado',
+            'params': {'language': 'es'},
+            'polarity': 'marl:Positive'
+
+        },
+        {
+            'input': u'Esta tarta está muy buena',
+            'params': {'language': 'es'},
+            'polarity': 'marl:Negative'
+
+        }
+    ]
--- a/sentiment-basic/sentiment-basic.senpy
+++ b/sentiment-basic/sentiment-basic.senpy
@@ -1,24 +1,7 @@
-{
-    "name": "sentiment-basic",
-    "module": "sentiment-basic",
-    "description": "Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.",
-    "author": "github.com/nachtkatze",
-    "version": "0.1",
-    "requirements": [
-        "nltk>=3.0.5",
-        "scipy>=0.14.0",
-        "textblob"
-    ],
-    "extra_params": {
-        "language": {
-            "aliases": ["language", "l"],
-            "required": true,
-            "options": ["en","es", "it", "fr", "auto"],
-            "default": "auto"
-        },
-    },
-    "sentiword_path": "data/SentiWordNet_3.0.txt",
-    "pos_path": "data/unigram_spanish.pickle",
-    "maxPolarityValue": "1",
-    "minPolarityValue": "-1"
-}
+---
+module: sentiment-basic
+requirements:
+- nltk>=3.0.5
+- scipy>=0.14.0
+- textblob
+
--- a/sentiment-basic/sentiwn.py
+++ b/sentiment-basic/sentiwn.py
@@ -46,7 +46,7 @@ class SentiWordNet(object):
                pos,syn_set_id,pos_score,neg_score,syn_set_score,\
                gloss = fields
            except:
-                print "Found data without all details"
+                print("Found data without all details")
                pass

            if pos and syn_set_score:
@@ -67,4 +67,4 @@ class SentiWordNet(object):
                senti_scores.append({"pos":pos_val,"neg":neg_val,\
                "obj": 1.0 - (pos_val - neg_val),'synset':synset})

-        return senti_scores
+        return senti_scores
--- a/sentiment-basic/test.py
+++ b/sentiment-basic/test.py
@@ -1,42 +0,0 @@
-import os
-import logging
-logging.basicConfig()
-try:
-    import unittest.mock as mock
-except ImportError:
-    import mock
-from senpy.extensions import Senpy
-from flask import Flask
-import unittest
-
-class SentiTextTest(unittest.TestCase):
-
-    def setUp(self):
-        self.app = Flask("test_plugin")
-        self.dir = os.path.join(os.path.dirname(__file__))
-        self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
-        self.senpy.init_app(self.app)
-
-    def tearDown(self):
-        self.senpy.deactivate_plugin("SentiText", sync=True)
-
-    def test_analyse(self):
-        plugin = self.senpy.plugins["SentiText"]
-        plugin.activate()
-
-        texts = {'Odio ir al cine' :  'marl:Neutral',
-                 'El cielo esta nublado' : 'marl:Positive',
-                 'Esta tarta esta muy buena' : 'marl:Neutral'}
-
-        for text in texts:
-            response = plugin.analyse(input=text)
-            sentimentSet = response.entries[0].sentiments[0]
-            print sentimentSet
-            expected = texts[text]
-            
-            assert sentimentSet['marl:hasPolarity'] == expected
-        
-        plugin.deactivate()
-
-if __name__ == '__main__':
-    unittest.main()