Regarding to #4. English bug with sentiment basic.

2025-07-01 10:32:21 +00:00 · 2017-03-07 13:58:46 +01:00 · 2017-03-07 13:58:46 +01:00 · 5427b02a1a
commit 5427b02a1a
parent df2dc17ac0
2 changed files with 17 additions and 6 deletions
--- a/sentiment-basic/sentiment-basic.py
+++ b/sentiment-basic/sentiment-basic.py
@ -13,6 +13,8 @@ from os import path
 from senpy.plugins import SentimentPlugin, SenpyPlugin
 from senpy.models import Results, Entry, Sentiment

+logger = logging.getLogger(__name__)
+

 class SentiTextPlugin(SentimentPlugin):

@ -68,20 +70,23 @@ class SentiTextPlugin(SentimentPlugin):

    def analyse_entry(self, entry, params):

+        language = params.get("language","eng")
        text = entry.get("text", None)
        tokens = self._tokenize(text)
        tokens = self._pos(tokens)
        
-        
        for i in tokens:
            tokens[i]['lemmas'] = {}
            for w in tokens[i]['tokens']:
-                lemmas = wn.lemmas(w[0], lang='spa')
+                lemmas = wn.lemmas(w[0], lang=language)
                if len(lemmas) == 0:
                    continue
                tokens[i]['lemmas'][w[0]] = lemmas
-        
-        trans = TextBlob(unicode(text)).translate(from_lang='es',to='en')
+
+        if language == "eng":
+            trans = TextBlob(unicode(text))
+        else:
+            trans = TextBlob(unicode(text)).translate(from_lang=TextBlob(unicode(text)).detect_language(),to='en')
        useful_synsets = {}
        for s_i, t_s in enumerate(trans.sentences):
            useful_synsets[s_i] = {}
@ -112,8 +117,6 @@ class SentiTextPlugin(SentimentPlugin):
                            scores[i][word] = score
                            break

-
-        lang = params.get("language", "en")
        p = params.get("prefix", None)

        for i in scores:
--- a/sentiment-basic/sentiment-basic.senpy
+++ b/sentiment-basic/sentiment-basic.senpy
@ -9,6 +9,14 @@
        "scipy>=0.14.0",
        "textblob"
    ],
+    "extra_params": {
+        "language": {
+            "aliases": ["language", "l"],
+            "required": true,
+            "options": ["eng","spa", "ita", "fra", "auto"],
+            "default": "auto"
+        },
+    },
    "sentiword_path": "SentiWordNet_3.0.txt",
    "pos_path": "unigram_spanish.pickle"
 }