1
0
mirror of https://github.com/gsi-upm/senpy synced 2025-10-22 11:18:23 +00:00

Regarding to #4. English bug with sentiment basic.

This commit is contained in:
militarpancho
2017-03-07 13:58:46 +01:00
parent df2dc17ac0
commit 5427b02a1a
2 changed files with 17 additions and 6 deletions

View File

@@ -13,6 +13,8 @@ from os import path
from senpy.plugins import SentimentPlugin, SenpyPlugin from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Entry, Sentiment from senpy.models import Results, Entry, Sentiment
logger = logging.getLogger(__name__)
class SentiTextPlugin(SentimentPlugin): class SentiTextPlugin(SentimentPlugin):
@@ -68,20 +70,23 @@ class SentiTextPlugin(SentimentPlugin):
def analyse_entry(self, entry, params): def analyse_entry(self, entry, params):
language = params.get("language","eng")
text = entry.get("text", None) text = entry.get("text", None)
tokens = self._tokenize(text) tokens = self._tokenize(text)
tokens = self._pos(tokens) tokens = self._pos(tokens)
for i in tokens: for i in tokens:
tokens[i]['lemmas'] = {} tokens[i]['lemmas'] = {}
for w in tokens[i]['tokens']: for w in tokens[i]['tokens']:
lemmas = wn.lemmas(w[0], lang='spa') lemmas = wn.lemmas(w[0], lang=language)
if len(lemmas) == 0: if len(lemmas) == 0:
continue continue
tokens[i]['lemmas'][w[0]] = lemmas tokens[i]['lemmas'][w[0]] = lemmas
trans = TextBlob(unicode(text)).translate(from_lang='es',to='en') if language == "eng":
trans = TextBlob(unicode(text))
else:
trans = TextBlob(unicode(text)).translate(from_lang=TextBlob(unicode(text)).detect_language(),to='en')
useful_synsets = {} useful_synsets = {}
for s_i, t_s in enumerate(trans.sentences): for s_i, t_s in enumerate(trans.sentences):
useful_synsets[s_i] = {} useful_synsets[s_i] = {}
@@ -112,8 +117,6 @@ class SentiTextPlugin(SentimentPlugin):
scores[i][word] = score scores[i][word] = score
break break
lang = params.get("language", "en")
p = params.get("prefix", None) p = params.get("prefix", None)
for i in scores: for i in scores:

View File

@@ -9,6 +9,14 @@
"scipy>=0.14.0", "scipy>=0.14.0",
"textblob" "textblob"
], ],
"extra_params": {
"language": {
"aliases": ["language", "l"],
"required": true,
"options": ["eng","spa", "ita", "fra", "auto"],
"default": "auto"
},
},
"sentiword_path": "SentiWordNet_3.0.txt", "sentiword_path": "SentiWordNet_3.0.txt",
"pos_path": "unigram_spanish.pickle" "pos_path": "unigram_spanish.pickle"
} }