mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-22 16:12:29 +00:00
Regarding to #4. English bug with sentiment basic.
This commit is contained in:
parent
df2dc17ac0
commit
5427b02a1a
@ -13,6 +13,8 @@ from os import path
|
|||||||
from senpy.plugins import SentimentPlugin, SenpyPlugin
|
from senpy.plugins import SentimentPlugin, SenpyPlugin
|
||||||
from senpy.models import Results, Entry, Sentiment
|
from senpy.models import Results, Entry, Sentiment
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class SentiTextPlugin(SentimentPlugin):
|
class SentiTextPlugin(SentimentPlugin):
|
||||||
|
|
||||||
@ -68,20 +70,23 @@ class SentiTextPlugin(SentimentPlugin):
|
|||||||
|
|
||||||
def analyse_entry(self, entry, params):
|
def analyse_entry(self, entry, params):
|
||||||
|
|
||||||
|
language = params.get("language","eng")
|
||||||
text = entry.get("text", None)
|
text = entry.get("text", None)
|
||||||
tokens = self._tokenize(text)
|
tokens = self._tokenize(text)
|
||||||
tokens = self._pos(tokens)
|
tokens = self._pos(tokens)
|
||||||
|
|
||||||
|
|
||||||
for i in tokens:
|
for i in tokens:
|
||||||
tokens[i]['lemmas'] = {}
|
tokens[i]['lemmas'] = {}
|
||||||
for w in tokens[i]['tokens']:
|
for w in tokens[i]['tokens']:
|
||||||
lemmas = wn.lemmas(w[0], lang='spa')
|
lemmas = wn.lemmas(w[0], lang=language)
|
||||||
if len(lemmas) == 0:
|
if len(lemmas) == 0:
|
||||||
continue
|
continue
|
||||||
tokens[i]['lemmas'][w[0]] = lemmas
|
tokens[i]['lemmas'][w[0]] = lemmas
|
||||||
|
|
||||||
trans = TextBlob(unicode(text)).translate(from_lang='es',to='en')
|
if language == "eng":
|
||||||
|
trans = TextBlob(unicode(text))
|
||||||
|
else:
|
||||||
|
trans = TextBlob(unicode(text)).translate(from_lang=TextBlob(unicode(text)).detect_language(),to='en')
|
||||||
useful_synsets = {}
|
useful_synsets = {}
|
||||||
for s_i, t_s in enumerate(trans.sentences):
|
for s_i, t_s in enumerate(trans.sentences):
|
||||||
useful_synsets[s_i] = {}
|
useful_synsets[s_i] = {}
|
||||||
@ -112,8 +117,6 @@ class SentiTextPlugin(SentimentPlugin):
|
|||||||
scores[i][word] = score
|
scores[i][word] = score
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
lang = params.get("language", "en")
|
|
||||||
p = params.get("prefix", None)
|
p = params.get("prefix", None)
|
||||||
|
|
||||||
for i in scores:
|
for i in scores:
|
||||||
|
@ -9,6 +9,14 @@
|
|||||||
"scipy>=0.14.0",
|
"scipy>=0.14.0",
|
||||||
"textblob"
|
"textblob"
|
||||||
],
|
],
|
||||||
|
"extra_params": {
|
||||||
|
"language": {
|
||||||
|
"aliases": ["language", "l"],
|
||||||
|
"required": true,
|
||||||
|
"options": ["eng","spa", "ita", "fra", "auto"],
|
||||||
|
"default": "auto"
|
||||||
|
},
|
||||||
|
},
|
||||||
"sentiword_path": "SentiWordNet_3.0.txt",
|
"sentiword_path": "SentiWordNet_3.0.txt",
|
||||||
"pos_path": "unigram_spanish.pickle"
|
"pos_path": "unigram_spanish.pickle"
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user