From 241f478a68174808b1bdf155c6396757ae370172 Mon Sep 17 00:00:00 2001 From: militarpancho Date: Wed, 8 Mar 2017 11:37:14 +0100 Subject: [PATCH] Added sufixes dictionary for wordnet lemmatizer. This close #4 --- sentiment-basic/sentiment-basic.py | 10 +++++----- sentiment-basic/sentiment-basic.senpy | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sentiment-basic/sentiment-basic.py b/sentiment-basic/sentiment-basic.py index 6eb9892..eea2d69 100644 --- a/sentiment-basic/sentiment-basic.py +++ b/sentiment-basic/sentiment-basic.py @@ -70,23 +70,23 @@ class SentiTextPlugin(SentimentPlugin): def analyse_entry(self, entry, params): - language = params.get("language","eng") + language = params.get("language") text = entry.get("text", None) tokens = self._tokenize(text) tokens = self._pos(tokens) - + sufixes = {'es':'spa','en':'eng','it':'ita','fr':'fra'} for i in tokens: tokens[i]['lemmas'] = {} for w in tokens[i]['tokens']: - lemmas = wn.lemmas(w[0], lang=language) + lemmas = wn.lemmas(w[0], lang=sufixes[language]) if len(lemmas) == 0: continue tokens[i]['lemmas'][w[0]] = lemmas - if language == "eng": + if language == "en": trans = TextBlob(unicode(text)) else: - trans = TextBlob(unicode(text)).translate(from_lang=TextBlob(unicode(text)).detect_language(),to='en') + trans = TextBlob(unicode(text)).translate(from_lang=language,to='en') useful_synsets = {} for s_i, t_s in enumerate(trans.sentences): useful_synsets[s_i] = {} diff --git a/sentiment-basic/sentiment-basic.senpy b/sentiment-basic/sentiment-basic.senpy index c152739..f6d8a7d 100644 --- a/sentiment-basic/sentiment-basic.senpy +++ b/sentiment-basic/sentiment-basic.senpy @@ -13,7 +13,7 @@ "language": { "aliases": ["language", "l"], "required": true, - "options": ["eng","spa", "ita", "fra", "auto"], + "options": ["en","es", "it", "fr", "auto"], "default": "auto" }, },