From b671ff51f9815861c28e6a90a1fa0ada9a46d91e Mon Sep 17 00:00:00 2001 From: militarpancho Date: Fri, 14 Jul 2017 11:13:59 +0200 Subject: [PATCH] Add support for py3 in emotion-wnaffect Normalize polarity values in sentiment-basic and sentiment-140 --- emotion-wnaffect/emotion-wnaffect.py | 174 +++++++++++++++--------- emotion-wnaffect/emotion-wnaffect.senpy | 3 +- emotion-wnaffect/emotion.py | 4 +- sentiment-140/sentiment-140.py | 4 +- sentiment-140/sentiment-140.senpy | 2 +- sentiment-basic/sentiment-basic.py | 6 +- sentiment-basic/sentiment-basic.senpy | 4 +- 7 files changed, 119 insertions(+), 78 deletions(-) diff --git a/emotion-wnaffect/emotion-wnaffect.py b/emotion-wnaffect/emotion-wnaffect.py index 42cc12c..6256883 100644 --- a/emotion-wnaffect/emotion-wnaffect.py +++ b/emotion-wnaffect/emotion-wnaffect.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - from __future__ import division import re import nltk @@ -9,32 +7,34 @@ import string import xml.etree.ElementTree as ET from nltk.corpus import stopwords from nltk.corpus import WordNetCorpusReader +from nltk.stem import wordnet from emotion import Emotion as Emo -from pattern.en import parse -from senpy.plugins import EmotionPlugin, SenpyPlugin, ShelfMixin +from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin from senpy.models import Results, EmotionSet, Entry, Emotion class EmotionTextPlugin(EmotionPlugin, ShelfMixin): - - def _load_synsets(self, synsets_path): """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str).""" tree = ET.parse(synsets_path) root = tree.getroot() - pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" } + pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"} synsets = {} for pos in ["noun", "adj", "verb", "adv"]: tag = pos_map[pos] synsets[tag] = {} - for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)): - offset = int(elem.get("id")[2:]) + for elem in root.findall( + ".//{0}-syn-list//{0}-syn".format(pos, pos)): + offset = int(elem.get("id")[2:]) if not offset: continue if elem.get("categ"): - synsets[tag][offset] = Emo.emotions[elem.get("categ")] if elem.get("categ") in Emo.emotions else None + synsets[tag][offset] = Emo.emotions[elem.get( + "categ")] if elem.get( + "categ") in Emo.emotions else None elif elem.get("noun-id"): - synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])] + synsets[tag][offset] = synsets[pos_map["noun"]][int( + elem.get("noun-id")[2:])] return synsets def _load_emotions(self, hierarchy_path): @@ -50,45 +50,59 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin): Emo.emotions[name] = Emo(name, elem.get("isa")) def activate(self, *args, **kwargs): - - nltk.download('stopwords') + + nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet']) self._stopwords = stopwords.words('english') - #local_path=os.path.dirname(os.path.abspath(__file__)) - self._categories = {'anger': ['general-dislike',], - 'fear': ['negative-fear',], - 'disgust': ['shame',], - 'joy': ['gratitude','affective','enthusiasm','love','joy','liking'], - 'sadness': ['ingrattitude','daze','humility','compassion','despair','anxiety','sadness']} + self._wnlemma = wordnet.WordNetLemmatizer() + self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'} + local_path = os.path.dirname(os.path.abspath(__file__)) + self._categories = { + 'anger': [ + 'general-dislike', + ], + 'fear': [ + 'negative-fear', + ], + 'disgust': [ + 'shame', + ], + 'joy': + ['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'], + 'sadness': [ + 'ingrattitude', 'daze', 'humility', 'compassion', 'despair', + 'anxiety', 'sadness' + ] + } - self._wnaffect_mappings = {'anger': 'anger', - 'fear': 'negative-fear', - 'disgust': 'disgust', - 'joy': 'joy', - 'sadness': 'sadness'} + self._wnaffect_mappings = { + 'anger': 'anger', + 'fear': 'negative-fear', + 'disgust': 'disgust', + 'joy': 'joy', + 'sadness': 'sadness' + } + self._load_emotions(local_path + self.hierarchy_path) - self._load_emotions(self.hierarchy_path) - if 'total_synsets' not in self.sh: - total_synsets = self._load_synsets(self.synsets_path) + total_synsets = self._load_synsets(local_path + self.synsets_path) self.sh['total_synsets'] = total_synsets - + self._total_synsets = self.sh['total_synsets'] - - if 'wn16' not in self.sh: - self._wn16_path = self.wn16_path - wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(self._wn16_path)), nltk.data.find(self._wn16_path)) - self.sh['wn16'] = wn16 - - self._wn16 = self.sh['wn16'] + + self._wn16_path = self.wn16_path + self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path)) + def deactivate(self, *args, **kwargs): self.save() def _my_preprocessor(self, text): - regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') - regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') + regHttp = re.compile( + '(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') + regHttps = re.compile( + '(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*') text = re.sub(regHttp, '', text) text = re.sub(regAt, '', text) @@ -109,56 +123,82 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin): unigrams_lemmas = [] pos_tagged = [] unigrams_words = [] - sentences = parse(text,lemmata=True).split() - for sentence in sentences: - for token in sentence: - if token[0].lower() not in self._stopwords: - unigrams_words.append(token[0].lower()) - unigrams_lemmas.append(token[4]) - pos_tagged.append(token[1]) + tokens = text.split() + for token in nltk.pos_tag(tokens): + unigrams_words.append(token[0]) + pos_tagged.append(token[1]) + if token[1][0] in self._syntactics.keys(): + unigrams_lemmas.append( + self._wnlemma.lemmatize(token[0], self._syntactics[token[1] + [0]])) + else: + unigrams_lemmas.append(token[0]) - return unigrams_words,unigrams_lemmas,pos_tagged + return unigrams_words, unigrams_lemmas, pos_tagged def _find_ngrams(self, input_list, n): return zip(*[input_list[i:] for i in range(n)]) def _clean_pos(self, pos_tagged): - pos_tags={'NN':'NN', 'NNP':'NN','NNP-LOC':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', - 'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'} + pos_tags = { + 'NN': 'NN', + 'NNP': 'NN', + 'NNP-LOC': 'NN', + 'NNS': 'NN', + 'JJ': 'JJ', + 'JJR': 'JJ', + 'JJS': 'JJ', + 'RB': 'RB', + 'RBR': 'RB', + 'RBS': 'RB', + 'VB': 'VB', + 'VBD': 'VB', + 'VGB': 'VB', + 'VBN': 'VB', + 'VBP': 'VB', + 'VBZ': 'VB' + } for i in range(len(pos_tagged)): if pos_tagged[i] in pos_tags: - pos_tagged[i]=pos_tags[pos_tagged[i]] + pos_tagged[i] = pos_tags[pos_tagged[i]] return pos_tagged - + def _extract_features(self, text): - feature_set={k:0 for k in self._categories} - ngrams_words,ngrams_lemmas,pos_tagged = self._extract_ngrams(text) - matches=0 - pos_tagged=self._clean_pos(pos_tagged) + feature_set = {k: 0 for k in self._categories} + ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text) + matches = 0 + pos_tagged = self._clean_pos(pos_tagged) - tag_wn={'NN':self._wn16.NOUN,'JJ':self._wn16.ADJ,'VB':self._wn16.VERB,'RB':self._wn16.ADV} + tag_wn = { + 'NN': self._wn16.NOUN, + 'JJ': self._wn16.ADJ, + 'VB': self._wn16.VERB, + 'RB': self._wn16.ADV + } for i in range(len(pos_tagged)): if pos_tagged[i] in tag_wn: - synsets = self._wn16.synsets(ngrams_words[i], tag_wn[pos_tagged[i]]) + synsets = self._wn16.synsets(ngrams_words[i], + tag_wn[pos_tagged[i]]) if synsets: offset = synsets[0].offset() if offset in self._total_synsets[pos_tagged[i]]: if self._total_synsets[pos_tagged[i]][offset] is None: continue else: - emotion = self._total_synsets[pos_tagged[i]][offset].get_level(5).name - matches+=1 + emotion = self._total_synsets[pos_tagged[i]][ + offset].get_level(5).name + matches += 1 for i in self._categories: if emotion in self._categories[i]: - feature_set[i]+=1 + feature_set[i] += 1 if matches == 0: - matches=1 + matches = 1 for i in feature_set: - feature_set[i] = (feature_set[i]/matches)*100 + feature_set[i] = (feature_set[i] / matches) * 100 return feature_set @@ -166,19 +206,19 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin): text_input = entry.get("text", None) - text=self._my_preprocessor(text_input) + text = self._my_preprocessor(text_input) - feature_text=self._extract_features(text) - - response = Results() + feature_text = self._extract_features(text) emotionSet = EmotionSet(id="Emotions0") emotions = emotionSet.onyx__hasEmotion for i in feature_text: - emotions.append(Emotion(onyx__hasEmotionCategory=self._wnaffect_mappings[i], - onyx__hasEmotionIntensity=feature_text[i])) + emotions.append( + Emotion( + onyx__hasEmotionCategory=self._wnaffect_mappings[i], + onyx__hasEmotionIntensity=feature_text[i])) entry.emotions = [emotionSet] - yield entry \ No newline at end of file + yield entry diff --git a/emotion-wnaffect/emotion-wnaffect.senpy b/emotion-wnaffect/emotion-wnaffect.senpy index e526f8f..0da2b88 100644 --- a/emotion-wnaffect/emotion-wnaffect.senpy +++ b/emotion-wnaffect/emotion-wnaffect.senpy @@ -22,5 +22,4 @@ onyx:usesEmotionModel: emoml:big6 requirements: - nltk>=3.0.5 - lxml>=3.4.2 -- pattern -async: false \ No newline at end of file +async: false diff --git a/emotion-wnaffect/emotion.py b/emotion-wnaffect/emotion.py index 31a4534..f1635fd 100644 --- a/emotion-wnaffect/emotion.py +++ b/emotion-wnaffect/emotion.py @@ -1,6 +1,4 @@ -# coding: utf-8 - """ Clement Michard (c) 2015 """ @@ -85,7 +83,7 @@ class Emotion: end_shape = '┐' else: end_shape = '' - print '{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape) + print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape)) for leaf in down: next_last = 'down' if down.index(leaf) is len(down) - 1 else '' next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name)) diff --git a/sentiment-140/sentiment-140.py b/sentiment-140/sentiment-140.py index dc345cb..b2d9259 100644 --- a/sentiment-140/sentiment-140.py +++ b/sentiment-140/sentiment-140.py @@ -22,7 +22,7 @@ class Sentiment140Plugin(SentimentPlugin): polarity_value = self.maxPolarityValue*int(res.json()["data"][0] ["polarity"]) * 0.25 polarity = "marl:Neutral" - neutral_value = self.maxPolarityValue / 2.0 + neutral_value = 0 if polarity_value > neutral_value: polarity = "marl:Positive" elif polarity_value < neutral_value: @@ -33,4 +33,4 @@ class Sentiment140Plugin(SentimentPlugin): marl__polarityValue=polarity_value) entry.sentiments.append(sentiment) - yield entry \ No newline at end of file + yield entry diff --git a/sentiment-140/sentiment-140.senpy b/sentiment-140/sentiment-140.senpy index 3766c98..b8e01be 100644 --- a/sentiment-140/sentiment-140.senpy +++ b/sentiment-140/sentiment-140.senpy @@ -14,5 +14,5 @@ }, "requirements": {}, "maxPolarityValue": "1", - "minPolarityValue": "0" + "minPolarityValue": "-1" } diff --git a/sentiment-basic/sentiment-basic.py b/sentiment-basic/sentiment-basic.py index ccaea53..9dbe4a9 100644 --- a/sentiment-basic/sentiment-basic.py +++ b/sentiment-basic/sentiment-basic.py @@ -131,14 +131,16 @@ class SentiTextPlugin(SentimentPlugin): if n_pos == 0 and n_neg == 0: g_score = 0.5 polarity = 'marl:Neutral' + polarity_value = 0 if g_score > 0.5: polarity = 'marl:Positive' + polarity_value = 1 elif g_score < 0.5: polarity = 'marl:Negative' - + polarity_value = -1 opinion = Sentiment(id="Opinion0"+'_'+str(i), marl__hasPolarity=polarity, - marL__polarityValue=float("{0:.2f}".format(g_score))) + marl__polarityValue=polarity_value) entry.sentiments.append(opinion) diff --git a/sentiment-basic/sentiment-basic.senpy b/sentiment-basic/sentiment-basic.senpy index f6d8a7d..04fd0e9 100644 --- a/sentiment-basic/sentiment-basic.senpy +++ b/sentiment-basic/sentiment-basic.senpy @@ -18,5 +18,7 @@ }, }, "sentiword_path": "SentiWordNet_3.0.txt", - "pos_path": "unigram_spanish.pickle" + "pos_path": "unigram_spanish.pickle", + "maxPolarityValue": "1", + "minPolarityValue": "-1" }