1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-22 08:12:27 +00:00

Add support for py3 in emotion-wnaffect

Normalize polarity values in sentiment-basic and sentiment-140
This commit is contained in:
militarpancho 2017-07-14 11:13:59 +02:00
parent dee007eacf
commit b671ff51f9
7 changed files with 119 additions and 78 deletions

View File

@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import division from __future__ import division
import re import re
import nltk import nltk
@ -9,32 +7,34 @@ import string
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from nltk.corpus import stopwords from nltk.corpus import stopwords
from nltk.corpus import WordNetCorpusReader from nltk.corpus import WordNetCorpusReader
from nltk.stem import wordnet
from emotion import Emotion as Emo from emotion import Emotion as Emo
from pattern.en import parse from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
from senpy.plugins import EmotionPlugin, SenpyPlugin, ShelfMixin
from senpy.models import Results, EmotionSet, Entry, Emotion from senpy.models import Results, EmotionSet, Entry, Emotion
class EmotionTextPlugin(EmotionPlugin, ShelfMixin): class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
def _load_synsets(self, synsets_path): def _load_synsets(self, synsets_path):
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str).""" """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
tree = ET.parse(synsets_path) tree = ET.parse(synsets_path)
root = tree.getroot() root = tree.getroot()
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" } pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
synsets = {} synsets = {}
for pos in ["noun", "adj", "verb", "adv"]: for pos in ["noun", "adj", "verb", "adv"]:
tag = pos_map[pos] tag = pos_map[pos]
synsets[tag] = {} synsets[tag] = {}
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)): for elem in root.findall(
offset = int(elem.get("id")[2:]) ".//{0}-syn-list//{0}-syn".format(pos, pos)):
offset = int(elem.get("id")[2:])
if not offset: continue if not offset: continue
if elem.get("categ"): if elem.get("categ"):
synsets[tag][offset] = Emo.emotions[elem.get("categ")] if elem.get("categ") in Emo.emotions else None synsets[tag][offset] = Emo.emotions[elem.get(
"categ")] if elem.get(
"categ") in Emo.emotions else None
elif elem.get("noun-id"): elif elem.get("noun-id"):
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])] synsets[tag][offset] = synsets[pos_map["noun"]][int(
elem.get("noun-id")[2:])]
return synsets return synsets
def _load_emotions(self, hierarchy_path): def _load_emotions(self, hierarchy_path):
@ -50,45 +50,59 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
Emo.emotions[name] = Emo(name, elem.get("isa")) Emo.emotions[name] = Emo(name, elem.get("isa"))
def activate(self, *args, **kwargs): def activate(self, *args, **kwargs):
nltk.download('stopwords') nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
self._stopwords = stopwords.words('english') self._stopwords = stopwords.words('english')
#local_path=os.path.dirname(os.path.abspath(__file__)) self._wnlemma = wordnet.WordNetLemmatizer()
self._categories = {'anger': ['general-dislike',], self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
'fear': ['negative-fear',], local_path = os.path.dirname(os.path.abspath(__file__))
'disgust': ['shame',], self._categories = {
'joy': ['gratitude','affective','enthusiasm','love','joy','liking'], 'anger': [
'sadness': ['ingrattitude','daze','humility','compassion','despair','anxiety','sadness']} 'general-dislike',
],
'fear': [
'negative-fear',
],
'disgust': [
'shame',
],
'joy':
['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
'sadness': [
'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
'anxiety', 'sadness'
]
}
self._wnaffect_mappings = {'anger': 'anger', self._wnaffect_mappings = {
'fear': 'negative-fear', 'anger': 'anger',
'disgust': 'disgust', 'fear': 'negative-fear',
'joy': 'joy', 'disgust': 'disgust',
'sadness': 'sadness'} 'joy': 'joy',
'sadness': 'sadness'
}
self._load_emotions(local_path + self.hierarchy_path)
self._load_emotions(self.hierarchy_path)
if 'total_synsets' not in self.sh: if 'total_synsets' not in self.sh:
total_synsets = self._load_synsets(self.synsets_path) total_synsets = self._load_synsets(local_path + self.synsets_path)
self.sh['total_synsets'] = total_synsets self.sh['total_synsets'] = total_synsets
self._total_synsets = self.sh['total_synsets'] self._total_synsets = self.sh['total_synsets']
if 'wn16' not in self.sh: self._wn16_path = self.wn16_path
self._wn16_path = self.wn16_path self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(self._wn16_path)), nltk.data.find(self._wn16_path))
self.sh['wn16'] = wn16
self._wn16 = self.sh['wn16']
def deactivate(self, *args, **kwargs): def deactivate(self, *args, **kwargs):
self.save() self.save()
def _my_preprocessor(self, text): def _my_preprocessor(self, text):
regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') regHttp = re.compile(
regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') '(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttps = re.compile(
'(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*') regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
text = re.sub(regHttp, '', text) text = re.sub(regHttp, '', text)
text = re.sub(regAt, '', text) text = re.sub(regAt, '', text)
@ -109,56 +123,82 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
unigrams_lemmas = [] unigrams_lemmas = []
pos_tagged = [] pos_tagged = []
unigrams_words = [] unigrams_words = []
sentences = parse(text,lemmata=True).split() tokens = text.split()
for sentence in sentences: for token in nltk.pos_tag(tokens):
for token in sentence: unigrams_words.append(token[0])
if token[0].lower() not in self._stopwords: pos_tagged.append(token[1])
unigrams_words.append(token[0].lower()) if token[1][0] in self._syntactics.keys():
unigrams_lemmas.append(token[4]) unigrams_lemmas.append(
pos_tagged.append(token[1]) self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
[0]]))
else:
unigrams_lemmas.append(token[0])
return unigrams_words,unigrams_lemmas,pos_tagged return unigrams_words, unigrams_lemmas, pos_tagged
def _find_ngrams(self, input_list, n): def _find_ngrams(self, input_list, n):
return zip(*[input_list[i:] for i in range(n)]) return zip(*[input_list[i:] for i in range(n)])
def _clean_pos(self, pos_tagged): def _clean_pos(self, pos_tagged):
pos_tags={'NN':'NN', 'NNP':'NN','NNP-LOC':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', pos_tags = {
'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'} 'NN': 'NN',
'NNP': 'NN',
'NNP-LOC': 'NN',
'NNS': 'NN',
'JJ': 'JJ',
'JJR': 'JJ',
'JJS': 'JJ',
'RB': 'RB',
'RBR': 'RB',
'RBS': 'RB',
'VB': 'VB',
'VBD': 'VB',
'VGB': 'VB',
'VBN': 'VB',
'VBP': 'VB',
'VBZ': 'VB'
}
for i in range(len(pos_tagged)): for i in range(len(pos_tagged)):
if pos_tagged[i] in pos_tags: if pos_tagged[i] in pos_tags:
pos_tagged[i]=pos_tags[pos_tagged[i]] pos_tagged[i] = pos_tags[pos_tagged[i]]
return pos_tagged return pos_tagged
def _extract_features(self, text): def _extract_features(self, text):
feature_set={k:0 for k in self._categories} feature_set = {k: 0 for k in self._categories}
ngrams_words,ngrams_lemmas,pos_tagged = self._extract_ngrams(text) ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
matches=0 matches = 0
pos_tagged=self._clean_pos(pos_tagged) pos_tagged = self._clean_pos(pos_tagged)
tag_wn={'NN':self._wn16.NOUN,'JJ':self._wn16.ADJ,'VB':self._wn16.VERB,'RB':self._wn16.ADV} tag_wn = {
'NN': self._wn16.NOUN,
'JJ': self._wn16.ADJ,
'VB': self._wn16.VERB,
'RB': self._wn16.ADV
}
for i in range(len(pos_tagged)): for i in range(len(pos_tagged)):
if pos_tagged[i] in tag_wn: if pos_tagged[i] in tag_wn:
synsets = self._wn16.synsets(ngrams_words[i], tag_wn[pos_tagged[i]]) synsets = self._wn16.synsets(ngrams_words[i],
tag_wn[pos_tagged[i]])
if synsets: if synsets:
offset = synsets[0].offset() offset = synsets[0].offset()
if offset in self._total_synsets[pos_tagged[i]]: if offset in self._total_synsets[pos_tagged[i]]:
if self._total_synsets[pos_tagged[i]][offset] is None: if self._total_synsets[pos_tagged[i]][offset] is None:
continue continue
else: else:
emotion = self._total_synsets[pos_tagged[i]][offset].get_level(5).name emotion = self._total_synsets[pos_tagged[i]][
matches+=1 offset].get_level(5).name
matches += 1
for i in self._categories: for i in self._categories:
if emotion in self._categories[i]: if emotion in self._categories[i]:
feature_set[i]+=1 feature_set[i] += 1
if matches == 0: if matches == 0:
matches=1 matches = 1
for i in feature_set: for i in feature_set:
feature_set[i] = (feature_set[i]/matches)*100 feature_set[i] = (feature_set[i] / matches) * 100
return feature_set return feature_set
@ -166,19 +206,19 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
text_input = entry.get("text", None) text_input = entry.get("text", None)
text=self._my_preprocessor(text_input) text = self._my_preprocessor(text_input)
feature_text=self._extract_features(text) feature_text = self._extract_features(text)
response = Results()
emotionSet = EmotionSet(id="Emotions0") emotionSet = EmotionSet(id="Emotions0")
emotions = emotionSet.onyx__hasEmotion emotions = emotionSet.onyx__hasEmotion
for i in feature_text: for i in feature_text:
emotions.append(Emotion(onyx__hasEmotionCategory=self._wnaffect_mappings[i], emotions.append(
onyx__hasEmotionIntensity=feature_text[i])) Emotion(
onyx__hasEmotionCategory=self._wnaffect_mappings[i],
onyx__hasEmotionIntensity=feature_text[i]))
entry.emotions = [emotionSet] entry.emotions = [emotionSet]
yield entry yield entry

View File

@ -22,5 +22,4 @@ onyx:usesEmotionModel: emoml:big6
requirements: requirements:
- nltk>=3.0.5 - nltk>=3.0.5
- lxml>=3.4.2 - lxml>=3.4.2
- pattern async: false
async: false

View File

@ -1,6 +1,4 @@
# coding: utf-8
""" """
Clement Michard (c) 2015 Clement Michard (c) 2015
""" """
@ -85,7 +83,7 @@ class Emotion:
end_shape = '' end_shape = ''
else: else:
end_shape = '' end_shape = ''
print '{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape) print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
for leaf in down: for leaf in down:
next_last = 'down' if down.index(leaf) is len(down) - 1 else '' next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '', " " * len(emotion.name)) next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '', " " * len(emotion.name))

View File

@ -22,7 +22,7 @@ class Sentiment140Plugin(SentimentPlugin):
polarity_value = self.maxPolarityValue*int(res.json()["data"][0] polarity_value = self.maxPolarityValue*int(res.json()["data"][0]
["polarity"]) * 0.25 ["polarity"]) * 0.25
polarity = "marl:Neutral" polarity = "marl:Neutral"
neutral_value = self.maxPolarityValue / 2.0 neutral_value = 0
if polarity_value > neutral_value: if polarity_value > neutral_value:
polarity = "marl:Positive" polarity = "marl:Positive"
elif polarity_value < neutral_value: elif polarity_value < neutral_value:
@ -33,4 +33,4 @@ class Sentiment140Plugin(SentimentPlugin):
marl__polarityValue=polarity_value) marl__polarityValue=polarity_value)
entry.sentiments.append(sentiment) entry.sentiments.append(sentiment)
yield entry yield entry

View File

@ -14,5 +14,5 @@
}, },
"requirements": {}, "requirements": {},
"maxPolarityValue": "1", "maxPolarityValue": "1",
"minPolarityValue": "0" "minPolarityValue": "-1"
} }

View File

@ -131,14 +131,16 @@ class SentiTextPlugin(SentimentPlugin):
if n_pos == 0 and n_neg == 0: if n_pos == 0 and n_neg == 0:
g_score = 0.5 g_score = 0.5
polarity = 'marl:Neutral' polarity = 'marl:Neutral'
polarity_value = 0
if g_score > 0.5: if g_score > 0.5:
polarity = 'marl:Positive' polarity = 'marl:Positive'
polarity_value = 1
elif g_score < 0.5: elif g_score < 0.5:
polarity = 'marl:Negative' polarity = 'marl:Negative'
polarity_value = -1
opinion = Sentiment(id="Opinion0"+'_'+str(i), opinion = Sentiment(id="Opinion0"+'_'+str(i),
marl__hasPolarity=polarity, marl__hasPolarity=polarity,
marL__polarityValue=float("{0:.2f}".format(g_score))) marl__polarityValue=polarity_value)
entry.sentiments.append(opinion) entry.sentiments.append(opinion)

View File

@ -18,5 +18,7 @@
}, },
}, },
"sentiword_path": "SentiWordNet_3.0.txt", "sentiword_path": "SentiWordNet_3.0.txt",
"pos_path": "unigram_spanish.pickle" "pos_path": "unigram_spanish.pickle",
"maxPolarityValue": "1",
"minPolarityValue": "-1"
} }