1
0
mirror of https://github.com/gsi-upm/senpy synced 2025-10-24 04:08:19 +00:00

Add support for py3 in emotion-wnaffect

Normalize polarity values in sentiment-basic and sentiment-140
This commit is contained in:
militarpancho
2017-07-14 11:13:59 +02:00
parent dee007eacf
commit b671ff51f9
7 changed files with 119 additions and 78 deletions

View File

@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import division
import re
import nltk
@@ -9,32 +7,34 @@ import string
import xml.etree.ElementTree as ET
from nltk.corpus import stopwords
from nltk.corpus import WordNetCorpusReader
from nltk.stem import wordnet
from emotion import Emotion as Emo
from pattern.en import parse
from senpy.plugins import EmotionPlugin, SenpyPlugin, ShelfMixin
from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
from senpy.models import Results, EmotionSet, Entry, Emotion
class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
def _load_synsets(self, synsets_path):
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
tree = ET.parse(synsets_path)
root = tree.getroot()
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
synsets = {}
for pos in ["noun", "adj", "verb", "adv"]:
tag = pos_map[pos]
synsets[tag] = {}
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
for elem in root.findall(
".//{0}-syn-list//{0}-syn".format(pos, pos)):
offset = int(elem.get("id")[2:])
if not offset: continue
if elem.get("categ"):
synsets[tag][offset] = Emo.emotions[elem.get("categ")] if elem.get("categ") in Emo.emotions else None
synsets[tag][offset] = Emo.emotions[elem.get(
"categ")] if elem.get(
"categ") in Emo.emotions else None
elif elem.get("noun-id"):
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
synsets[tag][offset] = synsets[pos_map["noun"]][int(
elem.get("noun-id")[2:])]
return synsets
def _load_emotions(self, hierarchy_path):
@@ -51,44 +51,58 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
def activate(self, *args, **kwargs):
nltk.download('stopwords')
nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
self._stopwords = stopwords.words('english')
#local_path=os.path.dirname(os.path.abspath(__file__))
self._categories = {'anger': ['general-dislike',],
'fear': ['negative-fear',],
'disgust': ['shame',],
'joy': ['gratitude','affective','enthusiasm','love','joy','liking'],
'sadness': ['ingrattitude','daze','humility','compassion','despair','anxiety','sadness']}
self._wnlemma = wordnet.WordNetLemmatizer()
self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
local_path = os.path.dirname(os.path.abspath(__file__))
self._categories = {
'anger': [
'general-dislike',
],
'fear': [
'negative-fear',
],
'disgust': [
'shame',
],
'joy':
['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
'sadness': [
'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
'anxiety', 'sadness'
]
}
self._wnaffect_mappings = {'anger': 'anger',
'fear': 'negative-fear',
'disgust': 'disgust',
'joy': 'joy',
'sadness': 'sadness'}
self._wnaffect_mappings = {
'anger': 'anger',
'fear': 'negative-fear',
'disgust': 'disgust',
'joy': 'joy',
'sadness': 'sadness'
}
self._load_emotions(self.hierarchy_path)
self._load_emotions(local_path + self.hierarchy_path)
if 'total_synsets' not in self.sh:
total_synsets = self._load_synsets(self.synsets_path)
total_synsets = self._load_synsets(local_path + self.synsets_path)
self.sh['total_synsets'] = total_synsets
self._total_synsets = self.sh['total_synsets']
if 'wn16' not in self.sh:
self._wn16_path = self.wn16_path
wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(self._wn16_path)), nltk.data.find(self._wn16_path))
self.sh['wn16'] = wn16
self._wn16_path = self.wn16_path
self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
self._wn16 = self.sh['wn16']
def deactivate(self, *args, **kwargs):
self.save()
def _my_preprocessor(self, text):
regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttp = re.compile(
'(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttps = re.compile(
'(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
text = re.sub(regHttp, '', text)
text = re.sub(regAt, '', text)
@@ -109,56 +123,82 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
unigrams_lemmas = []
pos_tagged = []
unigrams_words = []
sentences = parse(text,lemmata=True).split()
for sentence in sentences:
for token in sentence:
if token[0].lower() not in self._stopwords:
unigrams_words.append(token[0].lower())
unigrams_lemmas.append(token[4])
pos_tagged.append(token[1])
tokens = text.split()
for token in nltk.pos_tag(tokens):
unigrams_words.append(token[0])
pos_tagged.append(token[1])
if token[1][0] in self._syntactics.keys():
unigrams_lemmas.append(
self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
[0]]))
else:
unigrams_lemmas.append(token[0])
return unigrams_words,unigrams_lemmas,pos_tagged
return unigrams_words, unigrams_lemmas, pos_tagged
def _find_ngrams(self, input_list, n):
return zip(*[input_list[i:] for i in range(n)])
def _clean_pos(self, pos_tagged):
pos_tags={'NN':'NN', 'NNP':'NN','NNP-LOC':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
pos_tags = {
'NN': 'NN',
'NNP': 'NN',
'NNP-LOC': 'NN',
'NNS': 'NN',
'JJ': 'JJ',
'JJR': 'JJ',
'JJS': 'JJ',
'RB': 'RB',
'RBR': 'RB',
'RBS': 'RB',
'VB': 'VB',
'VBD': 'VB',
'VGB': 'VB',
'VBN': 'VB',
'VBP': 'VB',
'VBZ': 'VB'
}
for i in range(len(pos_tagged)):
if pos_tagged[i] in pos_tags:
pos_tagged[i]=pos_tags[pos_tagged[i]]
pos_tagged[i] = pos_tags[pos_tagged[i]]
return pos_tagged
def _extract_features(self, text):
feature_set={k:0 for k in self._categories}
ngrams_words,ngrams_lemmas,pos_tagged = self._extract_ngrams(text)
matches=0
pos_tagged=self._clean_pos(pos_tagged)
feature_set = {k: 0 for k in self._categories}
ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
matches = 0
pos_tagged = self._clean_pos(pos_tagged)
tag_wn={'NN':self._wn16.NOUN,'JJ':self._wn16.ADJ,'VB':self._wn16.VERB,'RB':self._wn16.ADV}
tag_wn = {
'NN': self._wn16.NOUN,
'JJ': self._wn16.ADJ,
'VB': self._wn16.VERB,
'RB': self._wn16.ADV
}
for i in range(len(pos_tagged)):
if pos_tagged[i] in tag_wn:
synsets = self._wn16.synsets(ngrams_words[i], tag_wn[pos_tagged[i]])
synsets = self._wn16.synsets(ngrams_words[i],
tag_wn[pos_tagged[i]])
if synsets:
offset = synsets[0].offset()
if offset in self._total_synsets[pos_tagged[i]]:
if self._total_synsets[pos_tagged[i]][offset] is None:
continue
else:
emotion = self._total_synsets[pos_tagged[i]][offset].get_level(5).name
matches+=1
emotion = self._total_synsets[pos_tagged[i]][
offset].get_level(5).name
matches += 1
for i in self._categories:
if emotion in self._categories[i]:
feature_set[i]+=1
feature_set[i] += 1
if matches == 0:
matches=1
matches = 1
for i in feature_set:
feature_set[i] = (feature_set[i]/matches)*100
feature_set[i] = (feature_set[i] / matches) * 100
return feature_set
@@ -166,18 +206,18 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
text_input = entry.get("text", None)
text=self._my_preprocessor(text_input)
text = self._my_preprocessor(text_input)
feature_text=self._extract_features(text)
response = Results()
feature_text = self._extract_features(text)
emotionSet = EmotionSet(id="Emotions0")
emotions = emotionSet.onyx__hasEmotion
for i in feature_text:
emotions.append(Emotion(onyx__hasEmotionCategory=self._wnaffect_mappings[i],
onyx__hasEmotionIntensity=feature_text[i]))
emotions.append(
Emotion(
onyx__hasEmotionCategory=self._wnaffect_mappings[i],
onyx__hasEmotionIntensity=feature_text[i]))
entry.emotions = [emotionSet]

View File

@@ -22,5 +22,4 @@ onyx:usesEmotionModel: emoml:big6
requirements:
- nltk>=3.0.5
- lxml>=3.4.2
- pattern
async: false

View File

@@ -1,6 +1,4 @@
# coding: utf-8
"""
Clement Michard (c) 2015
"""
@@ -85,7 +83,7 @@ class Emotion:
end_shape = ''
else:
end_shape = ''
print '{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape)
print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
for leaf in down:
next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '', " " * len(emotion.name))

View File

@@ -22,7 +22,7 @@ class Sentiment140Plugin(SentimentPlugin):
polarity_value = self.maxPolarityValue*int(res.json()["data"][0]
["polarity"]) * 0.25
polarity = "marl:Neutral"
neutral_value = self.maxPolarityValue / 2.0
neutral_value = 0
if polarity_value > neutral_value:
polarity = "marl:Positive"
elif polarity_value < neutral_value:

View File

@@ -14,5 +14,5 @@
},
"requirements": {},
"maxPolarityValue": "1",
"minPolarityValue": "0"
"minPolarityValue": "-1"
}

View File

@@ -131,14 +131,16 @@ class SentiTextPlugin(SentimentPlugin):
if n_pos == 0 and n_neg == 0:
g_score = 0.5
polarity = 'marl:Neutral'
polarity_value = 0
if g_score > 0.5:
polarity = 'marl:Positive'
polarity_value = 1
elif g_score < 0.5:
polarity = 'marl:Negative'
polarity_value = -1
opinion = Sentiment(id="Opinion0"+'_'+str(i),
marl__hasPolarity=polarity,
marL__polarityValue=float("{0:.2f}".format(g_score)))
marl__polarityValue=polarity_value)
entry.sentiments.append(opinion)

View File

@@ -18,5 +18,7 @@
},
},
"sentiword_path": "SentiWordNet_3.0.txt",
"pos_path": "unigram_spanish.pickle"
"pos_path": "unigram_spanish.pickle",
"maxPolarityValue": "1",
"minPolarityValue": "-1"
}