mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-14 04:32:29 +00:00
Add support for py3 in emotion-wnaffect
Normalize polarity values in sentiment-basic and sentiment-140
This commit is contained in:
parent
dee007eacf
commit
b671ff51f9
@ -1,5 +1,3 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import division
|
||||
import re
|
||||
import nltk
|
||||
@ -9,32 +7,34 @@ import string
|
||||
import xml.etree.ElementTree as ET
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.corpus import WordNetCorpusReader
|
||||
from nltk.stem import wordnet
|
||||
from emotion import Emotion as Emo
|
||||
from pattern.en import parse
|
||||
from senpy.plugins import EmotionPlugin, SenpyPlugin, ShelfMixin
|
||||
from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
|
||||
from senpy.models import Results, EmotionSet, Entry, Emotion
|
||||
|
||||
|
||||
class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
||||
|
||||
|
||||
def _load_synsets(self, synsets_path):
|
||||
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
|
||||
tree = ET.parse(synsets_path)
|
||||
root = tree.getroot()
|
||||
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
|
||||
pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
|
||||
|
||||
synsets = {}
|
||||
for pos in ["noun", "adj", "verb", "adv"]:
|
||||
tag = pos_map[pos]
|
||||
synsets[tag] = {}
|
||||
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
|
||||
for elem in root.findall(
|
||||
".//{0}-syn-list//{0}-syn".format(pos, pos)):
|
||||
offset = int(elem.get("id")[2:])
|
||||
if not offset: continue
|
||||
if elem.get("categ"):
|
||||
synsets[tag][offset] = Emo.emotions[elem.get("categ")] if elem.get("categ") in Emo.emotions else None
|
||||
synsets[tag][offset] = Emo.emotions[elem.get(
|
||||
"categ")] if elem.get(
|
||||
"categ") in Emo.emotions else None
|
||||
elif elem.get("noun-id"):
|
||||
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
|
||||
synsets[tag][offset] = synsets[pos_map["noun"]][int(
|
||||
elem.get("noun-id")[2:])]
|
||||
return synsets
|
||||
|
||||
def _load_emotions(self, hierarchy_path):
|
||||
@ -51,44 +51,58 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
||||
|
||||
def activate(self, *args, **kwargs):
|
||||
|
||||
nltk.download('stopwords')
|
||||
nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
|
||||
self._stopwords = stopwords.words('english')
|
||||
#local_path=os.path.dirname(os.path.abspath(__file__))
|
||||
self._categories = {'anger': ['general-dislike',],
|
||||
'fear': ['negative-fear',],
|
||||
'disgust': ['shame',],
|
||||
'joy': ['gratitude','affective','enthusiasm','love','joy','liking'],
|
||||
'sadness': ['ingrattitude','daze','humility','compassion','despair','anxiety','sadness']}
|
||||
self._wnlemma = wordnet.WordNetLemmatizer()
|
||||
self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
|
||||
local_path = os.path.dirname(os.path.abspath(__file__))
|
||||
self._categories = {
|
||||
'anger': [
|
||||
'general-dislike',
|
||||
],
|
||||
'fear': [
|
||||
'negative-fear',
|
||||
],
|
||||
'disgust': [
|
||||
'shame',
|
||||
],
|
||||
'joy':
|
||||
['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
|
||||
'sadness': [
|
||||
'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
|
||||
'anxiety', 'sadness'
|
||||
]
|
||||
}
|
||||
|
||||
self._wnaffect_mappings = {'anger': 'anger',
|
||||
'fear': 'negative-fear',
|
||||
'disgust': 'disgust',
|
||||
'joy': 'joy',
|
||||
'sadness': 'sadness'}
|
||||
self._wnaffect_mappings = {
|
||||
'anger': 'anger',
|
||||
'fear': 'negative-fear',
|
||||
'disgust': 'disgust',
|
||||
'joy': 'joy',
|
||||
'sadness': 'sadness'
|
||||
}
|
||||
|
||||
|
||||
self._load_emotions(self.hierarchy_path)
|
||||
self._load_emotions(local_path + self.hierarchy_path)
|
||||
|
||||
if 'total_synsets' not in self.sh:
|
||||
total_synsets = self._load_synsets(self.synsets_path)
|
||||
total_synsets = self._load_synsets(local_path + self.synsets_path)
|
||||
self.sh['total_synsets'] = total_synsets
|
||||
|
||||
self._total_synsets = self.sh['total_synsets']
|
||||
|
||||
if 'wn16' not in self.sh:
|
||||
self._wn16_path = self.wn16_path
|
||||
wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(self._wn16_path)), nltk.data.find(self._wn16_path))
|
||||
self.sh['wn16'] = wn16
|
||||
self._wn16_path = self.wn16_path
|
||||
self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
|
||||
|
||||
self._wn16 = self.sh['wn16']
|
||||
|
||||
def deactivate(self, *args, **kwargs):
|
||||
self.save()
|
||||
|
||||
def _my_preprocessor(self, text):
|
||||
|
||||
regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||
regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||
regHttp = re.compile(
|
||||
'(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||
regHttps = re.compile(
|
||||
'(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
|
||||
text = re.sub(regHttp, '', text)
|
||||
text = re.sub(regAt, '', text)
|
||||
@ -109,56 +123,82 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
||||
unigrams_lemmas = []
|
||||
pos_tagged = []
|
||||
unigrams_words = []
|
||||
sentences = parse(text,lemmata=True).split()
|
||||
for sentence in sentences:
|
||||
for token in sentence:
|
||||
if token[0].lower() not in self._stopwords:
|
||||
unigrams_words.append(token[0].lower())
|
||||
unigrams_lemmas.append(token[4])
|
||||
pos_tagged.append(token[1])
|
||||
tokens = text.split()
|
||||
for token in nltk.pos_tag(tokens):
|
||||
unigrams_words.append(token[0])
|
||||
pos_tagged.append(token[1])
|
||||
if token[1][0] in self._syntactics.keys():
|
||||
unigrams_lemmas.append(
|
||||
self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
|
||||
[0]]))
|
||||
else:
|
||||
unigrams_lemmas.append(token[0])
|
||||
|
||||
return unigrams_words,unigrams_lemmas,pos_tagged
|
||||
return unigrams_words, unigrams_lemmas, pos_tagged
|
||||
|
||||
def _find_ngrams(self, input_list, n):
|
||||
return zip(*[input_list[i:] for i in range(n)])
|
||||
|
||||
def _clean_pos(self, pos_tagged):
|
||||
|
||||
pos_tags={'NN':'NN', 'NNP':'NN','NNP-LOC':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
|
||||
'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
|
||||
pos_tags = {
|
||||
'NN': 'NN',
|
||||
'NNP': 'NN',
|
||||
'NNP-LOC': 'NN',
|
||||
'NNS': 'NN',
|
||||
'JJ': 'JJ',
|
||||
'JJR': 'JJ',
|
||||
'JJS': 'JJ',
|
||||
'RB': 'RB',
|
||||
'RBR': 'RB',
|
||||
'RBS': 'RB',
|
||||
'VB': 'VB',
|
||||
'VBD': 'VB',
|
||||
'VGB': 'VB',
|
||||
'VBN': 'VB',
|
||||
'VBP': 'VB',
|
||||
'VBZ': 'VB'
|
||||
}
|
||||
|
||||
for i in range(len(pos_tagged)):
|
||||
if pos_tagged[i] in pos_tags:
|
||||
pos_tagged[i]=pos_tags[pos_tagged[i]]
|
||||
pos_tagged[i] = pos_tags[pos_tagged[i]]
|
||||
return pos_tagged
|
||||
|
||||
def _extract_features(self, text):
|
||||
|
||||
feature_set={k:0 for k in self._categories}
|
||||
ngrams_words,ngrams_lemmas,pos_tagged = self._extract_ngrams(text)
|
||||
matches=0
|
||||
pos_tagged=self._clean_pos(pos_tagged)
|
||||
feature_set = {k: 0 for k in self._categories}
|
||||
ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
|
||||
matches = 0
|
||||
pos_tagged = self._clean_pos(pos_tagged)
|
||||
|
||||
tag_wn={'NN':self._wn16.NOUN,'JJ':self._wn16.ADJ,'VB':self._wn16.VERB,'RB':self._wn16.ADV}
|
||||
tag_wn = {
|
||||
'NN': self._wn16.NOUN,
|
||||
'JJ': self._wn16.ADJ,
|
||||
'VB': self._wn16.VERB,
|
||||
'RB': self._wn16.ADV
|
||||
}
|
||||
for i in range(len(pos_tagged)):
|
||||
if pos_tagged[i] in tag_wn:
|
||||
synsets = self._wn16.synsets(ngrams_words[i], tag_wn[pos_tagged[i]])
|
||||
synsets = self._wn16.synsets(ngrams_words[i],
|
||||
tag_wn[pos_tagged[i]])
|
||||
if synsets:
|
||||
offset = synsets[0].offset()
|
||||
if offset in self._total_synsets[pos_tagged[i]]:
|
||||
if self._total_synsets[pos_tagged[i]][offset] is None:
|
||||
continue
|
||||
else:
|
||||
emotion = self._total_synsets[pos_tagged[i]][offset].get_level(5).name
|
||||
matches+=1
|
||||
emotion = self._total_synsets[pos_tagged[i]][
|
||||
offset].get_level(5).name
|
||||
matches += 1
|
||||
for i in self._categories:
|
||||
if emotion in self._categories[i]:
|
||||
feature_set[i]+=1
|
||||
feature_set[i] += 1
|
||||
if matches == 0:
|
||||
matches=1
|
||||
matches = 1
|
||||
|
||||
for i in feature_set:
|
||||
feature_set[i] = (feature_set[i]/matches)*100
|
||||
feature_set[i] = (feature_set[i] / matches) * 100
|
||||
|
||||
return feature_set
|
||||
|
||||
@ -166,18 +206,18 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
||||
|
||||
text_input = entry.get("text", None)
|
||||
|
||||
text=self._my_preprocessor(text_input)
|
||||
text = self._my_preprocessor(text_input)
|
||||
|
||||
feature_text=self._extract_features(text)
|
||||
|
||||
response = Results()
|
||||
feature_text = self._extract_features(text)
|
||||
|
||||
emotionSet = EmotionSet(id="Emotions0")
|
||||
emotions = emotionSet.onyx__hasEmotion
|
||||
|
||||
for i in feature_text:
|
||||
emotions.append(Emotion(onyx__hasEmotionCategory=self._wnaffect_mappings[i],
|
||||
onyx__hasEmotionIntensity=feature_text[i]))
|
||||
emotions.append(
|
||||
Emotion(
|
||||
onyx__hasEmotionCategory=self._wnaffect_mappings[i],
|
||||
onyx__hasEmotionIntensity=feature_text[i]))
|
||||
|
||||
entry.emotions = [emotionSet]
|
||||
|
||||
|
@ -22,5 +22,4 @@ onyx:usesEmotionModel: emoml:big6
|
||||
requirements:
|
||||
- nltk>=3.0.5
|
||||
- lxml>=3.4.2
|
||||
- pattern
|
||||
async: false
|
@ -1,6 +1,4 @@
|
||||
|
||||
# coding: utf-8
|
||||
|
||||
"""
|
||||
Clement Michard (c) 2015
|
||||
"""
|
||||
@ -85,7 +83,7 @@ class Emotion:
|
||||
end_shape = '┐'
|
||||
else:
|
||||
end_shape = ''
|
||||
print '{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape)
|
||||
print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
|
||||
for leaf in down:
|
||||
next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
|
||||
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name))
|
||||
|
@ -22,7 +22,7 @@ class Sentiment140Plugin(SentimentPlugin):
|
||||
polarity_value = self.maxPolarityValue*int(res.json()["data"][0]
|
||||
["polarity"]) * 0.25
|
||||
polarity = "marl:Neutral"
|
||||
neutral_value = self.maxPolarityValue / 2.0
|
||||
neutral_value = 0
|
||||
if polarity_value > neutral_value:
|
||||
polarity = "marl:Positive"
|
||||
elif polarity_value < neutral_value:
|
||||
|
@ -14,5 +14,5 @@
|
||||
},
|
||||
"requirements": {},
|
||||
"maxPolarityValue": "1",
|
||||
"minPolarityValue": "0"
|
||||
"minPolarityValue": "-1"
|
||||
}
|
||||
|
@ -131,14 +131,16 @@ class SentiTextPlugin(SentimentPlugin):
|
||||
if n_pos == 0 and n_neg == 0:
|
||||
g_score = 0.5
|
||||
polarity = 'marl:Neutral'
|
||||
polarity_value = 0
|
||||
if g_score > 0.5:
|
||||
polarity = 'marl:Positive'
|
||||
polarity_value = 1
|
||||
elif g_score < 0.5:
|
||||
polarity = 'marl:Negative'
|
||||
|
||||
polarity_value = -1
|
||||
opinion = Sentiment(id="Opinion0"+'_'+str(i),
|
||||
marl__hasPolarity=polarity,
|
||||
marL__polarityValue=float("{0:.2f}".format(g_score)))
|
||||
marl__polarityValue=polarity_value)
|
||||
|
||||
|
||||
entry.sentiments.append(opinion)
|
||||
|
@ -18,5 +18,7 @@
|
||||
},
|
||||
},
|
||||
"sentiword_path": "SentiWordNet_3.0.txt",
|
||||
"pos_path": "unigram_spanish.pickle"
|
||||
"pos_path": "unigram_spanish.pickle",
|
||||
"maxPolarityValue": "1",
|
||||
"minPolarityValue": "-1"
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user