mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-22 16:12:29 +00:00
Add support for py3 in emotion-wnaffect
Normalize polarity values in sentiment-basic and sentiment-140
This commit is contained in:
parent
dee007eacf
commit
b671ff51f9
@ -1,5 +1,3 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
import re
|
import re
|
||||||
import nltk
|
import nltk
|
||||||
@ -9,32 +7,34 @@ import string
|
|||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
from nltk.corpus import WordNetCorpusReader
|
from nltk.corpus import WordNetCorpusReader
|
||||||
|
from nltk.stem import wordnet
|
||||||
from emotion import Emotion as Emo
|
from emotion import Emotion as Emo
|
||||||
from pattern.en import parse
|
from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
|
||||||
from senpy.plugins import EmotionPlugin, SenpyPlugin, ShelfMixin
|
|
||||||
from senpy.models import Results, EmotionSet, Entry, Emotion
|
from senpy.models import Results, EmotionSet, Entry, Emotion
|
||||||
|
|
||||||
|
|
||||||
class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
||||||
|
|
||||||
|
|
||||||
def _load_synsets(self, synsets_path):
|
def _load_synsets(self, synsets_path):
|
||||||
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
|
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
|
||||||
tree = ET.parse(synsets_path)
|
tree = ET.parse(synsets_path)
|
||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
|
pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
|
||||||
|
|
||||||
synsets = {}
|
synsets = {}
|
||||||
for pos in ["noun", "adj", "verb", "adv"]:
|
for pos in ["noun", "adj", "verb", "adv"]:
|
||||||
tag = pos_map[pos]
|
tag = pos_map[pos]
|
||||||
synsets[tag] = {}
|
synsets[tag] = {}
|
||||||
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
|
for elem in root.findall(
|
||||||
|
".//{0}-syn-list//{0}-syn".format(pos, pos)):
|
||||||
offset = int(elem.get("id")[2:])
|
offset = int(elem.get("id")[2:])
|
||||||
if not offset: continue
|
if not offset: continue
|
||||||
if elem.get("categ"):
|
if elem.get("categ"):
|
||||||
synsets[tag][offset] = Emo.emotions[elem.get("categ")] if elem.get("categ") in Emo.emotions else None
|
synsets[tag][offset] = Emo.emotions[elem.get(
|
||||||
|
"categ")] if elem.get(
|
||||||
|
"categ") in Emo.emotions else None
|
||||||
elif elem.get("noun-id"):
|
elif elem.get("noun-id"):
|
||||||
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
|
synsets[tag][offset] = synsets[pos_map["noun"]][int(
|
||||||
|
elem.get("noun-id")[2:])]
|
||||||
return synsets
|
return synsets
|
||||||
|
|
||||||
def _load_emotions(self, hierarchy_path):
|
def _load_emotions(self, hierarchy_path):
|
||||||
@ -51,44 +51,58 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
|||||||
|
|
||||||
def activate(self, *args, **kwargs):
|
def activate(self, *args, **kwargs):
|
||||||
|
|
||||||
nltk.download('stopwords')
|
nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
|
||||||
self._stopwords = stopwords.words('english')
|
self._stopwords = stopwords.words('english')
|
||||||
#local_path=os.path.dirname(os.path.abspath(__file__))
|
self._wnlemma = wordnet.WordNetLemmatizer()
|
||||||
self._categories = {'anger': ['general-dislike',],
|
self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
|
||||||
'fear': ['negative-fear',],
|
local_path = os.path.dirname(os.path.abspath(__file__))
|
||||||
'disgust': ['shame',],
|
self._categories = {
|
||||||
'joy': ['gratitude','affective','enthusiasm','love','joy','liking'],
|
'anger': [
|
||||||
'sadness': ['ingrattitude','daze','humility','compassion','despair','anxiety','sadness']}
|
'general-dislike',
|
||||||
|
],
|
||||||
|
'fear': [
|
||||||
|
'negative-fear',
|
||||||
|
],
|
||||||
|
'disgust': [
|
||||||
|
'shame',
|
||||||
|
],
|
||||||
|
'joy':
|
||||||
|
['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
|
||||||
|
'sadness': [
|
||||||
|
'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
|
||||||
|
'anxiety', 'sadness'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
self._wnaffect_mappings = {'anger': 'anger',
|
self._wnaffect_mappings = {
|
||||||
'fear': 'negative-fear',
|
'anger': 'anger',
|
||||||
'disgust': 'disgust',
|
'fear': 'negative-fear',
|
||||||
'joy': 'joy',
|
'disgust': 'disgust',
|
||||||
'sadness': 'sadness'}
|
'joy': 'joy',
|
||||||
|
'sadness': 'sadness'
|
||||||
|
}
|
||||||
|
|
||||||
|
self._load_emotions(local_path + self.hierarchy_path)
|
||||||
self._load_emotions(self.hierarchy_path)
|
|
||||||
|
|
||||||
if 'total_synsets' not in self.sh:
|
if 'total_synsets' not in self.sh:
|
||||||
total_synsets = self._load_synsets(self.synsets_path)
|
total_synsets = self._load_synsets(local_path + self.synsets_path)
|
||||||
self.sh['total_synsets'] = total_synsets
|
self.sh['total_synsets'] = total_synsets
|
||||||
|
|
||||||
self._total_synsets = self.sh['total_synsets']
|
self._total_synsets = self.sh['total_synsets']
|
||||||
|
|
||||||
if 'wn16' not in self.sh:
|
self._wn16_path = self.wn16_path
|
||||||
self._wn16_path = self.wn16_path
|
self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
|
||||||
wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(self._wn16_path)), nltk.data.find(self._wn16_path))
|
|
||||||
self.sh['wn16'] = wn16
|
|
||||||
|
|
||||||
self._wn16 = self.sh['wn16']
|
|
||||||
|
|
||||||
def deactivate(self, *args, **kwargs):
|
def deactivate(self, *args, **kwargs):
|
||||||
self.save()
|
self.save()
|
||||||
|
|
||||||
def _my_preprocessor(self, text):
|
def _my_preprocessor(self, text):
|
||||||
|
|
||||||
regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
regHttp = re.compile(
|
||||||
regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
'(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||||
|
regHttps = re.compile(
|
||||||
|
'(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||||
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
|
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
|
||||||
text = re.sub(regHttp, '', text)
|
text = re.sub(regHttp, '', text)
|
||||||
text = re.sub(regAt, '', text)
|
text = re.sub(regAt, '', text)
|
||||||
@ -109,56 +123,82 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
|||||||
unigrams_lemmas = []
|
unigrams_lemmas = []
|
||||||
pos_tagged = []
|
pos_tagged = []
|
||||||
unigrams_words = []
|
unigrams_words = []
|
||||||
sentences = parse(text,lemmata=True).split()
|
tokens = text.split()
|
||||||
for sentence in sentences:
|
for token in nltk.pos_tag(tokens):
|
||||||
for token in sentence:
|
unigrams_words.append(token[0])
|
||||||
if token[0].lower() not in self._stopwords:
|
pos_tagged.append(token[1])
|
||||||
unigrams_words.append(token[0].lower())
|
if token[1][0] in self._syntactics.keys():
|
||||||
unigrams_lemmas.append(token[4])
|
unigrams_lemmas.append(
|
||||||
pos_tagged.append(token[1])
|
self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
|
||||||
|
[0]]))
|
||||||
|
else:
|
||||||
|
unigrams_lemmas.append(token[0])
|
||||||
|
|
||||||
return unigrams_words,unigrams_lemmas,pos_tagged
|
return unigrams_words, unigrams_lemmas, pos_tagged
|
||||||
|
|
||||||
def _find_ngrams(self, input_list, n):
|
def _find_ngrams(self, input_list, n):
|
||||||
return zip(*[input_list[i:] for i in range(n)])
|
return zip(*[input_list[i:] for i in range(n)])
|
||||||
|
|
||||||
def _clean_pos(self, pos_tagged):
|
def _clean_pos(self, pos_tagged):
|
||||||
|
|
||||||
pos_tags={'NN':'NN', 'NNP':'NN','NNP-LOC':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
|
pos_tags = {
|
||||||
'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
|
'NN': 'NN',
|
||||||
|
'NNP': 'NN',
|
||||||
|
'NNP-LOC': 'NN',
|
||||||
|
'NNS': 'NN',
|
||||||
|
'JJ': 'JJ',
|
||||||
|
'JJR': 'JJ',
|
||||||
|
'JJS': 'JJ',
|
||||||
|
'RB': 'RB',
|
||||||
|
'RBR': 'RB',
|
||||||
|
'RBS': 'RB',
|
||||||
|
'VB': 'VB',
|
||||||
|
'VBD': 'VB',
|
||||||
|
'VGB': 'VB',
|
||||||
|
'VBN': 'VB',
|
||||||
|
'VBP': 'VB',
|
||||||
|
'VBZ': 'VB'
|
||||||
|
}
|
||||||
|
|
||||||
for i in range(len(pos_tagged)):
|
for i in range(len(pos_tagged)):
|
||||||
if pos_tagged[i] in pos_tags:
|
if pos_tagged[i] in pos_tags:
|
||||||
pos_tagged[i]=pos_tags[pos_tagged[i]]
|
pos_tagged[i] = pos_tags[pos_tagged[i]]
|
||||||
return pos_tagged
|
return pos_tagged
|
||||||
|
|
||||||
def _extract_features(self, text):
|
def _extract_features(self, text):
|
||||||
|
|
||||||
feature_set={k:0 for k in self._categories}
|
feature_set = {k: 0 for k in self._categories}
|
||||||
ngrams_words,ngrams_lemmas,pos_tagged = self._extract_ngrams(text)
|
ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
|
||||||
matches=0
|
matches = 0
|
||||||
pos_tagged=self._clean_pos(pos_tagged)
|
pos_tagged = self._clean_pos(pos_tagged)
|
||||||
|
|
||||||
tag_wn={'NN':self._wn16.NOUN,'JJ':self._wn16.ADJ,'VB':self._wn16.VERB,'RB':self._wn16.ADV}
|
tag_wn = {
|
||||||
|
'NN': self._wn16.NOUN,
|
||||||
|
'JJ': self._wn16.ADJ,
|
||||||
|
'VB': self._wn16.VERB,
|
||||||
|
'RB': self._wn16.ADV
|
||||||
|
}
|
||||||
for i in range(len(pos_tagged)):
|
for i in range(len(pos_tagged)):
|
||||||
if pos_tagged[i] in tag_wn:
|
if pos_tagged[i] in tag_wn:
|
||||||
synsets = self._wn16.synsets(ngrams_words[i], tag_wn[pos_tagged[i]])
|
synsets = self._wn16.synsets(ngrams_words[i],
|
||||||
|
tag_wn[pos_tagged[i]])
|
||||||
if synsets:
|
if synsets:
|
||||||
offset = synsets[0].offset()
|
offset = synsets[0].offset()
|
||||||
if offset in self._total_synsets[pos_tagged[i]]:
|
if offset in self._total_synsets[pos_tagged[i]]:
|
||||||
if self._total_synsets[pos_tagged[i]][offset] is None:
|
if self._total_synsets[pos_tagged[i]][offset] is None:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
emotion = self._total_synsets[pos_tagged[i]][offset].get_level(5).name
|
emotion = self._total_synsets[pos_tagged[i]][
|
||||||
matches+=1
|
offset].get_level(5).name
|
||||||
|
matches += 1
|
||||||
for i in self._categories:
|
for i in self._categories:
|
||||||
if emotion in self._categories[i]:
|
if emotion in self._categories[i]:
|
||||||
feature_set[i]+=1
|
feature_set[i] += 1
|
||||||
if matches == 0:
|
if matches == 0:
|
||||||
matches=1
|
matches = 1
|
||||||
|
|
||||||
for i in feature_set:
|
for i in feature_set:
|
||||||
feature_set[i] = (feature_set[i]/matches)*100
|
feature_set[i] = (feature_set[i] / matches) * 100
|
||||||
|
|
||||||
return feature_set
|
return feature_set
|
||||||
|
|
||||||
@ -166,18 +206,18 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
|||||||
|
|
||||||
text_input = entry.get("text", None)
|
text_input = entry.get("text", None)
|
||||||
|
|
||||||
text=self._my_preprocessor(text_input)
|
text = self._my_preprocessor(text_input)
|
||||||
|
|
||||||
feature_text=self._extract_features(text)
|
feature_text = self._extract_features(text)
|
||||||
|
|
||||||
response = Results()
|
|
||||||
|
|
||||||
emotionSet = EmotionSet(id="Emotions0")
|
emotionSet = EmotionSet(id="Emotions0")
|
||||||
emotions = emotionSet.onyx__hasEmotion
|
emotions = emotionSet.onyx__hasEmotion
|
||||||
|
|
||||||
for i in feature_text:
|
for i in feature_text:
|
||||||
emotions.append(Emotion(onyx__hasEmotionCategory=self._wnaffect_mappings[i],
|
emotions.append(
|
||||||
onyx__hasEmotionIntensity=feature_text[i]))
|
Emotion(
|
||||||
|
onyx__hasEmotionCategory=self._wnaffect_mappings[i],
|
||||||
|
onyx__hasEmotionIntensity=feature_text[i]))
|
||||||
|
|
||||||
entry.emotions = [emotionSet]
|
entry.emotions = [emotionSet]
|
||||||
|
|
||||||
|
@ -22,5 +22,4 @@ onyx:usesEmotionModel: emoml:big6
|
|||||||
requirements:
|
requirements:
|
||||||
- nltk>=3.0.5
|
- nltk>=3.0.5
|
||||||
- lxml>=3.4.2
|
- lxml>=3.4.2
|
||||||
- pattern
|
|
||||||
async: false
|
async: false
|
@ -1,6 +1,4 @@
|
|||||||
|
|
||||||
# coding: utf-8
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Clement Michard (c) 2015
|
Clement Michard (c) 2015
|
||||||
"""
|
"""
|
||||||
@ -85,7 +83,7 @@ class Emotion:
|
|||||||
end_shape = '┐'
|
end_shape = '┐'
|
||||||
else:
|
else:
|
||||||
end_shape = ''
|
end_shape = ''
|
||||||
print '{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape)
|
print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
|
||||||
for leaf in down:
|
for leaf in down:
|
||||||
next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
|
next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
|
||||||
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name))
|
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name))
|
||||||
|
@ -22,7 +22,7 @@ class Sentiment140Plugin(SentimentPlugin):
|
|||||||
polarity_value = self.maxPolarityValue*int(res.json()["data"][0]
|
polarity_value = self.maxPolarityValue*int(res.json()["data"][0]
|
||||||
["polarity"]) * 0.25
|
["polarity"]) * 0.25
|
||||||
polarity = "marl:Neutral"
|
polarity = "marl:Neutral"
|
||||||
neutral_value = self.maxPolarityValue / 2.0
|
neutral_value = 0
|
||||||
if polarity_value > neutral_value:
|
if polarity_value > neutral_value:
|
||||||
polarity = "marl:Positive"
|
polarity = "marl:Positive"
|
||||||
elif polarity_value < neutral_value:
|
elif polarity_value < neutral_value:
|
||||||
|
@ -14,5 +14,5 @@
|
|||||||
},
|
},
|
||||||
"requirements": {},
|
"requirements": {},
|
||||||
"maxPolarityValue": "1",
|
"maxPolarityValue": "1",
|
||||||
"minPolarityValue": "0"
|
"minPolarityValue": "-1"
|
||||||
}
|
}
|
||||||
|
@ -131,14 +131,16 @@ class SentiTextPlugin(SentimentPlugin):
|
|||||||
if n_pos == 0 and n_neg == 0:
|
if n_pos == 0 and n_neg == 0:
|
||||||
g_score = 0.5
|
g_score = 0.5
|
||||||
polarity = 'marl:Neutral'
|
polarity = 'marl:Neutral'
|
||||||
|
polarity_value = 0
|
||||||
if g_score > 0.5:
|
if g_score > 0.5:
|
||||||
polarity = 'marl:Positive'
|
polarity = 'marl:Positive'
|
||||||
|
polarity_value = 1
|
||||||
elif g_score < 0.5:
|
elif g_score < 0.5:
|
||||||
polarity = 'marl:Negative'
|
polarity = 'marl:Negative'
|
||||||
|
polarity_value = -1
|
||||||
opinion = Sentiment(id="Opinion0"+'_'+str(i),
|
opinion = Sentiment(id="Opinion0"+'_'+str(i),
|
||||||
marl__hasPolarity=polarity,
|
marl__hasPolarity=polarity,
|
||||||
marL__polarityValue=float("{0:.2f}".format(g_score)))
|
marl__polarityValue=polarity_value)
|
||||||
|
|
||||||
|
|
||||||
entry.sentiments.append(opinion)
|
entry.sentiments.append(opinion)
|
||||||
|
@ -18,5 +18,7 @@
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
"sentiword_path": "SentiWordNet_3.0.txt",
|
"sentiword_path": "SentiWordNet_3.0.txt",
|
||||||
"pos_path": "unigram_spanish.pickle"
|
"pos_path": "unigram_spanish.pickle",
|
||||||
|
"maxPolarityValue": "1",
|
||||||
|
"minPolarityValue": "-1"
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user