mirror of
https://github.com/gsi-upm/senpy
synced 2025-08-24 02:22:20 +00:00
Add plugins as submodules
This commit is contained in:
1
emotion-wnaffect
Submodule
1
emotion-wnaffect
Submodule
Submodule emotion-wnaffect added at 74c40d7e97
@@ -1,41 +0,0 @@
|
||||
# WordNet-Affect plugin
|
||||
|
||||
This plugin uses WordNet-Affect (http://wndomains.fbk.eu/wnaffect.html) to calculate the percentage of each emotion. The plugin classifies among five diferent emotions: anger, fear, disgust, joy and sadness. It is has been used a emotion mapping enlarge the emotions:
|
||||
|
||||
- anger : general-dislike
|
||||
- fear : negative-fear
|
||||
- disgust : shame
|
||||
- joy : gratitude, affective, enthusiasm, love, joy, liking
|
||||
- sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness
|
||||
|
||||
## Usage
|
||||
|
||||
The parameters accepted are:
|
||||
|
||||
- Language: English (en).
|
||||
- Input: Text to analyse.
|
||||
|
||||
Example request:
|
||||
```
|
||||
http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-wnaffect&language=en&input=I%20love%20Madrid
|
||||
```
|
||||
|
||||
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
|
||||
|
||||
|
||||
The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM for semantic web.
|
||||
|
||||
This plugin uses WNAffect labels for emotion analysis.
|
||||
|
||||
The emotion-wnaffect.senpy file can be copied and modified to use different versions of wnaffect with the same python code.
|
||||
|
||||
|
||||
## Known issues
|
||||
|
||||
- This plugin uses the pattern library, which means it will only run on python 2.7
|
||||
- Wnaffect and corpora files are not included in the repository, but can be easily added either to the docker image (using a volume) or in a new docker image.
|
||||
- You can download Wordnet 1.6 here: <http://wordnetcode.princeton.edu/1.6/wn16.unix.tar.gz> and extract the dict folder.
|
||||
- The hierarchy and synsets files can be found here: <https://github.com/larsmans/wordnet-domains-sentiwords/tree/master/wn-domains/wn-affect-1.1>
|
||||
|
||||
![alt GSI Logo][logoGSI]
|
||||
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"
|
@@ -1,224 +0,0 @@
|
||||
from __future__ import division
|
||||
import re
|
||||
import nltk
|
||||
import logging
|
||||
import os
|
||||
import string
|
||||
import xml.etree.ElementTree as ET
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.corpus import WordNetCorpusReader
|
||||
from nltk.stem import wordnet
|
||||
from emotion import Emotion as Emo
|
||||
from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
|
||||
from senpy.models import Results, EmotionSet, Entry, Emotion
|
||||
|
||||
|
||||
class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
|
||||
def _load_synsets(self, synsets_path):
|
||||
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
|
||||
tree = ET.parse(synsets_path)
|
||||
root = tree.getroot()
|
||||
pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
|
||||
|
||||
synsets = {}
|
||||
for pos in ["noun", "adj", "verb", "adv"]:
|
||||
tag = pos_map[pos]
|
||||
synsets[tag] = {}
|
||||
for elem in root.findall(
|
||||
".//{0}-syn-list//{0}-syn".format(pos, pos)):
|
||||
offset = int(elem.get("id")[2:])
|
||||
if not offset: continue
|
||||
if elem.get("categ"):
|
||||
synsets[tag][offset] = Emo.emotions[elem.get(
|
||||
"categ")] if elem.get(
|
||||
"categ") in Emo.emotions else None
|
||||
elif elem.get("noun-id"):
|
||||
synsets[tag][offset] = synsets[pos_map["noun"]][int(
|
||||
elem.get("noun-id")[2:])]
|
||||
return synsets
|
||||
|
||||
def _load_emotions(self, hierarchy_path):
|
||||
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
|
||||
|
||||
tree = ET.parse(hierarchy_path)
|
||||
root = tree.getroot()
|
||||
for elem in root.findall("categ"):
|
||||
name = elem.get("name")
|
||||
if name == "root":
|
||||
Emo.emotions["root"] = Emo("root")
|
||||
else:
|
||||
Emo.emotions[name] = Emo(name, elem.get("isa"))
|
||||
|
||||
def activate(self, *args, **kwargs):
|
||||
|
||||
nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
|
||||
self._stopwords = stopwords.words('english')
|
||||
self._wnlemma = wordnet.WordNetLemmatizer()
|
||||
self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
|
||||
local_path = os.path.dirname(os.path.abspath(__file__))
|
||||
self._categories = {
|
||||
'anger': [
|
||||
'general-dislike',
|
||||
],
|
||||
'fear': [
|
||||
'negative-fear',
|
||||
],
|
||||
'disgust': [
|
||||
'shame',
|
||||
],
|
||||
'joy':
|
||||
['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
|
||||
'sadness': [
|
||||
'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
|
||||
'anxiety', 'sadness'
|
||||
]
|
||||
}
|
||||
|
||||
self._wnaffect_mappings = {
|
||||
'anger': 'anger',
|
||||
'fear': 'negative-fear',
|
||||
'disgust': 'disgust',
|
||||
'joy': 'joy',
|
||||
'sadness': 'sadness'
|
||||
}
|
||||
|
||||
self._load_emotions(local_path + self.hierarchy_path)
|
||||
|
||||
if 'total_synsets' not in self.sh:
|
||||
total_synsets = self._load_synsets(local_path + self.synsets_path)
|
||||
self.sh['total_synsets'] = total_synsets
|
||||
|
||||
self._total_synsets = self.sh['total_synsets']
|
||||
|
||||
self._wn16_path = self.wn16_path
|
||||
self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
|
||||
|
||||
|
||||
def deactivate(self, *args, **kwargs):
|
||||
self.save()
|
||||
|
||||
def _my_preprocessor(self, text):
|
||||
|
||||
regHttp = re.compile(
|
||||
'(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||
regHttps = re.compile(
|
||||
'(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
|
||||
text = re.sub(regHttp, '', text)
|
||||
text = re.sub(regAt, '', text)
|
||||
text = re.sub('RT : ', '', text)
|
||||
text = re.sub(regHttps, '', text)
|
||||
text = re.sub('[0-9]', '', text)
|
||||
text = self._delete_punctuation(text)
|
||||
return text
|
||||
|
||||
def _delete_punctuation(self, text):
|
||||
|
||||
exclude = set(string.punctuation)
|
||||
s = ''.join(ch for ch in text if ch not in exclude)
|
||||
return s
|
||||
|
||||
def _extract_ngrams(self, text):
|
||||
|
||||
unigrams_lemmas = []
|
||||
pos_tagged = []
|
||||
unigrams_words = []
|
||||
tokens = text.split()
|
||||
for token in nltk.pos_tag(tokens):
|
||||
unigrams_words.append(token[0])
|
||||
pos_tagged.append(token[1])
|
||||
if token[1][0] in self._syntactics.keys():
|
||||
unigrams_lemmas.append(
|
||||
self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
|
||||
[0]]))
|
||||
else:
|
||||
unigrams_lemmas.append(token[0])
|
||||
|
||||
return unigrams_words, unigrams_lemmas, pos_tagged
|
||||
|
||||
def _find_ngrams(self, input_list, n):
|
||||
return zip(*[input_list[i:] for i in range(n)])
|
||||
|
||||
def _clean_pos(self, pos_tagged):
|
||||
|
||||
pos_tags = {
|
||||
'NN': 'NN',
|
||||
'NNP': 'NN',
|
||||
'NNP-LOC': 'NN',
|
||||
'NNS': 'NN',
|
||||
'JJ': 'JJ',
|
||||
'JJR': 'JJ',
|
||||
'JJS': 'JJ',
|
||||
'RB': 'RB',
|
||||
'RBR': 'RB',
|
||||
'RBS': 'RB',
|
||||
'VB': 'VB',
|
||||
'VBD': 'VB',
|
||||
'VGB': 'VB',
|
||||
'VBN': 'VB',
|
||||
'VBP': 'VB',
|
||||
'VBZ': 'VB'
|
||||
}
|
||||
|
||||
for i in range(len(pos_tagged)):
|
||||
if pos_tagged[i] in pos_tags:
|
||||
pos_tagged[i] = pos_tags[pos_tagged[i]]
|
||||
return pos_tagged
|
||||
|
||||
def _extract_features(self, text):
|
||||
|
||||
feature_set = {k: 0 for k in self._categories}
|
||||
ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
|
||||
matches = 0
|
||||
pos_tagged = self._clean_pos(pos_tagged)
|
||||
|
||||
tag_wn = {
|
||||
'NN': self._wn16.NOUN,
|
||||
'JJ': self._wn16.ADJ,
|
||||
'VB': self._wn16.VERB,
|
||||
'RB': self._wn16.ADV
|
||||
}
|
||||
for i in range(len(pos_tagged)):
|
||||
if pos_tagged[i] in tag_wn:
|
||||
synsets = self._wn16.synsets(ngrams_words[i],
|
||||
tag_wn[pos_tagged[i]])
|
||||
if synsets:
|
||||
offset = synsets[0].offset()
|
||||
if offset in self._total_synsets[pos_tagged[i]]:
|
||||
if self._total_synsets[pos_tagged[i]][offset] is None:
|
||||
continue
|
||||
else:
|
||||
emotion = self._total_synsets[pos_tagged[i]][
|
||||
offset].get_level(5).name
|
||||
matches += 1
|
||||
for i in self._categories:
|
||||
if emotion in self._categories[i]:
|
||||
feature_set[i] += 1
|
||||
if matches == 0:
|
||||
matches = 1
|
||||
|
||||
for i in feature_set:
|
||||
feature_set[i] = (feature_set[i] / matches) * 100
|
||||
|
||||
return feature_set
|
||||
|
||||
def analyse_entry(self, entry, params):
|
||||
|
||||
text_input = entry.get("text", None)
|
||||
|
||||
text = self._my_preprocessor(text_input)
|
||||
|
||||
feature_text = self._extract_features(text)
|
||||
|
||||
emotionSet = EmotionSet(id="Emotions0")
|
||||
emotions = emotionSet.onyx__hasEmotion
|
||||
|
||||
for i in feature_text:
|
||||
emotions.append(
|
||||
Emotion(
|
||||
onyx__hasEmotionCategory=self._wnaffect_mappings[i],
|
||||
onyx__hasEmotionIntensity=feature_text[i]))
|
||||
|
||||
entry.emotions = [emotionSet]
|
||||
|
||||
yield entry
|
@@ -1,25 +0,0 @@
|
||||
---
|
||||
name: emotion-wnaffect
|
||||
module: emotion-wnaffect
|
||||
description: 'Emotion classifier using WordNet-Affect to calculate the percentage
|
||||
of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness
|
||||
or neutral. The only available language is English (en)'
|
||||
author: "@icorcuera @balkian"
|
||||
version: '0.2'
|
||||
extra_params:
|
||||
language:
|
||||
"@id": lang_wnaffect
|
||||
aliases:
|
||||
- language
|
||||
- l
|
||||
required: false
|
||||
options:
|
||||
- en
|
||||
synsets_path: "/data/emotion-wnaffect/a-synsets.xml"
|
||||
hierarchy_path: "/data/emotion-wnaffect/a-hierarchy.xml"
|
||||
wn16_path: "/data/emotion-wnaffect/wordnet1.6/dict"
|
||||
onyx:usesEmotionModel: emoml:big6
|
||||
requirements:
|
||||
- nltk>=3.0.5
|
||||
- lxml>=3.4.2
|
||||
async: false
|
@@ -1,95 +0,0 @@
|
||||
|
||||
"""
|
||||
Clement Michard (c) 2015
|
||||
"""
|
||||
|
||||
class Emotion:
|
||||
"""Defines an emotion."""
|
||||
|
||||
emotions = {} # name to emotion (str -> Emotion)
|
||||
|
||||
def __init__(self, name, parent_name=None):
|
||||
"""Initializes an Emotion object.
|
||||
name -- name of the emotion (str)
|
||||
parent_name -- name of the parent emotion (str)
|
||||
"""
|
||||
|
||||
self.name = name
|
||||
self.parent = None
|
||||
self.level = 0
|
||||
self.children = []
|
||||
|
||||
if parent_name:
|
||||
self.parent = Emotion.emotions[parent_name] if parent_name else None
|
||||
self.parent.children.append(self)
|
||||
self.level = self.parent.level + 1
|
||||
|
||||
|
||||
def get_level(self, level):
|
||||
"""Returns the parent of self at the given level.
|
||||
level -- level in the hierarchy (int)
|
||||
"""
|
||||
|
||||
em = self
|
||||
while em.level > level and em.level >= 0:
|
||||
em = em.parent
|
||||
return em
|
||||
|
||||
|
||||
def __str__(self):
|
||||
"""Returns the emotion string formatted."""
|
||||
|
||||
return self.name
|
||||
|
||||
|
||||
def nb_children(self):
|
||||
"""Returns the number of children of the emotion."""
|
||||
|
||||
return sum(child.nb_children() for child in self.children) + 1
|
||||
|
||||
|
||||
@staticmethod
|
||||
def printTree(emotion=None, indent="", last='updown'):
|
||||
"""Prints the hierarchy of emotions.
|
||||
emotion -- root emotion (Emotion)
|
||||
"""
|
||||
|
||||
if not emotion:
|
||||
emotion = Emotion.emotions["root"]
|
||||
|
||||
size_branch = {child: child.nb_children() for child in emotion.children}
|
||||
leaves = sorted(emotion.children, key=lambda emotion: emotion.nb_children())
|
||||
up, down = [], []
|
||||
if leaves:
|
||||
while sum(size_branch[e] for e in down) < sum(size_branch[e] for e in leaves):
|
||||
down.append(leaves.pop())
|
||||
up = leaves
|
||||
|
||||
for leaf in up:
|
||||
next_last = 'up' if up.index(leaf) is 0 else ''
|
||||
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'up' in last else '│', " " * len(emotion.name))
|
||||
Emotion.printTree(leaf, indent=next_indent, last=next_last)
|
||||
if last == 'up':
|
||||
start_shape = '┌'
|
||||
elif last == 'down':
|
||||
start_shape = '└'
|
||||
elif last == 'updown':
|
||||
start_shape = ' '
|
||||
else:
|
||||
start_shape = '├'
|
||||
if up:
|
||||
end_shape = '┤'
|
||||
elif down:
|
||||
end_shape = '┐'
|
||||
else:
|
||||
end_shape = ''
|
||||
print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
|
||||
for leaf in down:
|
||||
next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
|
||||
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name))
|
||||
Emotion.printTree(leaf, indent=next_indent, last=next_last)
|
||||
|
||||
|
||||
|
||||
|
||||
|
@@ -1,42 +0,0 @@
|
||||
import os
|
||||
import logging
|
||||
logging.basicConfig()
|
||||
try:
|
||||
import unittest.mock as mock
|
||||
except ImportError:
|
||||
import mock
|
||||
from senpy.extensions import Senpy
|
||||
from flask import Flask
|
||||
import unittest
|
||||
|
||||
class emoTextWAFTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.app = Flask("test_plugin")
|
||||
self.dir = os.path.join(os.path.dirname(__file__))
|
||||
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
|
||||
self.senpy.init_app(self.app)
|
||||
|
||||
def tearDown(self):
|
||||
self.senpy.deactivate_plugin("EmoTextWAF", sync=True)
|
||||
|
||||
def test_analyse(self):
|
||||
plugin = self.senpy.plugins["EmoTextWAF"]
|
||||
plugin.activate()
|
||||
|
||||
texts = {'I hate you': 'anger',
|
||||
'i am sad': 'sadness',
|
||||
'i am happy with my marks': 'joy',
|
||||
'This movie is scary': 'negative-fear'}
|
||||
|
||||
for text in texts:
|
||||
response = plugin.analyse(input=text)
|
||||
expected = texts[text]
|
||||
emotionSet = response.entries[0].emotions[0]
|
||||
max_emotion = max(emotionSet['onyx:hasEmotion'], key=lambda x: x['onyx:hasEmotionIntensity'])
|
||||
assert max_emotion['onyx:hasEmotionCategory'] == expected
|
||||
|
||||
plugin.deactivate()
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@@ -1,92 +0,0 @@
|
||||
|
||||
# coding: utf-8
|
||||
|
||||
# In[1]:
|
||||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Clement Michard (c) 2015
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import nltk
|
||||
from emotion import Emotion
|
||||
from nltk.corpus import WordNetCorpusReader
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
class WNAffect:
|
||||
"""WordNet-Affect ressource."""
|
||||
|
||||
def __init__(self, wordnet16_dir, wn_domains_dir):
|
||||
"""Initializes the WordNet-Affect object."""
|
||||
|
||||
cwd = os.getcwd()
|
||||
nltk.data.path.append(cwd)
|
||||
wn16_path = "{0}/dict".format(wordnet16_dir)
|
||||
self.wn16 = WordNetCorpusReader(os.path.abspath("{0}/{1}".format(cwd, wn16_path)), nltk.data.find(wn16_path))
|
||||
self.flat_pos = {'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', 'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
|
||||
self.wn_pos = {'NN':self.wn16.NOUN, 'JJ':self.wn16.ADJ, 'VB':self.wn16.VERB, 'RB':self.wn16.ADV}
|
||||
self._load_emotions(wn_domains_dir)
|
||||
self.synsets = self._load_synsets(wn_domains_dir)
|
||||
|
||||
|
||||
|
||||
def _load_synsets(self, wn_domains_dir):
|
||||
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
|
||||
|
||||
tree = ET.parse("{0}/a-synsets.xml".format(wn_domains_dir))
|
||||
root = tree.getroot()
|
||||
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
|
||||
|
||||
synsets = {}
|
||||
for pos in ["noun", "adj", "verb", "adv"]:
|
||||
tag = pos_map[pos]
|
||||
synsets[tag] = {}
|
||||
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
|
||||
offset = int(elem.get("id")[2:])
|
||||
if not offset: continue
|
||||
if elem.get("categ"):
|
||||
synsets[tag][offset] = Emotion.emotions[elem.get("categ")] if elem.get("categ") in Emotion.emotions else None
|
||||
elif elem.get("noun-id"):
|
||||
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
|
||||
|
||||
return synsets
|
||||
|
||||
def _load_emotions(self, wn_domains_dir):
|
||||
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
|
||||
|
||||
tree = ET.parse("{0}/a-hierarchy.xml".format(wn_domains_dir))
|
||||
root = tree.getroot()
|
||||
for elem in root.findall("categ"):
|
||||
name = elem.get("name")
|
||||
if name == "root":
|
||||
Emotion.emotions["root"] = Emotion("root")
|
||||
else:
|
||||
Emotion.emotions[name] = Emotion(name, elem.get("isa"))
|
||||
|
||||
def get_emotion(self, word, pos):
|
||||
"""Returns the emotion of the word.
|
||||
word -- the word (str)
|
||||
pos -- part-of-speech (str)
|
||||
"""
|
||||
|
||||
if pos in self.flat_pos:
|
||||
pos = self.flat_pos[pos]
|
||||
synsets = self.wn16.synsets(word, self.wn_pos[pos])
|
||||
if synsets:
|
||||
offset = synsets[0].offset()
|
||||
if offset in self.synsets[pos]:
|
||||
return self.synsets[pos][offset]
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
wordnet16, wndomains32, word, pos = sys.argv[1:5]
|
||||
wna = WNAffect(wordnet16, wndomains32)
|
||||
print wna.get_emotion(word, pos)
|
||||
|
||||
|
Reference in New Issue
Block a user