1
0
mirror of https://github.com/gsi-upm/senpy synced 2025-08-24 02:22:20 +00:00

Add plugins as submodules

This commit is contained in:
Manuel Garcia Amado
2018-05-14 11:32:56 +02:00
parent 582ae8a340
commit fbde8a9462
40 changed files with 28 additions and 222452 deletions

1
emotion-wnaffect Submodule

Submodule emotion-wnaffect added at 74c40d7e97

View File

@@ -1,41 +0,0 @@
# WordNet-Affect plugin
This plugin uses WordNet-Affect (http://wndomains.fbk.eu/wnaffect.html) to calculate the percentage of each emotion. The plugin classifies among five diferent emotions: anger, fear, disgust, joy and sadness. It is has been used a emotion mapping enlarge the emotions:
- anger : general-dislike
- fear : negative-fear
- disgust : shame
- joy : gratitude, affective, enthusiasm, love, joy, liking
- sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness
## Usage
The parameters accepted are:
- Language: English (en).
- Input: Text to analyse.
Example request:
```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-wnaffect&language=en&input=I%20love%20Madrid
```
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM for semantic web.
This plugin uses WNAffect labels for emotion analysis.
The emotion-wnaffect.senpy file can be copied and modified to use different versions of wnaffect with the same python code.
## Known issues
- This plugin uses the pattern library, which means it will only run on python 2.7
- Wnaffect and corpora files are not included in the repository, but can be easily added either to the docker image (using a volume) or in a new docker image.
- You can download Wordnet 1.6 here: <http://wordnetcode.princeton.edu/1.6/wn16.unix.tar.gz> and extract the dict folder.
- The hierarchy and synsets files can be found here: <https://github.com/larsmans/wordnet-domains-sentiwords/tree/master/wn-domains/wn-affect-1.1>
![alt GSI Logo][logoGSI]
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"

View File

@@ -1,224 +0,0 @@
from __future__ import division
import re
import nltk
import logging
import os
import string
import xml.etree.ElementTree as ET
from nltk.corpus import stopwords
from nltk.corpus import WordNetCorpusReader
from nltk.stem import wordnet
from emotion import Emotion as Emo
from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
from senpy.models import Results, EmotionSet, Entry, Emotion
class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
def _load_synsets(self, synsets_path):
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
tree = ET.parse(synsets_path)
root = tree.getroot()
pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
synsets = {}
for pos in ["noun", "adj", "verb", "adv"]:
tag = pos_map[pos]
synsets[tag] = {}
for elem in root.findall(
".//{0}-syn-list//{0}-syn".format(pos, pos)):
offset = int(elem.get("id")[2:])
if not offset: continue
if elem.get("categ"):
synsets[tag][offset] = Emo.emotions[elem.get(
"categ")] if elem.get(
"categ") in Emo.emotions else None
elif elem.get("noun-id"):
synsets[tag][offset] = synsets[pos_map["noun"]][int(
elem.get("noun-id")[2:])]
return synsets
def _load_emotions(self, hierarchy_path):
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
tree = ET.parse(hierarchy_path)
root = tree.getroot()
for elem in root.findall("categ"):
name = elem.get("name")
if name == "root":
Emo.emotions["root"] = Emo("root")
else:
Emo.emotions[name] = Emo(name, elem.get("isa"))
def activate(self, *args, **kwargs):
nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
self._stopwords = stopwords.words('english')
self._wnlemma = wordnet.WordNetLemmatizer()
self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
local_path = os.path.dirname(os.path.abspath(__file__))
self._categories = {
'anger': [
'general-dislike',
],
'fear': [
'negative-fear',
],
'disgust': [
'shame',
],
'joy':
['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
'sadness': [
'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
'anxiety', 'sadness'
]
}
self._wnaffect_mappings = {
'anger': 'anger',
'fear': 'negative-fear',
'disgust': 'disgust',
'joy': 'joy',
'sadness': 'sadness'
}
self._load_emotions(local_path + self.hierarchy_path)
if 'total_synsets' not in self.sh:
total_synsets = self._load_synsets(local_path + self.synsets_path)
self.sh['total_synsets'] = total_synsets
self._total_synsets = self.sh['total_synsets']
self._wn16_path = self.wn16_path
self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
def deactivate(self, *args, **kwargs):
self.save()
def _my_preprocessor(self, text):
regHttp = re.compile(
'(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttps = re.compile(
'(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
text = re.sub(regHttp, '', text)
text = re.sub(regAt, '', text)
text = re.sub('RT : ', '', text)
text = re.sub(regHttps, '', text)
text = re.sub('[0-9]', '', text)
text = self._delete_punctuation(text)
return text
def _delete_punctuation(self, text):
exclude = set(string.punctuation)
s = ''.join(ch for ch in text if ch not in exclude)
return s
def _extract_ngrams(self, text):
unigrams_lemmas = []
pos_tagged = []
unigrams_words = []
tokens = text.split()
for token in nltk.pos_tag(tokens):
unigrams_words.append(token[0])
pos_tagged.append(token[1])
if token[1][0] in self._syntactics.keys():
unigrams_lemmas.append(
self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
[0]]))
else:
unigrams_lemmas.append(token[0])
return unigrams_words, unigrams_lemmas, pos_tagged
def _find_ngrams(self, input_list, n):
return zip(*[input_list[i:] for i in range(n)])
def _clean_pos(self, pos_tagged):
pos_tags = {
'NN': 'NN',
'NNP': 'NN',
'NNP-LOC': 'NN',
'NNS': 'NN',
'JJ': 'JJ',
'JJR': 'JJ',
'JJS': 'JJ',
'RB': 'RB',
'RBR': 'RB',
'RBS': 'RB',
'VB': 'VB',
'VBD': 'VB',
'VGB': 'VB',
'VBN': 'VB',
'VBP': 'VB',
'VBZ': 'VB'
}
for i in range(len(pos_tagged)):
if pos_tagged[i] in pos_tags:
pos_tagged[i] = pos_tags[pos_tagged[i]]
return pos_tagged
def _extract_features(self, text):
feature_set = {k: 0 for k in self._categories}
ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
matches = 0
pos_tagged = self._clean_pos(pos_tagged)
tag_wn = {
'NN': self._wn16.NOUN,
'JJ': self._wn16.ADJ,
'VB': self._wn16.VERB,
'RB': self._wn16.ADV
}
for i in range(len(pos_tagged)):
if pos_tagged[i] in tag_wn:
synsets = self._wn16.synsets(ngrams_words[i],
tag_wn[pos_tagged[i]])
if synsets:
offset = synsets[0].offset()
if offset in self._total_synsets[pos_tagged[i]]:
if self._total_synsets[pos_tagged[i]][offset] is None:
continue
else:
emotion = self._total_synsets[pos_tagged[i]][
offset].get_level(5).name
matches += 1
for i in self._categories:
if emotion in self._categories[i]:
feature_set[i] += 1
if matches == 0:
matches = 1
for i in feature_set:
feature_set[i] = (feature_set[i] / matches) * 100
return feature_set
def analyse_entry(self, entry, params):
text_input = entry.get("text", None)
text = self._my_preprocessor(text_input)
feature_text = self._extract_features(text)
emotionSet = EmotionSet(id="Emotions0")
emotions = emotionSet.onyx__hasEmotion
for i in feature_text:
emotions.append(
Emotion(
onyx__hasEmotionCategory=self._wnaffect_mappings[i],
onyx__hasEmotionIntensity=feature_text[i]))
entry.emotions = [emotionSet]
yield entry

View File

@@ -1,25 +0,0 @@
---
name: emotion-wnaffect
module: emotion-wnaffect
description: 'Emotion classifier using WordNet-Affect to calculate the percentage
of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness
or neutral. The only available language is English (en)'
author: "@icorcuera @balkian"
version: '0.2'
extra_params:
language:
"@id": lang_wnaffect
aliases:
- language
- l
required: false
options:
- en
synsets_path: "/data/emotion-wnaffect/a-synsets.xml"
hierarchy_path: "/data/emotion-wnaffect/a-hierarchy.xml"
wn16_path: "/data/emotion-wnaffect/wordnet1.6/dict"
onyx:usesEmotionModel: emoml:big6
requirements:
- nltk>=3.0.5
- lxml>=3.4.2
async: false

View File

@@ -1,95 +0,0 @@
"""
Clement Michard (c) 2015
"""
class Emotion:
"""Defines an emotion."""
emotions = {} # name to emotion (str -> Emotion)
def __init__(self, name, parent_name=None):
"""Initializes an Emotion object.
name -- name of the emotion (str)
parent_name -- name of the parent emotion (str)
"""
self.name = name
self.parent = None
self.level = 0
self.children = []
if parent_name:
self.parent = Emotion.emotions[parent_name] if parent_name else None
self.parent.children.append(self)
self.level = self.parent.level + 1
def get_level(self, level):
"""Returns the parent of self at the given level.
level -- level in the hierarchy (int)
"""
em = self
while em.level > level and em.level >= 0:
em = em.parent
return em
def __str__(self):
"""Returns the emotion string formatted."""
return self.name
def nb_children(self):
"""Returns the number of children of the emotion."""
return sum(child.nb_children() for child in self.children) + 1
@staticmethod
def printTree(emotion=None, indent="", last='updown'):
"""Prints the hierarchy of emotions.
emotion -- root emotion (Emotion)
"""
if not emotion:
emotion = Emotion.emotions["root"]
size_branch = {child: child.nb_children() for child in emotion.children}
leaves = sorted(emotion.children, key=lambda emotion: emotion.nb_children())
up, down = [], []
if leaves:
while sum(size_branch[e] for e in down) < sum(size_branch[e] for e in leaves):
down.append(leaves.pop())
up = leaves
for leaf in up:
next_last = 'up' if up.index(leaf) is 0 else ''
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'up' in last else '', " " * len(emotion.name))
Emotion.printTree(leaf, indent=next_indent, last=next_last)
if last == 'up':
start_shape = ''
elif last == 'down':
start_shape = ''
elif last == 'updown':
start_shape = ' '
else:
start_shape = ''
if up:
end_shape = ''
elif down:
end_shape = ''
else:
end_shape = ''
print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
for leaf in down:
next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '', " " * len(emotion.name))
Emotion.printTree(leaf, indent=next_indent, last=next_last)

View File

@@ -1,42 +0,0 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
import unittest
class emoTextWAFTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("EmoTextWAF", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["EmoTextWAF"]
plugin.activate()
texts = {'I hate you': 'anger',
'i am sad': 'sadness',
'i am happy with my marks': 'joy',
'This movie is scary': 'negative-fear'}
for text in texts:
response = plugin.analyse(input=text)
expected = texts[text]
emotionSet = response.entries[0].emotions[0]
max_emotion = max(emotionSet['onyx:hasEmotion'], key=lambda x: x['onyx:hasEmotionIntensity'])
assert max_emotion['onyx:hasEmotionCategory'] == expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()

View File

@@ -1,92 +0,0 @@
# coding: utf-8
# In[1]:
# -*- coding: utf-8 -*-
"""
Clement Michard (c) 2015
"""
import os
import sys
import nltk
from emotion import Emotion
from nltk.corpus import WordNetCorpusReader
import xml.etree.ElementTree as ET
class WNAffect:
"""WordNet-Affect ressource."""
def __init__(self, wordnet16_dir, wn_domains_dir):
"""Initializes the WordNet-Affect object."""
cwd = os.getcwd()
nltk.data.path.append(cwd)
wn16_path = "{0}/dict".format(wordnet16_dir)
self.wn16 = WordNetCorpusReader(os.path.abspath("{0}/{1}".format(cwd, wn16_path)), nltk.data.find(wn16_path))
self.flat_pos = {'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', 'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
self.wn_pos = {'NN':self.wn16.NOUN, 'JJ':self.wn16.ADJ, 'VB':self.wn16.VERB, 'RB':self.wn16.ADV}
self._load_emotions(wn_domains_dir)
self.synsets = self._load_synsets(wn_domains_dir)
def _load_synsets(self, wn_domains_dir):
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
tree = ET.parse("{0}/a-synsets.xml".format(wn_domains_dir))
root = tree.getroot()
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
synsets = {}
for pos in ["noun", "adj", "verb", "adv"]:
tag = pos_map[pos]
synsets[tag] = {}
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
offset = int(elem.get("id")[2:])
if not offset: continue
if elem.get("categ"):
synsets[tag][offset] = Emotion.emotions[elem.get("categ")] if elem.get("categ") in Emotion.emotions else None
elif elem.get("noun-id"):
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
return synsets
def _load_emotions(self, wn_domains_dir):
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
tree = ET.parse("{0}/a-hierarchy.xml".format(wn_domains_dir))
root = tree.getroot()
for elem in root.findall("categ"):
name = elem.get("name")
if name == "root":
Emotion.emotions["root"] = Emotion("root")
else:
Emotion.emotions[name] = Emotion(name, elem.get("isa"))
def get_emotion(self, word, pos):
"""Returns the emotion of the word.
word -- the word (str)
pos -- part-of-speech (str)
"""
if pos in self.flat_pos:
pos = self.flat_pos[pos]
synsets = self.wn16.synsets(word, self.wn_pos[pos])
if synsets:
offset = synsets[0].offset()
if offset in self.synsets[pos]:
return self.synsets[pos][offset]
return None
if __name__ == "__main__":
wordnet16, wndomains32, word, pos = sys.argv[1:5]
wna = WNAffect(wordnet16, wndomains32)
print wna.get_emotion(word, pos)