1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-25 01:22:28 +00:00

Squashed 'emotion-anew/' content from commit e8a3c83

git-subtree-dir: emotion-anew
git-subtree-split: e8a3c837e3543a5f5f19086e1fcaa34b22be639e
This commit is contained in:
J. Fernando Sánchez 2018-06-12 10:01:45 +02:00
commit 98ec4817cf
7 changed files with 329 additions and 0 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "data"]
path = data
url = ../data/emotion-anew

60
README.md Executable file
View File

@ -0,0 +1,60 @@
# Plugin emotion-anew
This plugin consists on an **emotion classifier** that detects six possible emotions:
- Anger : general-dislike.
- Fear : negative-fear.
- Disgust : shame.
- Joy : gratitude, affective, enthusiasm, love, joy, liking.
- Sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness.
- Neutral: not detected a particulary emotion.
The plugin uses **ANEW lexicon** dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. To do this comparision, it is defined that each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208.
The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value.
The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM, to express the information.
## Installation
* Download
```
git clone https://lab.cluster.gsi.dit.upm.es/senpy/emotion-anew.git
```
* Get data
```
cd emotion-anew
git submodule update --init --recursive
```
* Run
```
docker run -p 5000:5000 -v $PWD:/plugins gsiupm/senpy:python2.7 -f /plugins
```
## Data format
`data/Corpus/affective-isear.tsv` contains data from ISEAR Databank: http://emotion-research.net/toolbox/toolboxdatabase.2006-10-13.2581092615
##Usage
Params accepted:
- Language: English (en) and Spanish (es).
- Input: input text to analyse.
Example request:
```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-anew&language=en&input=I%20love%20Madrid
```
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
# Known issues
- To obtain Anew dictionary you can download from here: <https://github.com/hcorona/SMC2015/blob/master/resources/ANEW2010All.txt>
- This plugin only supports **Python2**
![alt GSI Logo][logoGSI]
[logoES]: https://www.gsi.dit.upm.es/ontologies/onyx/img/eurosentiment_logo.png "EuroSentiment logo"
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"

1
data Submodule

@ -0,0 +1 @@
Subproject commit 76b75e348a0251a66ff8f6eb44eb1d872d4990c2

156
emotion-anew.py Normal file
View File

@ -0,0 +1,156 @@
# -*- coding: utf-8 -*-
import re
import nltk
import csv
import sys
import os
import unicodedata
import string
import xml.etree.ElementTree as ET
import math
from sklearn.svm import LinearSVC
from sklearn.feature_extraction import DictVectorizer
from nltk import bigrams
from nltk import trigrams
from nltk.corpus import stopwords
from pattern.en import parse as parse_en
from pattern.es import parse as parse_es
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, EmotionSet, Entry, Emotion
class EmotionTextPlugin(SentimentPlugin):
def activate(self, *args, **kwargs):
nltk.download('stopwords')
self._stopwords = stopwords.words('english')
self._local_path=os.path.dirname(os.path.abspath(__file__))
def _my_preprocessor(self, text):
regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
text = re.sub(regHttp, '', text)
text = re.sub(regAt, '', text)
text = re.sub('RT : ', '', text)
text = re.sub(regHttps, '', text)
text = re.sub('[0-9]', '', text)
text = self._delete_punctuation(text)
return text
def _delete_punctuation(self, text):
exclude = set(string.punctuation)
s = ''.join(ch for ch in text if ch not in exclude)
return s
def _extract_ngrams(self, text, lang):
unigrams_lemmas = []
unigrams_words = []
pos_tagged = []
if lang == 'es':
sentences = parse_es(text,lemmata=True).split()
else:
sentences = parse_en(text,lemmata=True).split()
for sentence in sentences:
for token in sentence:
if token[0].lower() not in self._stopwords:
unigrams_words.append(token[0].lower())
unigrams_lemmas.append(token[4])
pos_tagged.append(token[1])
return unigrams_lemmas,unigrams_words,pos_tagged
def _find_ngrams(self, input_list, n):
return zip(*[input_list[i:] for i in range(n)])
def _emotion_calculate(self, VAD):
emotion=''
value=10000000000000000000000.0
for state in self.centroids:
valence=VAD[0]-self.centroids[state]['V']
arousal=VAD[1]-self.centroids[state]['A']
dominance=VAD[2]-self.centroids[state]['D']
new_value=math.sqrt((valence*valence)+(arousal*arousal)+(dominance*dominance))
if new_value < value:
value=new_value
emotion=state
return emotion
def _extract_features(self, tweet,dictionary,lang):
feature_set={}
ngrams_lemmas,ngrams_words,pos_tagged = self._extract_ngrams(tweet,lang)
pos_tags={'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
totalVAD=[0,0,0]
matches=0
for word in range(len(ngrams_lemmas)):
VAD=[]
if ngrams_lemmas[word] in dictionary:
matches+=1
totalVAD = [totalVAD[0]+float(dictionary[ngrams_lemmas[word]]['V']),
totalVAD[1]+float(dictionary[ngrams_lemmas[word]]['A']),
totalVAD[2]+float(dictionary[ngrams_lemmas[word]]['D'])]
elif ngrams_words[word] in dictionary:
matches+=1
totalVAD = [totalVAD[0]+float(dictionary[ngrams_words[word]]['V']),
totalVAD[1]+float(dictionary[ngrams_words[word]]['A']),
totalVAD[2]+float(dictionary[ngrams_words[word]]['D'])]
if matches==0:
emotion='neutral'
else:
totalVAD=[totalVAD[0]/matches,totalVAD[1]/matches,totalVAD[2]/matches]
emotion=self._emotion_calculate(totalVAD)
feature_set['emotion']=emotion
feature_set['V']=totalVAD[0]
feature_set['A']=totalVAD[1]
feature_set['D']=totalVAD[2]
return feature_set
def analyse_entry(self, entry, params):
text_input = entry.get("text", None)
text= self._my_preprocessor(text_input)
dictionary={}
lang = params.get("language", "auto")
if lang == 'es':
with open(self._local_path + self.anew_path_es,'rb') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary[row[2]]={}
dictionary[row[2]]['V']=row[3]
dictionary[row[2]]['A']=row[5]
dictionary[row[2]]['D']=row[7]
else:
with open(self._local_path + self.anew_path_en,'rb') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary[row[0]]={}
dictionary[row[0]]['V']=row[2]
dictionary[row[0]]['A']=row[4]
dictionary[row[0]]['D']=row[6]
feature_set=self._extract_features(text,dictionary,lang)
emotions = EmotionSet()
emotions.id = "Emotions0"
emotion1 = Emotion(id="Emotion0")
emotion1["onyx:hasEmotionCategory"] = self.emotions_ontology[feature_set['emotion']]
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence"] = feature_set['V']
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal"] = feature_set['A']
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance"] = feature_set['D']
emotions.onyx__hasEmotion.append(emotion1)
entry.emotions = [emotions,]
yield entry

BIN
emotion-anew.pyc Normal file

Binary file not shown.

64
emotion-anew.senpy Normal file
View File

@ -0,0 +1,64 @@
{
"name": "emotion-anew",
"module": "emotion-anew",
"description": "This plugin consists on an emotion classifier using ANEW lexicon dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. Each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208. The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value.",
"author": "@icorcuera",
"version": "0.5",
"extra_params": {
"language": {
"aliases": ["language", "l"],
"required": true,
"options": ["es","en"],
"default": "en"
}
},
"requirements": {},
"anew_path_es": "/data/Dictionary/Redondo(2007).csv",
"anew_path_en": "/data/Dictionary/ANEW2010All.txt",
"centroids": {
"anger": {
"A": 6.95,
"D": 5.1,
"V": 2.7
},
"disgust": {
"A": 5.3,
"D": 8.05,
"V": 2.7
},
"fear": {
"A": 6.5,
"D": 3.6,
"V": 3.2
},
"joy": {
"A": 7.22,
"D": 6.28,
"V": 8.6
},
"sadness": {
"A": 5.21,
"D": 2.82,
"V": 2.21
}
},
"emotions_ontology": {
"anger": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#anger",
"disgust": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#disgust",
"fear": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#negative-fear",
"joy": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#joy",
"neutral": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#neutral-emotion",
"sadness": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#sadness"
},
"requirements": [
"numpy",
"pandas",
"nltk",
"scipy",
"scikit-learn",
"textblob",
"pattern",
"lxml"
],
"onyx:usesEmotionModel": "emoml:big6",
}

45
test.py Normal file
View File

@ -0,0 +1,45 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
import unittest
import re
class emoTextANEWTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("EmoTextANEW", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["EmoTextANEW"]
plugin.activate()
ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#"
texts = {'I hate you': 'anger',
'i am sad': 'sadness',
'i am happy with my marks': 'joy',
'This movie is scary': 'negative-fear',
'this cake is disgusting' : 'negative-fear'}
for text in texts:
response = plugin.analyse(input=text)
expected = texts[text]
emotionSet = response.entries[0].emotions[0]
assert emotionSet['onyx:hasEmotion'][0]['onyx:hasEmotionCategory'] == ontology+expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()