From 98ec4817cff3abd06f961fbbdb5c860aeb887bca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Tue, 12 Jun 2018 10:01:45 +0200 Subject: [PATCH] Squashed 'emotion-anew/' content from commit e8a3c83 git-subtree-dir: emotion-anew git-subtree-split: e8a3c837e3543a5f5f19086e1fcaa34b22be639e --- .gitmodules | 3 + README.md | 60 +++++++++++++++++ data | 1 + emotion-anew.py | 156 +++++++++++++++++++++++++++++++++++++++++++++ emotion-anew.pyc | Bin 0 -> 6222 bytes emotion-anew.senpy | 64 +++++++++++++++++++ test.py | 45 +++++++++++++ 7 files changed, 329 insertions(+) create mode 100644 .gitmodules create mode 100755 README.md create mode 160000 data create mode 100644 emotion-anew.py create mode 100644 emotion-anew.pyc create mode 100644 emotion-anew.senpy create mode 100644 test.py diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e7aadbd --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "data"] + path = data + url = ../data/emotion-anew diff --git a/README.md b/README.md new file mode 100755 index 0000000..3b47978 --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +# Plugin emotion-anew + +This plugin consists on an **emotion classifier** that detects six possible emotions: +- Anger : general-dislike. +- Fear : negative-fear. +- Disgust : shame. +- Joy : gratitude, affective, enthusiasm, love, joy, liking. +- Sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness. +- Neutral: not detected a particulary emotion. + +The plugin uses **ANEW lexicon** dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. To do this comparision, it is defined that each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208. + +The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value. + +The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM, to express the information. + +## Installation + +* Download +``` +git clone https://lab.cluster.gsi.dit.upm.es/senpy/emotion-anew.git +``` +* Get data +``` +cd emotion-anew +git submodule update --init --recursive +``` +* Run +``` +docker run -p 5000:5000 -v $PWD:/plugins gsiupm/senpy:python2.7 -f /plugins +``` + +## Data format + +`data/Corpus/affective-isear.tsv` contains data from ISEAR Databank: http://emotion-research.net/toolbox/toolboxdatabase.2006-10-13.2581092615 + +##Usage + +Params accepted: +- Language: English (en) and Spanish (es). +- Input: input text to analyse. + + +Example request: +``` +http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-anew&language=en&input=I%20love%20Madrid +``` + +Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section. +# Known issues + +- To obtain Anew dictionary you can download from here: + +- This plugin only supports **Python2** + + +![alt GSI Logo][logoGSI] + +[logoES]: https://www.gsi.dit.upm.es/ontologies/onyx/img/eurosentiment_logo.png "EuroSentiment logo" +[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo" diff --git a/data b/data new file mode 160000 index 0000000..76b75e3 --- /dev/null +++ b/data @@ -0,0 +1 @@ +Subproject commit 76b75e348a0251a66ff8f6eb44eb1d872d4990c2 diff --git a/emotion-anew.py b/emotion-anew.py new file mode 100644 index 0000000..1ff42f4 --- /dev/null +++ b/emotion-anew.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- + +import re +import nltk +import csv +import sys +import os +import unicodedata +import string +import xml.etree.ElementTree as ET +import math + +from sklearn.svm import LinearSVC +from sklearn.feature_extraction import DictVectorizer + +from nltk import bigrams +from nltk import trigrams +from nltk.corpus import stopwords + +from pattern.en import parse as parse_en +from pattern.es import parse as parse_es +from senpy.plugins import SentimentPlugin, SenpyPlugin +from senpy.models import Results, EmotionSet, Entry, Emotion + + +class EmotionTextPlugin(SentimentPlugin): + + def activate(self, *args, **kwargs): + nltk.download('stopwords') + self._stopwords = stopwords.words('english') + self._local_path=os.path.dirname(os.path.abspath(__file__)) + + def _my_preprocessor(self, text): + + regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') + regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') + regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*') + text = re.sub(regHttp, '', text) + text = re.sub(regAt, '', text) + text = re.sub('RT : ', '', text) + text = re.sub(regHttps, '', text) + text = re.sub('[0-9]', '', text) + text = self._delete_punctuation(text) + return text + + def _delete_punctuation(self, text): + + exclude = set(string.punctuation) + s = ''.join(ch for ch in text if ch not in exclude) + return s + + def _extract_ngrams(self, text, lang): + + unigrams_lemmas = [] + unigrams_words = [] + pos_tagged = [] + if lang == 'es': + sentences = parse_es(text,lemmata=True).split() + else: + sentences = parse_en(text,lemmata=True).split() + + for sentence in sentences: + for token in sentence: + if token[0].lower() not in self._stopwords: + unigrams_words.append(token[0].lower()) + unigrams_lemmas.append(token[4]) + pos_tagged.append(token[1]) + + return unigrams_lemmas,unigrams_words,pos_tagged + + def _find_ngrams(self, input_list, n): + return zip(*[input_list[i:] for i in range(n)]) + + def _emotion_calculate(self, VAD): + emotion='' + value=10000000000000000000000.0 + for state in self.centroids: + valence=VAD[0]-self.centroids[state]['V'] + arousal=VAD[1]-self.centroids[state]['A'] + dominance=VAD[2]-self.centroids[state]['D'] + new_value=math.sqrt((valence*valence)+(arousal*arousal)+(dominance*dominance)) + if new_value < value: + value=new_value + emotion=state + return emotion + + def _extract_features(self, tweet,dictionary,lang): + feature_set={} + ngrams_lemmas,ngrams_words,pos_tagged = self._extract_ngrams(tweet,lang) + pos_tags={'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', + 'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'} + totalVAD=[0,0,0] + matches=0 + for word in range(len(ngrams_lemmas)): + VAD=[] + if ngrams_lemmas[word] in dictionary: + matches+=1 + totalVAD = [totalVAD[0]+float(dictionary[ngrams_lemmas[word]]['V']), + totalVAD[1]+float(dictionary[ngrams_lemmas[word]]['A']), + totalVAD[2]+float(dictionary[ngrams_lemmas[word]]['D'])] + elif ngrams_words[word] in dictionary: + matches+=1 + totalVAD = [totalVAD[0]+float(dictionary[ngrams_words[word]]['V']), + totalVAD[1]+float(dictionary[ngrams_words[word]]['A']), + totalVAD[2]+float(dictionary[ngrams_words[word]]['D'])] + if matches==0: + emotion='neutral' + else: + totalVAD=[totalVAD[0]/matches,totalVAD[1]/matches,totalVAD[2]/matches] + emotion=self._emotion_calculate(totalVAD) + feature_set['emotion']=emotion + feature_set['V']=totalVAD[0] + feature_set['A']=totalVAD[1] + feature_set['D']=totalVAD[2] + return feature_set + + def analyse_entry(self, entry, params): + + text_input = entry.get("text", None) + + text= self._my_preprocessor(text_input) + dictionary={} + lang = params.get("language", "auto") + if lang == 'es': + with open(self._local_path + self.anew_path_es,'rb') as tabfile: + reader = csv.reader(tabfile, delimiter='\t') + for row in reader: + dictionary[row[2]]={} + dictionary[row[2]]['V']=row[3] + dictionary[row[2]]['A']=row[5] + dictionary[row[2]]['D']=row[7] + else: + with open(self._local_path + self.anew_path_en,'rb') as tabfile: + reader = csv.reader(tabfile, delimiter='\t') + for row in reader: + dictionary[row[0]]={} + dictionary[row[0]]['V']=row[2] + dictionary[row[0]]['A']=row[4] + dictionary[row[0]]['D']=row[6] + + feature_set=self._extract_features(text,dictionary,lang) + + emotions = EmotionSet() + emotions.id = "Emotions0" + + emotion1 = Emotion(id="Emotion0") + + emotion1["onyx:hasEmotionCategory"] = self.emotions_ontology[feature_set['emotion']] + emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence"] = feature_set['V'] + emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal"] = feature_set['A'] + emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance"] = feature_set['D'] + + emotions.onyx__hasEmotion.append(emotion1) + entry.emotions = [emotions,] + + yield entry diff --git a/emotion-anew.pyc b/emotion-anew.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a3e0427b7cc116d719462b3292baa4cceac5be22 GIT binary patch literal 6222 zcmb_gOK%*<5w4kCF1dV*;!_Vxw)|KV5Gf~$92<&lQB-Uw3F8a}(q4rgjCP01p=M{6 z)3YL1AcFwP6EI*Pry$4Na>yZnBbS_f&rb-DACP=s^(;wQwj(FmM^9JvtGeo|s_rTM zb8PsJpLRZONbxD*`5yk}x+BuW&yiT9eq48??)3St)ZKo)B=u6iUY2^fUmuYAK)+s* zdZk|Wh}K<+N2NU`^)YcMK}q6qX-`Of!qR1l9cfQWebUkcN}rMX z8B13ro|5)ish_p^^~FJxubbjpD<&+lrEW#PT{lOV`Tm$kn{ibmQFQ!>HeBr+Jhn>tUXA$~#G(^|Vvb zke@l}bT2$8<~~LaH@kwr`4B}2%qUKv9D>B5mWU8nD}YByE5JrsD*(oTRtBY6(aMlC z2encaZ@orMG`Iu-%Kwk?H>)UekzFk1$P-7L+?8Ej1;n9gkf~;lo3dTI#XSIf!Xkq42 zTkfc}&2nfhc{NDF{iRM%1rHi|^eD)~3+QM#g*ao*l+&Q3+y-}f8-MdB6sL81y+6);!-211;Cr=` z$+s5oFZ#=`zH;Tgmof3;=?16=TBVA;dzat7thM_T&UzvBpIJ~e(sl<($O%T%eUP8U z({G00gnp-+H1cjh2d(K`G`M0ShbtP9^o#I5{!u`PeyeE&ru(&M$aLjbLhG%fjiyxa z^nH-{Y|zoE7*0vs8cG23Z_O2k{|#JVP%=fshO?)sXI*3Fn%yWzs~0X^(??_HB*caX zjkwzk|Fh#5M%$aMFbNMj**kAg5YAf>YdWUN{QUmR#QKG)=(PkdE z0@YxMm6TPF)vxto?=pHUv6uQT>~P6xPymWZK-^~A=O|i2A7xqAazI_vk%z8iZ_zH$ zJi;(Uc;YxT5PDW5nHS*hOPdaIIjqR)RxC$@a?p^&K@j)lXh;qbqf4@|TjnJ6&H=Sb zD2HVR-KUyiXa`PSu*~v-)D&Gexe74>l}2BL*0tQ@2GR$op;9&0|q56ZWN)us}bh^2Z44Ny@6D_0X z-5d{4NQp_a*xCD_)$hc{nx_!|XEY-BBZdIeko({d3PhZ*wmzc=5Yd1r!3l^0c1S1- z^G9$BGyrs`8Y*l6YSA$GRKpM(aDdQw20-FB4UfQYdJfKj-}D?bXB`4sdx(fWOY~EZ zWy-cU%ikt6ysoHP^DaQ{*6%+5QRQ#{`1z`WSe3M)DAww&NOetAuiB#kH{Jk(vNUR1 zT*Cu)6yRnr%TMsC8D!(usya$>7*Jz93gW&a0u)CSQ>R~o`!BieVN1gM9W zv?f$sK5^xzIA#nma=|juI5B|V4L)2vkPu4tQMQi(JaC$**vFuK4B=4~{PZzwA0zfL zY9C|vF)lY?-sVK{YhiH=oRFhoIhb}}FN7w*0w_=y66a_{gYPK0ha-TKD-SD@z3Cj{ z#5pN%V&I|9u)wgZgV}ANc}4@_92+O)i;Yt!jc1=}oVJZtw!Oa)pQly^{S_+eihqc)e28j!{B;O$G}7UaFCpp zn}y0qov{>jL~6=XoCVZ0s4`q_4k)roV{J`+Vr^|*bI9GhO5VNe6;I3dR+RKsEZbYL z>S9UQMc9tLAAn&`a^GXm zciGOk3TIST-Nr2=_dNEZL)hTTz@6`hICJpA*2EPO=fNQBc{kX=+sbwr3M zw#lKNr5IWZj^-%uEsmqJ6^Y6wraaAqxF0_dV;Va}9_1j9K5WHD<$10j%&W)Qsu0j@ zf@vr=Bf)v+I=&OAE#N=t3_BO`ef7Aui1<)-Uqg>!XI#IU%eX-eikxc-kpi(BVV*$% zcXG`Kgc7`~e^+5BfkWb!h;GOk;G-LGtTzoFk^ZjdAPYd012Ts+fli+tG1B08fU6`T zK7+|1Pf!R82cIz5uWoHHC}0J;Jlq#)LjhX_D33V{cypj!#)MPcQOg~BnmaBx_l{(9 z6WjU8N6GJ>7H#Oi7j2>t4I&mofO9Mweaeca+(I-2rqf)CHt!UoQOnihxCT}73OFpl zD2{~)gz!!TB7a2kQE8r~dp<#cL}snPKX!s{2OtOm*pqE@=IFkTv?X_4+bvJf`X_#XA#YZy!-povdJ9Q|~d1PHECR zczY)>MaylZ$X1&545uybv-|t|ODz*EHKTl~+i5R_W;sprG)`NQR(l7_kMPdm0rG1` z4ln)7iMdkb_5a6I#j(JA@V`rSe5$DFX`6b5G1MAsX%c#`fT2{1Be+Ku0W>1ql)e<< ziJpTR=8+!NvoL6e*%$jHUK@Qp#s<%@&=)yeu{em6&o_!T>UdSODnF*CVc)<5O-ecCeg)H6ow%O%pv1+AywpzmTh1yGCdKXz}GVGzIIy61w@uaD_5BIM|+6-2nixYS6qPS@DihEyls@FB(|OF+4X6u#zAJs&q1?;3eDGH!~U z-cs>;&ts7B6jD^0ZG&yX*my&1QrL41g}uj4-^3JZ`kU6+?{E|5WfY@8(#aRN0#+vZ LFUncxrP99vzgfhP literal 0 HcmV?d00001 diff --git a/emotion-anew.senpy b/emotion-anew.senpy new file mode 100644 index 0000000..5bce13e --- /dev/null +++ b/emotion-anew.senpy @@ -0,0 +1,64 @@ +{ + "name": "emotion-anew", + "module": "emotion-anew", + "description": "This plugin consists on an emotion classifier using ANEW lexicon dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. Each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208. The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value.", + "author": "@icorcuera", + "version": "0.5", + "extra_params": { + "language": { + "aliases": ["language", "l"], + "required": true, + "options": ["es","en"], + "default": "en" + } + }, + "requirements": {}, + "anew_path_es": "/data/Dictionary/Redondo(2007).csv", + "anew_path_en": "/data/Dictionary/ANEW2010All.txt", + "centroids": { + "anger": { + "A": 6.95, + "D": 5.1, + "V": 2.7 + }, + "disgust": { + "A": 5.3, + "D": 8.05, + "V": 2.7 + }, + "fear": { + "A": 6.5, + "D": 3.6, + "V": 3.2 + }, + "joy": { + "A": 7.22, + "D": 6.28, + "V": 8.6 + }, + "sadness": { + "A": 5.21, + "D": 2.82, + "V": 2.21 + } + }, + "emotions_ontology": { + "anger": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#anger", + "disgust": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#disgust", + "fear": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#negative-fear", + "joy": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#joy", + "neutral": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#neutral-emotion", + "sadness": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#sadness" + }, + "requirements": [ + "numpy", + "pandas", + "nltk", + "scipy", + "scikit-learn", + "textblob", + "pattern", + "lxml" + ], + "onyx:usesEmotionModel": "emoml:big6", +} diff --git a/test.py b/test.py new file mode 100644 index 0000000..cdfdf9d --- /dev/null +++ b/test.py @@ -0,0 +1,45 @@ +import os +import logging +logging.basicConfig() +try: + import unittest.mock as mock +except ImportError: + import mock +from senpy.extensions import Senpy +from flask import Flask +import unittest +import re + +class emoTextANEWTest(unittest.TestCase): + + def setUp(self): + self.app = Flask("test_plugin") + self.dir = os.path.join(os.path.dirname(__file__)) + self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False) + self.senpy.init_app(self.app) + + def tearDown(self): + self.senpy.deactivate_plugin("EmoTextANEW", sync=True) + + def test_analyse(self): + plugin = self.senpy.plugins["EmoTextANEW"] + plugin.activate() + + ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#" + texts = {'I hate you': 'anger', + 'i am sad': 'sadness', + 'i am happy with my marks': 'joy', + 'This movie is scary': 'negative-fear', + 'this cake is disgusting' : 'negative-fear'} + + for text in texts: + response = plugin.analyse(input=text) + expected = texts[text] + emotionSet = response.entries[0].emotions[0] + + assert emotionSet['onyx:hasEmotion'][0]['onyx:hasEmotionCategory'] == ontology+expected + + plugin.deactivate() + +if __name__ == '__main__': + unittest.main()