Merge commit '98ec4817cff3abd06f961fbbdb5c860aeb887bca' as 'emotion-anew'

2026-06-02 05:21:59 +00:00 · 2018-06-12 10:01:45 +02:00
parent 402b49f43f 98ec4817cf
commit 15ac26428a
7 changed files with 329 additions and 0 deletions
--- a/emotion-anew/.gitmodules
+++ b/emotion-anew/.gitmodules
@@ -0,0 +1,3 @@
 [submodule "data"]
 	path = data
 	url = ../data/emotion-anew
--- a/emotion-anew/README.md
+++ b/emotion-anew/README.md
@@ -0,0 +1,60 @@
 # Plugin emotion-anew 
 This plugin consists on an **emotion classifier** that detects six possible emotions:
 - Anger : general-dislike.
 - Fear : negative-fear.
 - Disgust : shame.
 - Joy : gratitude, affective, enthusiasm, love, joy, liking.
 - Sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness.
 - Neutral: not detected a particulary emotion. 
 The plugin uses **ANEW lexicon** dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. To do this comparision, it is defined that each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208. 
 The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value.
 The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM, to express the information.
 ## Installation
 * Download
 ```
 git clone https://lab.cluster.gsi.dit.upm.es/senpy/emotion-anew.git
 ```
 * Get data
 ```
 cd emotion-anew
 git submodule update --init --recursive
 ```
 * Run
 ```
 docker run -p 5000:5000 -v $PWD:/plugins gsiupm/senpy:python2.7 -f /plugins
 ```
 ## Data format
 `data/Corpus/affective-isear.tsv` contains data from ISEAR Databank: http://emotion-research.net/toolbox/toolboxdatabase.2006-10-13.2581092615 
 ##Usage
 Params accepted:
 - Language: English (en) and Spanish (es).
 - Input: input text to analyse.
 Example request: 
 ```
 http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-anew&language=en&input=I%20love%20Madrid
 ```
 Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
 # Known issues
 - To obtain Anew dictionary you can download from here: <https://github.com/hcorona/SMC2015/blob/master/resources/ANEW2010All.txt> 
 - This plugin only supports **Python2**
 ![alt GSI Logo][logoGSI]
 [logoES]: https://www.gsi.dit.upm.es/ontologies/onyx/img/eurosentiment_logo.png "EuroSentiment logo"
 [logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"
--- a/emotion-anew/data
+++ b/emotion-anew/data
--- a/emotion-anew/emotion-anew.py
+++ b/emotion-anew/emotion-anew.py
@@ -0,0 +1,156 @@
 # -*- coding: utf-8 -*-
 import re
 import nltk
 import csv
 import sys
 import os
 import unicodedata
 import string
 import xml.etree.ElementTree as ET
 import math
 from sklearn.svm import LinearSVC
 from sklearn.feature_extraction import DictVectorizer
 from nltk import bigrams
 from nltk import trigrams
 from nltk.corpus import stopwords
 from pattern.en import parse as parse_en
 from pattern.es import parse as parse_es
 from senpy.plugins import SentimentPlugin, SenpyPlugin
 from senpy.models import Results, EmotionSet, Entry, Emotion
 class EmotionTextPlugin(SentimentPlugin):
    def activate(self, *args, **kwargs):
        nltk.download('stopwords')
        self._stopwords = stopwords.words('english')
        self._local_path=os.path.dirname(os.path.abspath(__file__))
    def _my_preprocessor(self, text):
        regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
        regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
        regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
        text = re.sub(regHttp, '', text)
        text = re.sub(regAt, '', text)
        text = re.sub('RT : ', '', text)
        text = re.sub(regHttps, '', text)
        text = re.sub('[0-9]', '', text)
        text = self._delete_punctuation(text)
        return text
    def _delete_punctuation(self, text):
        exclude = set(string.punctuation)
        s = ''.join(ch for ch in text if ch not in exclude)
        return s
    def _extract_ngrams(self, text, lang):
        unigrams_lemmas = []
        unigrams_words = []
        pos_tagged = []
        if lang == 'es':
            sentences = parse_es(text,lemmata=True).split()
        else:
            sentences = parse_en(text,lemmata=True).split()
        for sentence in sentences:
            for token in sentence:
                if token[0].lower() not in self._stopwords:
                    unigrams_words.append(token[0].lower())
                    unigrams_lemmas.append(token[4])  
                    pos_tagged.append(token[1])        
        return unigrams_lemmas,unigrams_words,pos_tagged
    def _find_ngrams(self, input_list, n):
        return zip(*[input_list[i:] for i in range(n)])
    def _emotion_calculate(self, VAD):
        emotion=''
        value=10000000000000000000000.0
        for state in self.centroids:
            valence=VAD[0]-self.centroids[state]['V']
            arousal=VAD[1]-self.centroids[state]['A']
            dominance=VAD[2]-self.centroids[state]['D']
            new_value=math.sqrt((valence*valence)+(arousal*arousal)+(dominance*dominance))
            if new_value < value:
                value=new_value
                emotion=state
        return emotion
    def _extract_features(self, tweet,dictionary,lang):
        feature_set={}
        ngrams_lemmas,ngrams_words,pos_tagged = self._extract_ngrams(tweet,lang)
        pos_tags={'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
         'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
        totalVAD=[0,0,0]
        matches=0
        for word in range(len(ngrams_lemmas)):
            VAD=[]
            if ngrams_lemmas[word] in dictionary:
                matches+=1
                totalVAD = [totalVAD[0]+float(dictionary[ngrams_lemmas[word]]['V']),
                            totalVAD[1]+float(dictionary[ngrams_lemmas[word]]['A']),
                            totalVAD[2]+float(dictionary[ngrams_lemmas[word]]['D'])]
            elif ngrams_words[word] in dictionary:
                matches+=1
                totalVAD = [totalVAD[0]+float(dictionary[ngrams_words[word]]['V']),
                            totalVAD[1]+float(dictionary[ngrams_words[word]]['A']),
                            totalVAD[2]+float(dictionary[ngrams_words[word]]['D'])]
        if matches==0:
            emotion='neutral'
        else:
            totalVAD=[totalVAD[0]/matches,totalVAD[1]/matches,totalVAD[2]/matches]
            emotion=self._emotion_calculate(totalVAD)
        feature_set['emotion']=emotion
        feature_set['V']=totalVAD[0]
        feature_set['A']=totalVAD[1]
        feature_set['D']=totalVAD[2]
        return feature_set
    def analyse_entry(self, entry, params):
        text_input = entry.get("text", None)
        text= self._my_preprocessor(text_input)
        dictionary={}
        lang = params.get("language", "auto")
        if lang == 'es':
            with open(self._local_path + self.anew_path_es,'rb') as tabfile:
                reader = csv.reader(tabfile, delimiter='\t')
                for row in reader:
                    dictionary[row[2]]={}
                    dictionary[row[2]]['V']=row[3]
                    dictionary[row[2]]['A']=row[5]
                    dictionary[row[2]]['D']=row[7]
        else:
            with open(self._local_path + self.anew_path_en,'rb') as tabfile:
                reader = csv.reader(tabfile, delimiter='\t')
                for row in reader:
                    dictionary[row[0]]={}
                    dictionary[row[0]]['V']=row[2]
                    dictionary[row[0]]['A']=row[4]
                    dictionary[row[0]]['D']=row[6]
        feature_set=self._extract_features(text,dictionary,lang)
        emotions = EmotionSet()
        emotions.id = "Emotions0"
        emotion1 = Emotion(id="Emotion0")
        emotion1["onyx:hasEmotionCategory"] = self.emotions_ontology[feature_set['emotion']]
        emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence"] = feature_set['V']
        emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal"] = feature_set['A']
        emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance"] = feature_set['D']
        emotions.onyx__hasEmotion.append(emotion1)
        entry.emotions = [emotions,]
        yield entry
--- a/emotion-anew/emotion-anew.pyc
+++ b/emotion-anew/emotion-anew.pyc
--- a/emotion-anew/emotion-anew.senpy
+++ b/emotion-anew/emotion-anew.senpy
@@ -0,0 +1,64 @@
 {
    "name": "emotion-anew",
    "module": "emotion-anew",
    "description": "This plugin consists on an emotion classifier using ANEW lexicon dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. Each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208. The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value.",
    "author": "@icorcuera",
    "version": "0.5",
    "extra_params": {
        "language": {
            "aliases": ["language", "l"],
            "required": true,
            "options": ["es","en"],
            "default": "en"
        }
    },
    "requirements": {},
    "anew_path_es": "/data/Dictionary/Redondo(2007).csv",
    "anew_path_en": "/data/Dictionary/ANEW2010All.txt",
    "centroids": {
        "anger": {
            "A": 6.95, 
            "D": 5.1, 
            "V": 2.7
        }, 
        "disgust": {
            "A": 5.3, 
            "D": 8.05, 
            "V": 2.7
        }, 
        "fear": {
            "A": 6.5, 
            "D": 3.6, 
            "V": 3.2
        }, 
        "joy": {
            "A": 7.22, 
            "D": 6.28, 
            "V": 8.6
        }, 
        "sadness": {
            "A": 5.21, 
            "D": 2.82, 
            "V": 2.21
        }
    },
    "emotions_ontology": {
        "anger": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#anger", 
        "disgust": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#disgust", 
        "fear": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#negative-fear", 
        "joy": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#joy", 
        "neutral": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#neutral-emotion", 
        "sadness": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#sadness"
    },
    "requirements": [
        "numpy",
        "pandas",
        "nltk",
        "scipy",
        "scikit-learn",
        "textblob",
        "pattern",
        "lxml"
            ],
    "onyx:usesEmotionModel": "emoml:big6",
 }
--- a/emotion-anew/test.py
+++ b/emotion-anew/test.py
@@ -0,0 +1,45 @@
 import os
 import logging
 logging.basicConfig()
 try:
    import unittest.mock as mock
 except ImportError:
    import mock
 from senpy.extensions import Senpy
 from flask import Flask
 import unittest
 import re
 class emoTextANEWTest(unittest.TestCase):
    def setUp(self):
        self.app = Flask("test_plugin")
        self.dir = os.path.join(os.path.dirname(__file__))
        self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
        self.senpy.init_app(self.app)
    def tearDown(self):
        self.senpy.deactivate_plugin("EmoTextANEW", sync=True)
    def test_analyse(self):
        plugin = self.senpy.plugins["EmoTextANEW"]
        plugin.activate()
        ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#"
        texts = {'I hate you': 'anger',
                 'i am sad': 'sadness',
                 'i am happy with my marks': 'joy',
                 'This movie is scary': 'negative-fear',
                 'this cake is disgusting' : 'negative-fear'}
        for text in texts:
            response = plugin.analyse(input=text)
            expected = texts[text]
            emotionSet = response.entries[0].emotions[0]
            assert emotionSet['onyx:hasEmotion'][0]['onyx:hasEmotionCategory'] == ontology+expected
        plugin.deactivate()
 if __name__ == '__main__':
    unittest.main()