1
0
mirror of https://github.com/gsi-upm/senpy synced 2025-08-24 10:32:20 +00:00

Add 'community-plugins/' from commit '4c73797246c6aff8d055abfef73d3f0d34b933a8'

git-subtree-dir: community-plugins
git-subtree-mainline: 7f712952be
git-subtree-split: 4c73797246
This commit is contained in:
J. Fernando Sánchez
2023-09-20 13:32:30 +02:00
77 changed files with 11412 additions and 0 deletions

View File

@@ -0,0 +1,60 @@
# Plugin emotion-anew
This plugin consists on an **emotion classifier** that detects six possible emotions:
- Anger : general-dislike.
- Fear : negative-fear.
- Disgust : shame.
- Joy : gratitude, affective, enthusiasm, love, joy, liking.
- Sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness.
- Neutral: not detected a particulary emotion.
The plugin uses **ANEW lexicon** dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. To do this comparision, it is defined that each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208.
The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value.
The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM, to express the information.
## Installation
* Download
```
git clone https://lab.cluster.gsi.dit.upm.es/senpy/emotion-anew.git
```
* Get data
```
cd emotion-anew
git submodule update --init --recursive
```
* Run
```
docker run -p 5000:5000 -v $PWD:/plugins gsiupm/senpy:python2.7 -f /plugins
```
## Data format
`data/Corpus/affective-isear.tsv` contains data from ISEAR Databank: http://emotion-research.net/toolbox/toolboxdatabase.2006-10-13.2581092615
##Usage
Params accepted:
- Language: English (en) and Spanish (es).
- Input: input text to analyse.
Example request:
```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-anew&language=en&input=I%20love%20Madrid
```
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
# Known issues
- To obtain Anew dictionary you can download from here: <https://github.com/hcorona/SMC2015/blob/master/resources/ANEW2010All.txt>
- This plugin only supports **Python2**
![alt GSI Logo][logoGSI]
[logoES]: https://www.gsi.dit.upm.es/ontologies/onyx/img/eurosentiment_logo.png "EuroSentiment logo"
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"

View File

@@ -0,0 +1,227 @@
# -*- coding: utf-8 -*-
import re
import nltk
import csv
import sys
import os
import unicodedata
import string
import xml.etree.ElementTree as ET
import math
from sklearn.svm import LinearSVC
from sklearn.feature_extraction import DictVectorizer
from nltk import bigrams
from nltk import trigrams
from nltk.corpus import stopwords
from pattern.en import parse as parse_en
from pattern.es import parse as parse_es
from senpy.plugins import EmotionPlugin, SenpyPlugin
from senpy.models import Results, EmotionSet, Entry, Emotion
class ANEW(EmotionPlugin):
description = "This plugin consists on an emotion classifier using ANEW lexicon dictionary. It averages the VAD (valence-arousal-dominance) value of each word in the text that is also in the ANEW dictionary. To obtain a categorical value (e.g., happy) use the emotion conversion API (e.g., `emotion-model=emoml:big6`)."
author = "@icorcuera"
version = "0.5.2"
name = "emotion-anew"
extra_params = {
"language": {
"description": "language of the input",
"aliases": ["language", "l"],
"required": True,
"options": ["es","en"],
"default": "en"
}
}
anew_path_es = "Dictionary/Redondo(2007).csv"
anew_path_en = "Dictionary/ANEW2010All.txt"
onyx__usesEmotionModel = "emoml:pad-dimensions"
nltk_resources = ['stopwords']
def activate(self, *args, **kwargs):
self._stopwords = stopwords.words('english')
dictionary={}
dictionary['es'] = {}
with self.open(self.anew_path_es,'r') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary['es'][row[2]]={}
dictionary['es'][row[2]]['V']=row[3]
dictionary['es'][row[2]]['A']=row[5]
dictionary['es'][row[2]]['D']=row[7]
dictionary['en'] = {}
with self.open(self.anew_path_en,'r') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary['en'][row[0]]={}
dictionary['en'][row[0]]['V']=row[2]
dictionary['en'][row[0]]['A']=row[4]
dictionary['en'][row[0]]['D']=row[6]
self._dictionary = dictionary
def _my_preprocessor(self, text):
regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
text = re.sub(regHttp, '', text)
text = re.sub(regAt, '', text)
text = re.sub('RT : ', '', text)
text = re.sub(regHttps, '', text)
text = re.sub('[0-9]', '', text)
text = self._delete_punctuation(text)
return text
def _delete_punctuation(self, text):
exclude = set(string.punctuation)
s = ''.join(ch for ch in text if ch not in exclude)
return s
def _extract_ngrams(self, text, lang):
unigrams_lemmas = []
unigrams_words = []
pos_tagged = []
if lang == 'es':
sentences = list(parse_es(text, lemmata=True).split())
else:
sentences = list(parse_en(text, lemmata=True).split())
for sentence in sentences:
for token in sentence:
if token[0].lower() not in self._stopwords:
unigrams_words.append(token[0].lower())
unigrams_lemmas.append(token[4])
pos_tagged.append(token[1])
return unigrams_lemmas,unigrams_words,pos_tagged
def _find_ngrams(self, input_list, n):
return zip(*[input_list[i:] for i in range(n)])
def _extract_features(self, tweet,dictionary,lang):
feature_set={}
ngrams_lemmas,ngrams_words,pos_tagged = self._extract_ngrams(tweet,lang)
pos_tags={'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
totalVAD=[0,0,0]
matches=0
for word in range(len(ngrams_lemmas)):
VAD=[]
if ngrams_lemmas[word] in dictionary:
matches+=1
totalVAD = [totalVAD[0]+float(dictionary[ngrams_lemmas[word]]['V']),
totalVAD[1]+float(dictionary[ngrams_lemmas[word]]['A']),
totalVAD[2]+float(dictionary[ngrams_lemmas[word]]['D'])]
elif ngrams_words[word] in dictionary:
matches+=1
totalVAD = [totalVAD[0]+float(dictionary[ngrams_words[word]]['V']),
totalVAD[1]+float(dictionary[ngrams_words[word]]['A']),
totalVAD[2]+float(dictionary[ngrams_words[word]]['D'])]
if matches==0:
emotion='neutral'
else:
totalVAD=[totalVAD[0]/matches,totalVAD[1]/matches,totalVAD[2]/matches]
feature_set['V'] = totalVAD[0]
feature_set['A'] = totalVAD[1]
feature_set['D'] = totalVAD[2]
return feature_set
def analyse_entry(self, entry, activity):
params = activity.params
text_input = entry.text
text = self._my_preprocessor(text_input)
dictionary = self._dictionary[params['language']]
feature_set=self._extract_features(text, dictionary, params['language'])
emotions = EmotionSet()
emotions.id = "Emotions0"
emotion1 = Emotion(id="Emotion0")
emotion1["emoml:pad-dimensions_pleasure"] = feature_set['V']
emotion1["emoml:pad-dimensions_arousal"] = feature_set['A']
emotion1["emoml:pad-dimensions_dominance"] = feature_set['D']
emotion1.prov(activity)
emotions.prov(activity)
emotions.onyx__hasEmotion.append(emotion1)
entry.emotions = [emotions, ]
yield entry
ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#"
test_cases = [
{
'name': 'anger with VAD=(2.12, 6.95, 5.05)',
'input': 'I hate you',
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 6.95,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 5.05,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 2.12,
}]
}]
}
}, {
'input': 'i am sad',
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 4.13,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 3.45,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 1.61,
}]
}]
}
}, {
'name': 'joy',
'input': 'i am happy with my marks',
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 6.49,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 6.63,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 8.21,
}]
}]
}
}, {
'name': 'negative-feat',
'input': 'This movie is scary',
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 5.8100000000000005,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 4.33,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 5.050000000000001,
}]
}]
}
}, {
'name': 'negative-fear',
'input': 'this cake is disgusting' ,
'expected': {
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 5.09,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 4.4,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 5.109999999999999,
}]
}]
}
}
]

View File

@@ -0,0 +1,11 @@
---
module: emotion-anew
requirements:
- numpy
- pandas
- nltk
- scipy
- scikit-learn
- textblob
- pattern
- lxml