1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-22 00:02:28 +00:00

Add plugins as submodules

This commit is contained in:
Manuel Garcia Amado 2018-05-14 11:32:56 +02:00
parent 582ae8a340
commit fbde8a9462
40 changed files with 28 additions and 222452 deletions

21
.gitmodules vendored Normal file
View File

@ -0,0 +1,21 @@
[submodule "sentiment-meaningCloud"]
path = sentiment-meaningCloud
url = https://lab.cluster.gsi.dit.upm.es/senpy/sentiment-meaningCloud/
[submodule "sentiment-vader"]
path = sentiment-vader
url = https://lab.cluster.gsi.dit.upm.es/senpy/sentiment-vader/
[submodule "emotion-wnaffect"]
path = emotion-wnaffect
url = https://lab.cluster.gsi.dit.upm.es/senpy/emotion-wnaffect
[submodule "emotion-anew"]
path = emotion-anew
url = https://lab.cluster.gsi.dit.upm.es/senpy/emotion-anew
[submodule "sentiment-tass"]
path = sentiment-tass
url = https://lab.cluster.gsi.dit.upm.es/senpy/sentiment-tass
[submodule "sentiment-basic"]
path = sentiment-basic
url = https://lab.cluster.gsi.dit.upm.es/senpy/sentiment-basic
[submodule "affect"]
path = affect
url = https://lab.cluster.gsi.dit.upm.es/senpy/affect

1
affect Submodule

@ -0,0 +1 @@
Subproject commit b236a1c9bb14b4008787645a3ac260696e9fd471

View File

@ -1,10 +0,0 @@
# Senpy Plugin for Sentiment and Emotion
This plugin allows to choose sentiment and emotion plugins to retrieve a response with the total sentiment and emotion analysis.
You can adjust the parameters of the plugin as follows:
- Sentiments plugins: emotion-wnaffect, emotion-anew(only english)
- Emotions plugins: sentiment-tass, sentiment-vader,sentiment-meaningCloud(requires API KEY)
- Endpoint: the endpoint where the used plugins are. (default: http://senpy.cluster.gsi.dit.upm.es/api/)
- Language: english(en) or spanish(es)

View File

@ -1,55 +0,0 @@
import time
import requests
import json
import string
import os
import json
import logging
from os import path
import time
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Entry, Sentiment, Error
logger = logging.getLogger(__name__)
class unifiedPlugin(SentimentPlugin):
def analyse(self, **kwargs):
params = dict(kwargs)
txt = params["input"]
logger.info('TXT:%s' % txt)
endpoint = params["endpoint"]
lang = params.get("language")
key = params["apiKey"]
sentiplug = params["sentiments-plugin"]
s_params = params.copy()
s_params.update({'algo':sentiplug,'language':lang, 'meaningCloud-key':key})
senti_response = requests.get(endpoint, params=s_params).json()
logger.info('SENTIPARAMS: %s' % s_params)
logger.info('SENTIRESPONSE: %s' % senti_response)
if 'entries' not in senti_response:
raise Error(senti_response)
senti_response = Results(senti_response)
logger.info('SENTI: %s' % senti_response)
logger.info(senti_response)
emoplug = params["emotions-plugin"]
e_params = params.copy()
e_params.update({'algo':emoplug,'language':lang})
emo_response = requests.get(endpoint, params=e_params).json()
if 'entries' not in emo_response:
raise Error(emo_response)
emo_response = Results(emo_response)
logger.info('EMO: %s' % emo_response)
logger.info(emo_response)
#Senpy Response
response = Results()
response.analysis = [senti_response.analysis, emo_response.analysis]
unified = senti_response.entries[0]
unified["emotions"] = emo_response.entries[0]["emotions"]
response.entries.append(unified)
return response

View File

@ -1,39 +0,0 @@
{
"name": "affect",
"module": "affect",
"description": "Sentiment Analysis and Emotion Recognition. This plugins uses emotion and sentiment plugins to return an unified response with the sentiment analysis and the emotion Recognition. You have to choose as parameters one sentiment plugin and one emotion plugin.",
"author": "GSI UPM",
"version": "0.1",
"extra_params": {
"language": {
"aliases": ["language", "l"],
"required": true,
"options": ["en","es"],
"default": "en"
},
"sentiments-plugin": {
"aliases": ["sentiplug"],
"required": true,
"options": ["sentiment-meaningCloud","sentiment-tass","sentiment-vader"],
"default": "sentiment-tass"
},
"emotions-plugin": {
"aliases": ["emoplug"],
"required": true,
"options": ["emotion-wnaffect","emotion-anew"],
"default": "emotion-anew"
},
"endpoint": {
"aliases": ["endpoint"],
"required": true,
"default": "http://senpy.cluster.gsi.dit.upm.es/api/"
},
"apiKey": {
"aliases": ["meaningCloud-key","apiKey"],
"required": false
}
},
"requirements": {}
}

1
emotion-anew Submodule

@ -0,0 +1 @@
Subproject commit e8a3c837e3543a5f5f19086e1fcaa34b22be639e

View File

@ -1,39 +0,0 @@
# Plugin emotion-anew
This plugin consists on an **emotion classifier** that detects six possible emotions:
- Anger : general-dislike.
- Fear : negative-fear.
- Disgust : shame.
- Joy : gratitude, affective, enthusiasm, love, joy, liking.
- Sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness.
- Neutral: not detected a particulary emotion.
The plugin uses **ANEW lexicon** dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. To do this comparision, it is defined that each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208.
The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value.
The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM, to express the information.
##Usage
Params accepted:
- Language: English (en) and Spanish (es).
- Input: input text to analyse.
Example request:
```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-anew&language=en&input=I%20love%20Madrid
```
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
# Known issues
- To obtain Anew dictionary you can download from here: <https://github.com/hcorona/SMC2015/blob/master/resources/ANEW2010All.txt>
![alt GSI Logo][logoGSI]
[logoES]: https://www.gsi.dit.upm.es/ontologies/onyx/img/eurosentiment_logo.png "EuroSentiment logo"
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"

View File

@ -1,155 +0,0 @@
# -*- coding: utf-8 -*-
import re
import nltk
import csv
import sys
import os
import unicodedata
import string
import xml.etree.ElementTree as ET
import math
from sklearn.svm import LinearSVC
from sklearn.feature_extraction import DictVectorizer
from nltk import bigrams
from nltk import trigrams
from nltk.corpus import stopwords
from pattern.en import parse as parse_en
from pattern.es import parse as parse_es
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, EmotionSet, Entry, Emotion
class EmotionTextPlugin(SentimentPlugin):
def activate(self, *args, **kwargs):
self._stopwords = stopwords.words('english')
self._local_path=os.path.dirname(os.path.abspath(__file__))
def _my_preprocessor(self, text):
regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
text = re.sub(regHttp, '', text)
text = re.sub(regAt, '', text)
text = re.sub('RT : ', '', text)
text = re.sub(regHttps, '', text)
text = re.sub('[0-9]', '', text)
text = self._delete_punctuation(text)
return text
def _delete_punctuation(self, text):
exclude = set(string.punctuation)
s = ''.join(ch for ch in text if ch not in exclude)
return s
def _extract_ngrams(self, text, lang):
unigrams_lemmas = []
unigrams_words = []
pos_tagged = []
if lang == 'es':
sentences = parse_es(text,lemmata=True).split()
else:
sentences = parse_en(text,lemmata=True).split()
for sentence in sentences:
for token in sentence:
if token[0].lower() not in self._stopwords:
unigrams_words.append(token[0].lower())
unigrams_lemmas.append(token[4])
pos_tagged.append(token[1])
return unigrams_lemmas,unigrams_words,pos_tagged
def _find_ngrams(self, input_list, n):
return zip(*[input_list[i:] for i in range(n)])
def _emotion_calculate(self, VAD):
emotion=''
value=10000000000000000000000.0
for state in self.centroids:
valence=VAD[0]-self.centroids[state]['V']
arousal=VAD[1]-self.centroids[state]['A']
dominance=VAD[2]-self.centroids[state]['D']
new_value=math.sqrt((valence*valence)+(arousal*arousal)+(dominance*dominance))
if new_value < value:
value=new_value
emotion=state
return emotion
def _extract_features(self, tweet,dictionary,lang):
feature_set={}
ngrams_lemmas,ngrams_words,pos_tagged = self._extract_ngrams(tweet,lang)
pos_tags={'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
totalVAD=[0,0,0]
matches=0
for word in range(len(ngrams_lemmas)):
VAD=[]
if ngrams_lemmas[word] in dictionary:
matches+=1
totalVAD = [totalVAD[0]+float(dictionary[ngrams_lemmas[word]]['V']),
totalVAD[1]+float(dictionary[ngrams_lemmas[word]]['A']),
totalVAD[2]+float(dictionary[ngrams_lemmas[word]]['D'])]
elif ngrams_words[word] in dictionary:
matches+=1
totalVAD = [totalVAD[0]+float(dictionary[ngrams_words[word]]['V']),
totalVAD[1]+float(dictionary[ngrams_words[word]]['A']),
totalVAD[2]+float(dictionary[ngrams_words[word]]['D'])]
if matches==0:
emotion='neutral'
else:
totalVAD=[totalVAD[0]/matches,totalVAD[1]/matches,totalVAD[2]/matches]
emotion=self._emotion_calculate(totalVAD)
feature_set['emotion']=emotion
feature_set['V']=totalVAD[0]
feature_set['A']=totalVAD[1]
feature_set['D']=totalVAD[2]
return feature_set
def analyse_entry(self, entry, params):
text_input = entry.get("text", None)
text= self._my_preprocessor(text_input)
dictionary={}
lang = params.get("language", "auto")
if lang == 'es':
with open(self.anew_path_es,'rb') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary[row[2]]={}
dictionary[row[2]]['V']=row[3]
dictionary[row[2]]['A']=row[5]
dictionary[row[2]]['D']=row[7]
else:
with open(self.anew_path_en,'rb') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary[row[0]]={}
dictionary[row[0]]['V']=row[2]
dictionary[row[0]]['A']=row[4]
dictionary[row[0]]['D']=row[6]
feature_set=self._extract_features(text,dictionary,lang)
emotions = EmotionSet()
emotions.id = "Emotions0"
emotion1 = Emotion(id="Emotion0")
emotion1["onyx:hasEmotionCategory"] = self.emotions_ontology[feature_set['emotion']]
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence"] = feature_set['V']
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal"] = feature_set['A']
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance"] = feature_set['D']
emotions.onyx__hasEmotion.append(emotion1)
entry.emotions = [emotions,]
yield entry

View File

@ -1,64 +0,0 @@
{
"name": "emotion-anew",
"module": "emotion-anew",
"description": "This plugin consists on an emotion classifier using ANEW lexicon dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. Each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208. The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value.",
"author": "@icorcuera",
"version": "0.5",
"extra_params": {
"language": {
"aliases": ["language", "l"],
"required": true,
"options": ["es","en"],
"default": "en"
}
},
"requirements": {},
"anew_path_es": "/data/emotion-anew/Dictionary/Redondo(2007).csv",
"anew_path_en": "/data/emotion-anew/Dictionary/ANEW2010All.txt",
"centroids": {
"anger": {
"A": 6.95,
"D": 5.1,
"V": 2.7
},
"disgust": {
"A": 5.3,
"D": 8.05,
"V": 2.7
},
"fear": {
"A": 6.5,
"D": 3.6,
"V": 3.2
},
"joy": {
"A": 7.22,
"D": 6.28,
"V": 8.6
},
"sadness": {
"A": 5.21,
"D": 2.82,
"V": 2.21
}
},
"emotions_ontology": {
"anger": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#anger",
"disgust": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#disgust",
"fear": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#negative-fear",
"joy": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#joy",
"neutral": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#neutral-emotion",
"sadness": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#sadness"
},
"requirements": [
"numpy",
"pandas",
"nltk",
"scipy",
"scikit-learn",
"textblob",
"pattern",
"lxml"
],
"onyx:usesEmotionModel": "emoml:big6",
}

View File

@ -1,45 +0,0 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
import unittest
import re
class emoTextANEWTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("EmoTextANEW", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["EmoTextANEW"]
plugin.activate()
ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#"
texts = {'I hate you': 'anger',
'i am sad': 'sadness',
'i am happy with my marks': 'joy',
'This movie is scary': 'negative-fear',
'this cake is disgusting' : 'negative-fear'}
for text in texts:
response = plugin.analyse(input=text)
expected = texts[text]
emotionSet = response.entries[0].emotions[0]
assert emotionSet['onyx:hasEmotion'][0]['onyx:hasEmotionCategory'] == ontology+expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()

1
emotion-wnaffect Submodule

@ -0,0 +1 @@
Subproject commit 74c40d7e97d54d3c3e30739a85cf9322c92d5a87

View File

@ -1,41 +0,0 @@
# WordNet-Affect plugin
This plugin uses WordNet-Affect (http://wndomains.fbk.eu/wnaffect.html) to calculate the percentage of each emotion. The plugin classifies among five diferent emotions: anger, fear, disgust, joy and sadness. It is has been used a emotion mapping enlarge the emotions:
- anger : general-dislike
- fear : negative-fear
- disgust : shame
- joy : gratitude, affective, enthusiasm, love, joy, liking
- sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness
## Usage
The parameters accepted are:
- Language: English (en).
- Input: Text to analyse.
Example request:
```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-wnaffect&language=en&input=I%20love%20Madrid
```
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM for semantic web.
This plugin uses WNAffect labels for emotion analysis.
The emotion-wnaffect.senpy file can be copied and modified to use different versions of wnaffect with the same python code.
## Known issues
- This plugin uses the pattern library, which means it will only run on python 2.7
- Wnaffect and corpora files are not included in the repository, but can be easily added either to the docker image (using a volume) or in a new docker image.
- You can download Wordnet 1.6 here: <http://wordnetcode.princeton.edu/1.6/wn16.unix.tar.gz> and extract the dict folder.
- The hierarchy and synsets files can be found here: <https://github.com/larsmans/wordnet-domains-sentiwords/tree/master/wn-domains/wn-affect-1.1>
![alt GSI Logo][logoGSI]
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"

View File

@ -1,224 +0,0 @@
from __future__ import division
import re
import nltk
import logging
import os
import string
import xml.etree.ElementTree as ET
from nltk.corpus import stopwords
from nltk.corpus import WordNetCorpusReader
from nltk.stem import wordnet
from emotion import Emotion as Emo
from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
from senpy.models import Results, EmotionSet, Entry, Emotion
class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
def _load_synsets(self, synsets_path):
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
tree = ET.parse(synsets_path)
root = tree.getroot()
pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
synsets = {}
for pos in ["noun", "adj", "verb", "adv"]:
tag = pos_map[pos]
synsets[tag] = {}
for elem in root.findall(
".//{0}-syn-list//{0}-syn".format(pos, pos)):
offset = int(elem.get("id")[2:])
if not offset: continue
if elem.get("categ"):
synsets[tag][offset] = Emo.emotions[elem.get(
"categ")] if elem.get(
"categ") in Emo.emotions else None
elif elem.get("noun-id"):
synsets[tag][offset] = synsets[pos_map["noun"]][int(
elem.get("noun-id")[2:])]
return synsets
def _load_emotions(self, hierarchy_path):
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
tree = ET.parse(hierarchy_path)
root = tree.getroot()
for elem in root.findall("categ"):
name = elem.get("name")
if name == "root":
Emo.emotions["root"] = Emo("root")
else:
Emo.emotions[name] = Emo(name, elem.get("isa"))
def activate(self, *args, **kwargs):
nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
self._stopwords = stopwords.words('english')
self._wnlemma = wordnet.WordNetLemmatizer()
self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
local_path = os.path.dirname(os.path.abspath(__file__))
self._categories = {
'anger': [
'general-dislike',
],
'fear': [
'negative-fear',
],
'disgust': [
'shame',
],
'joy':
['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
'sadness': [
'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
'anxiety', 'sadness'
]
}
self._wnaffect_mappings = {
'anger': 'anger',
'fear': 'negative-fear',
'disgust': 'disgust',
'joy': 'joy',
'sadness': 'sadness'
}
self._load_emotions(local_path + self.hierarchy_path)
if 'total_synsets' not in self.sh:
total_synsets = self._load_synsets(local_path + self.synsets_path)
self.sh['total_synsets'] = total_synsets
self._total_synsets = self.sh['total_synsets']
self._wn16_path = self.wn16_path
self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
def deactivate(self, *args, **kwargs):
self.save()
def _my_preprocessor(self, text):
regHttp = re.compile(
'(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttps = re.compile(
'(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
text = re.sub(regHttp, '', text)
text = re.sub(regAt, '', text)
text = re.sub('RT : ', '', text)
text = re.sub(regHttps, '', text)
text = re.sub('[0-9]', '', text)
text = self._delete_punctuation(text)
return text
def _delete_punctuation(self, text):
exclude = set(string.punctuation)
s = ''.join(ch for ch in text if ch not in exclude)
return s
def _extract_ngrams(self, text):
unigrams_lemmas = []
pos_tagged = []
unigrams_words = []
tokens = text.split()
for token in nltk.pos_tag(tokens):
unigrams_words.append(token[0])
pos_tagged.append(token[1])
if token[1][0] in self._syntactics.keys():
unigrams_lemmas.append(
self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
[0]]))
else:
unigrams_lemmas.append(token[0])
return unigrams_words, unigrams_lemmas, pos_tagged
def _find_ngrams(self, input_list, n):
return zip(*[input_list[i:] for i in range(n)])
def _clean_pos(self, pos_tagged):
pos_tags = {
'NN': 'NN',
'NNP': 'NN',
'NNP-LOC': 'NN',
'NNS': 'NN',
'JJ': 'JJ',
'JJR': 'JJ',
'JJS': 'JJ',
'RB': 'RB',
'RBR': 'RB',
'RBS': 'RB',
'VB': 'VB',
'VBD': 'VB',
'VGB': 'VB',
'VBN': 'VB',
'VBP': 'VB',
'VBZ': 'VB'
}
for i in range(len(pos_tagged)):
if pos_tagged[i] in pos_tags:
pos_tagged[i] = pos_tags[pos_tagged[i]]
return pos_tagged
def _extract_features(self, text):
feature_set = {k: 0 for k in self._categories}
ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
matches = 0
pos_tagged = self._clean_pos(pos_tagged)
tag_wn = {
'NN': self._wn16.NOUN,
'JJ': self._wn16.ADJ,
'VB': self._wn16.VERB,
'RB': self._wn16.ADV
}
for i in range(len(pos_tagged)):
if pos_tagged[i] in tag_wn:
synsets = self._wn16.synsets(ngrams_words[i],
tag_wn[pos_tagged[i]])
if synsets:
offset = synsets[0].offset()
if offset in self._total_synsets[pos_tagged[i]]:
if self._total_synsets[pos_tagged[i]][offset] is None:
continue
else:
emotion = self._total_synsets[pos_tagged[i]][
offset].get_level(5).name
matches += 1
for i in self._categories:
if emotion in self._categories[i]:
feature_set[i] += 1
if matches == 0:
matches = 1
for i in feature_set:
feature_set[i] = (feature_set[i] / matches) * 100
return feature_set
def analyse_entry(self, entry, params):
text_input = entry.get("text", None)
text = self._my_preprocessor(text_input)
feature_text = self._extract_features(text)
emotionSet = EmotionSet(id="Emotions0")
emotions = emotionSet.onyx__hasEmotion
for i in feature_text:
emotions.append(
Emotion(
onyx__hasEmotionCategory=self._wnaffect_mappings[i],
onyx__hasEmotionIntensity=feature_text[i]))
entry.emotions = [emotionSet]
yield entry

View File

@ -1,25 +0,0 @@
---
name: emotion-wnaffect
module: emotion-wnaffect
description: 'Emotion classifier using WordNet-Affect to calculate the percentage
of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness
or neutral. The only available language is English (en)'
author: "@icorcuera @balkian"
version: '0.2'
extra_params:
language:
"@id": lang_wnaffect
aliases:
- language
- l
required: false
options:
- en
synsets_path: "/data/emotion-wnaffect/a-synsets.xml"
hierarchy_path: "/data/emotion-wnaffect/a-hierarchy.xml"
wn16_path: "/data/emotion-wnaffect/wordnet1.6/dict"
onyx:usesEmotionModel: emoml:big6
requirements:
- nltk>=3.0.5
- lxml>=3.4.2
async: false

View File

@ -1,95 +0,0 @@
"""
Clement Michard (c) 2015
"""
class Emotion:
"""Defines an emotion."""
emotions = {} # name to emotion (str -> Emotion)
def __init__(self, name, parent_name=None):
"""Initializes an Emotion object.
name -- name of the emotion (str)
parent_name -- name of the parent emotion (str)
"""
self.name = name
self.parent = None
self.level = 0
self.children = []
if parent_name:
self.parent = Emotion.emotions[parent_name] if parent_name else None
self.parent.children.append(self)
self.level = self.parent.level + 1
def get_level(self, level):
"""Returns the parent of self at the given level.
level -- level in the hierarchy (int)
"""
em = self
while em.level > level and em.level >= 0:
em = em.parent
return em
def __str__(self):
"""Returns the emotion string formatted."""
return self.name
def nb_children(self):
"""Returns the number of children of the emotion."""
return sum(child.nb_children() for child in self.children) + 1
@staticmethod
def printTree(emotion=None, indent="", last='updown'):
"""Prints the hierarchy of emotions.
emotion -- root emotion (Emotion)
"""
if not emotion:
emotion = Emotion.emotions["root"]
size_branch = {child: child.nb_children() for child in emotion.children}
leaves = sorted(emotion.children, key=lambda emotion: emotion.nb_children())
up, down = [], []
if leaves:
while sum(size_branch[e] for e in down) < sum(size_branch[e] for e in leaves):
down.append(leaves.pop())
up = leaves
for leaf in up:
next_last = 'up' if up.index(leaf) is 0 else ''
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'up' in last else '', " " * len(emotion.name))
Emotion.printTree(leaf, indent=next_indent, last=next_last)
if last == 'up':
start_shape = ''
elif last == 'down':
start_shape = ''
elif last == 'updown':
start_shape = ' '
else:
start_shape = ''
if up:
end_shape = ''
elif down:
end_shape = ''
else:
end_shape = ''
print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
for leaf in down:
next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '', " " * len(emotion.name))
Emotion.printTree(leaf, indent=next_indent, last=next_last)

View File

@ -1,42 +0,0 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
import unittest
class emoTextWAFTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("EmoTextWAF", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["EmoTextWAF"]
plugin.activate()
texts = {'I hate you': 'anger',
'i am sad': 'sadness',
'i am happy with my marks': 'joy',
'This movie is scary': 'negative-fear'}
for text in texts:
response = plugin.analyse(input=text)
expected = texts[text]
emotionSet = response.entries[0].emotions[0]
max_emotion = max(emotionSet['onyx:hasEmotion'], key=lambda x: x['onyx:hasEmotionIntensity'])
assert max_emotion['onyx:hasEmotionCategory'] == expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()

View File

@ -1,92 +0,0 @@
# coding: utf-8
# In[1]:
# -*- coding: utf-8 -*-
"""
Clement Michard (c) 2015
"""
import os
import sys
import nltk
from emotion import Emotion
from nltk.corpus import WordNetCorpusReader
import xml.etree.ElementTree as ET
class WNAffect:
"""WordNet-Affect ressource."""
def __init__(self, wordnet16_dir, wn_domains_dir):
"""Initializes the WordNet-Affect object."""
cwd = os.getcwd()
nltk.data.path.append(cwd)
wn16_path = "{0}/dict".format(wordnet16_dir)
self.wn16 = WordNetCorpusReader(os.path.abspath("{0}/{1}".format(cwd, wn16_path)), nltk.data.find(wn16_path))
self.flat_pos = {'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', 'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
self.wn_pos = {'NN':self.wn16.NOUN, 'JJ':self.wn16.ADJ, 'VB':self.wn16.VERB, 'RB':self.wn16.ADV}
self._load_emotions(wn_domains_dir)
self.synsets = self._load_synsets(wn_domains_dir)
def _load_synsets(self, wn_domains_dir):
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
tree = ET.parse("{0}/a-synsets.xml".format(wn_domains_dir))
root = tree.getroot()
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
synsets = {}
for pos in ["noun", "adj", "verb", "adv"]:
tag = pos_map[pos]
synsets[tag] = {}
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
offset = int(elem.get("id")[2:])
if not offset: continue
if elem.get("categ"):
synsets[tag][offset] = Emotion.emotions[elem.get("categ")] if elem.get("categ") in Emotion.emotions else None
elif elem.get("noun-id"):
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
return synsets
def _load_emotions(self, wn_domains_dir):
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
tree = ET.parse("{0}/a-hierarchy.xml".format(wn_domains_dir))
root = tree.getroot()
for elem in root.findall("categ"):
name = elem.get("name")
if name == "root":
Emotion.emotions["root"] = Emotion("root")
else:
Emotion.emotions[name] = Emotion(name, elem.get("isa"))
def get_emotion(self, word, pos):
"""Returns the emotion of the word.
word -- the word (str)
pos -- part-of-speech (str)
"""
if pos in self.flat_pos:
pos = self.flat_pos[pos]
synsets = self.wn16.synsets(word, self.wn_pos[pos])
if synsets:
offset = synsets[0].offset()
if offset in self.synsets[pos]:
return self.synsets[pos][offset]
return None
if __name__ == "__main__":
wordnet16, wndomains32, word, pos = sys.argv[1:5]
wna = WNAffect(wordnet16, wndomains32)
print wna.get_emotion(word, pos)

View File

@ -1,36 +0,0 @@
import requests
import json
from senpy.plugins import SentimentPlugin
from senpy.models import Results, Sentiment, Entry
class Sentiment140Plugin(SentimentPlugin):
def analyse_entry(self,entry,params):
lang = params.get("language", "auto")
res = requests.post("http://www.sentiment140.com/api/bulkClassifyJson",
json.dumps({"language": lang,
"data": [{"text": entry.get("text",None)}]
}
)
)
p = params.get("prefix", None)
polarity_value = self.maxPolarityValue*int(res.json()["data"][0]
["polarity"]) * 0.25
polarity = "marl:Neutral"
neutral_value = 0
if polarity_value > neutral_value:
polarity = "marl:Positive"
elif polarity_value < neutral_value:
polarity = "marl:Negative"
sentiment = Sentiment(id="Sentiment0",
prefix=p,
marl__hasPolarity=polarity,
marl__polarityValue=polarity_value)
entry.sentiments.append(sentiment)
yield entry

View File

@ -1,18 +0,0 @@
{
"name": "sentiment-140",
"module": "sentiment-140",
"description": "Sentiment classifier using rule-based classification for English and Spanish. This plugin uses sentiment140 data to perform classification. For more information: http://help.sentiment140.com/for-students/",
"author": "@balkian",
"version": "0.1",
"extra_params": {
"language": {
"@id": "lang_sentiment140",
"aliases": ["language", "l"],
"required": false,
"options": ["es", "en", "auto"]
}
},
"requirements": {},
"maxPolarityValue": "1",
"minPolarityValue": "-1"
}

1
sentiment-basic Submodule

@ -0,0 +1 @@
Subproject commit beb8e311619059a0c660411edef1cf95b3826c0a

View File

@ -1,26 +0,0 @@
# Sentiment basic plugin
This plugin is based on the classifier developed for the TASS 2015 competition. It has been developed for Spanish and English. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es
There is more information avaliable in:
- Aspect based Sentiment Analysis of Spanish Tweets, Oscar Araque and Ignacio Corcuera-Platas and Constantino Román-Gómez and Carlos A. Iglesias and J. Fernando Sánchez-Rada. http://gsi.dit.upm.es/es/investigacion/publicaciones?view=publication&task=show&id=376
## Usage
Params accepted:
- Language: Spanish (es).
- Input: text to analyse.
Example request:
```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-basic&language=es&input=I%20love%20Madrid
```
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
![alt GSI Logo][logoGSI]
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"

File diff suppressed because it is too large Load Diff

View File

@ -1,148 +0,0 @@
import os
import logging
import string
import nltk
import pickle
from sentiwn import SentiWordNet
from nltk.corpus import wordnet as wn
from textblob import TextBlob
from scipy.interpolate import interp1d
from os import path
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Entry, Sentiment
logger = logging.getLogger(__name__)
class SentiTextPlugin(SentimentPlugin):
def _load_swn(self):
self.swn_path = path.join(path.abspath(path.dirname(__file__)), self.sentiword_path)
swn = SentiWordNet(self.swn_path)
return swn
def _load_pos_tagger(self):
self.pos_path = path.join(path.abspath(path.dirname(__file__)), self.pos_path)
with open(self.pos_path, 'r') as f:
tagger = pickle.load(f)
return tagger
def activate(self, *args, **kwargs):
nltk.download(['punkt','wordnet'])
self._swn = self._load_swn()
self._pos_tagger = self._load_pos_tagger()
def _remove_punctuation(self, tokens):
return [t for t in tokens if t not in string.punctuation]
def _tokenize(self, text):
data = {}
sentences = nltk.sent_tokenize(text)
for i, sentence in enumerate(sentences):
sentence_ = {}
words = nltk.word_tokenize(sentence)
sentence_['sentence'] = sentence
tokens_ = [w.lower() for w in words]
sentence_['tokens'] = self._remove_punctuation(tokens_)
data[i] = sentence_
return data
def _pos(self, tokens):
for i in tokens:
tokens[i]['tokens'] = self._pos_tagger.tag(tokens[i]['tokens'])
return tokens
# def _stopwords(sentences, lang='english'):
# for i in sentences:
# sentences[i]['tokens'] = [t for t in sentences[i]['tokens'] if t not in nltk.corpus.stopwords.words(lang)]
# return sentences
def _compare_synsets(self, synsets, tokens, i):
for synset in synsets:
for word in tokens[i]['lemmas']:
for lemma in tokens[i]['lemmas'][word]:
synset_ = lemma.synset()
if synset == synset_:
return synset
return None
def analyse_entry(self, entry, params):
language = params.get("language")
text = entry.get("text", None)
tokens = self._tokenize(text)
tokens = self._pos(tokens)
sufixes = {'es':'spa','en':'eng','it':'ita','fr':'fra'}
for i in tokens:
tokens[i]['lemmas'] = {}
for w in tokens[i]['tokens']:
lemmas = wn.lemmas(w[0], lang=sufixes[language])
if len(lemmas) == 0:
continue
tokens[i]['lemmas'][w[0]] = lemmas
if language == "en":
trans = TextBlob(unicode(text))
else:
trans = TextBlob(unicode(text)).translate(from_lang=language,to='en')
useful_synsets = {}
for s_i, t_s in enumerate(trans.sentences):
useful_synsets[s_i] = {}
for w_i, t_w in enumerate(trans.sentences[s_i].words):
synsets = wn.synsets(trans.sentences[s_i].words[w_i])
if len(synsets) == 0:
continue
eq_synset = self._compare_synsets(synsets, tokens, s_i)
useful_synsets[s_i][t_w] = eq_synset
scores = {}
for i in tokens:
scores[i] = {}
if useful_synsets != None:
for word in useful_synsets[i]:
if useful_synsets[i][word] is None:
continue
temp_scores = self._swn.get_score(useful_synsets[i][word].name().split('.')[0].replace(' ',' '))
for score in temp_scores:
if score['synset'] == useful_synsets[i][word]:
t_score = score['pos'] - score['neg']
f_score = 'neu'
if t_score > 0:
f_score = 'pos'
elif t_score < 0:
f_score = 'neg'
score['score'] = f_score
scores[i][word] = score
break
p = params.get("prefix", None)
for i in scores:
n_pos = 0.0
n_neg = 0.0
for w in scores[i]:
if scores[i][w]['score'] == 'pos':
n_pos += 1.0
elif scores[i][w]['score'] == 'neg':
n_neg += 1.0
inter = interp1d([-1.0, 1.0], [0.0, 1.0])
try:
g_score = (n_pos - n_neg) / (n_pos + n_neg)
g_score = float(inter(g_score))
except:
if n_pos == 0 and n_neg == 0:
g_score = 0.5
polarity = 'marl:Neutral'
polarity_value = 0
if g_score > 0.5:
polarity = 'marl:Positive'
polarity_value = 1
elif g_score < 0.5:
polarity = 'marl:Negative'
polarity_value = -1
opinion = Sentiment(id="Opinion0"+'_'+str(i),
marl__hasPolarity=polarity,
marl__polarityValue=polarity_value)
entry.sentiments.append(opinion)
yield entry

View File

@ -1,24 +0,0 @@
{
"name": "sentiment-basic",
"module": "sentiment-basic",
"description": "Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.",
"author": "github.com/nachtkatze",
"version": "0.1",
"requirements": [
"nltk>=3.0.5",
"scipy>=0.14.0",
"textblob"
],
"extra_params": {
"language": {
"aliases": ["language", "l"],
"required": true,
"options": ["en","es", "it", "fr", "auto"],
"default": "auto"
},
},
"sentiword_path": "SentiWordNet_3.0.txt",
"pos_path": "unigram_spanish.pickle",
"maxPolarityValue": "1",
"minPolarityValue": "-1"
}

View File

@ -1,70 +0,0 @@
#!/usr/bin/env python
"""
Author : Jaganadh Gopinadhan <jaganadhg@gmail.com>
Copywright (C) : Jaganadh Gopinadhan
Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import sys,os
import re
from nltk.corpus import wordnet
class SentiWordNet(object):
"""
Interface to SentiWordNet
"""
def __init__(self,swn_file):
"""
"""
self.swn_file = swn_file
self.pos_synset = self.__parse_swn_file()
def __parse_swn_file(self):
"""
Parse the SentiWordNet file and populate the POS and SynsetID hash
"""
pos_synset_hash = {}
swn_data = open(self.swn_file,'r').readlines()
head_less_swn_data = filter((lambda line: not re.search(r"^\s*#",\
line)), swn_data)
for data in head_less_swn_data:
fields = data.strip().split("\t")
try:
pos,syn_set_id,pos_score,neg_score,syn_set_score,\
gloss = fields
except:
print "Found data without all details"
pass
if pos and syn_set_score:
pos_synset_hash[(pos,int(syn_set_id))] = (float(pos_score),\
float(neg_score))
return pos_synset_hash
def get_score(self,word,pos=None):
"""
Get score for a given word/word pos combination
"""
senti_scores = []
synsets = wordnet.synsets(word,pos)
for synset in synsets:
if self.pos_synset.has_key((synset.pos(), synset.offset())):
pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())]
senti_scores.append({"pos":pos_val,"neg":neg_val,\
"obj": 1.0 - (pos_val - neg_val),'synset':synset})
return senti_scores

View File

@ -1,42 +0,0 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
import unittest
class SentiTextTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("SentiText", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["SentiText"]
plugin.activate()
texts = {'Odio ir al cine' : 'marl:Neutral',
'El cielo esta nublado' : 'marl:Positive',
'Esta tarta esta muy buena' : 'marl:Neutral'}
for text in texts:
response = plugin.analyse(input=text)
sentimentSet = response.entries[0].sentiments[0]
print sentimentSet
expected = texts[text]
assert sentimentSet['marl:hasPolarity'] == expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()

File diff suppressed because it is too large Load Diff

@ -0,0 +1 @@
Subproject commit 56d21c3525a555236e3f400cfa15091a1f197dd3

View File

@ -1,32 +0,0 @@
# Senpy Plugin MeaningCloud
MeaningCloud plugin uses API from Meaning Cloud to perform sentiment analysis.
For more information about Meaning Cloud and its services, please visit: https://www.meaningcloud.com/developer/apis
## Usage
To use this plugin, you need to obtain an API key from meaningCloud signing up here: https://www.meaningcloud.com/developer/login
When you had obtained the meaningCloud API Key, you have to provide it to the plugin, using the param **apiKey**.
To use this plugin, you should use a GET Requests with the following possible params:
Params:
- Language: English (en) and Spanish (es). (default: en)
- API Key: the API key from Meaning Cloud. Aliases: ["apiKey","meaningCloud-key"]. (required)
- Input: text to analyse.(required)
- Model: model provided to Meaning Cloud API (for general domain). (default: general)
## Example of Usage
Example request:
```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=meaningCloud&language=en&apiKey=<put here your API key>&input=I%20love%20Madrid
```
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
![alt GSI Logo][logoGSI]
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"

View File

@ -1,76 +0,0 @@
import time
import logging
import requests
import json
import string
import os
from os import path
import time
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Entry, Sentiment,Error
logger = logging.getLogger(__name__)
class DaedalusPlugin(SentimentPlugin):
def _polarity(self, value):
if 'NONE' in value:
polarity = 'marl:Neutral'
polarityValue = 0
elif 'N' in value:
polarity = 'marl:Negative'
polarityValue = -1
elif 'P' in value:
polarity = 'marl:Positive'
polarityValue = 1
return polarity, polarityValue
def analyse_entry(self, entry, params):
txt = entry.get("text",None)
model = "general" # general_es / general_es / general_fr
api = 'http://api.meaningcloud.com/sentiment-2.1'
lang = params.get("language")
key = params["apiKey"]
parameters = {'key': key,
'model': model,
'lang': lang,
'of': 'json',
'txt': txt,
'src': 'its-not-a-real-python-sdk'
}
try:
r = requests.post(api, params=parameters, timeout=3)
except requests.exceptions.Timeout:
raise Error("Meaning Cloud API does not response")
api_response = r.json()
if not api_response.get('score_tag'):
raise Error(r.json())
logger.info(api_response)
response = Results()
agg_polarity, agg_polarityValue = self._polarity(api_response.get('score_tag', None))
agg_opinion = Sentiment(id="Opinion0",
marl__hasPolarity=agg_polarity,
marl__polarityValue = agg_polarityValue,
marl__opinionCount = len(api_response['sentence_list']))
entry.sentiments.append(agg_opinion)
logger.info(api_response['sentence_list'])
count = 1
for sentence in api_response['sentence_list']:
for nopinion in sentence['segment_list']:
logger.info(nopinion)
polarity, polarityValue = self._polarity(nopinion.get('score_tag', None))
opinion = Sentiment(id="Opinion{}".format(count),
marl__hasPolarity=polarity,
marl__polarityValue=polarityValue,
marl__aggregatesOpinion=agg_opinion.get('id'),
nif__anchorOf=nopinion.get('text', None),
nif__beginIndex=nopinion.get('inip', None),
nif__endIndex=nopinion.get('endp', None)
)
count += 1
entry.sentiments.append(opinion)
yield entry

View File

@ -1,24 +0,0 @@
{
"name": "sentiment-meaningCloud",
"module": "sentiment-meaningCloud",
"description": "Sentiment analysis with meaningCloud service. To use this plugin, you need to obtain an API key from meaningCloud signing up here: https://www.meaningcloud.com/developer/login. When you had obtained the meaningCloud API Key, you have to provide it to the plugin, using param apiKey. Example request: http://senpy.cluster.gsi.dit.upm.es/api/?algo=meaningCloud&language=en&apiKey=<put here your API key>&input=I%20love%20Madrid.",
"author": "GSI UPM",
"version": "1.0",
"extra_params": {
"language": {
"aliases": ["language", "l"],
"required": true,
"options": ["en","es"],
"default": "en"
},
"apiKey":{
"aliases":["meaningCloud-key","apiKey"],
"required":true
}
},
"requirements": {},
"maxPolarityValue": "1",
"minPolarityValue": "-1"
}

1
sentiment-tass Submodule

@ -0,0 +1 @@
Subproject commit 9922c6cadd68139580983b629f371aec3d173168

1
sentiment-vader Submodule

@ -0,0 +1 @@
Subproject commit ddb7432d260fd2d8fca719f1b3ee46117019f475

View File

@ -1,39 +0,0 @@
# Sentimet-vader plugin
=========
Vader is a plugin developed at GSI UPM for sentiment analysis.
For developing this plugin, it has been used the module vaderSentiment, which is described in the paper:
VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text
C.J. Hutto and Eric Gilbert
Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
If you use this plugin in your research, please cite the above paper.
For more information about the functionality, check the official repository
https://github.com/cjhutto/vaderSentiment
The response of this plugin uses [Marl ontology](https://www.gsi.dit.upm.es/ontologies/marl/) developed at GSI UPM for semantic web.
## Usage
Params accepted:
- Language: es (Spanish), en(English).
- Input: Text to analyse.
Example request:
```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-vader&language=en&input=I%20love%20Madrid
```
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
![alt GSI Logo][logoGSI]
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"
========

View File

@ -1,16 +0,0 @@
==========
This README file describes the plugin vaderSentiment.
For developing this plugin, it has been used the module vaderSentiment, which is described in the paper:
VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text
C.J. Hutto and Eric Gilbert
Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
If you use this plugin in your research, please cite the above paper
For more information about the functionality, check the official repository
https://github.com/cjhutto/vaderSentiment
========

View File

@ -1,49 +0,0 @@
# -*- coding: utf-8 -*-
from vaderSentiment import sentiment
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Sentiment, Entry
import logging
logger = logging.getLogger(__name__)
class vaderSentimentPlugin(SentimentPlugin):
def analyse_entry(self,entry,params):
logger.debug("Analysing with params {}".format(params))
text_input = entry.get("text", None)
aggregate = params['aggregate']
score = sentiment(text_input)
opinion0 = Sentiment(id= "Opinion_positive",
marl__hasPolarity= "marl:Positive",
marl__algorithmConfidence= score['pos']
)
opinion1 = Sentiment(id= "Opinion_negative",
marl__hasPolarity= "marl:Negative",
marl__algorithmConfidence= score['neg']
)
opinion2 = Sentiment(id= "Opinion_neutral",
marl__hasPolarity = "marl:Neutral",
marl__algorithmConfidence = score['neu']
)
if aggregate == 'true':
res = None
confident = max(score['neg'],score['neu'],score['pos'])
if opinion0.marl__algorithmConfidence == confident:
res = opinion0
elif opinion1.marl__algorithmConfidence == confident:
res = opinion1
elif opinion2.marl__algorithmConfidence == confident:
res = opinion2
entry.sentiments.append(res)
else:
entry.sentiments.append(opinion0)
entry.sentiments.append(opinion1)
entry.sentiments.append(opinion2)
yield entry

View File

@ -1,25 +0,0 @@
{
"name": "sentiment-vader",
"module": "sentiment-vader",
"description": "Sentiment classifier using vaderSentiment module. Params accepted: Language: {en, es}. The output uses Marl ontology developed at GSI UPM for semantic web.",
"author": "@icorcuera",
"version": "0.1",
"extra_params": {
"language": {
"@id": "lang_rand",
"aliases": ["language", "l"],
"required": false,
"options": ["es", "en", "auto"]
},
"aggregate": {
"aliases": ["aggregate","agg"],
"options": ["true", "false"],
"required": false,
"default": false
}
},
"requirements": {}
}

View File

@ -1,44 +0,0 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
from flask.ext.testing import TestCase
import unittest
class vaderTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("vaderSentiment", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["vaderSentiment"]
plugin.activate()
texts = {'I am tired :(' : 'marl:Negative',
'I love pizza' : 'marl:Positive',
'I like going to the cinema :)' : 'marl:Positive',
'This cake is disgusting' : 'marl:Negative'}
for text in texts:
response = plugin.analyse(input=text)
expected = texts[text]
sentimentSet = response.entries[0].sentiments
max_sentiment = max(sentimentSet, key=lambda x: x['marl:polarityValue'])
assert max_sentiment['marl:hasPolarity'] == expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()

View File

@ -1,363 +0,0 @@
#!/usr/bin/python
# coding: utf-8
'''
Created on July 04, 2013
@author: C.J. Hutto
Citation Information
If you use any of the VADER sentiment analysis tools
(VADER sentiment lexicon or Python code for rule-based sentiment
analysis engine) in your work or research, please cite the paper.
For example:
Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for
Sentiment Analysis of Social Media Text. Eighth International Conference on
Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
'''
import os, math, re, sys, fnmatch, string
reload(sys)
def make_lex_dict(f):
return dict(map(lambda (w, m): (w, float(m)), [wmsr.strip().split('\t')[0:2] for wmsr in open(f) ]))
f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms
try:
word_valence_dict = make_lex_dict(f)
except:
f = os.path.join(os.path.dirname(__file__),'vader_sentiment_lexicon.txt')
word_valence_dict = make_lex_dict(f)
# for removing punctuation
regex_remove_punctuation = re.compile('[%s]' % re.escape(string.punctuation))
def sentiment(text):
"""
Returns a float for sentiment strength based on the input text.
Positive values are positive valence, negative value are negative valence.
"""
wordsAndEmoticons = str(text).split() #doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
text_mod = regex_remove_punctuation.sub('', text) # removes punctuation (but loses emoticons & contractions)
wordsOnly = str(text_mod).split()
# get rid of empty items or single letter "words" like 'a' and 'I' from wordsOnly
for word in wordsOnly:
if len(word) <= 1:
wordsOnly.remove(word)
# now remove adjacent & redundant punctuation from [wordsAndEmoticons] while keeping emoticons and contractions
puncList = [".", "!", "?", ",", ";", ":", "-", "'", "\"",
"!!", "!!!", "??", "???", "?!?", "!?!", "?!?!", "!?!?"]
for word in wordsOnly:
for p in puncList:
pword = p + word
x1 = wordsAndEmoticons.count(pword)
while x1 > 0:
i = wordsAndEmoticons.index(pword)
wordsAndEmoticons.remove(pword)
wordsAndEmoticons.insert(i, word)
x1 = wordsAndEmoticons.count(pword)
wordp = word + p
x2 = wordsAndEmoticons.count(wordp)
while x2 > 0:
i = wordsAndEmoticons.index(wordp)
wordsAndEmoticons.remove(wordp)
wordsAndEmoticons.insert(i, word)
x2 = wordsAndEmoticons.count(wordp)
# get rid of residual empty items or single letter "words" like 'a' and 'I' from wordsAndEmoticons
for word in wordsAndEmoticons:
if len(word) <= 1:
wordsAndEmoticons.remove(word)
# remove stopwords from [wordsAndEmoticons]
#stopwords = [str(word).strip() for word in open('stopwords.txt')]
#for word in wordsAndEmoticons:
# if word in stopwords:
# wordsAndEmoticons.remove(word)
# check for negation
negate = ["aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt",
"ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't",
"dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither",
"don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't",
"neednt", "needn't", "never", "none", "nope", "nor", "not", "nothing", "nowhere",
"oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent",
"oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't",
"without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite"]
def negated(list, nWords=[], includeNT=True):
nWords.extend(negate)
for word in nWords:
if word in list:
return True
if includeNT:
for word in list:
if "n't" in word:
return True
if "least" in list:
i = list.index("least")
if i > 0 and list[i-1] != "at":
return True
return False
def normalize(score, alpha=15):
# normalize the score to be between -1 and 1 using an alpha that approximates the max expected value
normScore = score/math.sqrt( ((score*score) + alpha) )
return normScore
def wildCardMatch(patternWithWildcard, listOfStringsToMatchAgainst):
listOfMatches = fnmatch.filter(listOfStringsToMatchAgainst, patternWithWildcard)
return listOfMatches
def isALLCAP_differential(wordList):
countALLCAPS= 0
for w in wordList:
if str(w).isupper():
countALLCAPS += 1
cap_differential = len(wordList) - countALLCAPS
if cap_differential > 0 and cap_differential < len(wordList):
isDiff = True
else: isDiff = False
return isDiff
isCap_diff = isALLCAP_differential(wordsAndEmoticons)
b_incr = 0.293 #(empirically derived mean sentiment intensity rating increase for booster words)
b_decr = -0.293
# booster/dampener 'intensifiers' or 'degree adverbs' http://en.wiktionary.org/wiki/Category:English_degree_adverbs
booster_dict = {"absolutely": b_incr, "amazingly": b_incr, "awfully": b_incr, "completely": b_incr, "considerably": b_incr,
"decidedly": b_incr, "deeply": b_incr, "effing": b_incr, "enormously": b_incr,
"entirely": b_incr, "especially": b_incr, "exceptionally": b_incr, "extremely": b_incr,
"fabulously": b_incr, "flipping": b_incr, "flippin": b_incr,
"fricking": b_incr, "frickin": b_incr, "frigging": b_incr, "friggin": b_incr, "fully": b_incr, "fucking": b_incr,
"greatly": b_incr, "hella": b_incr, "highly": b_incr, "hugely": b_incr, "incredibly": b_incr,
"intensely": b_incr, "majorly": b_incr, "more": b_incr, "most": b_incr, "particularly": b_incr,
"purely": b_incr, "quite": b_incr, "really": b_incr, "remarkably": b_incr,
"so": b_incr, "substantially": b_incr,
"thoroughly": b_incr, "totally": b_incr, "tremendously": b_incr,
"uber": b_incr, "unbelievably": b_incr, "unusually": b_incr, "utterly": b_incr,
"very": b_incr,
"almost": b_decr, "barely": b_decr, "hardly": b_decr, "just enough": b_decr,
"kind of": b_decr, "kinda": b_decr, "kindof": b_decr, "kind-of": b_decr,
"less": b_decr, "little": b_decr, "marginally": b_decr, "occasionally": b_decr, "partly": b_decr,
"scarcely": b_decr, "slightly": b_decr, "somewhat": b_decr,
"sort of": b_decr, "sorta": b_decr, "sortof": b_decr, "sort-of": b_decr}
sentiments = []
for item in wordsAndEmoticons:
v = 0
i = wordsAndEmoticons.index(item)
if (i < len(wordsAndEmoticons)-1 and str(item).lower() == "kind" and \
str(wordsAndEmoticons[i+1]).lower() == "of") or str(item).lower() in booster_dict:
sentiments.append(v)
continue
item_lowercase = str(item).lower()
if item_lowercase in word_valence_dict:
#get the sentiment valence
v = float(word_valence_dict[item_lowercase])
#check if sentiment laden word is in ALLCAPS (while others aren't)
c_incr = 0.733 #(empirically derived mean sentiment intensity rating increase for using ALLCAPs to emphasize a word)
if str(item).isupper() and isCap_diff:
if v > 0: v += c_incr
else: v -= c_incr
#check if the preceding words increase, decrease, or negate/nullify the valence
def scalar_inc_dec(word, valence):
scalar = 0.0
word_lower = str(word).lower()
if word_lower in booster_dict:
scalar = booster_dict[word_lower]
if valence < 0: scalar *= -1
#check if booster/dampener word is in ALLCAPS (while others aren't)
if str(word).isupper() and isCap_diff:
if valence > 0: scalar += c_incr
else: scalar -= c_incr
return scalar
n_scalar = -0.74
if i > 0 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict:
s1 = scalar_inc_dec(wordsAndEmoticons[i-1], v)
v = v+s1
if negated([wordsAndEmoticons[i-1]]): v = v*n_scalar
if i > 1 and str(wordsAndEmoticons[i-2]).lower() not in word_valence_dict:
s2 = scalar_inc_dec(wordsAndEmoticons[i-2], v)
if s2 != 0: s2 = s2*0.95
v = v+s2
# check for special use of 'never' as valence modifier instead of negation
if wordsAndEmoticons[i-2] == "never" and (wordsAndEmoticons[i-1] == "so" or wordsAndEmoticons[i-1] == "this"):
v = v*1.5
# otherwise, check for negation/nullification
elif negated([wordsAndEmoticons[i-2]]): v = v*n_scalar
if i > 2 and str(wordsAndEmoticons[i-3]).lower() not in word_valence_dict:
s3 = scalar_inc_dec(wordsAndEmoticons[i-3], v)
if s3 != 0: s3 = s3*0.9
v = v+s3
# check for special use of 'never' as valence modifier instead of negation
if wordsAndEmoticons[i-3] == "never" and \
(wordsAndEmoticons[i-2] == "so" or wordsAndEmoticons[i-2] == "this") or \
(wordsAndEmoticons[i-1] == "so" or wordsAndEmoticons[i-1] == "this"):
v = v*1.25
# otherwise, check for negation/nullification
elif negated([wordsAndEmoticons[i-3]]): v = v*n_scalar
# check for special case idioms using a sentiment-laden keyword known to SAGE
special_case_idioms = {"the shit": 3, "the bomb": 3, "bad ass": 1.5, "yeah right": -2,
"cut the mustard": 2, "kiss of death": -1.5, "hand to mouth": -2}
# future work: consider other sentiment-laden idioms
#other_idioms = {"back handed": -2, "blow smoke": -2, "blowing smoke": -2, "upper hand": 1, "break a leg": 2,
# "cooking with gas": 2, "in the black": 2, "in the red": -2, "on the ball": 2,"under the weather": -2}
onezero = "{} {}".format(str(wordsAndEmoticons[i-1]), str(wordsAndEmoticons[i]))
twoonezero = "{} {} {}".format(str(wordsAndEmoticons[i-2]), str(wordsAndEmoticons[i-1]), str(wordsAndEmoticons[i]))
twoone = "{} {}".format(str(wordsAndEmoticons[i-2]), str(wordsAndEmoticons[i-1]))
threetwoone = "{} {} {}".format(str(wordsAndEmoticons[i-3]), str(wordsAndEmoticons[i-2]), str(wordsAndEmoticons[i-1]))
threetwo = "{} {}".format(str(wordsAndEmoticons[i-3]), str(wordsAndEmoticons[i-2]))
if onezero in special_case_idioms: v = special_case_idioms[onezero]
elif twoonezero in special_case_idioms: v = special_case_idioms[twoonezero]
elif twoone in special_case_idioms: v = special_case_idioms[twoone]
elif threetwoone in special_case_idioms: v = special_case_idioms[threetwoone]
elif threetwo in special_case_idioms: v = special_case_idioms[threetwo]
if len(wordsAndEmoticons)-1 > i:
zeroone = "{} {}".format(str(wordsAndEmoticons[i]), str(wordsAndEmoticons[i+1]))
if zeroone in special_case_idioms: v = special_case_idioms[zeroone]
if len(wordsAndEmoticons)-1 > i+1:
zeroonetwo = "{} {}".format(str(wordsAndEmoticons[i]), str(wordsAndEmoticons[i+1]), str(wordsAndEmoticons[i+2]))
if zeroonetwo in special_case_idioms: v = special_case_idioms[zeroonetwo]
# check for booster/dampener bi-grams such as 'sort of' or 'kind of'
if threetwo in booster_dict or twoone in booster_dict:
v = v+b_decr
# check for negation case using "least"
if i > 1 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict \
and str(wordsAndEmoticons[i-1]).lower() == "least":
if (str(wordsAndEmoticons[i-2]).lower() != "at" and str(wordsAndEmoticons[i-2]).lower() != "very"):
v = v*n_scalar
elif i > 0 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict \
and str(wordsAndEmoticons[i-1]).lower() == "least":
v = v*n_scalar
sentiments.append(v)
# check for modification in sentiment due to contrastive conjunction 'but'
if 'but' in wordsAndEmoticons or 'BUT' in wordsAndEmoticons:
try: bi = wordsAndEmoticons.index('but')
except: bi = wordsAndEmoticons.index('BUT')
for s in sentiments:
si = sentiments.index(s)
if si < bi:
sentiments.pop(si)
sentiments.insert(si, s*0.5)
elif si > bi:
sentiments.pop(si)
sentiments.insert(si, s*1.5)
if sentiments:
sum_s = float(sum(sentiments))
#print sentiments, sum_s
# check for added emphasis resulting from exclamation points (up to 4 of them)
ep_count = str(text).count("!")
if ep_count > 4: ep_count = 4
ep_amplifier = ep_count*0.292 #(empirically derived mean sentiment intensity rating increase for exclamation points)
if sum_s > 0: sum_s += ep_amplifier
elif sum_s < 0: sum_s -= ep_amplifier
# check for added emphasis resulting from question marks (2 or 3+)
qm_count = str(text).count("?")
qm_amplifier = 0
if qm_count > 1:
if qm_count <= 3: qm_amplifier = qm_count*0.18
else: qm_amplifier = 0.96
if sum_s > 0: sum_s += qm_amplifier
elif sum_s < 0: sum_s -= qm_amplifier
compound = normalize(sum_s)
# want separate positive versus negative sentiment scores
pos_sum = 0.0
neg_sum = 0.0
neu_count = 0
for sentiment_score in sentiments:
if sentiment_score > 0:
pos_sum += (float(sentiment_score) +1) # compensates for neutral words that are counted as 1
if sentiment_score < 0:
neg_sum += (float(sentiment_score) -1) # when used with math.fabs(), compensates for neutrals
if sentiment_score == 0:
neu_count += 1
if pos_sum > math.fabs(neg_sum): pos_sum += (ep_amplifier+qm_amplifier)
elif pos_sum < math.fabs(neg_sum): neg_sum -= (ep_amplifier+qm_amplifier)
total = pos_sum + math.fabs(neg_sum) + neu_count
pos = math.fabs(pos_sum / total)
neg = math.fabs(neg_sum / total)
neu = math.fabs(neu_count / total)
else:
compound = 0.0; pos = 0.0; neg = 0.0; neu = 0.0
s = {"neg" : round(neg, 3),
"neu" : round(neu, 3),
"pos" : round(pos, 3),
"compound" : round(compound, 4)}
return s
if __name__ == '__main__':
# --- examples -------
sentences = [
"VADER is smart, handsome, and funny.", # positive sentence example
"VADER is smart, handsome, and funny!", # punctuation emphasis handled correctly (sentiment intensity adjusted)
"VADER is very smart, handsome, and funny.", # booster words handled correctly (sentiment intensity adjusted)
"VADER is VERY SMART, handsome, and FUNNY.", # emphasis for ALLCAPS handled
"VADER is VERY SMART, handsome, and FUNNY!!!",# combination of signals - VADER appropriately adjusts intensity
"VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!",# booster words & punctuation make this close to ceiling for score
"The book was good.", # positive sentence
"The book was kind of good.", # qualified positive sentence is handled correctly (intensity adjusted)
"The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence
"A really bad, horrible book.", # negative sentence with booster words
"At least it isn't a horrible book.", # negated negative sentence with contraction
":) and :D", # emoticons handled
"", # an empty string is correctly handled
"Today sux", # negative slang handled
"Today sux!", # negative slang with punctuation emphasis handled
"Today SUX!", # negative slang with capitalization emphasis
"Today kinda sux! But I'll get by, lol" # mixed sentiment example with slang and constrastive conjunction "but"
]
paragraph = "It was one of the worst movies I've seen, despite good reviews. \
Unbelievably bad acting!! Poor direction. VERY poor production. \
The movie was bad. Very bad movie. VERY bad movie. VERY BAD movie. VERY BAD movie!"
from nltk import tokenize
lines_list = tokenize.sent_tokenize(paragraph)
sentences.extend(lines_list)
tricky_sentences = [
"Most automated sentiment analysis tools are shit.",
"VADER sentiment analysis is the shit.",
"Sentiment analysis has never been good.",
"Sentiment analysis with VADER has never been this good.",
"Warren Beatty has never been so entertaining.",
"I won't say that the movie is astounding and I wouldn't claim that the movie is too banal either.",
"I like to hate Michael Bay films, but I couldn't fault this one",
"It's one thing to watch an Uwe Boll film, but another thing entirely to pay for it",
"The movie was too good",
"This movie was actually neither that funny, nor super witty.",
"This movie doesn't care about cleverness, wit or any other kind of intelligent humor.",
"Those who find ugly meanings in beautiful things are corrupt without being charming.",
"There are slow and repetitive parts, BUT it has just enough spice to keep it interesting.",
"The script is not fantastic, but the acting is decent and the cinematography is EXCELLENT!",
"Roger Dodger is one of the most compelling variations on this theme.",
"Roger Dodger is one of the least compelling variations on this theme.",
"Roger Dodger is at least compelling as a variation on the theme.",
"they fall in love with the product",
"but then it breaks",
"usually around the time the 90 day warranty expires",
"the twin towers collapsed today",
"However, Mr. Carter solemnly argues, his client carried out the kidnapping under orders and in the ''least offensive way possible.''"
]
sentences.extend(tricky_sentences)
for sentence in sentences:
print sentence,
ss = sentiment(sentence)
print "\t" + str(ss)
print "\n\n Done!"

File diff suppressed because it is too large Load Diff