1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-12-22 04:58:12 +00:00

Update to senpy 0.20

This commit is contained in:
J. Fernando Sánchez 2019-04-04 12:56:46 +02:00
parent fa993c6e2a
commit 4f286057c9
13 changed files with 239 additions and 247 deletions

View File

@ -1 +1 @@
from gsiupm/senpy:0.11.4-python2.7
from gsiupm/senpy:0.20-python3.6

View File

@ -1,4 +1,4 @@
PYVERSION=2.7
PYVERSION=3.7
NAME=senpycommunity
REPO=gsiupm
PLUGINS= $(filter %/, $(wildcard */))
@ -19,7 +19,7 @@ test-fast: test-fast-/
test: docker-build test-fast
dev:
dev: docker-build
docker run -p $(DEV_PORT):5000 $(DOCKER_FLAGS) -ti $(DOCKER_FLAGS) -v $$PWD/$*:/senpy-plugins/ --entrypoint /bin/bash -v $$PWD/data:/data/ --rm $(IMAGEWTAG)
.PHONY:: test test-fast dev

View File

@ -19,11 +19,11 @@ from nltk.corpus import stopwords
from pattern.en import parse as parse_en
from pattern.es import parse as parse_es
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.plugins import EmotionPlugin, SenpyPlugin
from senpy.models import Results, EmotionSet, Entry, Emotion
class ANEW(SentimentPlugin):
class ANEW(EmotionPlugin):
description = "This plugin consists on an emotion classifier using ANEW lexicon dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. Each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208. The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value."
author = "@icorcuera"
version = "0.5.1"
@ -31,6 +31,7 @@ class ANEW(SentimentPlugin):
extra_params = {
"language": {
"description": "language of the input",
"aliases": ["language", "l"],
"required": True,
"options": ["es","en"],
@ -75,7 +76,7 @@ class ANEW(SentimentPlugin):
"neutral": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#neutral-emotion",
"sadness": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#sadness"
}
onyx__usesEmotionModel = "emoml:big6"
onyx__usesEmotionModel = "emoml:pad"
nltk_resources = ['stopwords']
def activate(self, *args, **kwargs):
@ -119,14 +120,13 @@ class ANEW(SentimentPlugin):
return s
def _extract_ngrams(self, text, lang):
unigrams_lemmas = []
unigrams_words = []
pos_tagged = []
if lang == 'es':
sentences = parse_es(text,lemmata=True).split()
sentences = list(parse_es(text, lemmata=True).split())
else:
sentences = parse_en(text,lemmata=True).split()
sentences = list(parse_en(text, lemmata=True).split())
for sentence in sentences:
for token in sentence:
@ -140,19 +140,6 @@ class ANEW(SentimentPlugin):
def _find_ngrams(self, input_list, n):
return zip(*[input_list[i:] for i in range(n)])
def _emotion_calculate(self, VAD):
emotion=''
value=10000000000000000000000.0
for state in self.centroids:
valence=VAD[0]-self.centroids[state]['V']
arousal=VAD[1]-self.centroids[state]['A']
dominance=VAD[2]-self.centroids[state]['D']
new_value=math.sqrt((valence*valence)+(arousal*arousal)+(dominance*dominance))
if new_value < value:
value=new_value
emotion=state
return emotion
def _extract_features(self, tweet,dictionary,lang):
feature_set={}
ngrams_lemmas,ngrams_words,pos_tagged = self._extract_ngrams(tweet,lang)
@ -176,14 +163,13 @@ class ANEW(SentimentPlugin):
emotion='neutral'
else:
totalVAD=[totalVAD[0]/matches,totalVAD[1]/matches,totalVAD[2]/matches]
emotion=self._emotion_calculate(totalVAD)
feature_set['emotion']=emotion
feature_set['V']=totalVAD[0]
feature_set['A']=totalVAD[1]
feature_set['D']=totalVAD[2]
feature_set['V'] = totalVAD[0]
feature_set['A'] = totalVAD[1]
feature_set['D'] = totalVAD[2]
return feature_set
def analyse_entry(self, entry, params):
def analyse_entry(self, entry, activity):
params = activity.params
text_input = entry.text
@ -196,13 +182,12 @@ class ANEW(SentimentPlugin):
emotions.id = "Emotions0"
emotion1 = Emotion(id="Emotion0")
emotion1["onyx:hasEmotionCategory"] = self.emotions_ontology[feature_set['emotion']]
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence"] = feature_set['V']
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal"] = feature_set['A']
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance"] = feature_set['D']
emotion1.prov(self)
emotions.prov(self)
emotion1.prov(activity)
emotions.prov(activity)
emotions.onyx__hasEmotion.append(emotion1)
entry.emotions = [emotions, ]
@ -212,47 +197,64 @@ class ANEW(SentimentPlugin):
ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#"
test_cases = [
{
'name': 'anger with VAD=(2.12, 6.95, 5.05)',
'input': 'I hate you',
'expected': {
'emotions': [{
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'anger',
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 6.95,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 5.05,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 2.12,
}]
}]
}
}, {
'input': 'i am sad',
'expected': {
'emotions': [{
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'sadness',
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 4.13,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 3.45,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 1.61,
}]
}]
}
}, {
'name': 'joy',
'input': 'i am happy with my marks',
'expected': {
'emotions': [{
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'joy',
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 6.49,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 6.63,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 8.21,
}]
}]
}
}, {
'name': 'negative-feat',
'input': 'This movie is scary',
'expected': {
'emotions': [{
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'negative-fear',
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 5.8100000000000005,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 4.33,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 5.050000000000001,
}]
}]
}
}, {
'name': 'negative-fear',
'input': 'this cake is disgusting' ,
'expected': {
'emotions': [{
'onyx:hasEmotionSet': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'negative-fear',
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal": 5.09,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance": 4.4,
"http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence": 5.109999999999999,
}]
}]
}

View File

@ -8,5 +8,4 @@ requirements:
- scikit-learn
- textblob
- pattern
- lxml
onyx:usesEmotionModel: "emoml:big6"
- lxml

View File

@ -1,49 +1,56 @@
#!/usr/local/bin/python
# coding: utf-8
from future import standard_library
standard_library.install_aliases()
import os
import re
import sys
import string
import numpy as np
import pandas as pd
from six.moves import urllib
from nltk.corpus import stopwords
from senpy import EmotionPlugin, TextBox, models
from senpy import EmotionBox, models
def ignore(dchars):
deletechars = "".join(dchars)
if sys.version_info[0] >= 3:
tbl = str.maketrans("", "", deletechars)
ignore = lambda s: s.translate(tbl)
else:
def ignore(s):
return string.translate(s, None, deletechars)
tbl = str.maketrans("", "", deletechars)
ignore = lambda s: s.translate(tbl)
return ignore
class DepecheMood(TextBox, EmotionPlugin):
'''Plugin that uses the DepecheMood++ emotion lexicon.'''
class DepecheMood(EmotionBox):
'''
Plugin that uses the DepecheMood emotion lexicon.
DepecheMood is an emotion lexicon automatically generated from news articles where users expressed their associated emotions. It contains two languages (English and Italian), as well as three types of word representations (token, lemma and lemma#PoS). For English, the lexicon contains 165k tokens, while the Italian version contains 116k. Unsupervised techniques can be applied to generate simple but effective baselines. To learn more, please visit https://github.com/marcoguerini/DepecheMood and http://www.depechemood.eu/
'''
author = 'Oscar Araque'
name = 'emotion-depechemood'
version = '0.1'
requirements = ['pandas']
nltk_resources = ["stopwords"]
onyx__usesEmotionModel = 'wna:WNAModel'
EMOTIONS = ['wna:negative-fear',
'wna:amusement',
'wna:anger',
'wna:annoyance',
'wna:indifference',
'wna:joy',
'wna:awe',
'wna:sadness']
DM_EMOTIONS = ['AFRAID', 'AMUSED', 'ANGRY', 'ANNOYED', 'DONT_CARE', 'HAPPY', 'INSPIRED', 'SAD',]
def __init__(self, *args, **kwargs):
super(DepecheMood, self).__init__(*args, **kwargs)
self.LEXICON_URL = "https://github.com/marcoguerini/DepecheMood/raw/master/DepecheMood%2B%2B/DepecheMood_english_token_full.tsv"
self.EMOTIONS = ['AFRAID', 'AMUSED', 'ANGRY', 'ANNOYED', 'DONT_CARE', 'HAPPY', 'INSPIRED', 'SAD',]
self._mapping = {
'AFRAID': 'wna:negative-fear',
'AMUSED': 'wna:amusement',
'ANGRY': 'wna:anger',
'ANNOYED': 'wna:annoyance',
'DONT_CARE': 'wna:indifference',
'HAPPY': 'wna:joy',
'INSPIRED': 'wna:awe',
'SAD': 'wna:sadness',
}
self._denoise = ignore(set(string.punctuation)|set('«»'))
self._stop_words = []
self._lex_vocab = None
@ -89,19 +96,21 @@ class DepecheMood(TextBox, EmotionPlugin):
return S
def estimate_all_emotions(self, tokens):
S = {}
S = []
intersection = set(tokens) & self._lex_vocab
for emotion in self.EMOTIONS:
for emotion in self.DM_EMOTIONS:
s = self.estimate_emotion(intersection, emotion)
emotion_mapped = self._mapping[emotion]
S[emotion_mapped] = s
S.append(s)
return S
def download_lex(self, file_path='DepecheMood_english_token_full.tsv', freq_threshold=10):
import pandas as pd
try:
file_path = self.find_file(file_path)
except IOError:
file_path = self.path(file_path)
filename, _ = urllib.request.urlretrieve(self.LEXICON_URL, file_path)
lexicon = pd.read_csv(file_path, sep='\t', index_col=0)
@ -110,18 +119,8 @@ class DepecheMood(TextBox, EmotionPlugin):
lexicon = lexicon.T.to_dict()
return lexicon
def output(self, output, entry, **kwargs):
s = models.EmotionSet()
s.prov__wasGeneratedBy = self.id
entry.emotions.append(s)
for label, value in output.items():
e = models.Emotion(onyx__hasEmotionCategory=label,
onyx__hasEmotionIntensity=value)
s.onyx__hasEmotion.append(e)
return entry
def predict_one(self, input, **kwargs):
tokens = self.preprocess(input)
def predict_one(self, features, **kwargs):
tokens = self.preprocess(features[0])
estimation = self.estimate_all_emotions(tokens)
return estimation
@ -131,26 +130,41 @@ class DepecheMood(TextBox, EmotionPlugin):
'nif:isString': 'My cat is very happy',
},
'expected': {
'emotions': [
'onyx:hasEmotionSet': [
{
'@type': 'emotionSet',
'onyx:hasEmotion': [
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:negative-fear',
'onyx:hasEmotionIntensity': 0.05278117640010922, },
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:amusement',
'onyx:hasEmotionIntensity': 0.2114806151413433, },
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:anger',
'onyx:hasEmotionIntensity': 0.05726119426520887, },
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:annoyance',
'onyx:hasEmotionIntensity': 0.12295990731053638, },
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:indifference',
'onyx:hasEmotionIntensity': 0.1860159893608025, },
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:joy',
'onyx:hasEmotionIntensity': 0.12904050973724163, },
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:awe',
'onyx:hasEmotionIntensity': 0.17973650399862967, },
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:sadness',
'onyx:hasEmotionIntensity': 0.060724103786128455, },
{
'onyx:hasEmotionCategory': 'wna:negative-fear',
'onyx:hasEmotionIntensity': 0.05278117640010922
},
{
'onyx:hasEmotionCategory': 'wna:amusement',
'onyx:hasEmotionIntensity': 0.2114806151413433,
},
{
'onyx:hasEmotionCategory': 'wna:anger',
'onyx:hasEmotionIntensity': 0.05726119426520887
},
{
'onyx:hasEmotionCategory': 'wna:annoyance',
'onyx:hasEmotionIntensity': 0.12295990731053638,
},
{
'onyx:hasEmotionCategory': 'wna:indifference',
'onyx:hasEmotionIntensity': 0.1860159893608025,
},
{
'onyx:hasEmotionCategory': 'wna:joy',
'onyx:hasEmotionIntensity': 0.12904050973724163,
},
{
'onyx:hasEmotionCategory': 'wna:awe',
'onyx:hasEmotionIntensity': 0.17973650399862967,
},
{
'onyx:hasEmotionCategory': 'wna:sadness',
'onyx:hasEmotionIntensity': 0.060724103786128455,
},
]
}
]
@ -164,4 +178,4 @@ if __name__ == '__main__':
# sp, app = easy_load()
# for plug in sp.analysis_plugins:
# plug.test()
easy()
easy_test(debug=False)

View File

@ -25,6 +25,7 @@ class WNAffect(EmotionPlugin, ShelfMixin):
extra_params = {
'language': {
"@id": 'lang_wnaffect',
'description': 'language of the input',
'aliases': ['language', 'l'],
'required': True,
'options': ['en',]
@ -223,7 +224,8 @@ class WNAffect(EmotionPlugin, ShelfMixin):
return feature_set
def analyse_entry(self, entry, params):
def analyse_entry(self, entry, activity):
params = activity.params
text_input = entry['nif:isString']
@ -262,14 +264,14 @@ class WNAffect(EmotionPlugin, ShelfMixin):
'algorithm': 'emotion-wnaffect'}
self.activate()
res = next(self.analyse_entry(Entry(nif__isString="This text make me sad"), params))
texts = {'I hate you': 'anger',
'i am sad': 'sadness',
'i am happy with my marks': 'joy',
'This movie is scary': 'negative-fear'}
for text in texts:
response = next(self.analyse_entry(Entry(nif__isString=text), params))
response = next(self.analyse_entry(Entry(nif__isString=text),
self.activity(params)))
expected = texts[text]
emotionSet = response.emotions[0]
max_emotion = max(emotionSet['onyx:hasEmotion'], key=lambda x: x['onyx:hasEmotionIntensity'])

View File

@ -1,8 +1,8 @@
from senpy.plugins import Analysis
from senpy.plugins import AnalysisPlugin
from senpy.models import Response, Entry
class ExamplePlugin(Analysis):
class ExamplePlugin(AnalysisPlugin):
'''A *VERY* simple plugin that exemplifies the development of Senpy Plugins'''
name = "example-plugin"
author = "@balkian"
@ -10,6 +10,7 @@ class ExamplePlugin(Analysis):
extra_params = {
"parameter": {
"@id": "parameter",
"description": "this parameter does nothing, it is only an example",
"aliases": ["parameter", "param"],
"required": True,
"default": 42
@ -17,7 +18,8 @@ class ExamplePlugin(Analysis):
}
custom_attribute = "42"
def analyse_entry(self, entry, params):
def analyse_entry(self, entry, activity):
params = activity.params
self.log.debug('Analysing with the example.')
self.log.debug('The answer to this response is: %s.' % params['parameter'])
resp = Response()

View File

@ -15,11 +15,9 @@ spec:
- name: senpy-latest
image: $IMAGEWTAG
imagePullPolicy: Always
args:
- "--default-plugins"
resources:
limits:
memory: "512Mi"
memory: "2048Mi"
cpu: "1000m"
ports:
- name: web

View File

@ -12,14 +12,14 @@ from textblob import TextBlob
from scipy.interpolate import interp1d
from os import path
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Entry, Sentiment
from senpy.plugins import SentimentBox, SenpyPlugin
from senpy.models import Results, Entry, Sentiment, Error
if sys.version_info[0] >= 3:
unicode = str
class SentimentBasic(SentimentPlugin):
class SentimentBasic(SentimentBox):
'''
Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.
'''
@ -28,10 +28,11 @@ class SentimentBasic(SentimentPlugin):
version = "0.1.1"
extra_params = {
"language": {
"description": "language of the text",
"aliases": ["language", "l"],
"required": True,
"options": ["en","es", "it", "fr", "auto"],
"default": "auto"
"options": ["en","es", "it", "fr"],
"default": "en"
}
}
sentiword_path = "SentiWordNet_3.0.txt"
@ -40,6 +41,8 @@ class SentimentBasic(SentimentPlugin):
minPolarityValue = -1
nltk_resources = ['punkt','wordnet', 'omw']
with_polarity = False
def _load_swn(self):
self.swn_path = self.find_file(self.sentiword_path)
swn = SentiWordNet(self.swn_path)
@ -59,128 +62,116 @@ class SentimentBasic(SentimentPlugin):
return [t for t in tokens if t not in string.punctuation]
def _tokenize(self, text):
data = {}
sentences = nltk.sent_tokenize(text)
for i, sentence in enumerate(sentences):
sentence_ = {}
words = nltk.word_tokenize(sentence)
sentence_['sentence'] = sentence
tokens_ = [w.lower() for w in words]
sentence_['tokens'] = self._remove_punctuation(tokens_)
data[i] = sentence_
return data
sentence_ = {}
words = nltk.word_tokenize(text)
sentence_['sentence'] = text
tokens_ = [w.lower() for w in words]
sentence_['tokens'] = self._remove_punctuation(tokens_)
return sentence_
def _pos(self, tokens):
for i in tokens:
tokens[i]['tokens'] = self._pos_tagger.tag(tokens[i]['tokens'])
tokens['tokens'] = self._pos_tagger.tag(tokens['tokens'])
return tokens
def _compare_synsets(self, synsets, tokens, i):
def _compare_synsets(self, synsets, tokens):
for synset in synsets:
for word in tokens[i]['lemmas']:
for lemma in tokens[i]['lemmas'][word]:
for word, lemmas in tokens['lemmas'].items():
for lemma in lemmas:
synset_ = lemma.synset()
if synset == synset_:
return synset
return None
def analyse_entry(self, entry, params):
language = params.get("language")
text = entry.text
def predict_one(self, features, activity):
language = activity.param("language")
text = features[0]
tokens = self._tokenize(text)
tokens = self._pos(tokens)
sufixes = {'es':'spa','en':'eng','it':'ita','fr':'fra'}
for i in tokens:
tokens[i]['lemmas'] = {}
for w in tokens[i]['tokens']:
lemmas = wn.lemmas(w[0], lang=sufixes[language])
if len(lemmas) == 0:
continue
tokens[i]['lemmas'][w[0]] = lemmas
tokens['lemmas'] = {}
for w in tokens['tokens']:
lemmas = wn.lemmas(w[0], lang=sufixes[language])
if len(lemmas) == 0:
continue
tokens['lemmas'][w[0]] = lemmas
if language == "en":
trans = TextBlob(unicode(text))
else:
trans = TextBlob(unicode(text)).translate(from_lang=language,to='en')
try:
trans = TextBlob(unicode(text)).translate(from_lang=language,to='en')
except Exception as ex:
raise Error('Could not translate the text from "{}" to "{}": {}'.format(language,
'en',
str(ex)))
useful_synsets = {}
for s_i, t_s in enumerate(trans.sentences):
useful_synsets[s_i] = {}
for w_i, t_w in enumerate(trans.sentences[s_i].words):
synsets = wn.synsets(trans.sentences[s_i].words[w_i])
if len(synsets) == 0:
continue
eq_synset = self._compare_synsets(synsets, tokens, s_i)
useful_synsets[s_i][t_w] = eq_synset
for w_i, t_w in enumerate(trans.sentences[0].words):
synsets = wn.synsets(trans.sentences[0].words[w_i])
if len(synsets) == 0:
continue
eq_synset = self._compare_synsets(synsets, tokens)
useful_synsets[t_w] = eq_synset
scores = {}
for i in tokens:
scores[i] = {}
if useful_synsets != None:
for word in useful_synsets[i]:
if useful_synsets[i][word] is None:
continue
temp_scores = self._swn.get_score(useful_synsets[i][word].name().split('.')[0].replace(' ',' '))
for score in temp_scores:
if score['synset'] == useful_synsets[i][word]:
t_score = score['pos'] - score['neg']
f_score = 'neu'
if t_score > 0:
f_score = 'pos'
elif t_score < 0:
f_score = 'neg'
score['score'] = f_score
scores[i][word] = score
break
p = params.get("prefix", None)
scores = {}
if useful_synsets != None:
for word in useful_synsets:
if useful_synsets[word] is None:
continue
temp_scores = self._swn.get_score(useful_synsets[word].name().split('.')[0].replace(' ',' '))
for score in temp_scores:
if score['synset'] == useful_synsets[word]:
t_score = score['pos'] - score['neg']
f_score = 'neu'
if t_score > 0:
f_score = 'pos'
elif t_score < 0:
f_score = 'neg'
score['score'] = f_score
scores[word] = score
break
g_score = 0.5
for i in scores:
n_pos = 0.0
n_neg = 0.0
for w in scores[i]:
if scores[i][w]['score'] == 'pos':
for w in scores:
if scores[w]['score'] == 'pos':
n_pos += 1.0
elif scores[i][w]['score'] == 'neg':
elif scores[w]['score'] == 'neg':
n_neg += 1.0
inter = interp1d([-1.0, 1.0], [0.0, 1.0])
try:
g_score = (n_pos - n_neg) / (n_pos + n_neg)
g_score = float(inter(g_score))
except:
if n_pos == 0 and n_neg == 0:
g_score = 0.5
if g_score >= 0.5:
polarity = 'marl:Positive'
polarity_value = 1
elif g_score < 0.5:
polarity = 'marl:Negative'
polarity_value = -1
else:
polarity = 'marl:Neutral'
polarity_value = 0
opinion = Sentiment(id="Opinion0"+'_'+str(i),
marl__hasPolarity=polarity,
marl__polarityValue=polarity_value)
opinion.prov(self)
entry.sentiments.append(opinion)
if g_score > 0.5: # Positive
return [1, 0, 0]
elif g_score < 0.5: # Negative
return [0, 0, 1]
else:
return [0, 1, 0]
yield entry
test_cases = [
{
'input': u'Odio ir al cine',
'input': 'Odio ir al cine',
'params': {'language': 'es'},
'polarity': 'marl:Negative'
},
{
'input': u'El cielo está nublado',
'input': 'El cielo está nublado',
'params': {'language': 'es'},
'polarity': 'marl:Positive'
'polarity': 'marl:Neutral'
},
{
'input': u'Esta tarta está muy buena',
'input': 'Esta tarta está muy buena',
'params': {'language': 'es'},
'polarity': 'marl:Negative'
'polarity': 'marl:Negative' # SURPRISINGLY!
}
]

View File

@ -21,7 +21,7 @@ class MeaningCloudPlugin(SentimentPlugin):
When you had obtained the meaningCloud API Key, you have to provide it to the plugin, using param apiKey.
Example request:
http://senpy.cluster.gsi.dit.upm.es/api/?algo=meaningCloud&language=en&apiKey=<API key>&input=I%20love%20Madrid.
http://senpy.cluster.gsi.dit.upm.es/api/?algo=meaningCloud&language=en&apiKey=YOUR_API_KEY&input=I%20love%20Madrid.
'''
name = 'sentiment-meaningcloud'
author = 'GSI UPM'
@ -31,12 +31,14 @@ class MeaningCloudPlugin(SentimentPlugin):
extra_params = {
"language": {
"description": "language of the input",
"aliases": ["language", "l"],
"required": True,
"options": ["en","es","ca","it","pt","fr","auto"],
"default": "auto"
},
"apikey":{
"description": "API key for the meaningcloud service. See https://www.meaningcloud.com/developer/login",
"aliases": ["apiKey", "meaningcloud-key", "meaningcloud-apikey"],
"required": True
}
@ -55,7 +57,8 @@ class MeaningCloudPlugin(SentimentPlugin):
polarityValue = 1
return polarity, polarityValue
def analyse_entry(self, entry, params):
def analyse_entry(self, entry, activity):
params = activity.params
txt = entry['nif:isString']
api = 'http://api.meaningcloud.com/'
@ -129,7 +132,9 @@ class MeaningCloudPlugin(SentimentPlugin):
sementity = sent_entity['sementity'].get('type', None).split(">")[-1]
entity['@type'] = "ODENTITY_{}".format(sementity)
entity.prov(self)
entry.entities.append(entity)
if 'senpy:hasEntity' not in entry:
entry['senpy:hasEntity'] = []
entry['senpy:hasEntity'].append(entity)
for topic in api_response_topics['concept_list']:
if 'semtheme_list' in topic:
@ -139,7 +144,9 @@ class MeaningCloudPlugin(SentimentPlugin):
concept['@type'] = "ODTHEME_{}".format(theme['type'].split(">")[-1])
concept['fam:topic-reference'] = "http://dbpedia.org/resource/{}".format(theme['type'].split('>')[-1])
entry.prov(self)
entry.topics.append(concept)
if 'senpy:hasTopic' not in entry:
entry['senpy:hasTopic'] = []
entry['senpy:hasTopic'].append(concept)
yield entry
test_cases = [
@ -160,11 +167,11 @@ class MeaningCloudPlugin(SentimentPlugin):
},
'input': 'Hello World Obama',
'expected': {
'sentiments': [
'marl:hasOpinion': [
{'marl:hasPolarity': 'marl:Neutral'}],
'entities': [
'senpy:hasEntity': [
{'itsrdf:taIdentRef': 'http://dbpedia.org/resource/Obama'}],
'topics': [
'senpy:hasTopic': [
{'fam:topic-reference': 'http://dbpedia.org/resource/Astronomy'}]
},
'responses': [

View File

@ -43,7 +43,8 @@ class TaigerPlugin3cats(SentimentPlugin):
raise ValueError('unknown polarity: {}'.format(value))
return polarity, value
def analyse_entry(self, entry, params):
def analyse_entry(self, entry, activity):
params = activity.params
txt = entry['nif:isString']
api = TAIGER_ENDPOINT
@ -89,7 +90,7 @@ class TaigerPlugin3cats(SentimentPlugin):
},
'input': 'I hate to say this',
'expected': {
'sentiments': [
'marl:hasOpinion': [
{'marl:hasPolarity': 'marl:Negative'}],
},
'responses': [
@ -116,7 +117,7 @@ class TaigerPlugin3cats(SentimentPlugin):
},
'input': 'This is amazing',
'expected': {
'sentiments': [
'marl:hasOpinion': [
{'marl:hasPolarity': 'marl:Positive'}],
},
'responses': [
@ -143,7 +144,7 @@ class TaigerPlugin3cats(SentimentPlugin):
},
'input': 'The pillow is in the wardrobe',
'expected': {
'sentiments': [
'marl:hasOpinion': [
{'marl:hasPolarity': 'marl:Neutral'}],
},
'responses': [

View File

@ -38,7 +38,8 @@ class TaigerPlugin(SentimentPlugin):
polarity = 'marl:Positive'
return polarity
def analyse_entry(self, entry, params):
def analyse_entry(self, entry, activity):
params = activity.params
txt = entry['nif:isString']
api = TAIGER_ENDPOINT
@ -77,7 +78,6 @@ class TaigerPlugin(SentimentPlugin):
'expanded-jsonld': 0,
'informat': 'text',
'prefix': '',
'plugin_type': 'analysisPlugin',
'urischeme': 'RFC5147String',
'outformat': 'json-ld',
'conversion': 'full',
@ -87,7 +87,7 @@ class TaigerPlugin(SentimentPlugin):
},
'input': 'I hate to say this',
'expected': {
'sentiments': [
'marl:hasOpinion': [
{'marl:hasPolarity': 'marl:Negative'}],
},
'responses': [
@ -109,7 +109,6 @@ class TaigerPlugin(SentimentPlugin):
'expanded-jsonld': 0,
'informat': 'text',
'prefix': '',
'plugin_type': 'analysisPlugin',
'urischeme': 'RFC5147String',
'outformat': 'json-ld',
'conversion': 'full',
@ -119,7 +118,7 @@ class TaigerPlugin(SentimentPlugin):
},
'input': 'This is amazing',
'expected': {
'sentiments': [
'marl:hasOpinion': [
{'marl:hasPolarity': 'marl:Positive'}],
},
'responses': [
@ -141,7 +140,6 @@ class TaigerPlugin(SentimentPlugin):
'expanded-jsonld': 0,
'informat': 'text',
'prefix': '',
'plugin_type': 'analysisPlugin',
'urischeme': 'RFC5147String',
'outformat': 'json-ld',
'conversion': 'full',
@ -151,7 +149,7 @@ class TaigerPlugin(SentimentPlugin):
},
'input': 'The pillow is in the wardrobe',
'expected': {
'sentiments': [
'marl:hasOpinion': [
{'marl:hasPolarity': 'marl:Neutral'}],
},
'responses': [

View File

@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
from vaderSentiment import sentiment
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.plugins import SentimentBox, SenpyPlugin
from senpy.models import Results, Sentiment, Entry
import logging
class VaderSentimentPlugin(SentimentPlugin):
class VaderSentimentPlugin(SentimentBox):
'''
Sentiment classifier using vaderSentiment module. Params accepted: Language: {en, es}. The output uses Marl ontology developed at GSI UPM for semantic web.
'''
@ -16,6 +16,7 @@ class VaderSentimentPlugin(SentimentPlugin):
version = "0.1.1"
extra_params = {
"language": {
"description": "language of the input",
"@id": "lang_rand",
"aliases": ["language", "l"],
"default": "auto",
@ -23,55 +24,32 @@ class VaderSentimentPlugin(SentimentPlugin):
},
"aggregate": {
"description": "Show only the strongest sentiment (aggregate) or all sentiments",
"aliases": ["aggregate","agg"],
"options": ["true", "false"],
"options": [True, False],
"default": False
}
}
requirements = {}
def analyse_entry(self, entry, params):
_VADER_KEYS = ['pos', 'neu', 'neg']
binary = False
self.log.debug("Analysing with params {}".format(params))
text_input = entry.text
aggregate = params['aggregate']
def predict_one(self, features, activity):
text_input = ' '.join(features)
scores = sentiment(text_input)
score = sentiment(text_input)
sentiments = []
for k in self._VADER_KEYS:
sentiments.append(scores[k])
opinion0 = Sentiment(id= "Opinion_positive",
marl__hasPolarity= "marl:Positive",
marl__algorithmConfidence= score['pos']
)
opinion0.prov(self)
opinion1 = Sentiment(id= "Opinion_negative",
marl__hasPolarity= "marl:Negative",
marl__algorithmConfidence= score['neg']
)
opinion1.prov(self)
opinion2 = Sentiment(id= "Opinion_neutral",
marl__hasPolarity = "marl:Neutral",
marl__algorithmConfidence = score['neu']
)
opinion2.prov(self)
if activity.param('aggregate'):
m = max(sentiments)
sentiments = [k if k==m else None for k in sentiments]
if aggregate == 'true':
res = None
confident = max(score['neg'],score['neu'],score['pos'])
if opinion0.marl__algorithmConfidence == confident:
res = opinion0
elif opinion1.marl__algorithmConfidence == confident:
res = opinion1
elif opinion2.marl__algorithmConfidence == confident:
res = opinion2
entry.sentiments.append(res)
else:
entry.sentiments.append(opinion0)
entry.sentiments.append(opinion1)
entry.sentiments.append(opinion2)
yield entry
return sentiments
test_cases = []