1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-14 12:42:27 +00:00

Merged into monorepo

This commit is contained in:
J. Fernando Sánchez 2018-06-14 19:38:08 +02:00
parent e51b659030
commit c52a894017
29 changed files with 406 additions and 493 deletions

24
.gitmodules vendored
View File

@ -1,15 +1,9 @@
[submodule "sentiment-meaningCloud"] [submodule "emotion-anew/data"]
path = sentiment-meaningCloud path = emotion-anew/data
url = https://lab.cluster.gsi.dit.upm.es/senpy/sentiment-meaningCloud/ url = https://lab.cluster.gsi.dit.upm.es/senpy/data/emotion-anew.git
[submodule "sentiment-vader"] [submodule "emotion-wnaffect/data"]
path = sentiment-vader path = emotion-wnaffect/data
url = https://lab.cluster.gsi.dit.upm.es/senpy/sentiment-vader/ url = https://lab.cluster.gsi.dit.upm.es/senpy/data/emotion-wnaffect.git
[submodule "emotion-wnaffect"] [submodule "sentiment-basic/data"]
path = emotion-wnaffect path = sentiment-basic/data
url = https://lab.cluster.gsi.dit.upm.es/senpy/emotion-wnaffect url = https://lab.cluster.gsi.dit.upm.es/senpy/data/sentiment-basic.git
[submodule "emotion-anew"]
path = emotion-anew
url = https://lab.cluster.gsi.dit.upm.es/senpy/emotion-anew
[submodule "sentiment-basic"]
path = sentiment-basic
url = https://lab.cluster.gsi.dit.upm.es/senpy/sentiment-basic

View File

@ -1,9 +1,10 @@
from gsiupm/senpy:0.8.7-python2.7 from gsiupm/senpy:0.10.5-python2.7
RUN python -m nltk.downloader stopwords RUN python -m nltk.downloader stopwords
RUN python -m nltk.downloader punkt RUN python -m nltk.downloader punkt
RUN python -m nltk.downloader maxent_treebank_pos_tagger RUN python -m nltk.downloader maxent_treebank_pos_tagger
RUN python -m nltk.downloader wordnet RUN python -m nltk.downloader wordnet
RUN python -m nltk.downloader omw
ADD . /senpy-plugins ADD . /senpy-plugins

View File

@ -3,6 +3,11 @@ NAME=senpycommunity
REPO=gsiupm REPO=gsiupm
VERSION=test VERSION=test
PLUGINS= $(filter %/, $(wildcard */)) PLUGINS= $(filter %/, $(wildcard */))
DOCKER_FLAGS=
ifdef SENPY_FOLDER
DOCKER_FLAGS+=-v $(realpath $(SENPY_FOLDER)):/usr/src/app/
endif
all: build run all: build run
@ -11,15 +16,15 @@ build: clean Dockerfile
docker build -t '$(REPO)/$(NAME):$(VERSION)-python$(PYVERSION)' -f Dockerfile .; docker build -t '$(REPO)/$(NAME):$(VERSION)-python$(PYVERSION)' -f Dockerfile .;
test-%: test-%:
docker run -v $$PWD/$*:/senpy-plugins/ --rm --entrypoint=/usr/local/bin/py.test -ti '$(REPO)/$(NAME):$(VERSION)-python$(PYVERSION)' test.py docker run $(DOCKER_FLAGS) -v $$PWD/$*:/senpy-plugins/ --rm -ti '$(REPO)/$(NAME):$(VERSION)-python$(PYVERSION)' --only-test $(TEST_FLAGS)
test: $(addprefix test-,$(PLUGINS)) test: test-.
clean: clean:
@docker ps -a | awk '/$(REPO)\/$(NAME)/{ split($$2, vers, "-"); if(vers[1] != "${VERSION}"){ print $$1;}}' | xargs docker rm 2>/dev/null|| true @docker ps -a | awk '/$(REPO)\/$(NAME)/{ split($$2, vers, "-"); if(vers[1] != "${VERSION}"){ print $$1;}}' | xargs docker rm 2>/dev/null|| true
@docker images | awk '/$(REPO)\/$(NAME)/{ split($$2, vers, "-"); if(vers[1] != "${VERSION}"){ print $$1":"$$2;}}' | xargs docker rmi 2>/dev/null|| true @docker images | awk '/$(REPO)\/$(NAME)/{ split($$2, vers, "-"); if(vers[1] != "${VERSION}"){ print $$1":"$$2;}}' | xargs docker rmi 2>/dev/null|| true
run: build run: build
docker run --rm -p 5000:5000 -ti '$(REPO)/$(NAME):$(VERSION)-python$(PYMAIN)' docker run $(DOCKER_FLAGS) --rm -p 5000:5000 -ti '$(REPO)/$(NAME):$(VERSION)-python$(PYMAIN)'
.PHONY: test test-% build-% build test test_pip run clean .PHONY: test test-% build-% build test test_pip run clean

View File

@ -1,3 +0,0 @@
[submodule "data"]
path = data
url = ../data/emotion-anew

View File

@ -23,12 +23,81 @@ from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, EmotionSet, Entry, Emotion from senpy.models import Results, EmotionSet, Entry, Emotion
class EmotionTextPlugin(SentimentPlugin): class ANEW(SentimentPlugin):
description = "This plugin consists on an emotion classifier using ANEW lexicon dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. Each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208. The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value."
author = "@icorcuera"
version = "0.5.1"
name = "emotion-anew"
extra_params = {
"language": {
"aliases": ["language", "l"],
"required": True,
"options": ["es","en"],
"default": "en"
}
}
anew_path_es = "Dictionary/Redondo(2007).csv"
anew_path_en = "Dictionary/ANEW2010All.txt"
centroids = {
"anger": {
"A": 6.95,
"D": 5.1,
"V": 2.7
},
"disgust": {
"A": 5.3,
"D": 8.05,
"V": 2.7
},
"fear": {
"A": 6.5,
"D": 3.6,
"V": 3.2
},
"joy": {
"A": 7.22,
"D": 6.28,
"V": 8.6
},
"sadness": {
"A": 5.21,
"D": 2.82,
"V": 2.21
}
}
emotions_ontology = {
"anger": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#anger",
"disgust": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#disgust",
"fear": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#negative-fear",
"joy": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#joy",
"neutral": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#neutral-emotion",
"sadness": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#sadness"
}
onyx__usesEmotionModel = "emoml:big6"
nltk_resources = ['stopwords']
def activate(self, *args, **kwargs): def activate(self, *args, **kwargs):
nltk.download('stopwords')
self._stopwords = stopwords.words('english') self._stopwords = stopwords.words('english')
self._local_path=os.path.dirname(os.path.abspath(__file__)) dictionary={}
dictionary['es'] = {}
with self.open(self.anew_path_es,'rb') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary['es'][row[2]]={}
dictionary['es'][row[2]]['V']=row[3]
dictionary['es'][row[2]]['A']=row[5]
dictionary['es'][row[2]]['D']=row[7]
dictionary['en'] = {}
with self.open(self.anew_path_en,'rb') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary['en'][row[0]]={}
dictionary['en'][row[0]]['V']=row[2]
dictionary['en'][row[0]]['A']=row[4]
dictionary['en'][row[0]]['D']=row[6]
self._dictionary = dictionary
def _my_preprocessor(self, text): def _my_preprocessor(self, text):
@ -116,41 +185,76 @@ class EmotionTextPlugin(SentimentPlugin):
def analyse_entry(self, entry, params): def analyse_entry(self, entry, params):
text_input = entry.get("text", None) text_input = entry.text
text= self._my_preprocessor(text_input) text = self._my_preprocessor(text_input)
dictionary={} dictionary = self._dictionary[params['language']]
lang = params.get("language", "auto")
if lang == 'es':
with open(self._local_path + self.anew_path_es,'rb') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary[row[2]]={}
dictionary[row[2]]['V']=row[3]
dictionary[row[2]]['A']=row[5]
dictionary[row[2]]['D']=row[7]
else:
with open(self._local_path + self.anew_path_en,'rb') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary[row[0]]={}
dictionary[row[0]]['V']=row[2]
dictionary[row[0]]['A']=row[4]
dictionary[row[0]]['D']=row[6]
feature_set=self._extract_features(text,dictionary,lang) feature_set=self._extract_features(text, dictionary, params['language'])
emotions = EmotionSet() emotions = EmotionSet()
emotions.id = "Emotions0" emotions.id = "Emotions0"
emotion1 = Emotion(id="Emotion0") emotion1 = Emotion(id="Emotion0")
emotion1["onyx:hasEmotionCategory"] = self.emotions_ontology[feature_set['emotion']] emotion1["onyx:hasEmotionCategory"] = self.emotions_ontology[feature_set['emotion']]
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence"] = feature_set['V'] emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#valence"] = feature_set['V']
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal"] = feature_set['A'] emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#arousal"] = feature_set['A']
emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance"] = feature_set['D'] emotion1["http://www.gsi.dit.upm.es/ontologies/onyx/vocabularies/anew/ns#dominance"] = feature_set['D']
emotion1.prov(self)
emotions.prov(self)
emotions.onyx__hasEmotion.append(emotion1) emotions.onyx__hasEmotion.append(emotion1)
entry.emotions = [emotions,] entry.emotions = [emotions, ]
yield entry yield entry
ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#"
test_cases = [
{
'input': 'I hate you',
'expected': {
'emotions': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'anger',
}]
}]
}
}, {
'input': 'i am sad',
'expected': {
'emotions': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'sadness',
}]
}]
}
}, {
'input': 'i am happy with my marks',
'expected': {
'emotions': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'joy',
}]
}]
}
}, {
'input': 'This movie is scary',
'expected': {
'emotions': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'negative-fear',
}]
}]
}
}, {
'input': 'this cake is disgusting' ,
'expected': {
'emotions': [{
'onyx:hasEmotion': [{
'onyx:hasEmotionCategory': ontology + 'negative-fear',
}]
}]
}
}
]

Binary file not shown.

View File

@ -1,64 +1,12 @@
{ ---
"name": "emotion-anew", module: emotion-anew
"module": "emotion-anew", requirements:
"description": "This plugin consists on an emotion classifier using ANEW lexicon dictionary to calculate VAD (valence-arousal-dominance) of the sentence and determinate which emotion is closer to this value. Each emotion has a centroid, calculated according to this article: http://www.aclweb.org/anthology/W10-0208. The plugin is going to look for the words in the sentence that appear in the ANEW dictionary and calculate the average VAD score for the sentence. Once this score is calculated, it is going to seek the emotion that is closest to this value.", - numpy
"author": "@icorcuera", - pandas
"version": "0.5", - nltk
"extra_params": { - scipy
"language": { - scikit-learn
"aliases": ["language", "l"], - textblob
"required": true, - pattern
"options": ["es","en"], - lxml
"default": "en" onyx:usesEmotionModel: "emoml:big6"
}
},
"requirements": {},
"anew_path_es": "/data/Dictionary/Redondo(2007).csv",
"anew_path_en": "/data/Dictionary/ANEW2010All.txt",
"centroids": {
"anger": {
"A": 6.95,
"D": 5.1,
"V": 2.7
},
"disgust": {
"A": 5.3,
"D": 8.05,
"V": 2.7
},
"fear": {
"A": 6.5,
"D": 3.6,
"V": 3.2
},
"joy": {
"A": 7.22,
"D": 6.28,
"V": 8.6
},
"sadness": {
"A": 5.21,
"D": 2.82,
"V": 2.21
}
},
"emotions_ontology": {
"anger": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#anger",
"disgust": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#disgust",
"fear": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#negative-fear",
"joy": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#joy",
"neutral": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#neutral-emotion",
"sadness": "http://gsi.dit.upm.es/ontologies/wnaffect/ns#sadness"
},
"requirements": [
"numpy",
"pandas",
"nltk",
"scipy",
"scikit-learn",
"textblob",
"pattern",
"lxml"
],
"onyx:usesEmotionModel": "emoml:big6",
}

View File

@ -1,45 +0,0 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
import unittest
import re
class emoTextANEWTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("EmoTextANEW", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["EmoTextANEW"]
plugin.activate()
ontology = "http://gsi.dit.upm.es/ontologies/wnaffect/ns#"
texts = {'I hate you': 'anger',
'i am sad': 'sadness',
'i am happy with my marks': 'joy',
'This movie is scary': 'negative-fear',
'this cake is disgusting' : 'negative-fear'}
for text in texts:
response = plugin.analyse(input=text)
expected = texts[text]
emotionSet = response.entries[0].emotions[0]
assert emotionSet['onyx:hasEmotion'][0]['onyx:hasEmotionCategory'] == ontology+expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()

View File

@ -1,3 +0,0 @@
[submodule "data"]
path = data
url = ../data/emotion-wnaffect

View File

@ -2,7 +2,6 @@
from __future__ import division from __future__ import division
import re import re
import nltk import nltk
import logging
import os import os
import string import string
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
@ -14,11 +13,29 @@ from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
from senpy.models import Results, EmotionSet, Entry, Emotion from senpy.models import Results, EmotionSet, Entry, Emotion
class EmotionTextPlugin(EmotionPlugin, ShelfMixin): class WNAffect(EmotionPlugin, ShelfMixin):
'''Emotion classifier using WordNet-Affect to calculate the percentage '''
Emotion classifier using WordNet-Affect to calculate the percentage
of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness
or neutral. The only available language is English (en) or neutral. The only available language is English (en)
''' '''
name = 'emotion-wnaffect'
author = ["@icorcuera", "@balkian"]
version = '0.2'
extra_params = {
'language': {
"@id": 'lang_wnaffect',
'aliases': ['language', 'l'],
'required': True,
'options': ['en',]
}
}
synsets_path = "a-synsets.xml"
hierarchy_path = "a-hierarchy.xml"
wn16_path = "wordnet1.6/dict"
onyx__usesEmotionModel = "emoml:big6"
nltk_resources = ['stopwords', 'averaged_perceptron_tagger', 'wordnet']
def _load_synsets(self, synsets_path): def _load_synsets(self, synsets_path):
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str).""" """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
tree = ET.parse(synsets_path) tree = ET.parse(synsets_path)
@ -56,7 +73,6 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
def activate(self, *args, **kwargs): def activate(self, *args, **kwargs):
nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
self._stopwords = stopwords.words('english') self._stopwords = stopwords.words('english')
self._wnlemma = wordnet.WordNetLemmatizer() self._wnlemma = wordnet.WordNetLemmatizer()
self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'} self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
@ -87,16 +103,16 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
'sadness': 'sadness' 'sadness': 'sadness'
} }
self._load_emotions(local_path + self.hierarchy_path) self._load_emotions(self.find_file(self.hierarchy_path))
if 'total_synsets' not in self.sh: if 'total_synsets' not in self.sh:
total_synsets = self._load_synsets(local_path + self.synsets_path) total_synsets = self._load_synsets(self.find_file(self.synsets_path))
self.sh['total_synsets'] = total_synsets self.sh['total_synsets'] = total_synsets
self._total_synsets = self.sh['total_synsets'] self._total_synsets = self.sh['total_synsets']
self._wn16_path = self.wn16_path self._wn16_path = self.wn16_path
self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path)) self._wn16 = WordNetCorpusReader(self.find_file(self._wn16_path), nltk.data.find(self.find_file(self._wn16_path)))
def deactivate(self, *args, **kwargs): def deactivate(self, *args, **kwargs):

View File

@ -1,24 +1,5 @@
--- ---
name: emotion-wnaffect
module: emotion-wnaffect module: emotion-wnaffect
description: 'Emotion classifier using WordNet-Affect to calculate the percentage
of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness
or neutral. The only available language is English (en)'
author: "@icorcuera @balkian"
version: '0.2'
extra_params:
language:
"@id": lang_wnaffect
aliases:
- language
- l
required: false
options:
- en
synsets_path: "/a-synsets.xml"
hierarchy_path: "/a-hierarchy.xml"
wn16_path: "/wordnet1.6/dict"
onyx:usesEmotionModel: emoml:big6
requirements: requirements:
- nltk>=3.0.5 - nltk>=3.0.5
- lxml>=3.4.2 - lxml>=3.4.2

View File

@ -17,7 +17,9 @@ from nltk.corpus import WordNetCorpusReader
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
class WNAffect: class WNAffect:
"""WordNet-Affect ressource.""" """WordNet-Affect resource."""
nltk_resources = ['averaged_perceptron_tagger']
def __init__(self, wordnet16_dir, wn_domains_dir): def __init__(self, wordnet16_dir, wn_domains_dir):
"""Initializes the WordNet-Affect object.""" """Initializes the WordNet-Affect object."""

View File

@ -1,19 +0,0 @@
from senpy.plugins import SentimentPlugin
from senpy.models import Response, Entry
import logging
logger = logging.getLogger(__name__)
class ExamplePlugin(SentimentPlugin):
def analyse(self, *args, **kwargs):
logger.warn('Analysing with the example.')
logger.warn('The answer to this response is: %s.' % kwargs['parameter'])
resp = Response()
ent = Entry(kwargs['input'])
ent['example:reversed'] = kwargs['input'][::-1]
ent['example:the_answer'] = kwargs['parameter']
resp.entries.append(ent)
return resp

View File

@ -1,17 +0,0 @@
{
"name": "ExamplePlugin",
"module": "example",
"description": "I am just an example",
"author": "@balkian",
"version": "0.1",
"extra_params": {
"parameter": {
"@id": "parameter",
"aliases": ["parameter", "param"],
"required": true,
"default": 42
}
},
"requirements": ["noop"],
"custom_attribute": "42"
}

View File

@ -0,0 +1,37 @@
from senpy.plugins import Analysis
from senpy.models import Response, Entry
import logging
logger = logging.getLogger(__name__)
class ExamplePlugin(Analysis):
'''A *VERY* simple plugin that exemplifies the development of Senpy Plugins'''
name = "example-plugin"
author = "@balkian"
version = "0.1"
extra_params = {
"parameter": {
"@id": "parameter",
"aliases": ["parameter", "param"],
"required": True,
"default": 42
}
}
custom_attribute = "42"
def analyse_entry(self, entry, params):
logger.debug('Analysing with the example.')
logger.debug('The answer to this response is: %s.' % params['parameter'])
resp = Response()
entry['example:reversed'] = entry.text[::-1]
entry['example:the_answer'] = params['parameter']
yield entry
test_cases = [{
'input': 'hello',
'expected': {
'example:reversed': 'olleh'
}
}]

View File

@ -1,23 +0,0 @@
import unittest
from flask import Flask
import os
from senpy.extensions import Senpy
class emoTextWAFTest(unittest.TestCase):
def setUp(self):
self.app = Flask("Example")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("ExamplePlugin", sync=True)
def test_analyse(self):
assert len(self.senpy.plugins.keys()) == 1
assert True
if __name__ == '__main__':
unittest.main()

View File

@ -1,3 +0,0 @@
[submodule "data"]
path = data
url = ../data/sentiment-basic

View File

@ -1,5 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os import os
import logging
import string import string
import nltk import nltk
import pickle import pickle
@ -13,24 +14,40 @@ from os import path
from senpy.plugins import SentimentPlugin, SenpyPlugin from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Entry, Sentiment from senpy.models import Results, Entry, Sentiment
logger = logging.getLogger(__name__)
class SentimentBasic(SentimentPlugin):
class SentiTextPlugin(SentimentPlugin): '''
Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.
'''
name = "sentiment-basic"
author = "github.com/nachtkatze"
version = "0.1.1"
extra_params = {
"language": {
"aliases": ["language", "l"],
"required": True,
"options": ["en","es", "it", "fr", "auto"],
"default": "auto"
}
}
sentiword_path = "SentiWordNet_3.0.txt"
pos_path = "unigram_spanish.pickle"
maxPolarityValue = 1
minPolarityValue = -1
nltk_resources = ['punkt','wordnet']
def _load_swn(self): def _load_swn(self):
self.swn_path = path.join(path.abspath(path.dirname(__file__)), self.sentiword_path) self.swn_path = self.find_file(self.sentiword_path)
swn = SentiWordNet(self.swn_path) swn = SentiWordNet(self.swn_path)
return swn return swn
def _load_pos_tagger(self): def _load_pos_tagger(self):
self.pos_path = path.join(path.abspath(path.dirname(__file__)), self.pos_path) self.pos_path = self.find_file(self.pos_path)
with open(self.pos_path, 'r') as f: with open(self.pos_path, 'r') as f:
tagger = pickle.load(f) tagger = pickle.load(f)
return tagger return tagger
def activate(self, *args, **kwargs): def activate(self, *args, **kwargs):
nltk.download(['punkt','wordnet'])
self._swn = self._load_swn() self._swn = self._load_swn()
self._pos_tagger = self._load_pos_tagger() self._pos_tagger = self._load_pos_tagger()
@ -54,11 +71,6 @@ class SentiTextPlugin(SentimentPlugin):
tokens[i]['tokens'] = self._pos_tagger.tag(tokens[i]['tokens']) tokens[i]['tokens'] = self._pos_tagger.tag(tokens[i]['tokens'])
return tokens return tokens
# def _stopwords(sentences, lang='english'):
# for i in sentences:
# sentences[i]['tokens'] = [t for t in sentences[i]['tokens'] if t not in nltk.corpus.stopwords.words(lang)]
# return sentences
def _compare_synsets(self, synsets, tokens, i): def _compare_synsets(self, synsets, tokens, i):
for synset in synsets: for synset in synsets:
for word in tokens[i]['lemmas']: for word in tokens[i]['lemmas']:
@ -71,7 +83,7 @@ class SentiTextPlugin(SentimentPlugin):
def analyse_entry(self, entry, params): def analyse_entry(self, entry, params):
language = params.get("language") language = params.get("language")
text = entry.get("text", None) text = entry.text
tokens = self._tokenize(text) tokens = self._tokenize(text)
tokens = self._pos(tokens) tokens = self._pos(tokens)
sufixes = {'es':'spa','en':'eng','it':'ita','fr':'fra'} sufixes = {'es':'spa','en':'eng','it':'ita','fr':'fra'}
@ -130,19 +142,41 @@ class SentiTextPlugin(SentimentPlugin):
except: except:
if n_pos == 0 and n_neg == 0: if n_pos == 0 and n_neg == 0:
g_score = 0.5 g_score = 0.5
polarity = 'marl:Neutral' if g_score >= 0.5:
polarity_value = 0
if g_score > 0.5:
polarity = 'marl:Positive' polarity = 'marl:Positive'
polarity_value = 1 polarity_value = 1
elif g_score < 0.5: elif g_score < 0.5:
polarity = 'marl:Negative' polarity = 'marl:Negative'
polarity_value = -1 polarity_value = -1
else:
polarity = 'marl:Neutral'
polarity_value = 0
opinion = Sentiment(id="Opinion0"+'_'+str(i), opinion = Sentiment(id="Opinion0"+'_'+str(i),
marl__hasPolarity=polarity, marl__hasPolarity=polarity,
marl__polarityValue=polarity_value) marl__polarityValue=polarity_value)
opinion.prov(self)
entry.sentiments.append(opinion) entry.sentiments.append(opinion)
yield entry yield entry
test_cases = [
{
'input': u'Odio ir al cine',
'params': {'language': 'es'},
'polarity': 'marl:Negative'
},
{
'input': u'El cielo está nublado',
'params': {'language': 'es'},
'polarity': 'marl:Positive'
},
{
'input': u'Esta tarta está muy buena',
'params': {'language': 'es'},
'polarity': 'marl:Negative'
}
]

View File

@ -1,24 +1,7 @@
{ ---
"name": "sentiment-basic", module: sentiment-basic
"module": "sentiment-basic", requirements:
"description": "Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.", - nltk>=3.0.5
"author": "github.com/nachtkatze", - scipy>=0.14.0
"version": "0.1", - textblob
"requirements": [
"nltk>=3.0.5",
"scipy>=0.14.0",
"textblob"
],
"extra_params": {
"language": {
"aliases": ["language", "l"],
"required": true,
"options": ["en","es", "it", "fr", "auto"],
"default": "auto"
},
},
"sentiword_path": "data/SentiWordNet_3.0.txt",
"pos_path": "data/unigram_spanish.pickle",
"maxPolarityValue": "1",
"minPolarityValue": "-1"
}

View File

@ -46,7 +46,7 @@ class SentiWordNet(object):
pos,syn_set_id,pos_score,neg_score,syn_set_score,\ pos,syn_set_id,pos_score,neg_score,syn_set_score,\
gloss = fields gloss = fields
except: except:
print "Found data without all details" print("Found data without all details")
pass pass
if pos and syn_set_score: if pos and syn_set_score:

View File

@ -1,42 +0,0 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
import unittest
class SentiTextTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("SentiText", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["SentiText"]
plugin.activate()
texts = {'Odio ir al cine' : 'marl:Neutral',
'El cielo esta nublado' : 'marl:Positive',
'Esta tarta esta muy buena' : 'marl:Neutral'}
for text in texts:
response = plugin.analyse(input=text)
sentimentSet = response.entries[0].sentiments[0]
print sentimentSet
expected = texts[text]
assert sentimentSet['marl:hasPolarity'] == expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()

View File

@ -1,7 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import time import time
import logging
import requests import requests
import json import json
import string import string
@ -9,20 +8,33 @@ import os
from os import path from os import path
import time import time
from senpy.plugins import SentimentPlugin from senpy.plugins import SentimentPlugin
from senpy.models import Results, Entry, Sentiment, Error from senpy.models import Results, Entry, Entity, Topic, Sentiment, Error
from senpy.utils import check_template
import mocked_request from mocked_request import mocked_requests_post
try: try:
from unittest import mock from unittest import mock
except ImportError: except ImportError:
import mock import mock
logger = logging.getLogger(__name__)
class MeaningCloudPlugin(SentimentPlugin): class MeaningCloudPlugin(SentimentPlugin):
version = "0.1" '''
Sentiment analysis with meaningCloud service.
To use this plugin, you need to obtain an API key from meaningCloud signing up here:
https://www.meaningcloud.com/developer/login
When you had obtained the meaningCloud API Key, you have to provide it to the plugin, using param apiKey.
Example request:
http://senpy.cluster.gsi.dit.upm.es/api/?algo=meaningCloud&language=en&apiKey=<API key>&input=I%20love%20Madrid.
'''
name = 'sentiment-meaningcloud'
author = 'GSI UPM'
version = "1.1"
maxPolarityValue = 1
minPolarityValue = -1
extra_params = { extra_params = {
"language": { "language": {
@ -37,7 +49,6 @@ class MeaningCloudPlugin(SentimentPlugin):
} }
} }
"""MeaningCloud plugin uses API from Meaning Cloud to perform sentiment analysis."""
def _polarity(self, value): def _polarity(self, value):
if 'NONE' in value: if 'NONE' in value:
@ -81,7 +92,7 @@ class MeaningCloudPlugin(SentimentPlugin):
if not api_response.get('score_tag'): if not api_response.get('score_tag'):
raise Error(r.json()) raise Error(r.json())
entry['language_detected'] = lang entry['language_detected'] = lang
logger.info(api_response) self.log.debug(api_response)
agg_polarity, agg_polarityValue = self._polarity( agg_polarity, agg_polarityValue = self._polarity(
api_response.get('score_tag', None)) api_response.get('score_tag', None))
agg_opinion = Sentiment( agg_opinion = Sentiment(
@ -89,13 +100,14 @@ class MeaningCloudPlugin(SentimentPlugin):
marl__hasPolarity=agg_polarity, marl__hasPolarity=agg_polarity,
marl__polarityValue=agg_polarityValue, marl__polarityValue=agg_polarityValue,
marl__opinionCount=len(api_response['sentence_list'])) marl__opinionCount=len(api_response['sentence_list']))
agg_opinion.prov(self)
entry.sentiments.append(agg_opinion) entry.sentiments.append(agg_opinion)
logger.info(api_response['sentence_list']) self.log.debug(api_response['sentence_list'])
count = 1 count = 1
for sentence in api_response['sentence_list']: for sentence in api_response['sentence_list']:
for nopinion in sentence['segment_list']: for nopinion in sentence['segment_list']:
logger.info(nopinion) self.log.debug(nopinion)
polarity, polarityValue = self._polarity( polarity, polarityValue = self._polarity(
nopinion.get('score_tag', None)) nopinion.get('score_tag', None))
opinion = Sentiment( opinion = Sentiment(
@ -107,39 +119,37 @@ class MeaningCloudPlugin(SentimentPlugin):
nif__beginIndex=nopinion.get('inip', None), nif__beginIndex=nopinion.get('inip', None),
nif__endIndex=nopinion.get('endp', None)) nif__endIndex=nopinion.get('endp', None))
count += 1 count += 1
opinion.prov(self)
entry.sentiments.append(opinion) entry.sentiments.append(opinion)
mapper = {'es': 'es.', 'en': '', 'ca': 'es.', 'it':'it.', 'fr':'fr.', 'pt':'pt.'} mapper = {'es': 'es.', 'en': '', 'ca': 'es.', 'it':'it.', 'fr':'fr.', 'pt':'pt.'}
for sent_entity in api_response_topics['entity_list']: for sent_entity in api_response_topics['entity_list']:
resource = "_".join(sent_entity.get('form', None).split()) resource = "_".join(sent_entity.get('form', None).split())
entity = Sentiment( entity = Entity(
id="Entity{}".format(sent_entity.get('id')), id="Entity{}".format(sent_entity.get('id')),
marl__describesObject="http://{}dbpedia.org/resource/{}".format( itsrdf__taIdentRef="http://{}dbpedia.org/resource/{}".format(
mapper[lang], resource), mapper[lang], resource),
nif__anchorOf=sent_entity.get('form', None), nif__anchorOf=sent_entity.get('form', None),
nif__beginIndex=sent_entity['variant_list'][0].get('inip', None), nif__beginIndex=sent_entity['variant_list'][0].get('inip', None),
nif__endIndex=sent_entity['variant_list'][0].get('endp', None)) nif__endIndex=sent_entity['variant_list'][0].get('endp', None))
entity[ sementity = sent_entity['sementity'].get('type', None).split(">")[-1]
'@type'] = "ODENTITY_{}".format( entity['@type'] = "ODENTITY_{}".format(sementity)
sent_entity['sementity'].get('type', None).split(">")[-1]) entity.prov(self)
entry.entities.append(entity) entry.entities.append(entity)
for topic in api_response_topics['concept_list']: for topic in api_response_topics['concept_list']:
if 'semtheme_list' in topic: if 'semtheme_list' in topic:
for theme in topic['semtheme_list']: for theme in topic['semtheme_list']:
concept = Sentiment( concept = Topic()
id="Topic{}".format(topic.get('id')), concept.id = "Topic{}".format(topic.get('id'))
prov__wasDerivedFrom="http://dbpedia.org/resource/{}". concept['@type'] = "ODTHEME_{}".format(theme['type'].split(">")[-1])
format(theme['type'].split('>')[-1])) concept['fam:topic-reference'] = "http://dbpedia.org/resource/{}".format(theme['type'].split('>')[-1])
concept[ entry.prov(self)
'@type'] = "ODTHEME_{}".format(
theme['type'].split(">")[-1])
entry.topics.append(concept) entry.topics.append(concept)
yield entry yield entry
@mock.patch('requests.post', side_effect=mocked_request.mocked_requests_post) @mock.patch('requests.post', side_effect=mocked_requests_post)
def test(self, *args, **kwargs): def test(self, *args, **kwargs):
results = list() results = list()
params = {'algo': 'sentiment-meaningCloud', params = {'algo': 'sentiment-meaningCloud',
@ -156,15 +166,16 @@ class MeaningCloudPlugin(SentimentPlugin):
'language': 'en', 'language': 'en',
'apikey': '00000', 'apikey': '00000',
'algorithm': 'sentiment-meaningCloud'} 'algorithm': 'sentiment-meaningCloud'}
for i in range(100):
res = next(self.analyse_entry(Entry(nif__isString="Hello World Obama"), params)) res = next(self.analyse_entry(Entry(nif__isString="Hello World Obama"), params))
results.append(res.sentiments[0]['marl:hasPolarity'])
results.append(res.topics[0]['prov:wasDerivedFrom'])
results.append(res.entities[0]['prov:wasDerivedFrom'])
assert 'marl:Neutral' in results check_template(res,
assert 'http://dbpedia.org/resource/Astronomy' in results {'sentiments': [
assert 'http://dbpedia.org/resource/Obama' in results {'marl:hasPolarity': 'marl:Neutral'}],
'entities': [
{'itsrdf:taIdentRef': 'http://dbpedia.org/resource/Obama'}],
'topics': [
{'fam:topic-reference': 'http://dbpedia.org/resource/Astronomy'}]
})
if __name__ == '__main__': if __name__ == '__main__':
from senpy import easy_test from senpy import easy_test

View File

@ -7,7 +7,6 @@ def mocked_requests_post(*args, **kwargs):
def json(self): def json(self):
return self.json_data return self.json_data
print("Mocking request")
if args[0] == 'http://api.meaningcloud.com/sentiment-2.1': if args[0] == 'http://api.meaningcloud.com/sentiment-2.1':
return MockResponse({ return MockResponse({
'model': 'general_en', 'model': 'general_en',

View File

@ -1,11 +0,0 @@
{
"name": "sentiment-meaningcloud",
"module": "sentiment_meaningcloud",
"description": "Sentiment analysis with meaningCloud service. To use this plugin, you need to obtain an API key from meaningCloud signing up here: https://www.meaningcloud.com/developer/login. When you had obtained the meaningCloud API Key, you have to provide it to the plugin, using param apiKey. Example request: http://senpy.cluster.gsi.dit.upm.es/api/?algo=meaningCloud&language=en&apiKey=<put here your API key>&input=I%20love%20Madrid.",
"author": "GSI UPM",
"version": "1.0",
"requirements": {},
"maxPolarityValue": "1",
"minPolarityValue": "-1"
}

View File

@ -1,10 +1,12 @@
# Sentimet-vader plugin # Sentimet-vader plugin
=========
Vader is a plugin developed at GSI UPM for sentiment analysis. Vader is a plugin developed at GSI UPM for sentiment analysis.
The response of this plugin uses [Marl ontology](https://www.gsi.dit.upm.es/ontologies/marl/) developed at GSI UPM for semantic web.
## Acknowledgements
This plugin uses the vaderSentiment module underneath, which is described in the paper:
For developing this plugin, it has been used the module vaderSentiment, which is described in the paper:
VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text
C.J. Hutto and Eric Gilbert C.J. Hutto and Eric Gilbert
Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014. Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
@ -15,16 +17,16 @@ For more information about the functionality, check the official repository
https://github.com/cjhutto/vaderSentiment https://github.com/cjhutto/vaderSentiment
The response of this plugin uses [Marl ontology](https://www.gsi.dit.upm.es/ontologies/marl/) developed at GSI UPM for semantic web.
## Usage ## Usage
Params accepted: Parameters:
- Language: es (Spanish), en(English). - Language: es (Spanish), en(English).
- Input: Text to analyse. - Input: Text to analyse.
Example request: Example request:
``` ```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-vader&language=en&input=I%20love%20Madrid http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-vader&language=en&input=I%20love%20Madrid
``` ```

View File

@ -1,16 +0,0 @@
==========
This README file describes the plugin vaderSentiment.
For developing this plugin, it has been used the module vaderSentiment, which is described in the paper:
VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text
C.J. Hutto and Eric Gilbert
Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
If you use this plugin in your research, please cite the above paper
For more information about the functionality, check the official repository
https://github.com/cjhutto/vaderSentiment
========

View File

@ -1,25 +0,0 @@
{
"name": "sentiment-vader",
"module": "sentiment-vader",
"description": "Sentiment classifier using vaderSentiment module. Params accepted: Language: {en, es}. The output uses Marl ontology developed at GSI UPM for semantic web.",
"author": "@icorcuera",
"version": "0.1",
"extra_params": {
"language": {
"@id": "lang_rand",
"aliases": ["language", "l"],
"required": false,
"options": ["es", "en", "auto"]
},
"aggregate": {
"aliases": ["aggregate","agg"],
"options": ["true", "false"],
"required": false,
"default": false
}
},
"requirements": {}
}

View File

@ -1,44 +0,0 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
from flask.ext.testing import TestCase
import unittest
class vaderTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("vaderSentiment", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["vaderSentiment"]
plugin.activate()
texts = {'I am tired :(' : 'marl:Negative',
'I love pizza' : 'marl:Positive',
'I like going to the cinema :)' : 'marl:Positive',
'This cake is disgusting' : 'marl:Negative'}
for text in texts:
response = plugin.analyse(input=text)
expected = texts[text]
sentimentSet = response.entries[0].sentiments
max_sentiment = max(sentimentSet, key=lambda x: x['marl:polarityValue'])
assert max_sentiment['marl:hasPolarity'] == expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()

View File

@ -5,15 +5,37 @@ from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Sentiment, Entry from senpy.models import Results, Sentiment, Entry
import logging import logging
logger = logging.getLogger(__name__)
class vaderSentimentPlugin(SentimentPlugin): class VaderSentimentPlugin(SentimentPlugin):
'''
Sentiment classifier using vaderSentiment module. Params accepted: Language: {en, es}. The output uses Marl ontology developed at GSI UPM for semantic web.
'''
name = "sentiment-vader"
module = "sentiment-vader"
author = "@icorcuera"
version = "0.1.1"
extra_params = {
"language": {
"@id": "lang_rand",
"aliases": ["language", "l"],
"default": "auto",
"options": ["es", "en", "auto"]
},
def analyse_entry(self,entry,params): "aggregate": {
"aliases": ["aggregate","agg"],
"options": ["true", "false"],
"default": False
}
logger.debug("Analysing with params {}".format(params)) }
requirements = {}
text_input = entry.get("text", None) def analyse_entry(self, entry, params):
self.log.debug("Analysing with params {}".format(params))
text_input = entry.text
aggregate = params['aggregate'] aggregate = params['aggregate']
score = sentiment(text_input) score = sentiment(text_input)
@ -22,14 +44,17 @@ class vaderSentimentPlugin(SentimentPlugin):
marl__hasPolarity= "marl:Positive", marl__hasPolarity= "marl:Positive",
marl__algorithmConfidence= score['pos'] marl__algorithmConfidence= score['pos']
) )
opinion0.prov(self)
opinion1 = Sentiment(id= "Opinion_negative", opinion1 = Sentiment(id= "Opinion_negative",
marl__hasPolarity= "marl:Negative", marl__hasPolarity= "marl:Negative",
marl__algorithmConfidence= score['neg'] marl__algorithmConfidence= score['neg']
) )
opinion1.prov(self)
opinion2 = Sentiment(id= "Opinion_neutral", opinion2 = Sentiment(id= "Opinion_neutral",
marl__hasPolarity = "marl:Neutral", marl__hasPolarity = "marl:Neutral",
marl__algorithmConfidence = score['neu'] marl__algorithmConfidence = score['neu']
) )
opinion2.prov(self)
if aggregate == 'true': if aggregate == 'true':
res = None res = None
@ -47,3 +72,25 @@ class vaderSentimentPlugin(SentimentPlugin):
entry.sentiments.append(opinion2) entry.sentiments.append(opinion2)
yield entry yield entry
test_cases = []
test_cases = [
{
'input': 'I am tired :(',
'polarity': 'marl:Negative'
},
{
'input': 'I love pizza :(',
'polarity': 'marl:Positive'
},
{
'input': 'I enjoy going to the cinema :)',
'polarity': 'marl:Negative'
},
{
'input': 'This cake is disgusting',
'polarity': 'marl:Negative'
},
]