From 675a905ab4daf647863ac27cf328f368c1621e34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Fri, 14 Dec 2018 18:50:35 +0100 Subject: [PATCH 1/2] Add depeche mood --- emotion-depechemood/depechemood_plugin.py | 112 ++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 emotion-depechemood/depechemood_plugin.py diff --git a/emotion-depechemood/depechemood_plugin.py b/emotion-depechemood/depechemood_plugin.py new file mode 100644 index 0000000..8b243af --- /dev/null +++ b/emotion-depechemood/depechemood_plugin.py @@ -0,0 +1,112 @@ +#!/usr/local/bin/python +# coding: utf-8 + +import os +import re +import string +import numpy as np +import pandas as pd +from six.moves import urllib +from nltk.corpus import stopwords + +from senpy import EmotionPlugin, TextBox, models + + +class DepecheMood(TextBox, EmotionPlugin): + '''Plugin that uses the DepecheMood++ emotion lexicon.''' + + author = 'Oscar Araque' + version = '0.1' + + def __init__(self, *args, **kwargs): + super(DepecheMood, self).__init__(*args, **kwargs) + self.LEXICON_URL = "https://github.com/marcoguerini/DepecheMood/raw/master/DepecheMood%2B%2B/DepecheMood_english_token_full.tsv" + self.EMOTIONS = ['AFRAID', 'AMUSED', 'ANGRY', 'ANNOYED', 'DONT_CARE', 'HAPPY', 'INSPIRED', 'SAD',] + self.noise = self._noise() + self.stop_words = stopwords.words('english') + [''] + + def _noise(self): + noise = set(string.punctuation) | set('«»') + noise = {ord(c): None for c in noise} + return noise + + def activate(self): + self._lex = self.download_lex() + self._lex_vocab = set(list(self._lex.keys())) + + def clean_str(self, string): + string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string) + string = re.sub(r"[0-9]+", " num ", string) + string = re.sub(r"\'s", " \'s", string) + string = re.sub(r"\'ve", " \'ve", string) + string = re.sub(r"n\'t", " n\'t", string) + string = re.sub(r"\'re", " \'re", string) + string = re.sub(r"\'d", " \'d", string) + string = re.sub(r"\'ll", " \'ll", string) + string = re.sub(r"\.", " . ", string) + string = re.sub(r",", " , ", string) + string = re.sub(r"!", " ! ", string) + string = re.sub(r"\(", " ( ", string) + string = re.sub(r"\)", " ) ", string) + string = re.sub(r"\?", " ? ", string) + string = re.sub(r"\s{2,}", " ", string) + return string.strip().lower() + + def preprocess(self, text): + if text is None: + return None + tokens = self.clean_str(text).translate(self.noise).split(' ') + tokens = [tok for tok in tokens if tok not in self.stop_words] + return tokens + + def estimate_emotion(self, tokens, emotion): + s = [] + for tok in tokens: + s.append(self._lex[tok][emotion]) + dividend = np.sum(s) if np.sum(s) > 0 else 0 + divisor = len(s) if len(s) > 0 else 1 + S = np.sum(s) / divisor + return S + + def estimate_all_emotions(self, tokens): + S = {emotion: None for emotion in self.EMOTIONS} + intersection = set(tokens) & self._lex_vocab + for emotion in self.EMOTIONS: + s = self.estimate_emotion(intersection, emotion) + S[emotion] = s + return S + + def download_lex(self, file_path='./DepecheMood_english_token_full.tsv', freq_threshold=10): + if not os.path.exists(file_path): + filename, _ = urllib.request.urlretrieve(self.LEXICON_URL, file_path) + + lexicon = pd.read_csv(file_path, sep='\t', index_col=0) + lexicon = lexicon[lexicon['freq'] >= 10] + lexicon.drop('freq', axis=1, inplace=True) + lexicon = lexicon.T.to_dict() + return lexicon + + def output(self, output, entry, **kwargs): + s = models.EmotionSet() + entry.emotions.append(s) + for label, value in output.items(): + e = models.Emotion(onyx__hasEmotionCategory=label, + onyx__hasEmotionIntensity=value) + s.onyx__hasEmotion.append(e) + return entry + + def predict_one(self, input, **kwargs): + tokens = self.preprocess(input) + estimation = self.estimate_all_emotions(tokens) + return estimation + + test_cases = [ + { + 'text': '' + } + ] + + +if __name__ == '__main__': + from senpy.utils import easy + easy() From d5f9ef88b241c3e48f035a44bb7cfb62c5c92157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Wed, 9 Jan 2019 16:18:12 +0100 Subject: [PATCH 2/2] Add new taiger plugin --- sentiment-taiger/README.md | 19 ++-- sentiment-taiger/docker-compose.yml | 2 +- sentiment-taiger/taiger3c_plugin.py | 164 ++++++++++++++++++++++++++++ sentiment-taiger/taiger_plugin.py | 4 +- 4 files changed, 179 insertions(+), 10 deletions(-) create mode 100644 sentiment-taiger/taiger3c_plugin.py diff --git a/sentiment-taiger/README.md b/sentiment-taiger/README.md index 7199d4f..cc1f175 100644 --- a/sentiment-taiger/README.md +++ b/sentiment-taiger/README.md @@ -1,6 +1,9 @@ # Senpy Plugin Taiger -Service that analyzes sentiments from social posts written in Spanish or English. +Proxy for two of Taiger's sentiment analysis services for social media posts: + +* taiger-plugin: proxy for a service that normalizes the text, and gives both a polarity and a polarity value for the text. It works for Spanish and English text. +* taiger3c-plugin: it uses a simpler service that only returns a polarity (positive, negative or none). It only works for Spanish. ## Usage @@ -8,18 +11,19 @@ Service that analyzes sentiments from social posts written in Spanish or English To use this plugin, you should use a GET Requests with the following possible params: Params: - Input: text to analyse.(required) -- Endpoint: Enpoint to the Taiger service. ## Example of Usage Example request: ``` -http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-taiger&inputText=This%20is%20amazing +curl http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-taiger&inputText=This%20is%20amazing + +#Or, for the taiger3c plugin: +curl http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-taiger3c&inputText=Me%20encanta ``` -Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section. - -For example, this would be the example respond for the request done. +This plugin follows the senpy schema and vocabularies, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, the NIF API section. +It should look like this: ``` { @@ -53,7 +57,8 @@ For example, this would be the example respond for the request done. } ``` -As it can be seen, this plugin analyzes sentiment givin three categories or tags: `marl:Positive`, `marl:Neutral` or `marl:Negative`, that will be held in the `marl:hasPolarity` field. Moreover, the plugin retrieves a `marl:polarityValue`. +As can be seen, this plugin analyzes sentiment giving three categories or tags: `marl:Positive`, `marl:Neutral` or `marl:Negative`, that will be held in the `marl:hasPolarity` field. +Moreover, the plugin retrieves a `marl:polarityValue` (a value between -1 and 1). This plugin supports **python2.7** and **python3**. ![alt GSI Logo][logoGSI] diff --git a/sentiment-taiger/docker-compose.yml b/sentiment-taiger/docker-compose.yml index fbcaf03..3d53c10 100644 --- a/sentiment-taiger/docker-compose.yml +++ b/sentiment-taiger/docker-compose.yml @@ -8,4 +8,4 @@ services: volumes: - ".:/senpy-plugins" environment: - TAIGER_ENDPOINT: 'http://34.244.91.7:8080/sentiment/classifyPositivity' + TAIGER_ENDPOINT: "${TAIGER_ENDPOINT:-http://somedi-taiger.hopto.org:5406/es_sentiment_analyzer_3classes}" diff --git a/sentiment-taiger/taiger3c_plugin.py b/sentiment-taiger/taiger3c_plugin.py new file mode 100644 index 0000000..2c0d790 --- /dev/null +++ b/sentiment-taiger/taiger3c_plugin.py @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- + +import time +import requests +import json +import string +import os +from os import path +import time +from senpy.plugins import SentimentPlugin +from senpy.models import Results, Entry, Entity, Topic, Sentiment, Error + + +TAIGER_ENDPOINT = os.environ.get("TAIGER3C_ENDPOINT", 'http://somedi-taiger.hopto.org:5406/es_sentiment_analyzer_3classes') + + +class TaigerPlugin3cats(SentimentPlugin): + ''' + Service that analyzes sentiments from social posts written in Spanish or English. + + Example request: + + http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-taiger3c&inputText=This%20is%20amazing + ''' + name = 'sentiment-taiger3c' + author = 'GSI UPM' + version = "0.1" + maxPolarityValue = -1 + minPolarityValue = 1 + + def _polarity(self, value): + + if 'NONE' == value: + polarity = 'marl:Neutral' + value = 0 + elif 'N' == value: + polarity = 'marl:Negative' + value = -1 + elif 'P' == value: + polarity = 'marl:Positive' + value = 1 + else: + raise ValueError('unknown polarity: {}'.format(value)) + print(value, 'whatsup') + return polarity, value + + def analyse_entry(self, entry, params): + + txt = entry['nif:isString'] + api = TAIGER_ENDPOINT + parameters = { + 'text': txt + } + try: + r = requests.get( + api, params=parameters, timeout=3) + agg_polarity, value = self._polarity(r.text.strip()) + except requests.exceptions.Timeout: + raise Error("No response from the API") + except Exception as ex: + raise Error("There was a problem with the endpoint: {}".format(ex)) + if not agg_polarity: + raise Error('No category in response: {}'.format(ar.text)) + self.log.debug(agg_polarity) + agg_opinion = Sentiment( + id="Opinion0", + marl__hasPolarity=agg_polarity, + marl__polarityValue=value, + ) + agg_opinion.prov(self) + entry.sentiments.append(agg_opinion) + + yield entry + + test_cases = [ + { + 'params': { + 'algo': 'sentiment-taiger', + 'intype': 'direct', + 'expanded-jsonld': 0, + 'informat': 'text', + 'prefix': '', + 'plugin_type': 'analysisPlugin', + 'urischeme': 'RFC5147String', + 'outformat': 'json-ld', + 'conversion': 'full', + 'language': 'en', + 'apikey': '00000', + 'algorithm': 'sentiment-taiger' + }, + 'input': 'I hate to say this', + 'expected': { + 'sentiments': [ + {'marl:hasPolarity': 'marl:Negative'}], + }, + 'responses': [ + { + 'url': TAIGER_ENDPOINT, + 'body': 'N', + } + ] + }, + { + 'params': { + 'algo': 'sentiment-taiger', + 'intype': 'direct', + 'expanded-jsonld': 0, + 'informat': 'text', + 'prefix': '', + 'plugin_type': 'analysisPlugin', + 'urischeme': 'RFC5147String', + 'outformat': 'json-ld', + 'conversion': 'full', + 'language': 'en', + 'apikey': '00000', + 'algorithm': 'sentiment-taiger' + }, + 'input': 'This is amazing', + 'expected': { + 'sentiments': [ + {'marl:hasPolarity': 'marl:Positive'}], + }, + 'responses': [ + { + 'url': TAIGER_ENDPOINT, + 'body': 'P', + } + ] + }, + { + 'params': { + 'algo': 'sentiment-taiger', + 'intype': 'direct', + 'expanded-jsonld': 0, + 'informat': 'text', + 'prefix': '', + 'plugin_type': 'analysisPlugin', + 'urischeme': 'RFC5147String', + 'outformat': 'json-ld', + 'conversion': 'full', + 'language': 'en', + 'apikey': '00000', + 'algorithm': 'sentiment-taiger' + }, + 'input': 'The pillow is in the wardrobe', + 'expected': { + 'sentiments': [ + {'marl:hasPolarity': 'marl:Neutral'}], + }, + 'responses': [ + { + 'url': TAIGER_ENDPOINT, + 'body': 'NONE', + } + ] + } + + + ] + + +if __name__ == '__main__': + from senpy import easy_test + easy_test(debug=False) diff --git a/sentiment-taiger/taiger_plugin.py b/sentiment-taiger/taiger_plugin.py index 14be1ae..7395d5a 100644 --- a/sentiment-taiger/taiger_plugin.py +++ b/sentiment-taiger/taiger_plugin.py @@ -25,8 +25,8 @@ class TaigerPlugin(SentimentPlugin): name = 'sentiment-taiger' author = 'GSI UPM' version = "0.1" - maxPolarityValue = 0 - minPolarityValue = -10 + maxPolarityValue = -1 + minPolarityValue = 1 def _polarity(self, value):