2019-01-09 16:19:22 +00:00
|
|
|
#!/usr/local/bin/python
|
|
|
|
# coding: utf-8
|
|
|
|
|
|
|
|
import os
|
|
|
|
import re
|
2019-01-09 18:29:24 +00:00
|
|
|
import sys
|
2019-01-09 16:19:22 +00:00
|
|
|
import string
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
from six.moves import urllib
|
|
|
|
from nltk.corpus import stopwords
|
|
|
|
|
|
|
|
from senpy import EmotionPlugin, TextBox, models
|
|
|
|
|
|
|
|
|
2019-01-09 18:29:24 +00:00
|
|
|
def ignore(dchars):
|
|
|
|
deletechars = "".join(dchars)
|
|
|
|
if sys.version_info[0] >= 3:
|
|
|
|
tbl = str.maketrans("", "", deletechars)
|
|
|
|
ignore = lambda s: s.translate(tbl)
|
|
|
|
else:
|
|
|
|
def ignore(s):
|
|
|
|
return string.translate(s, None, deletechars)
|
|
|
|
return ignore
|
|
|
|
|
|
|
|
|
2019-01-09 16:19:22 +00:00
|
|
|
class DepecheMood(TextBox, EmotionPlugin):
|
|
|
|
'''Plugin that uses the DepecheMood++ emotion lexicon.'''
|
|
|
|
|
|
|
|
author = 'Oscar Araque'
|
|
|
|
version = '0.1'
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
super(DepecheMood, self).__init__(*args, **kwargs)
|
|
|
|
self.LEXICON_URL = "https://github.com/marcoguerini/DepecheMood/raw/master/DepecheMood%2B%2B/DepecheMood_english_token_full.tsv"
|
|
|
|
self.EMOTIONS = ['AFRAID', 'AMUSED', 'ANGRY', 'ANNOYED', 'DONT_CARE', 'HAPPY', 'INSPIRED', 'SAD',]
|
|
|
|
self._mapping = {
|
|
|
|
'AFRAID': 'wna:negative-fear',
|
|
|
|
'AMUSED': 'wna:amusement',
|
|
|
|
'ANGRY': 'wna:anger',
|
|
|
|
'ANNOYED': 'wna:annoyance',
|
|
|
|
'DONT_CARE': 'wna:indifference',
|
|
|
|
'HAPPY': 'wna:joy',
|
|
|
|
'INSPIRED': 'wna:awe',
|
|
|
|
'SAD': 'wna:sadness',
|
|
|
|
}
|
2019-01-09 18:29:24 +00:00
|
|
|
self._denoise = ignore(set(string.punctuation)|set('«»'))
|
|
|
|
self._stop_words = []
|
2019-01-09 16:19:22 +00:00
|
|
|
self._lex_vocab = None
|
|
|
|
self._lex = None
|
|
|
|
|
|
|
|
def activate(self):
|
|
|
|
self._lex = self.download_lex()
|
|
|
|
self._lex_vocab = set(list(self._lex.keys()))
|
2019-01-09 18:29:24 +00:00
|
|
|
self._stop_words = stopwords.words('english') + ['']
|
2019-01-09 16:19:22 +00:00
|
|
|
|
|
|
|
def clean_str(self, string):
|
|
|
|
string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string)
|
|
|
|
string = re.sub(r"[0-9]+", " num ", string)
|
|
|
|
string = re.sub(r"\'s", " \'s", string)
|
|
|
|
string = re.sub(r"\'ve", " \'ve", string)
|
|
|
|
string = re.sub(r"n\'t", " n\'t", string)
|
|
|
|
string = re.sub(r"\'re", " \'re", string)
|
|
|
|
string = re.sub(r"\'d", " \'d", string)
|
|
|
|
string = re.sub(r"\'ll", " \'ll", string)
|
|
|
|
string = re.sub(r"\.", " . ", string)
|
|
|
|
string = re.sub(r",", " , ", string)
|
|
|
|
string = re.sub(r"!", " ! ", string)
|
|
|
|
string = re.sub(r"\(", " ( ", string)
|
|
|
|
string = re.sub(r"\)", " ) ", string)
|
|
|
|
string = re.sub(r"\?", " ? ", string)
|
|
|
|
string = re.sub(r"\s{2,}", " ", string)
|
|
|
|
return string.strip().lower()
|
|
|
|
|
|
|
|
def preprocess(self, text):
|
|
|
|
if text is None:
|
|
|
|
return None
|
2019-01-09 18:29:24 +00:00
|
|
|
tokens = self._denoise(self.clean_str(text)).split(' ')
|
2019-01-09 16:19:22 +00:00
|
|
|
tokens = [tok for tok in tokens if tok not in self._stop_words]
|
|
|
|
return tokens
|
|
|
|
|
|
|
|
def estimate_emotion(self, tokens, emotion):
|
|
|
|
s = []
|
|
|
|
for tok in tokens:
|
|
|
|
s.append(self._lex[tok][emotion])
|
|
|
|
dividend = np.sum(s) if np.sum(s) > 0 else 0
|
|
|
|
divisor = len(s) if len(s) > 0 else 1
|
|
|
|
S = np.sum(s) / divisor
|
|
|
|
return S
|
|
|
|
|
|
|
|
def estimate_all_emotions(self, tokens):
|
|
|
|
S = {}
|
|
|
|
intersection = set(tokens) & self._lex_vocab
|
|
|
|
for emotion in self.EMOTIONS:
|
|
|
|
s = self.estimate_emotion(intersection, emotion)
|
|
|
|
emotion_mapped = self._mapping[emotion]
|
|
|
|
S[emotion_mapped] = s
|
|
|
|
return S
|
|
|
|
|
|
|
|
def download_lex(self, file_path='DepecheMood_english_token_full.tsv', freq_threshold=10):
|
|
|
|
|
|
|
|
try:
|
|
|
|
file_path = self.find_file(file_path)
|
|
|
|
except IOError:
|
|
|
|
filename, _ = urllib.request.urlretrieve(self.LEXICON_URL, file_path)
|
|
|
|
|
|
|
|
lexicon = pd.read_csv(file_path, sep='\t', index_col=0)
|
|
|
|
lexicon = lexicon[lexicon['freq'] >= freq_threshold]
|
|
|
|
lexicon.drop('freq', axis=1, inplace=True)
|
|
|
|
lexicon = lexicon.T.to_dict()
|
|
|
|
return lexicon
|
|
|
|
|
|
|
|
def output(self, output, entry, **kwargs):
|
|
|
|
s = models.EmotionSet()
|
|
|
|
s.prov__wasGeneratedBy = self.id
|
|
|
|
entry.emotions.append(s)
|
|
|
|
for label, value in output.items():
|
|
|
|
e = models.Emotion(onyx__hasEmotionCategory=label,
|
|
|
|
onyx__hasEmotionIntensity=value)
|
|
|
|
s.onyx__hasEmotion.append(e)
|
|
|
|
return entry
|
|
|
|
|
|
|
|
def predict_one(self, input, **kwargs):
|
|
|
|
tokens = self.preprocess(input)
|
|
|
|
estimation = self.estimate_all_emotions(tokens)
|
|
|
|
return estimation
|
|
|
|
|
|
|
|
test_cases = [
|
|
|
|
{
|
|
|
|
'entry': {
|
|
|
|
'nif:isString': 'My cat is very happy',
|
|
|
|
},
|
|
|
|
'expected': {
|
|
|
|
'emotions': [
|
|
|
|
{
|
|
|
|
'@type': 'emotionSet',
|
|
|
|
'onyx:hasEmotion': [
|
|
|
|
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:negative-fear',
|
|
|
|
'onyx:hasEmotionIntensity': 0.05278117640010922, },
|
|
|
|
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:amusement',
|
|
|
|
'onyx:hasEmotionIntensity': 0.2114806151413433, },
|
|
|
|
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:anger',
|
|
|
|
'onyx:hasEmotionIntensity': 0.05726119426520887, },
|
|
|
|
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:annoyance',
|
|
|
|
'onyx:hasEmotionIntensity': 0.12295990731053638, },
|
|
|
|
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:indifference',
|
|
|
|
'onyx:hasEmotionIntensity': 0.1860159893608025, },
|
|
|
|
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:joy',
|
|
|
|
'onyx:hasEmotionIntensity': 0.12904050973724163, },
|
|
|
|
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:awe',
|
|
|
|
'onyx:hasEmotionIntensity': 0.17973650399862967, },
|
|
|
|
{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:sadness',
|
|
|
|
'onyx:hasEmotionIntensity': 0.060724103786128455, },
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
from senpy.utils import easy, easy_load, easy_test
|
|
|
|
# sp, app = easy_load()
|
|
|
|
# for plug in sp.analysis_plugins:
|
|
|
|
# plug.test()
|
|
|
|
easy()
|