depechemood updated

2026-01-25 00:48:17 +00:00 · 2019-01-09 17:19:22 +01:00
parent 4507449266
commit 94394af20b
1 changed files with 159 additions and 0 deletions
--- a/emotion-depechemood/depechemood_plugin.py
+++ b/emotion-depechemood/depechemood_plugin.py
@@ -0,0 +1,159 @@
 #!/usr/local/bin/python
 # coding: utf-8
 import os
 import re
 import string
 import numpy as np
 import pandas as pd
 from six.moves import urllib
 from nltk.corpus import stopwords
 from senpy import EmotionPlugin, TextBox, models
 class DepecheMood(TextBox, EmotionPlugin):
    '''Plugin that uses the DepecheMood++ emotion lexicon.'''
    author = 'Oscar Araque'
    version = '0.1'
    def __init__(self, *args, **kwargs):
        super(DepecheMood, self).__init__(*args, **kwargs)
        self.LEXICON_URL = "https://github.com/marcoguerini/DepecheMood/raw/master/DepecheMood%2B%2B/DepecheMood_english_token_full.tsv"
        self.EMOTIONS = ['AFRAID', 'AMUSED', 'ANGRY', 'ANNOYED', 'DONT_CARE', 'HAPPY', 'INSPIRED', 'SAD',]
        self._mapping = {
            'AFRAID': 'wna:negative-fear',
            'AMUSED': 'wna:amusement',
            'ANGRY': 'wna:anger',
            'ANNOYED': 'wna:annoyance',
            'DONT_CARE': 'wna:indifference',
            'HAPPY': 'wna:joy',
            'INSPIRED': 'wna:awe',
            'SAD': 'wna:sadness',
        }
        self._noise = self.__noise() 
        self._stop_words = stopwords.words('english') + ['']
        self._lex_vocab = None
        self._lex = None
    def __noise(self):
        noise = set(string.punctuation) | set('«»')
        noise = {ord(c): None for c in noise}
        return noise
    def activate(self):
        self._lex = self.download_lex()
        self._lex_vocab = set(list(self._lex.keys()))
    def clean_str(self, string):
        string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string)
        string = re.sub(r"[0-9]+", " num ", string)
        string = re.sub(r"\'s", " \'s", string)
        string = re.sub(r"\'ve", " \'ve", string)
        string = re.sub(r"n\'t", " n\'t", string)
        string = re.sub(r"\'re", " \'re", string)
        string = re.sub(r"\'d", " \'d", string)
        string = re.sub(r"\'ll", " \'ll", string)
        string = re.sub(r"\.", " . ", string)
        string = re.sub(r",", " , ", string)
        string = re.sub(r"!", " ! ", string)
        string = re.sub(r"\(", " ( ", string)
        string = re.sub(r"\)", " ) ", string)
        string = re.sub(r"\?", " ? ", string)
        string = re.sub(r"\s{2,}", " ", string)
        return string.strip().lower()
    def preprocess(self, text):
        if text is None:
            return None
        tokens = self.clean_str(text).translate(self._noise).split(' ')
        tokens = [tok for tok in tokens if tok not in self._stop_words]
        return tokens   
    def estimate_emotion(self, tokens, emotion):
        s = []
        for tok in tokens:
            s.append(self._lex[tok][emotion])
        dividend = np.sum(s) if np.sum(s) > 0 else 0
        divisor = len(s) if len(s) > 0 else 1
        S = np.sum(s) / divisor
        return S
    def estimate_all_emotions(self, tokens):
        S = {}
        intersection = set(tokens) & self._lex_vocab
        for emotion in self.EMOTIONS:
            s = self.estimate_emotion(intersection, emotion)
            emotion_mapped = self._mapping[emotion]
            S[emotion_mapped] = s
        return S
    def download_lex(self, file_path='DepecheMood_english_token_full.tsv', freq_threshold=10):
        try:
            file_path = self.find_file(file_path)
        except IOError:
            filename, _ = urllib.request.urlretrieve(self.LEXICON_URL, file_path)
        lexicon = pd.read_csv(file_path, sep='\t', index_col=0)
        lexicon = lexicon[lexicon['freq'] >= freq_threshold]
        lexicon.drop('freq', axis=1, inplace=True)
        lexicon = lexicon.T.to_dict()
        return lexicon
    def output(self, output, entry, **kwargs):
        s = models.EmotionSet()
        s.prov__wasGeneratedBy = self.id
        entry.emotions.append(s)
        for label, value in output.items():
            e = models.Emotion(onyx__hasEmotionCategory=label,
                               onyx__hasEmotionIntensity=value)
            s.onyx__hasEmotion.append(e)
        return entry
    def predict_one(self, input, **kwargs):
        tokens = self.preprocess(input)
        estimation = self.estimate_all_emotions(tokens)
        return estimation
    test_cases = [
        {
            'entry': {
                'nif:isString': 'My cat is very happy',
            },
            'expected': {
                'emotions': [
                    {
                        '@type': 'emotionSet',
                        'onyx:hasEmotion': [
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:negative-fear',
                             'onyx:hasEmotionIntensity': 0.05278117640010922, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:amusement',
                             'onyx:hasEmotionIntensity': 0.2114806151413433, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:anger',
                             'onyx:hasEmotionIntensity': 0.05726119426520887, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:annoyance',
                             'onyx:hasEmotionIntensity': 0.12295990731053638, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:indifference',
                             'onyx:hasEmotionIntensity': 0.1860159893608025, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:joy',
                             'onyx:hasEmotionIntensity': 0.12904050973724163, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:awe',
                             'onyx:hasEmotionIntensity': 0.17973650399862967, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:sadness',
                                'onyx:hasEmotionIntensity': 0.060724103786128455, },
                        ]
                    }
                ]
            }
        }
    ]
 if __name__ == '__main__':
    from senpy.utils import easy, easy_load, easy_test
    # sp, app = easy_load()
    # for plug in sp.analysis_plugins:
    #     plug.test()
    easy()