senpy/emotion-depechemood/depechemood_plugin.py

#!/usr/local/bin/python
# coding: utf-8

import os
import re
import sys
import string
import numpy as np
import pandas as pd
from six.moves import urllib
from nltk.corpus import stopwords

from senpy import EmotionPlugin, TextBox, models


def ignore(dchars):
    deletechars = "".join(dchars)
    if sys.version_info[0] >= 3:
        tbl = str.maketrans("", "", deletechars)
        ignore = lambda s: s.translate(tbl)
    else:
        def ignore(s):
            return string.translate(s, None, deletechars)
    return ignore


class DepecheMood(TextBox, EmotionPlugin):
    '''Plugin that uses the DepecheMood++ emotion lexicon.'''

    author = 'Oscar Araque'
    version = '0.1'

    def __init__(self, *args, **kwargs):
        super(DepecheMood, self).__init__(*args, **kwargs)
        self.LEXICON_URL = "https://github.com/marcoguerini/DepecheMood/raw/master/DepecheMood%2B%2B/DepecheMood_english_token_full.tsv"
        self.EMOTIONS = ['AFRAID', 'AMUSED', 'ANGRY', 'ANNOYED', 'DONT_CARE', 'HAPPY', 'INSPIRED', 'SAD',]
        self._mapping = {
            'AFRAID': 'wna:negative-fear',
            'AMUSED': 'wna:amusement',
            'ANGRY': 'wna:anger',
            'ANNOYED': 'wna:annoyance',
            'DONT_CARE': 'wna:indifference',
            'HAPPY': 'wna:joy',
            'INSPIRED': 'wna:awe',
            'SAD': 'wna:sadness',
        }
        self._denoise = ignore(set(string.punctuation)|set('«»'))
        self._stop_words = []
        self._lex_vocab = None
        self._lex = None

    def activate(self):
        self._lex = self.download_lex()
        self._lex_vocab = set(list(self._lex.keys()))
        self._stop_words = stopwords.words('english') + ['']

    def clean_str(self, string):
        string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string)
        string = re.sub(r"[0-9]+", " num ", string)
        string = re.sub(r"\'s", " \'s", string)
        string = re.sub(r"\'ve", " \'ve", string)
        string = re.sub(r"n\'t", " n\'t", string)
        string = re.sub(r"\'re", " \'re", string)
        string = re.sub(r"\'d", " \'d", string)
        string = re.sub(r"\'ll", " \'ll", string)
        string = re.sub(r"\.", " . ", string)
        string = re.sub(r",", " , ", string)
        string = re.sub(r"!", " ! ", string)
        string = re.sub(r"\(", " ( ", string)
        string = re.sub(r"\)", " ) ", string)
        string = re.sub(r"\?", " ? ", string)
        string = re.sub(r"\s{2,}", " ", string)
        return string.strip().lower()

    def preprocess(self, text):
        if text is None:
            return None
        tokens = self._denoise(self.clean_str(text)).split(' ')
        tokens = [tok for tok in tokens if tok not in self._stop_words]
        return tokens   

    def estimate_emotion(self, tokens, emotion):
        s = []
        for tok in tokens:
            s.append(self._lex[tok][emotion])
        dividend = np.sum(s) if np.sum(s) > 0 else 0
        divisor = len(s) if len(s) > 0 else 1
        S = np.sum(s) / divisor
        return S

    def estimate_all_emotions(self, tokens):
        S = {}
        intersection = set(tokens) & self._lex_vocab
        for emotion in self.EMOTIONS:
            s = self.estimate_emotion(intersection, emotion)
            emotion_mapped = self._mapping[emotion]
            S[emotion_mapped] = s
        return S

    def download_lex(self, file_path='DepecheMood_english_token_full.tsv', freq_threshold=10):

        try:
            file_path = self.find_file(file_path)
        except IOError:
            filename, _ = urllib.request.urlretrieve(self.LEXICON_URL, file_path)

        lexicon = pd.read_csv(file_path, sep='\t', index_col=0)
        lexicon = lexicon[lexicon['freq'] >= freq_threshold]
        lexicon.drop('freq', axis=1, inplace=True)
        lexicon = lexicon.T.to_dict()
        return lexicon

    def output(self, output, entry, **kwargs):
        s = models.EmotionSet()
        s.prov__wasGeneratedBy = self.id
        entry.emotions.append(s)
        for label, value in output.items():
            e = models.Emotion(onyx__hasEmotionCategory=label,
                               onyx__hasEmotionIntensity=value)
            s.onyx__hasEmotion.append(e)
        return entry

    def predict_one(self, input, **kwargs):
        tokens = self.preprocess(input)
        estimation = self.estimate_all_emotions(tokens)
        return estimation

    test_cases = [
        {
            'entry': {
                'nif:isString': 'My cat is very happy',
            },
            'expected': {
                'emotions': [
                    {
                        '@type': 'emotionSet',
                        'onyx:hasEmotion': [
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:negative-fear',
                             'onyx:hasEmotionIntensity': 0.05278117640010922, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:amusement',
                             'onyx:hasEmotionIntensity': 0.2114806151413433, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:anger',
                             'onyx:hasEmotionIntensity': 0.05726119426520887, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:annoyance',
                             'onyx:hasEmotionIntensity': 0.12295990731053638, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:indifference',
                             'onyx:hasEmotionIntensity': 0.1860159893608025, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:joy',
                             'onyx:hasEmotionIntensity': 0.12904050973724163, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:awe',
                             'onyx:hasEmotionIntensity': 0.17973650399862967, },
                            {'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:sadness',
                                'onyx:hasEmotionIntensity': 0.060724103786128455, },
                        ]
                    }
                ]
            }
        }
    ]


if __name__ == '__main__':
    from senpy.utils import easy, easy_load, easy_test
    # sp, app = easy_load()
    # for plug in sp.analysis_plugins:
    #     plug.test()
    easy()
depechemood updated 2019-01-09 16:19:22 +00:00			`#!/usr/local/bin/python`
			`# coding: utf-8`

			`import os`
			`import re`
tweaks for py2/py3 compatibility 2019-01-09 18:29:24 +00:00			`import sys`
depechemood updated 2019-01-09 16:19:22 +00:00			`import string`
			`import numpy as np`
			`import pandas as pd`
			`from six.moves import urllib`
			`from nltk.corpus import stopwords`

			`from senpy import EmotionPlugin, TextBox, models`


tweaks for py2/py3 compatibility 2019-01-09 18:29:24 +00:00			`def ignore(dchars):`
			`deletechars = "".join(dchars)`
			`if sys.version_info[0] >= 3:`
			`tbl = str.maketrans("", "", deletechars)`
			`ignore = lambda s: s.translate(tbl)`
			`else:`
			`def ignore(s):`
			`return string.translate(s, None, deletechars)`
			`return ignore`


depechemood updated 2019-01-09 16:19:22 +00:00			`class DepecheMood(TextBox, EmotionPlugin):`
			`'''Plugin that uses the DepecheMood++ emotion lexicon.'''`

			`author = 'Oscar Araque'`
			`version = '0.1'`

			`def __init__(self, args, *kwargs):`
			`super(DepecheMood, self).__init__(args, *kwargs)`
			`self.LEXICON_URL = "https://github.com/marcoguerini/DepecheMood/raw/master/DepecheMood%2B%2B/DepecheMood_english_token_full.tsv"`
			`self.EMOTIONS = ['AFRAID', 'AMUSED', 'ANGRY', 'ANNOYED', 'DONT_CARE', 'HAPPY', 'INSPIRED', 'SAD',]`
			`self._mapping = {`
			`'AFRAID': 'wna:negative-fear',`
			`'AMUSED': 'wna:amusement',`
			`'ANGRY': 'wna:anger',`
			`'ANNOYED': 'wna:annoyance',`
			`'DONT_CARE': 'wna:indifference',`
			`'HAPPY': 'wna:joy',`
			`'INSPIRED': 'wna:awe',`
			`'SAD': 'wna:sadness',`
			`}`
tweaks for py2/py3 compatibility 2019-01-09 18:29:24 +00:00			`self._denoise = ignore(set(string.punctuation)\|set('«»'))`
			`self._stop_words = []`
depechemood updated 2019-01-09 16:19:22 +00:00			`self._lex_vocab = None`
			`self._lex = None`

			`def activate(self):`
			`self._lex = self.download_lex()`
			`self._lex_vocab = set(list(self._lex.keys()))`
tweaks for py2/py3 compatibility 2019-01-09 18:29:24 +00:00			`self._stop_words = stopwords.words('english') + ['']`
depechemood updated 2019-01-09 16:19:22 +00:00
			`def clean_str(self, string):`
			string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string)
			`string = re.sub(r"[0-9]+", " num ", string)`
			`string = re.sub(r"\'s", " \'s", string)`
			`string = re.sub(r"\'ve", " \'ve", string)`
			`string = re.sub(r"n\'t", " n\'t", string)`
			`string = re.sub(r"\'re", " \'re", string)`
			`string = re.sub(r"\'d", " \'d", string)`
			`string = re.sub(r"\'ll", " \'ll", string)`
			`string = re.sub(r"\.", " . ", string)`
			`string = re.sub(r",", " , ", string)`
			`string = re.sub(r"!", " ! ", string)`
			`string = re.sub(r"\(", " ( ", string)`
			`string = re.sub(r"\)", " ) ", string)`
			`string = re.sub(r"\?", " ? ", string)`
			`string = re.sub(r"\s{2,}", " ", string)`
			`return string.strip().lower()`

			`def preprocess(self, text):`
			`if text is None:`
			`return None`
tweaks for py2/py3 compatibility 2019-01-09 18:29:24 +00:00			`tokens = self._denoise(self.clean_str(text)).split(' ')`
depechemood updated 2019-01-09 16:19:22 +00:00			`tokens = [tok for tok in tokens if tok not in self._stop_words]`
			`return tokens`

			`def estimate_emotion(self, tokens, emotion):`
			`s = []`
			`for tok in tokens:`
			`s.append(self._lex[tok][emotion])`
			`dividend = np.sum(s) if np.sum(s) > 0 else 0`
			`divisor = len(s) if len(s) > 0 else 1`
			`S = np.sum(s) / divisor`
			`return S`

			`def estimate_all_emotions(self, tokens):`
			`S = {}`
			`intersection = set(tokens) & self._lex_vocab`
			`for emotion in self.EMOTIONS:`
			`s = self.estimate_emotion(intersection, emotion)`
			`emotion_mapped = self._mapping[emotion]`
			`S[emotion_mapped] = s`
			`return S`

			`def download_lex(self, file_path='DepecheMood_english_token_full.tsv', freq_threshold=10):`

			`try:`
			`file_path = self.find_file(file_path)`
			`except IOError:`
			`filename, _ = urllib.request.urlretrieve(self.LEXICON_URL, file_path)`

			`lexicon = pd.read_csv(file_path, sep='\t', index_col=0)`
			`lexicon = lexicon[lexicon['freq'] >= freq_threshold]`
			`lexicon.drop('freq', axis=1, inplace=True)`
			`lexicon = lexicon.T.to_dict()`
			`return lexicon`

			`def output(self, output, entry, **kwargs):`
			`s = models.EmotionSet()`
			`s.prov__wasGeneratedBy = self.id`
			`entry.emotions.append(s)`
			`for label, value in output.items():`
			`e = models.Emotion(onyx__hasEmotionCategory=label,`
			`onyx__hasEmotionIntensity=value)`
			`s.onyx__hasEmotion.append(e)`
			`return entry`

			`def predict_one(self, input, **kwargs):`
			`tokens = self.preprocess(input)`
			`estimation = self.estimate_all_emotions(tokens)`
			`return estimation`

			`test_cases = [`
			`{`
			`'entry': {`
			`'nif:isString': 'My cat is very happy',`
			`},`
			`'expected': {`
			`'emotions': [`
			`{`
			`'@type': 'emotionSet',`
			`'onyx:hasEmotion': [`
			`{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:negative-fear',`
			`'onyx:hasEmotionIntensity': 0.05278117640010922, },`
			`{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:amusement',`
			`'onyx:hasEmotionIntensity': 0.2114806151413433, },`
			`{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:anger',`
			`'onyx:hasEmotionIntensity': 0.05726119426520887, },`
			`{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:annoyance',`
			`'onyx:hasEmotionIntensity': 0.12295990731053638, },`
			`{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:indifference',`
			`'onyx:hasEmotionIntensity': 0.1860159893608025, },`
			`{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:joy',`
			`'onyx:hasEmotionIntensity': 0.12904050973724163, },`
			`{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:awe',`
			`'onyx:hasEmotionIntensity': 0.17973650399862967, },`
			`{'@type': 'emotion', 'onyx:hasEmotionCategory': 'wna:sadness',`
			`'onyx:hasEmotionIntensity': 0.060724103786128455, },`
			`]`
			`}`
			`]`
			`}`
			`}`
			`]`


			`if __name__ == '__main__':`
			`from senpy.utils import easy, easy_load, easy_test`
			`# sp, app = easy_load()`
			`# for plug in sp.analysis_plugins:`
			`# plug.test()`
			`easy()`