2019-01-09 16:19:22 +00:00
#!/usr/local/bin/python
# coding: utf-8
2019-04-04 10:56:46 +00:00
from future import standard_library
standard_library . install_aliases ( )
2019-01-09 16:19:22 +00:00
import os
import re
2019-01-09 18:29:24 +00:00
import sys
2019-01-09 16:19:22 +00:00
import string
import numpy as np
from six . moves import urllib
from nltk . corpus import stopwords
2019-04-04 10:56:46 +00:00
from senpy import EmotionBox , models
2019-01-09 16:19:22 +00:00
2019-01-09 18:29:24 +00:00
def ignore ( dchars ) :
deletechars = " " . join ( dchars )
2019-04-04 10:56:46 +00:00
tbl = str . maketrans ( " " , " " , deletechars )
ignore = lambda s : s . translate ( tbl )
2019-01-09 18:29:24 +00:00
return ignore
2019-04-04 10:56:46 +00:00
class DepecheMood ( EmotionBox ) :
'''
Plugin that uses the DepecheMood emotion lexicon .
DepecheMood is an emotion lexicon automatically generated from news articles where users expressed their associated emotions . It contains two languages ( English and Italian ) , as well as three types of word representations ( token , lemma and lemma #PoS). For English, the lexicon contains 165k tokens, while the Italian version contains 116k. Unsupervised techniques can be applied to generate simple but effective baselines. To learn more, please visit https://github.com/marcoguerini/DepecheMood and http://www.depechemood.eu/
'''
2019-01-09 16:19:22 +00:00
author = ' Oscar Araque '
2019-04-04 10:56:46 +00:00
name = ' emotion-depechemood '
2019-01-09 16:19:22 +00:00
version = ' 0.1 '
2019-04-04 10:56:46 +00:00
requirements = [ ' pandas ' ]
nltk_resources = [ " stopwords " ]
onyx__usesEmotionModel = ' wna:WNAModel '
EMOTIONS = [ ' wna:negative-fear ' ,
' wna:amusement ' ,
' wna:anger ' ,
' wna:annoyance ' ,
' wna:indifference ' ,
' wna:joy ' ,
' wna:awe ' ,
' wna:sadness ' ]
DM_EMOTIONS = [ ' AFRAID ' , ' AMUSED ' , ' ANGRY ' , ' ANNOYED ' , ' DONT_CARE ' , ' HAPPY ' , ' INSPIRED ' , ' SAD ' , ]
2019-01-09 16:19:22 +00:00
def __init__ ( self , * args , * * kwargs ) :
super ( DepecheMood , self ) . __init__ ( * args , * * kwargs )
self . LEXICON_URL = " https://github.com/marcoguerini/DepecheMood/raw/master/DepecheMood % 2B % 2B/DepecheMood_english_token_full.tsv "
2019-01-09 18:29:24 +00:00
self . _denoise = ignore ( set ( string . punctuation ) | set ( ' «» ' ) )
self . _stop_words = [ ]
2019-01-09 16:19:22 +00:00
self . _lex_vocab = None
self . _lex = None
def activate ( self ) :
self . _lex = self . download_lex ( )
self . _lex_vocab = set ( list ( self . _lex . keys ( ) ) )
2019-01-09 18:29:24 +00:00
self . _stop_words = stopwords . words ( ' english ' ) + [ ' ' ]
2019-01-09 16:19:22 +00:00
def clean_str ( self , string ) :
string = re . sub ( r " [^A-Za-z0-9().,!? \ ' \ `] " , " " , string )
string = re . sub ( r " [0-9]+ " , " num " , string )
string = re . sub ( r " \ ' s " , " \' s " , string )
string = re . sub ( r " \ ' ve " , " \' ve " , string )
string = re . sub ( r " n \ ' t " , " n \' t " , string )
string = re . sub ( r " \ ' re " , " \' re " , string )
string = re . sub ( r " \ ' d " , " \' d " , string )
string = re . sub ( r " \ ' ll " , " \' ll " , string )
string = re . sub ( r " \ . " , " . " , string )
string = re . sub ( r " , " , " , " , string )
string = re . sub ( r " ! " , " ! " , string )
string = re . sub ( r " \ ( " , " ( " , string )
string = re . sub ( r " \ ) " , " ) " , string )
string = re . sub ( r " \ ? " , " ? " , string )
string = re . sub ( r " \ s { 2,} " , " " , string )
return string . strip ( ) . lower ( )
def preprocess ( self , text ) :
if text is None :
return None
2019-01-09 18:29:24 +00:00
tokens = self . _denoise ( self . clean_str ( text ) ) . split ( ' ' )
2019-01-09 16:19:22 +00:00
tokens = [ tok for tok in tokens if tok not in self . _stop_words ]
return tokens
def estimate_emotion ( self , tokens , emotion ) :
s = [ ]
for tok in tokens :
s . append ( self . _lex [ tok ] [ emotion ] )
dividend = np . sum ( s ) if np . sum ( s ) > 0 else 0
divisor = len ( s ) if len ( s ) > 0 else 1
S = np . sum ( s ) / divisor
return S
def estimate_all_emotions ( self , tokens ) :
2019-04-04 10:56:46 +00:00
S = [ ]
2019-01-09 16:19:22 +00:00
intersection = set ( tokens ) & self . _lex_vocab
2019-04-04 10:56:46 +00:00
for emotion in self . DM_EMOTIONS :
2019-01-09 16:19:22 +00:00
s = self . estimate_emotion ( intersection , emotion )
2019-04-04 10:56:46 +00:00
S . append ( s )
2019-01-09 16:19:22 +00:00
return S
def download_lex ( self , file_path = ' DepecheMood_english_token_full.tsv ' , freq_threshold = 10 ) :
2019-04-04 10:56:46 +00:00
import pandas as pd
2019-01-09 16:19:22 +00:00
try :
file_path = self . find_file ( file_path )
except IOError :
2019-04-04 10:56:46 +00:00
file_path = self . path ( file_path )
2019-01-09 16:19:22 +00:00
filename , _ = urllib . request . urlretrieve ( self . LEXICON_URL , file_path )
lexicon = pd . read_csv ( file_path , sep = ' \t ' , index_col = 0 )
lexicon = lexicon [ lexicon [ ' freq ' ] > = freq_threshold ]
lexicon . drop ( ' freq ' , axis = 1 , inplace = True )
lexicon = lexicon . T . to_dict ( )
return lexicon
2019-04-04 10:56:46 +00:00
def predict_one ( self , features , * * kwargs ) :
tokens = self . preprocess ( features [ 0 ] )
2019-01-09 16:19:22 +00:00
estimation = self . estimate_all_emotions ( tokens )
return estimation
test_cases = [
{
' entry ' : {
' nif:isString ' : ' My cat is very happy ' ,
} ,
' expected ' : {
2019-04-04 10:56:46 +00:00
' onyx:hasEmotionSet ' : [
2019-01-09 16:19:22 +00:00
{
' onyx:hasEmotion ' : [
2019-04-04 10:56:46 +00:00
{
' onyx:hasEmotionCategory ' : ' wna:negative-fear ' ,
' onyx:hasEmotionIntensity ' : 0.05278117640010922
} ,
{
' onyx:hasEmotionCategory ' : ' wna:amusement ' ,
' onyx:hasEmotionIntensity ' : 0.2114806151413433 ,
} ,
{
' onyx:hasEmotionCategory ' : ' wna:anger ' ,
' onyx:hasEmotionIntensity ' : 0.05726119426520887
} ,
{
' onyx:hasEmotionCategory ' : ' wna:annoyance ' ,
' onyx:hasEmotionIntensity ' : 0.12295990731053638 ,
} ,
{
' onyx:hasEmotionCategory ' : ' wna:indifference ' ,
' onyx:hasEmotionIntensity ' : 0.1860159893608025 ,
} ,
{
' onyx:hasEmotionCategory ' : ' wna:joy ' ,
' onyx:hasEmotionIntensity ' : 0.12904050973724163 ,
} ,
{
' onyx:hasEmotionCategory ' : ' wna:awe ' ,
' onyx:hasEmotionIntensity ' : 0.17973650399862967 ,
} ,
{
' onyx:hasEmotionCategory ' : ' wna:sadness ' ,
' onyx:hasEmotionIntensity ' : 0.060724103786128455 ,
} ,
2019-01-09 16:19:22 +00:00
]
}
]
}
}
]
if __name__ == ' __main__ ' :
from senpy . utils import easy , easy_load , easy_test
# sp, app = easy_load()
# for plug in sp.analysis_plugins:
# plug.test()
2019-04-04 10:56:46 +00:00
easy_test ( debug = False )