1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-22 00:02:28 +00:00

tweaks for py2/py3 compatibility

This commit is contained in:
J. Fernando Sánchez 2019-01-09 19:29:24 +01:00
parent 80acb9307c
commit bb6f9ee367
7 changed files with 35 additions and 18 deletions

View File

@ -82,7 +82,7 @@ class ANEW(SentimentPlugin):
self._stopwords = stopwords.words('english') self._stopwords = stopwords.words('english')
dictionary={} dictionary={}
dictionary['es'] = {} dictionary['es'] = {}
with self.open(self.anew_path_es,'rb') as tabfile: with self.open(self.anew_path_es,'r') as tabfile:
reader = csv.reader(tabfile, delimiter='\t') reader = csv.reader(tabfile, delimiter='\t')
for row in reader: for row in reader:
dictionary['es'][row[2]]={} dictionary['es'][row[2]]={}
@ -90,7 +90,7 @@ class ANEW(SentimentPlugin):
dictionary['es'][row[2]]['A']=row[5] dictionary['es'][row[2]]['A']=row[5]
dictionary['es'][row[2]]['D']=row[7] dictionary['es'][row[2]]['D']=row[7]
dictionary['en'] = {} dictionary['en'] = {}
with self.open(self.anew_path_en,'rb') as tabfile: with self.open(self.anew_path_en,'r') as tabfile:
reader = csv.reader(tabfile, delimiter='\t') reader = csv.reader(tabfile, delimiter='\t')
for row in reader: for row in reader:
dictionary['en'][row[0]]={} dictionary['en'][row[0]]={}

Binary file not shown.

View File

@ -3,6 +3,7 @@
import os import os
import re import re
import sys
import string import string
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -12,6 +13,18 @@ from nltk.corpus import stopwords
from senpy import EmotionPlugin, TextBox, models from senpy import EmotionPlugin, TextBox, models
def ignore(dchars):
deletechars = "".join(dchars)
if sys.version_info[0] >= 3:
tbl = str.maketrans("", "", deletechars)
ignore = lambda s: s.translate(tbl)
else:
from functools import partial
def ignore(s):
return string.translate(s, None, deletechars)
return ignore
class DepecheMood(TextBox, EmotionPlugin): class DepecheMood(TextBox, EmotionPlugin):
'''Plugin that uses the DepecheMood++ emotion lexicon.''' '''Plugin that uses the DepecheMood++ emotion lexicon.'''
@ -32,19 +45,15 @@ class DepecheMood(TextBox, EmotionPlugin):
'INSPIRED': 'wna:awe', 'INSPIRED': 'wna:awe',
'SAD': 'wna:sadness', 'SAD': 'wna:sadness',
} }
self._noise = self.__noise() self._denoise = ignore(set(string.punctuation)|set('«»'))
self._stop_words = stopwords.words('english') + [''] self._stop_words = []
self._lex_vocab = None self._lex_vocab = None
self._lex = None self._lex = None
def __noise(self):
noise = set(string.punctuation) | set('«»')
noise = {ord(c): None for c in noise}
return noise
def activate(self): def activate(self):
self._lex = self.download_lex() self._lex = self.download_lex()
self._lex_vocab = set(list(self._lex.keys())) self._lex_vocab = set(list(self._lex.keys()))
self._stop_words = stopwords.words('english') + ['']
def clean_str(self, string): def clean_str(self, string):
string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string) string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string)
@ -67,7 +76,7 @@ class DepecheMood(TextBox, EmotionPlugin):
def preprocess(self, text): def preprocess(self, text):
if text is None: if text is None:
return None return None
tokens = self.clean_str(text).translate(self._noise).split(' ') tokens = self._denoise(self.clean_str(text)).split(' ')
tokens = [tok for tok in tokens if tok not in self._stop_words] tokens = [tok for tok in tokens if tok not in self._stop_words]
return tokens return tokens

View File

@ -1,6 +1,7 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os import os
import sys
import string import string
import nltk import nltk
import pickle import pickle
@ -14,6 +15,9 @@ from os import path
from senpy.plugins import SentimentPlugin, SenpyPlugin from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Entry, Sentiment from senpy.models import Results, Entry, Sentiment
if sys.version_info[0] >= 3:
unicode = str
class SentimentBasic(SentimentPlugin): class SentimentBasic(SentimentPlugin):
''' '''
@ -43,7 +47,7 @@ class SentimentBasic(SentimentPlugin):
def _load_pos_tagger(self): def _load_pos_tagger(self):
self.pos_path = self.find_file(self.pos_path) self.pos_path = self.find_file(self.pos_path)
with open(self.pos_path, 'r') as f: with open(self.pos_path, 'rb') as f:
tagger = pickle.load(f) tagger = pickle.load(f)
return tagger return tagger

View File

@ -62,7 +62,7 @@ class SentiWordNet(object):
senti_scores = [] senti_scores = []
synsets = wordnet.synsets(word,pos) synsets = wordnet.synsets(word,pos)
for synset in synsets: for synset in synsets:
if self.pos_synset.has_key((synset.pos(), synset.offset())): if (synset.pos(), synset.offset()) in self.pos_synset:
pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())] pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())]
senti_scores.append({"pos":pos_val,"neg":neg_val,\ senti_scores.append({"pos":pos_val,"neg":neg_val,\
"obj": 1.0 - (pos_val - neg_val),'synset':synset}) "obj": 1.0 - (pos_val - neg_val),'synset':synset})

View File

@ -41,7 +41,6 @@ class TaigerPlugin3cats(SentimentPlugin):
value = 1 value = 1
else: else:
raise ValueError('unknown polarity: {}'.format(value)) raise ValueError('unknown polarity: {}'.format(value))
print(value, 'whatsup')
return polarity, value return polarity, value
def analyse_entry(self, entry, params): def analyse_entry(self, entry, params):

View File

@ -17,10 +17,15 @@ For example:
''' '''
import os, math, re, sys, fnmatch, string import os, math, re, sys, fnmatch, string
reload(sys) import codecs
def make_lex_dict(f): def make_lex_dict(f):
return dict(map(lambda (w, m): (w, float(m)), [wmsr.strip().split('\t')[0:2] for wmsr in open(f) ])) maps = {}
with codecs.open(f, encoding='iso-8859-1') as f:
for wmsr in f:
w, m = wmsr.strip().split('\t')[:2]
maps[w] = m
return maps
f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms
try: try:
@ -356,8 +361,8 @@ if __name__ == '__main__':
] ]
sentences.extend(tricky_sentences) sentences.extend(tricky_sentences)
for sentence in sentences: for sentence in sentences:
print sentence, print(sentence)
ss = sentiment(sentence) ss = sentiment(sentence)
print "\t" + str(ss) print("\t" + str(ss))
print "\n\n Done!" print("\n\n Done!")