1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-14 04:32:29 +00:00

tweaks for py2/py3 compatibility

This commit is contained in:
J. Fernando Sánchez 2019-01-09 19:29:24 +01:00
parent 80acb9307c
commit bb6f9ee367
7 changed files with 35 additions and 18 deletions

View File

@ -82,7 +82,7 @@ class ANEW(SentimentPlugin):
self._stopwords = stopwords.words('english')
dictionary={}
dictionary['es'] = {}
with self.open(self.anew_path_es,'rb') as tabfile:
with self.open(self.anew_path_es,'r') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary['es'][row[2]]={}
@ -90,7 +90,7 @@ class ANEW(SentimentPlugin):
dictionary['es'][row[2]]['A']=row[5]
dictionary['es'][row[2]]['D']=row[7]
dictionary['en'] = {}
with self.open(self.anew_path_en,'rb') as tabfile:
with self.open(self.anew_path_en,'r') as tabfile:
reader = csv.reader(tabfile, delimiter='\t')
for row in reader:
dictionary['en'][row[0]]={}

Binary file not shown.

View File

@ -3,6 +3,7 @@
import os
import re
import sys
import string
import numpy as np
import pandas as pd
@ -12,6 +13,18 @@ from nltk.corpus import stopwords
from senpy import EmotionPlugin, TextBox, models
def ignore(dchars):
deletechars = "".join(dchars)
if sys.version_info[0] >= 3:
tbl = str.maketrans("", "", deletechars)
ignore = lambda s: s.translate(tbl)
else:
from functools import partial
def ignore(s):
return string.translate(s, None, deletechars)
return ignore
class DepecheMood(TextBox, EmotionPlugin):
'''Plugin that uses the DepecheMood++ emotion lexicon.'''
@ -32,19 +45,15 @@ class DepecheMood(TextBox, EmotionPlugin):
'INSPIRED': 'wna:awe',
'SAD': 'wna:sadness',
}
self._noise = self.__noise()
self._stop_words = stopwords.words('english') + ['']
self._denoise = ignore(set(string.punctuation)|set('«»'))
self._stop_words = []
self._lex_vocab = None
self._lex = None
def __noise(self):
noise = set(string.punctuation) | set('«»')
noise = {ord(c): None for c in noise}
return noise
def activate(self):
self._lex = self.download_lex()
self._lex_vocab = set(list(self._lex.keys()))
self._stop_words = stopwords.words('english') + ['']
def clean_str(self, string):
string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string)
@ -67,7 +76,7 @@ class DepecheMood(TextBox, EmotionPlugin):
def preprocess(self, text):
if text is None:
return None
tokens = self.clean_str(text).translate(self._noise).split(' ')
tokens = self._denoise(self.clean_str(text)).split(' ')
tokens = [tok for tok in tokens if tok not in self._stop_words]
return tokens

View File

@ -1,6 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import sys
import string
import nltk
import pickle
@ -14,6 +15,9 @@ from os import path
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Entry, Sentiment
if sys.version_info[0] >= 3:
unicode = str
class SentimentBasic(SentimentPlugin):
'''
@ -43,7 +47,7 @@ class SentimentBasic(SentimentPlugin):
def _load_pos_tagger(self):
self.pos_path = self.find_file(self.pos_path)
with open(self.pos_path, 'r') as f:
with open(self.pos_path, 'rb') as f:
tagger = pickle.load(f)
return tagger

View File

@ -62,7 +62,7 @@ class SentiWordNet(object):
senti_scores = []
synsets = wordnet.synsets(word,pos)
for synset in synsets:
if self.pos_synset.has_key((synset.pos(), synset.offset())):
if (synset.pos(), synset.offset()) in self.pos_synset:
pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())]
senti_scores.append({"pos":pos_val,"neg":neg_val,\
"obj": 1.0 - (pos_val - neg_val),'synset':synset})

View File

@ -41,7 +41,6 @@ class TaigerPlugin3cats(SentimentPlugin):
value = 1
else:
raise ValueError('unknown polarity: {}'.format(value))
print(value, 'whatsup')
return polarity, value
def analyse_entry(self, entry, params):

View File

@ -17,10 +17,15 @@ For example:
'''
import os, math, re, sys, fnmatch, string
reload(sys)
import codecs
def make_lex_dict(f):
return dict(map(lambda (w, m): (w, float(m)), [wmsr.strip().split('\t')[0:2] for wmsr in open(f) ]))
maps = {}
with codecs.open(f, encoding='iso-8859-1') as f:
for wmsr in f:
w, m = wmsr.strip().split('\t')[:2]
maps[w] = m
return maps
f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms
try:
@ -356,8 +361,8 @@ if __name__ == '__main__':
]
sentences.extend(tricky_sentences)
for sentence in sentences:
print sentence,
print(sentence)
ss = sentiment(sentence)
print "\t" + str(ss)
print("\t" + str(ss))
print "\n\n Done!"
print("\n\n Done!")