mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-22 08:12:27 +00:00
tweaks for py2/py3 compatibility
This commit is contained in:
parent
80acb9307c
commit
bb6f9ee367
@ -82,7 +82,7 @@ class ANEW(SentimentPlugin):
|
|||||||
self._stopwords = stopwords.words('english')
|
self._stopwords = stopwords.words('english')
|
||||||
dictionary={}
|
dictionary={}
|
||||||
dictionary['es'] = {}
|
dictionary['es'] = {}
|
||||||
with self.open(self.anew_path_es,'rb') as tabfile:
|
with self.open(self.anew_path_es,'r') as tabfile:
|
||||||
reader = csv.reader(tabfile, delimiter='\t')
|
reader = csv.reader(tabfile, delimiter='\t')
|
||||||
for row in reader:
|
for row in reader:
|
||||||
dictionary['es'][row[2]]={}
|
dictionary['es'][row[2]]={}
|
||||||
@ -90,7 +90,7 @@ class ANEW(SentimentPlugin):
|
|||||||
dictionary['es'][row[2]]['A']=row[5]
|
dictionary['es'][row[2]]['A']=row[5]
|
||||||
dictionary['es'][row[2]]['D']=row[7]
|
dictionary['es'][row[2]]['D']=row[7]
|
||||||
dictionary['en'] = {}
|
dictionary['en'] = {}
|
||||||
with self.open(self.anew_path_en,'rb') as tabfile:
|
with self.open(self.anew_path_en,'r') as tabfile:
|
||||||
reader = csv.reader(tabfile, delimiter='\t')
|
reader = csv.reader(tabfile, delimiter='\t')
|
||||||
for row in reader:
|
for row in reader:
|
||||||
dictionary['en'][row[0]]={}
|
dictionary['en'][row[0]]={}
|
||||||
|
Binary file not shown.
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
import string
|
import string
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -12,6 +13,18 @@ from nltk.corpus import stopwords
|
|||||||
from senpy import EmotionPlugin, TextBox, models
|
from senpy import EmotionPlugin, TextBox, models
|
||||||
|
|
||||||
|
|
||||||
|
def ignore(dchars):
|
||||||
|
deletechars = "".join(dchars)
|
||||||
|
if sys.version_info[0] >= 3:
|
||||||
|
tbl = str.maketrans("", "", deletechars)
|
||||||
|
ignore = lambda s: s.translate(tbl)
|
||||||
|
else:
|
||||||
|
from functools import partial
|
||||||
|
def ignore(s):
|
||||||
|
return string.translate(s, None, deletechars)
|
||||||
|
return ignore
|
||||||
|
|
||||||
|
|
||||||
class DepecheMood(TextBox, EmotionPlugin):
|
class DepecheMood(TextBox, EmotionPlugin):
|
||||||
'''Plugin that uses the DepecheMood++ emotion lexicon.'''
|
'''Plugin that uses the DepecheMood++ emotion lexicon.'''
|
||||||
|
|
||||||
@ -32,19 +45,15 @@ class DepecheMood(TextBox, EmotionPlugin):
|
|||||||
'INSPIRED': 'wna:awe',
|
'INSPIRED': 'wna:awe',
|
||||||
'SAD': 'wna:sadness',
|
'SAD': 'wna:sadness',
|
||||||
}
|
}
|
||||||
self._noise = self.__noise()
|
self._denoise = ignore(set(string.punctuation)|set('«»'))
|
||||||
self._stop_words = stopwords.words('english') + ['']
|
self._stop_words = []
|
||||||
self._lex_vocab = None
|
self._lex_vocab = None
|
||||||
self._lex = None
|
self._lex = None
|
||||||
|
|
||||||
def __noise(self):
|
|
||||||
noise = set(string.punctuation) | set('«»')
|
|
||||||
noise = {ord(c): None for c in noise}
|
|
||||||
return noise
|
|
||||||
|
|
||||||
def activate(self):
|
def activate(self):
|
||||||
self._lex = self.download_lex()
|
self._lex = self.download_lex()
|
||||||
self._lex_vocab = set(list(self._lex.keys()))
|
self._lex_vocab = set(list(self._lex.keys()))
|
||||||
|
self._stop_words = stopwords.words('english') + ['']
|
||||||
|
|
||||||
def clean_str(self, string):
|
def clean_str(self, string):
|
||||||
string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string)
|
string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string)
|
||||||
@ -67,7 +76,7 @@ class DepecheMood(TextBox, EmotionPlugin):
|
|||||||
def preprocess(self, text):
|
def preprocess(self, text):
|
||||||
if text is None:
|
if text is None:
|
||||||
return None
|
return None
|
||||||
tokens = self.clean_str(text).translate(self._noise).split(' ')
|
tokens = self._denoise(self.clean_str(text)).split(' ')
|
||||||
tokens = [tok for tok in tokens if tok not in self._stop_words]
|
tokens = [tok for tok in tokens if tok not in self._stop_words]
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import string
|
import string
|
||||||
import nltk
|
import nltk
|
||||||
import pickle
|
import pickle
|
||||||
@ -14,6 +15,9 @@ from os import path
|
|||||||
from senpy.plugins import SentimentPlugin, SenpyPlugin
|
from senpy.plugins import SentimentPlugin, SenpyPlugin
|
||||||
from senpy.models import Results, Entry, Sentiment
|
from senpy.models import Results, Entry, Sentiment
|
||||||
|
|
||||||
|
if sys.version_info[0] >= 3:
|
||||||
|
unicode = str
|
||||||
|
|
||||||
|
|
||||||
class SentimentBasic(SentimentPlugin):
|
class SentimentBasic(SentimentPlugin):
|
||||||
'''
|
'''
|
||||||
@ -43,7 +47,7 @@ class SentimentBasic(SentimentPlugin):
|
|||||||
|
|
||||||
def _load_pos_tagger(self):
|
def _load_pos_tagger(self):
|
||||||
self.pos_path = self.find_file(self.pos_path)
|
self.pos_path = self.find_file(self.pos_path)
|
||||||
with open(self.pos_path, 'r') as f:
|
with open(self.pos_path, 'rb') as f:
|
||||||
tagger = pickle.load(f)
|
tagger = pickle.load(f)
|
||||||
return tagger
|
return tagger
|
||||||
|
|
||||||
|
@ -62,7 +62,7 @@ class SentiWordNet(object):
|
|||||||
senti_scores = []
|
senti_scores = []
|
||||||
synsets = wordnet.synsets(word,pos)
|
synsets = wordnet.synsets(word,pos)
|
||||||
for synset in synsets:
|
for synset in synsets:
|
||||||
if self.pos_synset.has_key((synset.pos(), synset.offset())):
|
if (synset.pos(), synset.offset()) in self.pos_synset:
|
||||||
pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())]
|
pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())]
|
||||||
senti_scores.append({"pos":pos_val,"neg":neg_val,\
|
senti_scores.append({"pos":pos_val,"neg":neg_val,\
|
||||||
"obj": 1.0 - (pos_val - neg_val),'synset':synset})
|
"obj": 1.0 - (pos_val - neg_val),'synset':synset})
|
||||||
|
@ -41,7 +41,6 @@ class TaigerPlugin3cats(SentimentPlugin):
|
|||||||
value = 1
|
value = 1
|
||||||
else:
|
else:
|
||||||
raise ValueError('unknown polarity: {}'.format(value))
|
raise ValueError('unknown polarity: {}'.format(value))
|
||||||
print(value, 'whatsup')
|
|
||||||
return polarity, value
|
return polarity, value
|
||||||
|
|
||||||
def analyse_entry(self, entry, params):
|
def analyse_entry(self, entry, params):
|
||||||
|
@ -17,10 +17,15 @@ For example:
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import os, math, re, sys, fnmatch, string
|
import os, math, re, sys, fnmatch, string
|
||||||
reload(sys)
|
import codecs
|
||||||
|
|
||||||
def make_lex_dict(f):
|
def make_lex_dict(f):
|
||||||
return dict(map(lambda (w, m): (w, float(m)), [wmsr.strip().split('\t')[0:2] for wmsr in open(f) ]))
|
maps = {}
|
||||||
|
with codecs.open(f, encoding='iso-8859-1') as f:
|
||||||
|
for wmsr in f:
|
||||||
|
w, m = wmsr.strip().split('\t')[:2]
|
||||||
|
maps[w] = m
|
||||||
|
return maps
|
||||||
|
|
||||||
f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms
|
f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms
|
||||||
try:
|
try:
|
||||||
@ -356,8 +361,8 @@ if __name__ == '__main__':
|
|||||||
]
|
]
|
||||||
sentences.extend(tricky_sentences)
|
sentences.extend(tricky_sentences)
|
||||||
for sentence in sentences:
|
for sentence in sentences:
|
||||||
print sentence,
|
print(sentence)
|
||||||
ss = sentiment(sentence)
|
ss = sentiment(sentence)
|
||||||
print "\t" + str(ss)
|
print("\t" + str(ss))
|
||||||
|
|
||||||
print "\n\n Done!"
|
print("\n\n Done!")
|
||||||
|
Loading…
Reference in New Issue
Block a user