diff --git a/emotion-anew/emotion-anew.py b/emotion-anew/emotion-anew.py index a91e8fa..37421d5 100644 --- a/emotion-anew/emotion-anew.py +++ b/emotion-anew/emotion-anew.py @@ -82,7 +82,7 @@ class ANEW(SentimentPlugin): self._stopwords = stopwords.words('english') dictionary={} dictionary['es'] = {} - with self.open(self.anew_path_es,'rb') as tabfile: + with self.open(self.anew_path_es,'r') as tabfile: reader = csv.reader(tabfile, delimiter='\t') for row in reader: dictionary['es'][row[2]]={} @@ -90,7 +90,7 @@ class ANEW(SentimentPlugin): dictionary['es'][row[2]]['A']=row[5] dictionary['es'][row[2]]['D']=row[7] dictionary['en'] = {} - with self.open(self.anew_path_en,'rb') as tabfile: + with self.open(self.anew_path_en,'r') as tabfile: reader = csv.reader(tabfile, delimiter='\t') for row in reader: dictionary['en'][row[0]]={} diff --git a/emotion-anew/emotion-anew.pyc b/emotion-anew/emotion-anew.pyc deleted file mode 100644 index c5cbdf5..0000000 Binary files a/emotion-anew/emotion-anew.pyc and /dev/null differ diff --git a/emotion-depechemood/depechemood_plugin.py b/emotion-depechemood/depechemood_plugin.py index 25389be..9f7a883 100644 --- a/emotion-depechemood/depechemood_plugin.py +++ b/emotion-depechemood/depechemood_plugin.py @@ -3,6 +3,7 @@ import os import re +import sys import string import numpy as np import pandas as pd @@ -12,6 +13,18 @@ from nltk.corpus import stopwords from senpy import EmotionPlugin, TextBox, models +def ignore(dchars): + deletechars = "".join(dchars) + if sys.version_info[0] >= 3: + tbl = str.maketrans("", "", deletechars) + ignore = lambda s: s.translate(tbl) + else: + from functools import partial + def ignore(s): + return string.translate(s, None, deletechars) + return ignore + + class DepecheMood(TextBox, EmotionPlugin): '''Plugin that uses the DepecheMood++ emotion lexicon.''' @@ -32,19 +45,15 @@ class DepecheMood(TextBox, EmotionPlugin): 'INSPIRED': 'wna:awe', 'SAD': 'wna:sadness', } - self._noise = self.__noise() - self._stop_words = stopwords.words('english') + [''] + self._denoise = ignore(set(string.punctuation)|set('«»')) + self._stop_words = [] self._lex_vocab = None self._lex = None - def __noise(self): - noise = set(string.punctuation) | set('«»') - noise = {ord(c): None for c in noise} - return noise - def activate(self): self._lex = self.download_lex() self._lex_vocab = set(list(self._lex.keys())) + self._stop_words = stopwords.words('english') + [''] def clean_str(self, string): string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string) @@ -67,7 +76,7 @@ class DepecheMood(TextBox, EmotionPlugin): def preprocess(self, text): if text is None: return None - tokens = self.clean_str(text).translate(self._noise).split(' ') + tokens = self._denoise(self.clean_str(text)).split(' ') tokens = [tok for tok in tokens if tok not in self._stop_words] return tokens diff --git a/sentiment-basic/sentiment-basic.py b/sentiment-basic/sentiment-basic.py index 1883ed8..214bd4b 100644 --- a/sentiment-basic/sentiment-basic.py +++ b/sentiment-basic/sentiment-basic.py @@ -1,6 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- import os +import sys import string import nltk import pickle @@ -14,6 +15,9 @@ from os import path from senpy.plugins import SentimentPlugin, SenpyPlugin from senpy.models import Results, Entry, Sentiment +if sys.version_info[0] >= 3: + unicode = str + class SentimentBasic(SentimentPlugin): ''' @@ -43,7 +47,7 @@ class SentimentBasic(SentimentPlugin): def _load_pos_tagger(self): self.pos_path = self.find_file(self.pos_path) - with open(self.pos_path, 'r') as f: + with open(self.pos_path, 'rb') as f: tagger = pickle.load(f) return tagger diff --git a/sentiment-basic/sentiwn.py b/sentiment-basic/sentiwn.py index 0c08c31..6f192b5 100644 --- a/sentiment-basic/sentiwn.py +++ b/sentiment-basic/sentiwn.py @@ -62,7 +62,7 @@ class SentiWordNet(object): senti_scores = [] synsets = wordnet.synsets(word,pos) for synset in synsets: - if self.pos_synset.has_key((synset.pos(), synset.offset())): + if (synset.pos(), synset.offset()) in self.pos_synset: pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())] senti_scores.append({"pos":pos_val,"neg":neg_val,\ "obj": 1.0 - (pos_val - neg_val),'synset':synset}) diff --git a/sentiment-taiger/taiger3c_plugin.py b/sentiment-taiger/taiger3c_plugin.py index 2c0d790..1278be5 100644 --- a/sentiment-taiger/taiger3c_plugin.py +++ b/sentiment-taiger/taiger3c_plugin.py @@ -41,7 +41,6 @@ class TaigerPlugin3cats(SentimentPlugin): value = 1 else: raise ValueError('unknown polarity: {}'.format(value)) - print(value, 'whatsup') return polarity, value def analyse_entry(self, entry, params): diff --git a/sentiment-vader/vaderSentiment.py b/sentiment-vader/vaderSentiment.py index 02658d2..31825a6 100644 --- a/sentiment-vader/vaderSentiment.py +++ b/sentiment-vader/vaderSentiment.py @@ -17,10 +17,15 @@ For example: ''' import os, math, re, sys, fnmatch, string -reload(sys) +import codecs def make_lex_dict(f): - return dict(map(lambda (w, m): (w, float(m)), [wmsr.strip().split('\t')[0:2] for wmsr in open(f) ])) + maps = {} + with codecs.open(f, encoding='iso-8859-1') as f: + for wmsr in f: + w, m = wmsr.strip().split('\t')[:2] + maps[w] = m + return maps f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms try: @@ -356,8 +361,8 @@ if __name__ == '__main__': ] sentences.extend(tricky_sentences) for sentence in sentences: - print sentence, + print(sentence) ss = sentiment(sentence) - print "\t" + str(ss) + print("\t" + str(ss)) - print "\n\n Done!" + print("\n\n Done!")