tweaks for py2/py3 compatibility

2026-06-27 08:01:58 +00:00 · 2019-01-09 19:29:24 +01:00
parent 80acb9307c
commit bb6f9ee367
7 changed files with 35 additions and 18 deletions
--- a/emotion-anew/emotion-anew.py
+++ b/emotion-anew/emotion-anew.py
@@ -82,7 +82,7 @@ class ANEW(SentimentPlugin):
        self._stopwords = stopwords.words('english')
        dictionary={}
        dictionary['es'] = {}
-        with self.open(self.anew_path_es,'rb') as tabfile:
+        with self.open(self.anew_path_es,'r') as tabfile:
            reader = csv.reader(tabfile, delimiter='\t')
            for row in reader:
                dictionary['es'][row[2]]={}
@@ -90,7 +90,7 @@ class ANEW(SentimentPlugin):
                dictionary['es'][row[2]]['A']=row[5]
                dictionary['es'][row[2]]['D']=row[7]
        dictionary['en'] = {}
-        with self.open(self.anew_path_en,'rb') as tabfile:
+        with self.open(self.anew_path_en,'r') as tabfile:
            reader = csv.reader(tabfile, delimiter='\t')
            for row in reader:
                dictionary['en'][row[0]]={}
--- a/emotion-anew/emotion-anew.pyc
+++ b/emotion-anew/emotion-anew.pyc
--- a/emotion-depechemood/depechemood_plugin.py
+++ b/emotion-depechemood/depechemood_plugin.py
@@ -3,6 +3,7 @@

 import os
 import re
+import sys
 import string
 import numpy as np
 import pandas as pd
@@ -12,6 +13,18 @@ from nltk.corpus import stopwords
 from senpy import EmotionPlugin, TextBox, models


+def ignore(dchars):
+    deletechars = "".join(dchars)
+    if sys.version_info[0] >= 3:
+        tbl = str.maketrans("", "", deletechars)
+        ignore = lambda s: s.translate(tbl)
+    else:
+        from functools import partial
+        def ignore(s):
+            return string.translate(s, None, deletechars)
+    return ignore
+
+
 class DepecheMood(TextBox, EmotionPlugin):
    '''Plugin that uses the DepecheMood++ emotion lexicon.'''

@@ -32,19 +45,15 @@ class DepecheMood(TextBox, EmotionPlugin):
            'INSPIRED': 'wna:awe',
            'SAD': 'wna:sadness',
        }
-        self._noise = self.__noise() 
-        self._stop_words = stopwords.words('english') + ['']
+        self._denoise = ignore(set(string.punctuation)|set('«»'))
+        self._stop_words = []
        self._lex_vocab = None
        self._lex = None

-    def __noise(self):
-        noise = set(string.punctuation) | set('«»')
-        noise = {ord(c): None for c in noise}
-        return noise
-
    def activate(self):
        self._lex = self.download_lex()
        self._lex_vocab = set(list(self._lex.keys()))
+        self._stop_words = stopwords.words('english') + ['']

    def clean_str(self, string):
        string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string)
@@ -67,7 +76,7 @@ class DepecheMood(TextBox, EmotionPlugin):
    def preprocess(self, text):
        if text is None:
            return None
-        tokens = self.clean_str(text).translate(self._noise).split(' ')
+        tokens = self._denoise(self.clean_str(text)).split(' ')
        tokens = [tok for tok in tokens if tok not in self._stop_words]
        return tokens   

--- a/sentiment-basic/sentiment-basic.py
+++ b/sentiment-basic/sentiment-basic.py
@@ -1,6 +1,7 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import os
+import sys
 import string
 import nltk
 import pickle
@@ -14,6 +15,9 @@ from os import path
 from senpy.plugins import SentimentPlugin, SenpyPlugin
 from senpy.models import Results, Entry, Sentiment

+if sys.version_info[0] >= 3:
+    unicode = str
+

 class SentimentBasic(SentimentPlugin):
    '''
@@ -43,7 +47,7 @@ class SentimentBasic(SentimentPlugin):

    def _load_pos_tagger(self):
        self.pos_path = self.find_file(self.pos_path)
-        with open(self.pos_path, 'r') as f:
+        with open(self.pos_path, 'rb') as f:
            tagger = pickle.load(f)
        return tagger

--- a/sentiment-basic/sentiwn.py
+++ b/sentiment-basic/sentiwn.py
@@ -62,7 +62,7 @@ class SentiWordNet(object):
        senti_scores = []
        synsets = wordnet.synsets(word,pos)
        for synset in synsets:
-            if self.pos_synset.has_key((synset.pos(), synset.offset())):
+            if (synset.pos(), synset.offset()) in self.pos_synset:
                pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())]
                senti_scores.append({"pos":pos_val,"neg":neg_val,\
                "obj": 1.0 - (pos_val - neg_val),'synset':synset})
--- a/sentiment-taiger/taiger3c_plugin.py
+++ b/sentiment-taiger/taiger3c_plugin.py
@@ -41,7 +41,6 @@ class TaigerPlugin3cats(SentimentPlugin):
            value = 1
        else:
            raise ValueError('unknown polarity: {}'.format(value))
-        print(value, 'whatsup')
        return polarity, value

    def analyse_entry(self, entry, params):
--- a/sentiment-vader/vaderSentiment.py
+++ b/sentiment-vader/vaderSentiment.py
@@ -17,10 +17,15 @@ For example:
 '''

 import os, math, re, sys, fnmatch, string 
-reload(sys)
+import codecs

 def make_lex_dict(f):
-    return dict(map(lambda (w, m): (w, float(m)), [wmsr.strip().split('\t')[0:2] for wmsr in open(f) ]))
+    maps = {}
+    with codecs.open(f, encoding='iso-8859-1') as f:
+        for wmsr in f:
+            w, m = wmsr.strip().split('\t')[:2]
+            maps[w] = m
+    return maps
    
 f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms
 try:
@@ -356,8 +361,8 @@ if __name__ == '__main__':
                        ]
    sentences.extend(tricky_sentences)
    for sentence in sentences:
-        print sentence,
+        print(sentence)
        ss = sentiment(sentence)
-        print "\t" + str(ss)
+        print("\t" + str(ss))
    
-    print "\n\n Done!"
+    print("\n\n Done!")