From bb6f9ee36799788aa9c08ae89b7682dc02d29222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Fernando=20S=C3=A1nchez?= Date: Wed, 9 Jan 2019 19:29:24 +0100 Subject: [PATCH] tweaks for py2/py3 compatibility --- emotion-anew/emotion-anew.py | 4 ++-- emotion-anew/emotion-anew.pyc | Bin 8767 -> 0 bytes emotion-depechemood/depechemood_plugin.py | 25 +++++++++++++++------- sentiment-basic/sentiment-basic.py | 6 +++++- sentiment-basic/sentiwn.py | 2 +- sentiment-taiger/taiger3c_plugin.py | 1 - sentiment-vader/vaderSentiment.py | 15 ++++++++----- 7 files changed, 35 insertions(+), 18 deletions(-) delete mode 100644 emotion-anew/emotion-anew.pyc diff --git a/emotion-anew/emotion-anew.py b/emotion-anew/emotion-anew.py index a91e8fa..37421d5 100644 --- a/emotion-anew/emotion-anew.py +++ b/emotion-anew/emotion-anew.py @@ -82,7 +82,7 @@ class ANEW(SentimentPlugin): self._stopwords = stopwords.words('english') dictionary={} dictionary['es'] = {} - with self.open(self.anew_path_es,'rb') as tabfile: + with self.open(self.anew_path_es,'r') as tabfile: reader = csv.reader(tabfile, delimiter='\t') for row in reader: dictionary['es'][row[2]]={} @@ -90,7 +90,7 @@ class ANEW(SentimentPlugin): dictionary['es'][row[2]]['A']=row[5] dictionary['es'][row[2]]['D']=row[7] dictionary['en'] = {} - with self.open(self.anew_path_en,'rb') as tabfile: + with self.open(self.anew_path_en,'r') as tabfile: reader = csv.reader(tabfile, delimiter='\t') for row in reader: dictionary['en'][row[0]]={} diff --git a/emotion-anew/emotion-anew.pyc b/emotion-anew/emotion-anew.pyc deleted file mode 100644 index c5cbdf540c165cf49eeeccb26d7719e255fee1f9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8767 zcmb_hO>7)Tc78oW4mlJlQW7PK`fW@88EMTSCCjv}XuXs~?T@j`wVMLzjFuU+x{JdW zd%A~RH6+I&ksl}71V|ttw_W6vJ>`%E_7>!rV~`-H#h!i6B|wmK^1W9*KN4*Lv_j5Q zSJkVZ*RQ_!Q*Vm@Jux=*@Y<7r^1mXUOZe(L4pE4YLordUS9hr9bo&cbD|G8csujES zA*u~^>%&wV?$%3GD|PE5^lX%#m8mw??KwfU6H+fwJWkCCs!fo?926-&NzF;BP1^Pl z#SS&6s5WKW!_q!QwNtiTqWCm5&rt1*ZI4hqP0h1ZJJ)N^Nc+5Pk5W8K%?nh!K+TI( zyJ&mM6knp|WvX4K<`t@4q2^VpU8SZ=H5VhsNL`>!h~fnO8PS(Szh8TUg|%84hxDTV z#otN=A|z8r@#82_ezv;y&~}|f*K!n?H5HgNi@s2q?H@t^)2Na8O>H|yd4X)OhgZ`k zZSAF5m=798SIf_|suXzmB#Kp)n5c>3)41J;5_7`#wK}$48RCJwo2PU;HrkA#{-~Ln zC{0$CF}&oX#AF>AloJfky1(*hy$2Sa^#EVJi=7owDXoWYiSqkyo;80$KEL{@*0t zJba5n>=V9CA(jl^p%Amf3WYcDxJe<82e!XMwQ&mHr3R!kLE#)Dr-m1*N@bG5d8y#t z7Nl~D!dp_oYu=X184BN%$~1*{q=I*TpOGCwri&DQ0Infv`6vitnr6ZeX%RbyAKAxU zu=INvh4J@}j=%n@@!vf<{_3m7e|&WO_g*!A=@{c1Ff23VtybFxiD=;iI1KeZo283E#+#c8xO&Tp&KfrG9H2Xds|Vk)#Cw7 z263vv0yZ(sGnQ_vs{6iwW|A5mYoR1j$lD` z&j1pLj62AwR%%-q>z=Lf#dBD=+Vq5DrK{OgE#a86pbb_u8@Pq)`_)^T&2_%UoS&#Y z!!|eelSUg18@AxF7(qXdd|sHznc8VbnFm`h0b8o>l z@2N0N!nAUGVc~-t)j;p+3oi~}dEQ>QwQxUADX{mu3nC95s zv>zr)YkeETb96RzR1G6jZMT|LrRUScFgPPr_Y!||6E1N+(bwe_^qu2Q3+>zKyT_fF zsD^K%T{Xuqt{09w9iq_pjyow)ZIi*4OVn`!2!Z8IP|4M2SoJ(MMQ^1E)T?a-$U=SBGaCU7yq(A&;N!FhX@#ZR-%Kh zJO9#g4u@$!p@U&s-q@tWQh$xv131YN&EDhZ2+i7+yS=mU{X!`prHon1iKCQpy0^1N z>+2!WXP@WXPGEG8TLcb8IzG1(*a}>p%((^E%x%B7$L-`%O3v-nQ3`WAY{lA`lz3;u@fA=|-v%Qstf7&qPyM#}I?nes!G zy*$Rom-W5}s4zRSq*n2!RA5cCEU~>ORfb}Jf(XI*PdB4jd2jP%-Ze}2DtM3-`a7u2 z!(FsGbJla|`9Zt=LZjO1h@tjjba&x{Kf_$j>ymTYDLG@#dFQM%f%*mfzj>q`u$)=A zEW(HQ>VH5ns4$0xZnMDp1GUMi03Mpq5umX^2Z*wU=m0uaq(i6MDT?z_C&J>d843Iy znGSUt?95>=YK$$g?5c==MZX80E4k6V@x-6|;{IH1VeX@io7H|}e&c3kpt*7L#`nbZ z$UKc$;y4rE!G=qf{^A=qpWLj^zx~$rr8lwg%|Qah1E~srAG>#5sXbxl?A3(jpD95_ zkTzRDg#bdgpPDlu)u9&%uF{cGCfm`A0jHs-L}-VXaNZK z&1=AGiuq`E>a9$*vNV8`OS4V%>1h;Hm~{C6Z0~>45l}I#yI*mN5Cq&hpaR(Z6;uNC zBK@4?YFVJafGu`t88k+f;AyqO^H)4}Q2|>8mKAsLaqBlw01+EA3^z>uOQP)&%3yXJ zT61=QCpJ7hZNRYLwW}9=bp%S&<%0AvjL-ST8vIooz zYg{x-{KO46X~-RQ;6UXVpCK@n_x1Q>z4uW_5Tq^h{2tytUgDU>!Mnqg_{$9NXV^_O+yaL+~=o z8!;V@(0)J%BWOIK!%;ebTPV^joGlh%wH}tUQ|xKk-wu)i2M5k1?;fIA>9Bqo4cRr_e_B=2p!?alh0Qjv}thX6KHCD~0Z~O?=+-e zNVnq1h+^gRM7ra2Pi5XUjFi*mt3nc57t}vQUWkKi$yT%zo8hPnAKGwwq}SK?R;^Um zjo)Y>?&f3a-W+U5-famBCf!zvg$M>78y&(_9Tp^m|EMRDkLmC_9H&#E1P@Yj#+{3W z^Mwg`ky&TPIptg`2!Qy-dH{L{4L}6w0Y2aha7fD=Q|v7O4V=i;KPb@76#aEyCq*3@ zh+jl4Yo<657M&3l5VsvAt+uHnU@)Q#iImZ6yf6Fw`eu}b`OAVKtFC0N)(cwkk08QK z!5RVgc$NKM3RrQ)*?7)c!l(y~8Nh%spb2j^dma%Y69CKs0z$0w&=w|eY}lVM%7l^J zG6J;tOhaq%nVx~d;4?h~VOphuAh+Qz&asBDXor}$opbzIhtO9Ad)8n!{`R*&DgDQP z{qeFu-3C!&L?nn};d>lJiOCU=b#LGn`cRTB(E+`anLeb&Hz7-4Lphskki(O^r_{7T z6e9FIks$RvnP5Y`ZbCs?9DvE}*Em&<1nYT-RPSA-pQBgvOOn8N3h?Fs>i~2KU=A3- ztbnZdVg)~hSRgJwS$jYNU4+*^LrgQw)($5XA&rGobdX%aqksooP0>D(%j4EC9&#Cw z2Pgi}8pQ)Czp{M{BmmJlZaXLJ<0LKOxRLy`p9VI^m`OSuqy1?IDggHdm;e=`W#k;5 z5Q9C=y$5`aDA0C^vJadCq@1Vd157-S6=vB~%VZuli$ot=(-|fro_~pWx=%cFgg9-9 zmbabXFkT0CM*ACfw!^+*=PvPUckU8j-Wi*(^KO&ksQ%SKZuD@1lEv;TgLXE5!EP0= z!&S>XA2klV%)-RP1AGv*p2IE6ii}&QY>V$aXq~n#UIndbw1%L1d`(%Az+z=Z)MRC4 zRhplCBF#@e@$#qb_8v&ndtkf02ey07cCS5H7V_E;x^27Y+JjGR|AU$+Hu9g)WZv7} zj(UpiUFN?viuce(FJ+~l-6@&%@y zsX9k96KGp^(RF2PS@RxoWZsW?9_v=_&8+t_sqy2k1%Z_bwsL37lRW(Tk~59Ux4aGU z&9djxa-Q|yp;IsNqR{<|&K>+uqBe`~6fQ-V@&9(Ob`ut&TzCf~#+;M#|3btR@LFG? zfHjBVXG07OSz7@NS?ghML=Lo!{Q>A=u316hP0-EPq!LH)h=5>KU`Sx<>%$yLunFOQ zhmAgF!V91i{$!Q|qZ06jCVLUI2C@oh4P+9~!hIY=@Z%E}0k|D(@M5D(&tMrwyF{eU z@kaF%T(0r&QL7a`<`s`ivDJ^P#n(1~jzT%!ymLXorWuN9(%HX@ylzf;2#3%}vyQ-2 ze;)$|vbW!yBjZ)(chkUsisW?0Su?(@bH*%pEQ0v|kM(lrBo0yk@b&rYWz6LlGKuBA zkFc!AF3$TyF3MaSa>1Hq7_+(m()Bz~*b)DW8`rMk8yJhDUhhlQV@Blwx)TT7G^;RY z)H@QGB51|$Y!XoMZLuyKwd=(Tm|8)2gOa~;e z-$HIyg?bdDBG+%aNIB~B-NqII{f@gAnJu^3ahra&tu^1(XmC?)4PSjo(|;cy~QoQ0_l$ZWhK>e!nhFzkbH=Xs`oLE;h25D z8CR9bl&U_$= 3: + tbl = str.maketrans("", "", deletechars) + ignore = lambda s: s.translate(tbl) + else: + from functools import partial + def ignore(s): + return string.translate(s, None, deletechars) + return ignore + + class DepecheMood(TextBox, EmotionPlugin): '''Plugin that uses the DepecheMood++ emotion lexicon.''' @@ -32,19 +45,15 @@ class DepecheMood(TextBox, EmotionPlugin): 'INSPIRED': 'wna:awe', 'SAD': 'wna:sadness', } - self._noise = self.__noise() - self._stop_words = stopwords.words('english') + [''] + self._denoise = ignore(set(string.punctuation)|set('«»')) + self._stop_words = [] self._lex_vocab = None self._lex = None - def __noise(self): - noise = set(string.punctuation) | set('«»') - noise = {ord(c): None for c in noise} - return noise - def activate(self): self._lex = self.download_lex() self._lex_vocab = set(list(self._lex.keys())) + self._stop_words = stopwords.words('english') + [''] def clean_str(self, string): string = re.sub(r"[^A-Za-z0-9().,!?\'\`]", " ", string) @@ -67,7 +76,7 @@ class DepecheMood(TextBox, EmotionPlugin): def preprocess(self, text): if text is None: return None - tokens = self.clean_str(text).translate(self._noise).split(' ') + tokens = self._denoise(self.clean_str(text)).split(' ') tokens = [tok for tok in tokens if tok not in self._stop_words] return tokens diff --git a/sentiment-basic/sentiment-basic.py b/sentiment-basic/sentiment-basic.py index 1883ed8..214bd4b 100644 --- a/sentiment-basic/sentiment-basic.py +++ b/sentiment-basic/sentiment-basic.py @@ -1,6 +1,7 @@ #!/usr/bin/python # -*- coding: utf-8 -*- import os +import sys import string import nltk import pickle @@ -14,6 +15,9 @@ from os import path from senpy.plugins import SentimentPlugin, SenpyPlugin from senpy.models import Results, Entry, Sentiment +if sys.version_info[0] >= 3: + unicode = str + class SentimentBasic(SentimentPlugin): ''' @@ -43,7 +47,7 @@ class SentimentBasic(SentimentPlugin): def _load_pos_tagger(self): self.pos_path = self.find_file(self.pos_path) - with open(self.pos_path, 'r') as f: + with open(self.pos_path, 'rb') as f: tagger = pickle.load(f) return tagger diff --git a/sentiment-basic/sentiwn.py b/sentiment-basic/sentiwn.py index 0c08c31..6f192b5 100644 --- a/sentiment-basic/sentiwn.py +++ b/sentiment-basic/sentiwn.py @@ -62,7 +62,7 @@ class SentiWordNet(object): senti_scores = [] synsets = wordnet.synsets(word,pos) for synset in synsets: - if self.pos_synset.has_key((synset.pos(), synset.offset())): + if (synset.pos(), synset.offset()) in self.pos_synset: pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())] senti_scores.append({"pos":pos_val,"neg":neg_val,\ "obj": 1.0 - (pos_val - neg_val),'synset':synset}) diff --git a/sentiment-taiger/taiger3c_plugin.py b/sentiment-taiger/taiger3c_plugin.py index 2c0d790..1278be5 100644 --- a/sentiment-taiger/taiger3c_plugin.py +++ b/sentiment-taiger/taiger3c_plugin.py @@ -41,7 +41,6 @@ class TaigerPlugin3cats(SentimentPlugin): value = 1 else: raise ValueError('unknown polarity: {}'.format(value)) - print(value, 'whatsup') return polarity, value def analyse_entry(self, entry, params): diff --git a/sentiment-vader/vaderSentiment.py b/sentiment-vader/vaderSentiment.py index 02658d2..31825a6 100644 --- a/sentiment-vader/vaderSentiment.py +++ b/sentiment-vader/vaderSentiment.py @@ -17,10 +17,15 @@ For example: ''' import os, math, re, sys, fnmatch, string -reload(sys) +import codecs def make_lex_dict(f): - return dict(map(lambda (w, m): (w, float(m)), [wmsr.strip().split('\t')[0:2] for wmsr in open(f) ])) + maps = {} + with codecs.open(f, encoding='iso-8859-1') as f: + for wmsr in f: + w, m = wmsr.strip().split('\t')[:2] + maps[w] = m + return maps f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms try: @@ -356,8 +361,8 @@ if __name__ == '__main__': ] sentences.extend(tricky_sentences) for sentence in sentences: - print sentence, + print(sentence) ss = sentiment(sentence) - print "\t" + str(ss) + print("\t" + str(ss)) - print "\n\n Done!" + print("\n\n Done!")