From b671ff51f9815861c28e6a90a1fa0ada9a46d91e Mon Sep 17 00:00:00 2001
From: militarpancho <militarpancho_93@msn.com>
Date: Fri, 14 Jul 2017 11:13:59 +0200
Subject: [PATCH] Add support for py3 in emotion-wnaffect Normalize polarity
 values in sentiment-basic and sentiment-140

---
 emotion-wnaffect/emotion-wnaffect.py    | 174 +++++++++++++++---------
 emotion-wnaffect/emotion-wnaffect.senpy |   3 +-
 emotion-wnaffect/emotion.py             |   4 +-
 sentiment-140/sentiment-140.py          |   4 +-
 sentiment-140/sentiment-140.senpy       |   2 +-
 sentiment-basic/sentiment-basic.py      |   6 +-
 sentiment-basic/sentiment-basic.senpy   |   4 +-
 7 files changed, 119 insertions(+), 78 deletions(-)

diff --git a/emotion-wnaffect/emotion-wnaffect.py b/emotion-wnaffect/emotion-wnaffect.py
index 42cc12c..6256883 100644
--- a/emotion-wnaffect/emotion-wnaffect.py
+++ b/emotion-wnaffect/emotion-wnaffect.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 from __future__ import division
 import re
 import nltk
@@ -9,32 +7,34 @@ import string
 import xml.etree.ElementTree as ET
 from nltk.corpus import stopwords
 from nltk.corpus import WordNetCorpusReader
+from nltk.stem import wordnet
 from emotion import Emotion as Emo
-from pattern.en import parse
-from senpy.plugins import EmotionPlugin, SenpyPlugin, ShelfMixin
+from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
 from senpy.models import Results, EmotionSet, Entry, Emotion
 
 
 class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
-    
-  
     def _load_synsets(self, synsets_path):
         """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
         tree = ET.parse(synsets_path)
         root = tree.getroot()
-        pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
+        pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
 
         synsets = {}
         for pos in ["noun", "adj", "verb", "adv"]:
             tag = pos_map[pos]
             synsets[tag] = {}
-            for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
-                offset = int(elem.get("id")[2:])                
+            for elem in root.findall(
+                    ".//{0}-syn-list//{0}-syn".format(pos, pos)):
+                offset = int(elem.get("id")[2:])
                 if not offset: continue
                 if elem.get("categ"):
-                    synsets[tag][offset] = Emo.emotions[elem.get("categ")] if elem.get("categ") in Emo.emotions else None
+                    synsets[tag][offset] = Emo.emotions[elem.get(
+                        "categ")] if elem.get(
+                            "categ") in Emo.emotions else None
                 elif elem.get("noun-id"):
-                    synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
+                    synsets[tag][offset] = synsets[pos_map["noun"]][int(
+                        elem.get("noun-id")[2:])]
         return synsets
 
     def _load_emotions(self, hierarchy_path):
@@ -50,45 +50,59 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
                 Emo.emotions[name] = Emo(name, elem.get("isa"))
 
     def activate(self, *args, **kwargs):
-        
-        nltk.download('stopwords')
+
+        nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
         self._stopwords = stopwords.words('english')
-        #local_path=os.path.dirname(os.path.abspath(__file__))
-        self._categories = {'anger': ['general-dislike',],
-                            'fear': ['negative-fear',],
-                            'disgust': ['shame',],
-                            'joy': ['gratitude','affective','enthusiasm','love','joy','liking'],
-                            'sadness': ['ingrattitude','daze','humility','compassion','despair','anxiety','sadness']}
+        self._wnlemma = wordnet.WordNetLemmatizer()
+        self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
+        local_path = os.path.dirname(os.path.abspath(__file__))
+        self._categories = {
+            'anger': [
+                'general-dislike',
+            ],
+            'fear': [
+                'negative-fear',
+            ],
+            'disgust': [
+                'shame',
+            ],
+            'joy':
+            ['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
+            'sadness': [
+                'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
+                'anxiety', 'sadness'
+            ]
+        }
 
-        self._wnaffect_mappings = {'anger': 'anger',
-                                   'fear': 'negative-fear',
-                                   'disgust': 'disgust',
-                                   'joy': 'joy',
-                                   'sadness': 'sadness'}
+        self._wnaffect_mappings = {
+            'anger': 'anger',
+            'fear': 'negative-fear',
+            'disgust': 'disgust',
+            'joy': 'joy',
+            'sadness': 'sadness'
+        }
 
+        self._load_emotions(local_path + self.hierarchy_path)
 
-        self._load_emotions(self.hierarchy_path)
-                
         if 'total_synsets' not in self.sh:
-            total_synsets = self._load_synsets(self.synsets_path)
+            total_synsets = self._load_synsets(local_path + self.synsets_path)
             self.sh['total_synsets'] = total_synsets
-        
+
         self._total_synsets = self.sh['total_synsets']
-        
-        if 'wn16' not in self.sh:
-            self._wn16_path = self.wn16_path
-            wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(self._wn16_path)), nltk.data.find(self._wn16_path))
-            self.sh['wn16'] = wn16
-        
-        self._wn16 = self.sh['wn16']
+
+        self._wn16_path = self.wn16_path
+        self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
+
 
     def deactivate(self, *args, **kwargs):
         self.save()
 
     def _my_preprocessor(self, text):
 
-        regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
-        regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
+        regHttp = re.compile(
+            '(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
+        regHttps = re.compile(
+            '(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
         regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
         text = re.sub(regHttp, '', text)
         text = re.sub(regAt, '', text)
@@ -109,56 +123,82 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
         unigrams_lemmas = []
         pos_tagged = []
         unigrams_words = []
-        sentences = parse(text,lemmata=True).split()
-        for sentence in sentences:
-            for token in sentence:
-                if token[0].lower() not in self._stopwords:
-                    unigrams_words.append(token[0].lower())
-                    unigrams_lemmas.append(token[4])  
-                    pos_tagged.append(token[1])        
+        tokens = text.split()
+        for token in nltk.pos_tag(tokens):
+            unigrams_words.append(token[0])
+            pos_tagged.append(token[1])
+            if token[1][0] in self._syntactics.keys():
+                unigrams_lemmas.append(
+                    self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
+                                                                       [0]]))
+            else:
+                unigrams_lemmas.append(token[0])
 
-        return unigrams_words,unigrams_lemmas,pos_tagged
+        return unigrams_words, unigrams_lemmas, pos_tagged
 
     def _find_ngrams(self, input_list, n):
         return zip(*[input_list[i:] for i in range(n)])
 
     def _clean_pos(self, pos_tagged):
 
-        pos_tags={'NN':'NN', 'NNP':'NN','NNP-LOC':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
-        'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
+        pos_tags = {
+            'NN': 'NN',
+            'NNP': 'NN',
+            'NNP-LOC': 'NN',
+            'NNS': 'NN',
+            'JJ': 'JJ',
+            'JJR': 'JJ',
+            'JJS': 'JJ',
+            'RB': 'RB',
+            'RBR': 'RB',
+            'RBS': 'RB',
+            'VB': 'VB',
+            'VBD': 'VB',
+            'VGB': 'VB',
+            'VBN': 'VB',
+            'VBP': 'VB',
+            'VBZ': 'VB'
+        }
 
         for i in range(len(pos_tagged)):
             if pos_tagged[i] in pos_tags:
-                pos_tagged[i]=pos_tags[pos_tagged[i]]
+                pos_tagged[i] = pos_tags[pos_tagged[i]]
         return pos_tagged
-    
+
     def _extract_features(self, text):
 
-        feature_set={k:0 for k in self._categories}
-        ngrams_words,ngrams_lemmas,pos_tagged = self._extract_ngrams(text)
-        matches=0
-        pos_tagged=self._clean_pos(pos_tagged)
+        feature_set = {k: 0 for k in self._categories}
+        ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
+        matches = 0
+        pos_tagged = self._clean_pos(pos_tagged)
 
-        tag_wn={'NN':self._wn16.NOUN,'JJ':self._wn16.ADJ,'VB':self._wn16.VERB,'RB':self._wn16.ADV}
+        tag_wn = {
+            'NN': self._wn16.NOUN,
+            'JJ': self._wn16.ADJ,
+            'VB': self._wn16.VERB,
+            'RB': self._wn16.ADV
+        }
         for i in range(len(pos_tagged)):
             if pos_tagged[i] in tag_wn:
-                synsets = self._wn16.synsets(ngrams_words[i], tag_wn[pos_tagged[i]])   
+                synsets = self._wn16.synsets(ngrams_words[i],
+                                             tag_wn[pos_tagged[i]])
                 if synsets:
                     offset = synsets[0].offset()
                     if offset in self._total_synsets[pos_tagged[i]]:
                         if self._total_synsets[pos_tagged[i]][offset] is None:
                             continue
                         else:
-                            emotion = self._total_synsets[pos_tagged[i]][offset].get_level(5).name
-                            matches+=1
+                            emotion = self._total_synsets[pos_tagged[i]][
+                                offset].get_level(5).name
+                            matches += 1
                             for i in self._categories:
                                 if emotion in self._categories[i]:
-                                    feature_set[i]+=1
+                                    feature_set[i] += 1
         if matches == 0:
-            matches=1                
+            matches = 1
 
         for i in feature_set:
-            feature_set[i] = (feature_set[i]/matches)*100
+            feature_set[i] = (feature_set[i] / matches) * 100
 
         return feature_set
 
@@ -166,19 +206,19 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
 
         text_input = entry.get("text", None)
 
-        text=self._my_preprocessor(text_input)
+        text = self._my_preprocessor(text_input)
 
-        feature_text=self._extract_features(text)
-
-        response = Results()
+        feature_text = self._extract_features(text)
 
         emotionSet = EmotionSet(id="Emotions0")
         emotions = emotionSet.onyx__hasEmotion
 
         for i in feature_text:
-            emotions.append(Emotion(onyx__hasEmotionCategory=self._wnaffect_mappings[i],
-                                    onyx__hasEmotionIntensity=feature_text[i]))
+            emotions.append(
+                Emotion(
+                    onyx__hasEmotionCategory=self._wnaffect_mappings[i],
+                    onyx__hasEmotionIntensity=feature_text[i]))
 
         entry.emotions = [emotionSet]
 
-        yield entry
\ No newline at end of file
+        yield entry
diff --git a/emotion-wnaffect/emotion-wnaffect.senpy b/emotion-wnaffect/emotion-wnaffect.senpy
index e526f8f..0da2b88 100644
--- a/emotion-wnaffect/emotion-wnaffect.senpy
+++ b/emotion-wnaffect/emotion-wnaffect.senpy
@@ -22,5 +22,4 @@ onyx:usesEmotionModel: emoml:big6
 requirements:
 - nltk>=3.0.5
 - lxml>=3.4.2
-- pattern
-async: false
\ No newline at end of file
+async: false
diff --git a/emotion-wnaffect/emotion.py b/emotion-wnaffect/emotion.py
index 31a4534..f1635fd 100644
--- a/emotion-wnaffect/emotion.py
+++ b/emotion-wnaffect/emotion.py
@@ -1,6 +1,4 @@
 
-# coding: utf-8
-
 """
 Clement Michard (c) 2015
 """
@@ -85,7 +83,7 @@ class Emotion:
             end_shape = '┐'
         else:
             end_shape = ''
-        print '{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape)
+        print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
         for leaf in down:
             next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
             next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name))
diff --git a/sentiment-140/sentiment-140.py b/sentiment-140/sentiment-140.py
index dc345cb..b2d9259 100644
--- a/sentiment-140/sentiment-140.py
+++ b/sentiment-140/sentiment-140.py
@@ -22,7 +22,7 @@ class Sentiment140Plugin(SentimentPlugin):
         polarity_value = self.maxPolarityValue*int(res.json()["data"][0]
                                                    ["polarity"]) * 0.25
         polarity = "marl:Neutral"
-        neutral_value = self.maxPolarityValue / 2.0
+        neutral_value = 0
         if polarity_value > neutral_value:
             polarity = "marl:Positive"
         elif polarity_value < neutral_value:
@@ -33,4 +33,4 @@ class Sentiment140Plugin(SentimentPlugin):
                             marl__polarityValue=polarity_value)
         entry.sentiments.append(sentiment)
 
-        yield entry
\ No newline at end of file
+        yield entry
diff --git a/sentiment-140/sentiment-140.senpy b/sentiment-140/sentiment-140.senpy
index 3766c98..b8e01be 100644
--- a/sentiment-140/sentiment-140.senpy
+++ b/sentiment-140/sentiment-140.senpy
@@ -14,5 +14,5 @@
      },
      "requirements": {},
      "maxPolarityValue": "1",
-     "minPolarityValue": "0"
+     "minPolarityValue": "-1"
 }
diff --git a/sentiment-basic/sentiment-basic.py b/sentiment-basic/sentiment-basic.py
index ccaea53..9dbe4a9 100644
--- a/sentiment-basic/sentiment-basic.py
+++ b/sentiment-basic/sentiment-basic.py
@@ -131,14 +131,16 @@ class SentiTextPlugin(SentimentPlugin):
                 if n_pos == 0 and n_neg == 0:
                     g_score = 0.5
             polarity = 'marl:Neutral'
+            polarity_value = 0
             if g_score > 0.5:
                 polarity = 'marl:Positive'
+                polarity_value = 1
             elif g_score < 0.5:
                 polarity = 'marl:Negative'
-
+                polarity_value = -1
             opinion = Sentiment(id="Opinion0"+'_'+str(i),
                           marl__hasPolarity=polarity,
-                          marL__polarityValue=float("{0:.2f}".format(g_score)))
+                          marl__polarityValue=polarity_value)
 
 
             entry.sentiments.append(opinion)
diff --git a/sentiment-basic/sentiment-basic.senpy b/sentiment-basic/sentiment-basic.senpy
index f6d8a7d..04fd0e9 100644
--- a/sentiment-basic/sentiment-basic.senpy
+++ b/sentiment-basic/sentiment-basic.senpy
@@ -18,5 +18,7 @@
         },
     },
     "sentiword_path": "SentiWordNet_3.0.txt",
-    "pos_path": "unigram_spanish.pickle"
+    "pos_path": "unigram_spanish.pickle",
+    "maxPolarityValue": "1",
+    "minPolarityValue": "-1"
 }