Add support for py3 in emotion-wnaffect

Normalize polarity values in sentiment-basic and sentiment-140
2025-08-23 18:12:20 +00:00 · 2017-07-14 11:13:59 +02:00
parent dee007eacf
commit b671ff51f9
7 changed files with 119 additions and 78 deletions
--- a/emotion-wnaffect/emotion-wnaffect.py
+++ b/emotion-wnaffect/emotion-wnaffect.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 from __future__ import division
 import re
 import nltk
@@ -9,32 +7,34 @@ import string
 import xml.etree.ElementTree as ET
 from nltk.corpus import stopwords
 from nltk.corpus import WordNetCorpusReader
+from nltk.stem import wordnet
 from emotion import Emotion as Emo
-from pattern.en import parse
-from senpy.plugins import EmotionPlugin, SenpyPlugin, ShelfMixin
+from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
 from senpy.models import Results, EmotionSet, Entry, Emotion


 class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
-    
-  
    def _load_synsets(self, synsets_path):
        """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
        tree = ET.parse(synsets_path)
        root = tree.getroot()
-        pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
+        pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}

        synsets = {}
        for pos in ["noun", "adj", "verb", "adv"]:
            tag = pos_map[pos]
            synsets[tag] = {}
-            for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
-                offset = int(elem.get("id")[2:])                
+            for elem in root.findall(
+                    ".//{0}-syn-list//{0}-syn".format(pos, pos)):
+                offset = int(elem.get("id")[2:])
                if not offset: continue
                if elem.get("categ"):
-                    synsets[tag][offset] = Emo.emotions[elem.get("categ")] if elem.get("categ") in Emo.emotions else None
+                    synsets[tag][offset] = Emo.emotions[elem.get(
+                        "categ")] if elem.get(
+                            "categ") in Emo.emotions else None
                elif elem.get("noun-id"):
-                    synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
+                    synsets[tag][offset] = synsets[pos_map["noun"]][int(
+                        elem.get("noun-id")[2:])]
        return synsets

    def _load_emotions(self, hierarchy_path):
@@ -50,45 +50,59 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
                Emo.emotions[name] = Emo(name, elem.get("isa"))

    def activate(self, *args, **kwargs):
-        
-        nltk.download('stopwords')
+
+        nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
        self._stopwords = stopwords.words('english')
-        #local_path=os.path.dirname(os.path.abspath(__file__))
-        self._categories = {'anger': ['general-dislike',],
-                            'fear': ['negative-fear',],
-                            'disgust': ['shame',],
-                            'joy': ['gratitude','affective','enthusiasm','love','joy','liking'],
-                            'sadness': ['ingrattitude','daze','humility','compassion','despair','anxiety','sadness']}
+        self._wnlemma = wordnet.WordNetLemmatizer()
+        self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
+        local_path = os.path.dirname(os.path.abspath(__file__))
+        self._categories = {
+            'anger': [
+                'general-dislike',
+            ],
+            'fear': [
+                'negative-fear',
+            ],
+            'disgust': [
+                'shame',
+            ],
+            'joy':
+            ['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
+            'sadness': [
+                'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
+                'anxiety', 'sadness'
+            ]
+        }

-        self._wnaffect_mappings = {'anger': 'anger',
-                                   'fear': 'negative-fear',
-                                   'disgust': 'disgust',
-                                   'joy': 'joy',
-                                   'sadness': 'sadness'}
+        self._wnaffect_mappings = {
+            'anger': 'anger',
+            'fear': 'negative-fear',
+            'disgust': 'disgust',
+            'joy': 'joy',
+            'sadness': 'sadness'
+        }

+        self._load_emotions(local_path + self.hierarchy_path)

-        self._load_emotions(self.hierarchy_path)
-                
        if 'total_synsets' not in self.sh:
-            total_synsets = self._load_synsets(self.synsets_path)
+            total_synsets = self._load_synsets(local_path + self.synsets_path)
            self.sh['total_synsets'] = total_synsets
-        
+
        self._total_synsets = self.sh['total_synsets']
-        
-        if 'wn16' not in self.sh:
-            self._wn16_path = self.wn16_path
-            wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(self._wn16_path)), nltk.data.find(self._wn16_path))
-            self.sh['wn16'] = wn16
-        
-        self._wn16 = self.sh['wn16']
+
+        self._wn16_path = self.wn16_path
+        self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
+

    def deactivate(self, *args, **kwargs):
        self.save()

    def _my_preprocessor(self, text):

-        regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
-        regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
+        regHttp = re.compile(
+            '(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
+        regHttps = re.compile(
+            '(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
        regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
        text = re.sub(regHttp, '', text)
        text = re.sub(regAt, '', text)
@@ -109,56 +123,82 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
        unigrams_lemmas = []
        pos_tagged = []
        unigrams_words = []
-        sentences = parse(text,lemmata=True).split()
-        for sentence in sentences:
-            for token in sentence:
-                if token[0].lower() not in self._stopwords:
-                    unigrams_words.append(token[0].lower())
-                    unigrams_lemmas.append(token[4])  
-                    pos_tagged.append(token[1])        
+        tokens = text.split()
+        for token in nltk.pos_tag(tokens):
+            unigrams_words.append(token[0])
+            pos_tagged.append(token[1])
+            if token[1][0] in self._syntactics.keys():
+                unigrams_lemmas.append(
+                    self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
+                                                                       [0]]))
+            else:
+                unigrams_lemmas.append(token[0])

-        return unigrams_words,unigrams_lemmas,pos_tagged
+        return unigrams_words, unigrams_lemmas, pos_tagged

    def _find_ngrams(self, input_list, n):
        return zip(*[input_list[i:] for i in range(n)])

    def _clean_pos(self, pos_tagged):

-        pos_tags={'NN':'NN', 'NNP':'NN','NNP-LOC':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
-        'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
+        pos_tags = {
+            'NN': 'NN',
+            'NNP': 'NN',
+            'NNP-LOC': 'NN',
+            'NNS': 'NN',
+            'JJ': 'JJ',
+            'JJR': 'JJ',
+            'JJS': 'JJ',
+            'RB': 'RB',
+            'RBR': 'RB',
+            'RBS': 'RB',
+            'VB': 'VB',
+            'VBD': 'VB',
+            'VGB': 'VB',
+            'VBN': 'VB',
+            'VBP': 'VB',
+            'VBZ': 'VB'
+        }

        for i in range(len(pos_tagged)):
            if pos_tagged[i] in pos_tags:
-                pos_tagged[i]=pos_tags[pos_tagged[i]]
+                pos_tagged[i] = pos_tags[pos_tagged[i]]
        return pos_tagged
-    
+
    def _extract_features(self, text):

-        feature_set={k:0 for k in self._categories}
-        ngrams_words,ngrams_lemmas,pos_tagged = self._extract_ngrams(text)
-        matches=0
-        pos_tagged=self._clean_pos(pos_tagged)
+        feature_set = {k: 0 for k in self._categories}
+        ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
+        matches = 0
+        pos_tagged = self._clean_pos(pos_tagged)

-        tag_wn={'NN':self._wn16.NOUN,'JJ':self._wn16.ADJ,'VB':self._wn16.VERB,'RB':self._wn16.ADV}
+        tag_wn = {
+            'NN': self._wn16.NOUN,
+            'JJ': self._wn16.ADJ,
+            'VB': self._wn16.VERB,
+            'RB': self._wn16.ADV
+        }
        for i in range(len(pos_tagged)):
            if pos_tagged[i] in tag_wn:
-                synsets = self._wn16.synsets(ngrams_words[i], tag_wn[pos_tagged[i]])   
+                synsets = self._wn16.synsets(ngrams_words[i],
+                                             tag_wn[pos_tagged[i]])
                if synsets:
                    offset = synsets[0].offset()
                    if offset in self._total_synsets[pos_tagged[i]]:
                        if self._total_synsets[pos_tagged[i]][offset] is None:
                            continue
                        else:
-                            emotion = self._total_synsets[pos_tagged[i]][offset].get_level(5).name
-                            matches+=1
+                            emotion = self._total_synsets[pos_tagged[i]][
+                                offset].get_level(5).name
+                            matches += 1
                            for i in self._categories:
                                if emotion in self._categories[i]:
-                                    feature_set[i]+=1
+                                    feature_set[i] += 1
        if matches == 0:
-            matches=1                
+            matches = 1

        for i in feature_set:
-            feature_set[i] = (feature_set[i]/matches)*100
+            feature_set[i] = (feature_set[i] / matches) * 100

        return feature_set

@@ -166,19 +206,19 @@ class EmotionTextPlugin(EmotionPlugin, ShelfMixin):

        text_input = entry.get("text", None)

-        text=self._my_preprocessor(text_input)
+        text = self._my_preprocessor(text_input)

-        feature_text=self._extract_features(text)
-
-        response = Results()
+        feature_text = self._extract_features(text)

        emotionSet = EmotionSet(id="Emotions0")
        emotions = emotionSet.onyx__hasEmotion

        for i in feature_text:
-            emotions.append(Emotion(onyx__hasEmotionCategory=self._wnaffect_mappings[i],
-                                    onyx__hasEmotionIntensity=feature_text[i]))
+            emotions.append(
+                Emotion(
+                    onyx__hasEmotionCategory=self._wnaffect_mappings[i],
+                    onyx__hasEmotionIntensity=feature_text[i]))

        entry.emotions = [emotionSet]

-        yield entry
+        yield entry
--- a/emotion-wnaffect/emotion-wnaffect.senpy
+++ b/emotion-wnaffect/emotion-wnaffect.senpy
@@ -22,5 +22,4 @@ onyx:usesEmotionModel: emoml:big6
 requirements:
 - nltk>=3.0.5
 - lxml>=3.4.2
- pattern
-async: false
+async: false
--- a/emotion-wnaffect/emotion.py
+++ b/emotion-wnaffect/emotion.py
@@ -1,6 +1,4 @@

-# coding: utf-8
-
 """
 Clement Michard (c) 2015
 """
@@ -85,7 +83,7 @@ class Emotion:
            end_shape = '┐'
        else:
            end_shape = ''
-        print '{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape)
+        print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
        for leaf in down:
            next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
            next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name))
--- a/sentiment-140/sentiment-140.py
+++ b/sentiment-140/sentiment-140.py
@@ -22,7 +22,7 @@ class Sentiment140Plugin(SentimentPlugin):
        polarity_value = self.maxPolarityValue*int(res.json()["data"][0]
                                                   ["polarity"]) * 0.25
        polarity = "marl:Neutral"
-        neutral_value = self.maxPolarityValue / 2.0
+        neutral_value = 0
        if polarity_value > neutral_value:
            polarity = "marl:Positive"
        elif polarity_value < neutral_value:
@@ -33,4 +33,4 @@ class Sentiment140Plugin(SentimentPlugin):
                            marl__polarityValue=polarity_value)
        entry.sentiments.append(sentiment)

-        yield entry
+        yield entry
--- a/sentiment-140/sentiment-140.senpy
+++ b/sentiment-140/sentiment-140.senpy
@@ -14,5 +14,5 @@
     },
     "requirements": {},
     "maxPolarityValue": "1",
-     "minPolarityValue": "0"
+     "minPolarityValue": "-1"
 }
--- a/sentiment-basic/sentiment-basic.py
+++ b/sentiment-basic/sentiment-basic.py
@@ -131,14 +131,16 @@ class SentiTextPlugin(SentimentPlugin):
                if n_pos == 0 and n_neg == 0:
                    g_score = 0.5
            polarity = 'marl:Neutral'
+            polarity_value = 0
            if g_score > 0.5:
                polarity = 'marl:Positive'
+                polarity_value = 1
            elif g_score < 0.5:
                polarity = 'marl:Negative'
-
+                polarity_value = -1
            opinion = Sentiment(id="Opinion0"+'_'+str(i),
                          marl__hasPolarity=polarity,
-                          marL__polarityValue=float("{0:.2f}".format(g_score)))
+                          marl__polarityValue=polarity_value)


            entry.sentiments.append(opinion)
--- a/sentiment-basic/sentiment-basic.senpy
+++ b/sentiment-basic/sentiment-basic.senpy
@@ -18,5 +18,7 @@
        },
    },
    "sentiword_path": "SentiWordNet_3.0.txt",
-    "pos_path": "unigram_spanish.pickle"
+    "pos_path": "unigram_spanish.pickle",
+    "maxPolarityValue": "1",
+    "minPolarityValue": "-1"
 }