Add plugins as submodules

2025-12-04 12:58:17 +00:00 · 2018-05-14 11:32:56 +02:00
parent 582ae8a340
commit fbde8a9462
40 changed files with 28 additions and 222452 deletions
--- a/1
+++ b/1
--- a/emotion-wnaffect/README.md
+++ b/emotion-wnaffect/README.md
@@ -1,41 +0,0 @@
-# WordNet-Affect plugin
-
-This plugin uses WordNet-Affect (http://wndomains.fbk.eu/wnaffect.html) to calculate the percentage of each emotion. The plugin classifies among five diferent emotions: anger, fear, disgust, joy and sadness. It is has been used a emotion mapping enlarge the emotions:
-
- anger : general-dislike
- fear : negative-fear
- disgust : shame
- joy : gratitude, affective, enthusiasm, love, joy, liking
- sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness
-
-## Usage
-
-The parameters accepted are:
-
- Language: English (en).
- Input: Text to analyse.
-
-Example request: 
-```
-http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-wnaffect&language=en&input=I%20love%20Madrid
-```
-
-Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section. 
-
-
-The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM for semantic web.
-
-This plugin uses WNAffect labels for emotion analysis.
-
-The emotion-wnaffect.senpy file can be copied and modified to use different versions of wnaffect with the same python code.
-
-
-## Known issues
-
-  This plugin uses the pattern library, which means it will only run on python 2.7
-  Wnaffect and corpora files are not included in the repository, but can be easily added either to the docker image (using a volume) or in a new docker image. 
-  You can download Wordnet 1.6 here: <http://wordnetcode.princeton.edu/1.6/wn16.unix.tar.gz> and extract the dict folder. 
-  The hierarchy and synsets files can be found here: <https://github.com/larsmans/wordnet-domains-sentiwords/tree/master/wn-domains/wn-affect-1.1>
-
-![alt GSI Logo][logoGSI]
-[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"
--- a/emotion-wnaffect/emotion-wnaffect.py
+++ b/emotion-wnaffect/emotion-wnaffect.py
@@ -1,224 +0,0 @@
-from __future__ import division
-import re
-import nltk
-import logging
-import os
-import string
-import xml.etree.ElementTree as ET
-from nltk.corpus import stopwords
-from nltk.corpus import WordNetCorpusReader
-from nltk.stem import wordnet
-from emotion import Emotion as Emo
-from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
-from senpy.models import Results, EmotionSet, Entry, Emotion
-
-
-class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
-    def _load_synsets(self, synsets_path):
-        """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
-        tree = ET.parse(synsets_path)
-        root = tree.getroot()
-        pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
-
-        synsets = {}
-        for pos in ["noun", "adj", "verb", "adv"]:
-            tag = pos_map[pos]
-            synsets[tag] = {}
-            for elem in root.findall(
-                    ".//{0}-syn-list//{0}-syn".format(pos, pos)):
-                offset = int(elem.get("id")[2:])
-                if not offset: continue
-                if elem.get("categ"):
-                    synsets[tag][offset] = Emo.emotions[elem.get(
-                        "categ")] if elem.get(
-                            "categ") in Emo.emotions else None
-                elif elem.get("noun-id"):
-                    synsets[tag][offset] = synsets[pos_map["noun"]][int(
-                        elem.get("noun-id")[2:])]
-        return synsets
-
-    def _load_emotions(self, hierarchy_path):
-        """Loads the hierarchy of emotions from the WordNet-Affect xml."""
-
-        tree = ET.parse(hierarchy_path)
-        root = tree.getroot()
-        for elem in root.findall("categ"):
-            name = elem.get("name")
-            if name == "root":
-                Emo.emotions["root"] = Emo("root")
-            else:
-                Emo.emotions[name] = Emo(name, elem.get("isa"))
-
-    def activate(self, *args, **kwargs):
-
-        nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
-        self._stopwords = stopwords.words('english')
-        self._wnlemma = wordnet.WordNetLemmatizer()
-        self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
-        local_path = os.path.dirname(os.path.abspath(__file__))
-        self._categories = {
-            'anger': [
-                'general-dislike',
-            ],
-            'fear': [
-                'negative-fear',
-            ],
-            'disgust': [
-                'shame',
-            ],
-            'joy':
-            ['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
-            'sadness': [
-                'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
-                'anxiety', 'sadness'
-            ]
-        }
-
-        self._wnaffect_mappings = {
-            'anger': 'anger',
-            'fear': 'negative-fear',
-            'disgust': 'disgust',
-            'joy': 'joy',
-            'sadness': 'sadness'
-        }
-
-        self._load_emotions(local_path + self.hierarchy_path)
-
-        if 'total_synsets' not in self.sh:
-            total_synsets = self._load_synsets(local_path + self.synsets_path)
-            self.sh['total_synsets'] = total_synsets
-
-        self._total_synsets = self.sh['total_synsets']
-
-        self._wn16_path = self.wn16_path
-        self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
-
-
-    def deactivate(self, *args, **kwargs):
-        self.save()
-
-    def _my_preprocessor(self, text):
-
-        regHttp = re.compile(
-            '(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
-        regHttps = re.compile(
-            '(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
-        regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
-        text = re.sub(regHttp, '', text)
-        text = re.sub(regAt, '', text)
-        text = re.sub('RT : ', '', text)
-        text = re.sub(regHttps, '', text)
-        text = re.sub('[0-9]', '', text)
-        text = self._delete_punctuation(text)
-        return text
-
-    def _delete_punctuation(self, text):
-
-        exclude = set(string.punctuation)
-        s = ''.join(ch for ch in text if ch not in exclude)
-        return s
-
-    def _extract_ngrams(self, text):
-
-        unigrams_lemmas = []
-        pos_tagged = []
-        unigrams_words = []
-        tokens = text.split()
-        for token in nltk.pos_tag(tokens):
-            unigrams_words.append(token[0])
-            pos_tagged.append(token[1])
-            if token[1][0] in self._syntactics.keys():
-                unigrams_lemmas.append(
-                    self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
-                                                                       [0]]))
-            else:
-                unigrams_lemmas.append(token[0])
-
-        return unigrams_words, unigrams_lemmas, pos_tagged
-
-    def _find_ngrams(self, input_list, n):
-        return zip(*[input_list[i:] for i in range(n)])
-
-    def _clean_pos(self, pos_tagged):
-
-        pos_tags = {
-            'NN': 'NN',
-            'NNP': 'NN',
-            'NNP-LOC': 'NN',
-            'NNS': 'NN',
-            'JJ': 'JJ',
-            'JJR': 'JJ',
-            'JJS': 'JJ',
-            'RB': 'RB',
-            'RBR': 'RB',
-            'RBS': 'RB',
-            'VB': 'VB',
-            'VBD': 'VB',
-            'VGB': 'VB',
-            'VBN': 'VB',
-            'VBP': 'VB',
-            'VBZ': 'VB'
-        }
-
-        for i in range(len(pos_tagged)):
-            if pos_tagged[i] in pos_tags:
-                pos_tagged[i] = pos_tags[pos_tagged[i]]
-        return pos_tagged
-
-    def _extract_features(self, text):
-
-        feature_set = {k: 0 for k in self._categories}
-        ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
-        matches = 0
-        pos_tagged = self._clean_pos(pos_tagged)
-
-        tag_wn = {
-            'NN': self._wn16.NOUN,
-            'JJ': self._wn16.ADJ,
-            'VB': self._wn16.VERB,
-            'RB': self._wn16.ADV
-        }
-        for i in range(len(pos_tagged)):
-            if pos_tagged[i] in tag_wn:
-                synsets = self._wn16.synsets(ngrams_words[i],
-                                             tag_wn[pos_tagged[i]])
-                if synsets:
-                    offset = synsets[0].offset()
-                    if offset in self._total_synsets[pos_tagged[i]]:
-                        if self._total_synsets[pos_tagged[i]][offset] is None:
-                            continue
-                        else:
-                            emotion = self._total_synsets[pos_tagged[i]][
-                                offset].get_level(5).name
-                            matches += 1
-                            for i in self._categories:
-                                if emotion in self._categories[i]:
-                                    feature_set[i] += 1
-        if matches == 0:
-            matches = 1
-
-        for i in feature_set:
-            feature_set[i] = (feature_set[i] / matches) * 100
-
-        return feature_set
-
-    def analyse_entry(self, entry, params):
-
-        text_input = entry.get("text", None)
-
-        text = self._my_preprocessor(text_input)
-
-        feature_text = self._extract_features(text)
-
-        emotionSet = EmotionSet(id="Emotions0")
-        emotions = emotionSet.onyx__hasEmotion
-
-        for i in feature_text:
-            emotions.append(
-                Emotion(
-                    onyx__hasEmotionCategory=self._wnaffect_mappings[i],
-                    onyx__hasEmotionIntensity=feature_text[i]))
-
-        entry.emotions = [emotionSet]
-
-        yield entry
--- a/emotion-wnaffect/emotion-wnaffect.senpy
+++ b/emotion-wnaffect/emotion-wnaffect.senpy
@@ -1,25 +0,0 @@
---
-name: emotion-wnaffect
-module: emotion-wnaffect
-description: 'Emotion classifier using WordNet-Affect to calculate the percentage
-  of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness
-  or neutral. The only available language is English (en)'
-author: "@icorcuera @balkian"
-version: '0.2'
-extra_params:
-  language:
-    "@id": lang_wnaffect
-    aliases:
-    - language
-    - l
-    required: false
-    options:
-    - en
-synsets_path: "/data/emotion-wnaffect/a-synsets.xml"
-hierarchy_path: "/data/emotion-wnaffect/a-hierarchy.xml"
-wn16_path: "/data/emotion-wnaffect/wordnet1.6/dict"
-onyx:usesEmotionModel: emoml:big6
-requirements:
- nltk>=3.0.5
- lxml>=3.4.2
-async: false
--- a/emotion-wnaffect/emotion.py
+++ b/emotion-wnaffect/emotion.py
@@ -1,95 +0,0 @@
-
-"""
-Clement Michard (c) 2015
-"""
-
-class Emotion:
-    """Defines an emotion."""
-    
-    emotions = {} # name to emotion (str -> Emotion)
-    
-    def __init__(self, name, parent_name=None):
-        """Initializes an Emotion object.
-            name -- name of the emotion (str)
-            parent_name -- name of the parent emotion (str)
-        """
-        
-        self.name = name
-        self.parent = None
-        self.level = 0
-        self.children = []
-        
-        if parent_name:
-            self.parent = Emotion.emotions[parent_name] if parent_name else None
-            self.parent.children.append(self)
-            self.level = self.parent.level + 1
-            
-            
-    def get_level(self, level):
-        """Returns the parent of self at the given level.
-            level -- level in the hierarchy (int)        
-        """
-        
-        em = self
-        while em.level > level and em.level >= 0:
-            em = em.parent
-        return em
-    
-    
-    def __str__(self):
-        """Returns the emotion string formatted."""
-        
-        return self.name
-        
-        
-    def nb_children(self):
-        """Returns the number of children of the emotion."""
-        
-        return sum(child.nb_children() for child in self.children) + 1
-        
-        
-    @staticmethod
-    def printTree(emotion=None, indent="", last='updown'):
-        """Prints the hierarchy of emotions.
-            emotion -- root emotion (Emotion)
-        """
-        
-        if not emotion:
-            emotion = Emotion.emotions["root"]
-
-        size_branch = {child: child.nb_children() for child in emotion.children}
-        leaves = sorted(emotion.children, key=lambda emotion: emotion.nb_children())
-        up, down = [], []
-        if leaves:
-            while sum(size_branch[e] for e in down) < sum(size_branch[e] for e in leaves):
-                down.append(leaves.pop())
-            up = leaves
-
-        for leaf in up:     
-            next_last = 'up' if up.index(leaf) is 0 else ''
-            next_indent = '{0}{1}{2}'.format(indent, ' ' if 'up' in last else '│', " " * len(emotion.name))
-            Emotion.printTree(leaf, indent=next_indent, last=next_last)
-        if last == 'up':
-            start_shape = '┌'
-        elif last == 'down':
-            start_shape = '└'
-        elif last == 'updown':
-            start_shape = ' '
-        else:
-            start_shape = '├'
-        if up:
-            end_shape = '┤'
-        elif down:
-            end_shape = '┐'
-        else:
-            end_shape = ''
-        print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
-        for leaf in down:
-            next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
-            next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name))
-            Emotion.printTree(leaf, indent=next_indent, last=next_last)
-
-
-
-
-
--- a/emotion-wnaffect/test_wna.py
+++ b/emotion-wnaffect/test_wna.py
@@ -1,42 +0,0 @@
-import os
-import logging
-logging.basicConfig()
-try:
-    import unittest.mock as mock
-except ImportError:
-    import mock
-from senpy.extensions import Senpy
-from flask import Flask
-import unittest
-
-class emoTextWAFTest(unittest.TestCase):
-
-    def setUp(self):
-        self.app = Flask("test_plugin")
-        self.dir = os.path.join(os.path.dirname(__file__))
-        self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
-        self.senpy.init_app(self.app)
-
-    def tearDown(self):
-        self.senpy.deactivate_plugin("EmoTextWAF", sync=True)
-
-    def test_analyse(self):
-        plugin = self.senpy.plugins["EmoTextWAF"]
-        plugin.activate()
-
-        texts = {'I hate you': 'anger',
-                 'i am sad': 'sadness',
-                 'i am happy with my marks': 'joy',
-                 'This movie is scary': 'negative-fear'}
-
-        for text in texts:
-            response = plugin.analyse(input=text)
-            expected = texts[text]
-            emotionSet = response.entries[0].emotions[0]
-            max_emotion = max(emotionSet['onyx:hasEmotion'], key=lambda x: x['onyx:hasEmotionIntensity'])
-            assert max_emotion['onyx:hasEmotionCategory'] == expected
-
-        plugin.deactivate()
-
-if __name__ == '__main__':
-    unittest.main()
--- a/emotion-wnaffect/wnaffect.py
+++ b/emotion-wnaffect/wnaffect.py
@@ -1,92 +0,0 @@
-
-# coding: utf-8
-
-# In[1]:
-
-
-# -*- coding: utf-8 -*-
-"""
-Clement Michard (c) 2015
-"""
-
-import os
-import sys
-import nltk
-from emotion import Emotion
-from nltk.corpus import WordNetCorpusReader
-import xml.etree.ElementTree as ET
-
-class WNAffect:
-    """WordNet-Affect ressource."""
-    
-    def __init__(self, wordnet16_dir, wn_domains_dir):
-        """Initializes the WordNet-Affect object."""
-        
-        cwd = os.getcwd()
-        nltk.data.path.append(cwd)
-        wn16_path = "{0}/dict".format(wordnet16_dir)
-        self.wn16 = WordNetCorpusReader(os.path.abspath("{0}/{1}".format(cwd, wn16_path)), nltk.data.find(wn16_path))
-        self.flat_pos = {'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', 'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
-        self.wn_pos = {'NN':self.wn16.NOUN, 'JJ':self.wn16.ADJ, 'VB':self.wn16.VERB, 'RB':self.wn16.ADV}
-        self._load_emotions(wn_domains_dir)
-        self.synsets = self._load_synsets(wn_domains_dir)
-        
-
-
-    def _load_synsets(self, wn_domains_dir):
-        """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
-        
-        tree = ET.parse("{0}/a-synsets.xml".format(wn_domains_dir))
-        root = tree.getroot()
-        pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
-    
-        synsets = {}
-        for pos in ["noun", "adj", "verb", "adv"]:
-            tag = pos_map[pos]
-            synsets[tag] = {}
-            for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
-                offset = int(elem.get("id")[2:])                
-                if not offset: continue
-                if elem.get("categ"):
-                    synsets[tag][offset] = Emotion.emotions[elem.get("categ")] if elem.get("categ") in Emotion.emotions else None
-                elif elem.get("noun-id"):
-                    synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
-    
-        return synsets
-        
-    def _load_emotions(self, wn_domains_dir):
-        """Loads the hierarchy of emotions from the WordNet-Affect xml."""
-        
-        tree = ET.parse("{0}/a-hierarchy.xml".format(wn_domains_dir))
-        root = tree.getroot()
-        for elem in root.findall("categ"):
-            name = elem.get("name")
-            if name == "root":
-                Emotion.emotions["root"] = Emotion("root")
-            else:
-                Emotion.emotions[name] = Emotion(name, elem.get("isa"))
-    
-    def get_emotion(self, word, pos):
-        """Returns the emotion of the word.
-            word -- the word (str)
-            pos -- part-of-speech (str)
-        """
-        
-        if pos in self.flat_pos:
-            pos = self.flat_pos[pos]
-            synsets = self.wn16.synsets(word, self.wn_pos[pos])         
-            if synsets:
-                offset = synsets[0].offset()
-                if offset in self.synsets[pos]:
-                    return self.synsets[pos][offset]
-        return None
-
-            
-
-            
-if __name__ == "__main__":
-    wordnet16, wndomains32, word, pos = sys.argv[1:5]
-    wna = WNAffect(wordnet16, wndomains32)
-    print wna.get_emotion(word, pos)
-
-