Add 'community-plugins/' from commit '4c73797246c6aff8d055abfef73d3f0d34b933a8'

git-subtree-dir: community-plugins git-subtree-mainline: 7f712952be git-subtree-split: 4c73797246
2025-08-24 10:32:20 +00:00 · 2023-09-20 13:32:30 +02:00
parent 7f712952be 4c73797246
commit e1d888ebd6
77 changed files with 11412 additions and 0 deletions
--- a/community-plugins/sentiment-basic/README.md
+++ b/community-plugins/sentiment-basic/README.md
@@ -0,0 +1,28 @@
+# Sentiment basic plugin
+
+This plugin is based on the classifier developed for the TASS 2015 competition. It has been developed for Spanish and English. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es
+
+There is more information avaliable in:
+	
+	- Aspect based Sentiment Analysis of Spanish Tweets, Oscar Araque and Ignacio Corcuera-Platas and Constantino Román-Gómez and Carlos A. Iglesias and J. Fernando Sánchez-Rada. http://gsi.dit.upm.es/es/investigacion/publicaciones?view=publication&task=show&id=376
+
+## Usage
+Params accepted:
+
+- Language: Spanish (es).
+- Input: text to analyse.
+
+
+Example request: 
+```
+http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-basic&language=es&input=I%20love%20Madrid
+```
+
+Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section. 
+
+This plugin only supports **python2**
+
+
+![alt GSI Logo][logoGSI]
+
+[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"
--- a/community-plugins/sentiment-basic/sentiment-basic.py
+++ b/community-plugins/sentiment-basic/sentiment-basic.py
@@ -0,0 +1,177 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+import os
+import sys
+import string
+import nltk
+import pickle
+
+from sentiwn import SentiWordNet
+from nltk.corpus import wordnet as wn
+from textblob import TextBlob
+from scipy.interpolate import interp1d
+from os import path
+
+from senpy.plugins import SentimentBox, SenpyPlugin
+from senpy.models import Results, Entry, Sentiment, Error
+
+if sys.version_info[0] >= 3:
+    unicode = str
+
+
+class SentimentBasic(SentimentBox):
+    '''
+    Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.
+    '''
+    name = "sentiment-basic"
+    author = "github.com/nachtkatze"
+    version = "0.1.1"
+    extra_params = {
+        "language": {
+            "description": "language of the text",
+            "aliases": ["language", "l"],
+            "required": True,
+            "options": ["en","es", "it", "fr"],
+            "default": "en"
+        }
+    }
+    sentiword_path = "SentiWordNet_3.0.txt"
+    pos_path = "unigram_spanish.pickle"
+    maxPolarityValue = 1
+    minPolarityValue = -1
+    nltk_resources = ['punkt','wordnet', 'omw']
+
+    with_polarity = False
+
+    def _load_swn(self):
+        self.swn_path = self.find_file(self.sentiword_path)
+        swn = SentiWordNet(self.swn_path)
+        return swn
+
+    def _load_pos_tagger(self):
+        self.pos_path = self.find_file(self.pos_path)
+        with open(self.pos_path, 'rb') as f:
+            tagger = pickle.load(f)
+        return tagger
+
+    def activate(self, *args, **kwargs):
+        self._swn = self._load_swn()
+        self._pos_tagger = self._load_pos_tagger()
+
+    def _remove_punctuation(self, tokens):
+        return [t for t in tokens if t not in string.punctuation]
+
+    def _tokenize(self, text):
+        sentence_ = {}
+        words = nltk.word_tokenize(text)
+        sentence_['sentence'] = text
+        tokens_ = [w.lower() for w in words]
+        sentence_['tokens'] = self._remove_punctuation(tokens_)
+        return sentence_
+
+    def _pos(self, tokens):
+        tokens['tokens'] = self._pos_tagger.tag(tokens['tokens'])
+        return tokens
+
+    def _compare_synsets(self, synsets, tokens):
+        for synset in synsets:
+            for word, lemmas in tokens['lemmas'].items():
+                for lemma in lemmas:
+                    synset_ = lemma.synset() 
+                    if synset == synset_:
+                        return synset
+        return None
+
+    def predict_one(self, features, activity):
+        language = activity.param("language")
+        text = features[0]
+        tokens = self._tokenize(text)
+        tokens = self._pos(tokens)
+        sufixes = {'es':'spa','en':'eng','it':'ita','fr':'fra'}
+        tokens['lemmas'] = {}
+        for w in tokens['tokens']:
+            lemmas = wn.lemmas(w[0], lang=sufixes[language])
+            if len(lemmas) == 0:
+                continue
+            tokens['lemmas'][w[0]] = lemmas
+        if language == "en":
+            trans = TextBlob(unicode(text))
+        else:
+            try:
+                trans = TextBlob(unicode(text)).translate(from_lang=language,to='en')
+            except Exception as ex:
+                raise Error('Could not translate the text from "{}" to "{}": {}'.format(language,
+                                                                                    'en',
+                                                                                    str(ex)))
+        useful_synsets = {}
+        for w_i, t_w in enumerate(trans.sentences[0].words):
+            synsets = wn.synsets(trans.sentences[0].words[w_i])
+            if len(synsets) == 0:
+                continue
+            eq_synset = self._compare_synsets(synsets, tokens)
+            useful_synsets[t_w] = eq_synset
+        scores = {}
+        scores = {}
+        if useful_synsets != None:
+            for word in useful_synsets:
+                if useful_synsets[word] is None:
+                    continue
+                temp_scores = self._swn.get_score(useful_synsets[word].name().split('.')[0].replace(' ',' '))
+                for score in temp_scores:
+                    if score['synset'] == useful_synsets[word]:
+                        t_score = score['pos'] - score['neg']
+                        f_score = 'neu'
+                        if t_score > 0:
+                            f_score = 'pos'
+                        elif t_score < 0:
+                            f_score = 'neg'
+                        score['score'] = f_score
+                        scores[word] = score
+                        break
+        g_score = 0.5
+
+        for i in scores:
+            n_pos = 0.0
+            n_neg = 0.0
+            for w in scores:
+                if scores[w]['score'] == 'pos':
+                    n_pos += 1.0
+                elif scores[w]['score'] == 'neg':
+                    n_neg += 1.0
+            inter = interp1d([-1.0, 1.0], [0.0, 1.0])
+
+            try:
+                g_score = (n_pos - n_neg) / (n_pos + n_neg)
+                g_score = float(inter(g_score))
+            except:
+                if n_pos == 0 and n_neg == 0:
+                    g_score = 0.5
+
+        if g_score > 0.5:  # Positive
+            return [1, 0, 0]
+        elif g_score < 0.5:  # Negative
+            return [0, 0, 1]
+        else:
+            return [0, 1, 0]
+
+
+    test_cases = [
+        {
+            'input': 'Odio ir al cine',
+            'params': {'language': 'es'},
+            'polarity': 'marl:Negative'
+
+        },
+        {
+            'input': 'El cielo está nublado',
+            'params': {'language': 'es'},
+            'polarity': 'marl:Neutral'
+
+        },
+        {
+            'input': 'Esta tarta está muy buena',
+            'params': {'language': 'es'},
+            'polarity': 'marl:Negative' # SURPRISINGLY!
+
+        }
+    ]
--- a/community-plugins/sentiment-basic/sentiment-basic.senpy
+++ b/community-plugins/sentiment-basic/sentiment-basic.senpy
@@ -0,0 +1,7 @@
+---
+module: sentiment-basic
+requirements:
+- nltk>=3.0.5
+- scipy>=0.14.0
+- textblob
+
--- a/community-plugins/sentiment-basic/sentiwn.py
+++ b/community-plugins/sentiment-basic/sentiwn.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+"""
+Author : Jaganadh Gopinadhan <jaganadhg@gmail.com>
+Copywright (C) : Jaganadh Gopinadhan
+
+ Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+      http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+"""
+
+import sys,os
+import re
+
+from nltk.corpus import wordnet
+
+class SentiWordNet(object):
+    """
+    Interface to SentiWordNet
+    """
+    def __init__(self,swn_file):
+        """
+        """
+        self.swn_file = swn_file
+        self.pos_synset = self.__parse_swn_file()
+
+    def __parse_swn_file(self):
+        """
+        Parse the SentiWordNet file and populate the POS and SynsetID hash
+        """
+        pos_synset_hash = {}
+        swn_data = open(self.swn_file,'r').readlines()
+        head_less_swn_data = filter((lambda line: not re.search(r"^\s*#",\
+        line)), swn_data)
+
+        for data in head_less_swn_data:
+            fields = data.strip().split("\t")
+            try:
+                pos,syn_set_id,pos_score,neg_score,syn_set_score,\
+                gloss = fields
+            except:
+                print("Found data without all details")
+                pass
+
+            if pos and syn_set_score:
+                pos_synset_hash[(pos,int(syn_set_id))] = (float(pos_score),\
+                float(neg_score))
+
+        return pos_synset_hash
+
+    def get_score(self,word,pos=None):
+        """
+        Get score for a given word/word pos combination
+        """
+        senti_scores = []
+        synsets = wordnet.synsets(word,pos)
+        for synset in synsets:
+            if (synset.pos(), synset.offset()) in self.pos_synset:
+                pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())]
+                senti_scores.append({"pos":pos_val,"neg":neg_val,\
+                "obj": 1.0 - (pos_val - neg_val),'synset':synset})
+
+        return senti_scores