mirror of
https://github.com/gsi-upm/senpy
synced 2025-08-24 10:32:20 +00:00
Add 'community-plugins/' from commit '4c73797246c6aff8d055abfef73d3f0d34b933a8'
git-subtree-dir: community-plugins git-subtree-mainline:7f712952be
git-subtree-split:4c73797246
This commit is contained in:
28
community-plugins/sentiment-basic/README.md
Executable file
28
community-plugins/sentiment-basic/README.md
Executable file
@@ -0,0 +1,28 @@
|
||||
# Sentiment basic plugin
|
||||
|
||||
This plugin is based on the classifier developed for the TASS 2015 competition. It has been developed for Spanish and English. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es
|
||||
|
||||
There is more information avaliable in:
|
||||
|
||||
- Aspect based Sentiment Analysis of Spanish Tweets, Oscar Araque and Ignacio Corcuera-Platas and Constantino Román-Gómez and Carlos A. Iglesias and J. Fernando Sánchez-Rada. http://gsi.dit.upm.es/es/investigacion/publicaciones?view=publication&task=show&id=376
|
||||
|
||||
## Usage
|
||||
Params accepted:
|
||||
|
||||
- Language: Spanish (es).
|
||||
- Input: text to analyse.
|
||||
|
||||
|
||||
Example request:
|
||||
```
|
||||
http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-basic&language=es&input=I%20love%20Madrid
|
||||
```
|
||||
|
||||
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
|
||||
|
||||
This plugin only supports **python2**
|
||||
|
||||
|
||||
![alt GSI Logo][logoGSI]
|
||||
|
||||
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"
|
177
community-plugins/sentiment-basic/sentiment-basic.py
Normal file
177
community-plugins/sentiment-basic/sentiment-basic.py
Normal file
@@ -0,0 +1,177 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import sys
|
||||
import string
|
||||
import nltk
|
||||
import pickle
|
||||
|
||||
from sentiwn import SentiWordNet
|
||||
from nltk.corpus import wordnet as wn
|
||||
from textblob import TextBlob
|
||||
from scipy.interpolate import interp1d
|
||||
from os import path
|
||||
|
||||
from senpy.plugins import SentimentBox, SenpyPlugin
|
||||
from senpy.models import Results, Entry, Sentiment, Error
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
unicode = str
|
||||
|
||||
|
||||
class SentimentBasic(SentimentBox):
|
||||
'''
|
||||
Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.
|
||||
'''
|
||||
name = "sentiment-basic"
|
||||
author = "github.com/nachtkatze"
|
||||
version = "0.1.1"
|
||||
extra_params = {
|
||||
"language": {
|
||||
"description": "language of the text",
|
||||
"aliases": ["language", "l"],
|
||||
"required": True,
|
||||
"options": ["en","es", "it", "fr"],
|
||||
"default": "en"
|
||||
}
|
||||
}
|
||||
sentiword_path = "SentiWordNet_3.0.txt"
|
||||
pos_path = "unigram_spanish.pickle"
|
||||
maxPolarityValue = 1
|
||||
minPolarityValue = -1
|
||||
nltk_resources = ['punkt','wordnet', 'omw']
|
||||
|
||||
with_polarity = False
|
||||
|
||||
def _load_swn(self):
|
||||
self.swn_path = self.find_file(self.sentiword_path)
|
||||
swn = SentiWordNet(self.swn_path)
|
||||
return swn
|
||||
|
||||
def _load_pos_tagger(self):
|
||||
self.pos_path = self.find_file(self.pos_path)
|
||||
with open(self.pos_path, 'rb') as f:
|
||||
tagger = pickle.load(f)
|
||||
return tagger
|
||||
|
||||
def activate(self, *args, **kwargs):
|
||||
self._swn = self._load_swn()
|
||||
self._pos_tagger = self._load_pos_tagger()
|
||||
|
||||
def _remove_punctuation(self, tokens):
|
||||
return [t for t in tokens if t not in string.punctuation]
|
||||
|
||||
def _tokenize(self, text):
|
||||
sentence_ = {}
|
||||
words = nltk.word_tokenize(text)
|
||||
sentence_['sentence'] = text
|
||||
tokens_ = [w.lower() for w in words]
|
||||
sentence_['tokens'] = self._remove_punctuation(tokens_)
|
||||
return sentence_
|
||||
|
||||
def _pos(self, tokens):
|
||||
tokens['tokens'] = self._pos_tagger.tag(tokens['tokens'])
|
||||
return tokens
|
||||
|
||||
def _compare_synsets(self, synsets, tokens):
|
||||
for synset in synsets:
|
||||
for word, lemmas in tokens['lemmas'].items():
|
||||
for lemma in lemmas:
|
||||
synset_ = lemma.synset()
|
||||
if synset == synset_:
|
||||
return synset
|
||||
return None
|
||||
|
||||
def predict_one(self, features, activity):
|
||||
language = activity.param("language")
|
||||
text = features[0]
|
||||
tokens = self._tokenize(text)
|
||||
tokens = self._pos(tokens)
|
||||
sufixes = {'es':'spa','en':'eng','it':'ita','fr':'fra'}
|
||||
tokens['lemmas'] = {}
|
||||
for w in tokens['tokens']:
|
||||
lemmas = wn.lemmas(w[0], lang=sufixes[language])
|
||||
if len(lemmas) == 0:
|
||||
continue
|
||||
tokens['lemmas'][w[0]] = lemmas
|
||||
if language == "en":
|
||||
trans = TextBlob(unicode(text))
|
||||
else:
|
||||
try:
|
||||
trans = TextBlob(unicode(text)).translate(from_lang=language,to='en')
|
||||
except Exception as ex:
|
||||
raise Error('Could not translate the text from "{}" to "{}": {}'.format(language,
|
||||
'en',
|
||||
str(ex)))
|
||||
useful_synsets = {}
|
||||
for w_i, t_w in enumerate(trans.sentences[0].words):
|
||||
synsets = wn.synsets(trans.sentences[0].words[w_i])
|
||||
if len(synsets) == 0:
|
||||
continue
|
||||
eq_synset = self._compare_synsets(synsets, tokens)
|
||||
useful_synsets[t_w] = eq_synset
|
||||
scores = {}
|
||||
scores = {}
|
||||
if useful_synsets != None:
|
||||
for word in useful_synsets:
|
||||
if useful_synsets[word] is None:
|
||||
continue
|
||||
temp_scores = self._swn.get_score(useful_synsets[word].name().split('.')[0].replace(' ',' '))
|
||||
for score in temp_scores:
|
||||
if score['synset'] == useful_synsets[word]:
|
||||
t_score = score['pos'] - score['neg']
|
||||
f_score = 'neu'
|
||||
if t_score > 0:
|
||||
f_score = 'pos'
|
||||
elif t_score < 0:
|
||||
f_score = 'neg'
|
||||
score['score'] = f_score
|
||||
scores[word] = score
|
||||
break
|
||||
g_score = 0.5
|
||||
|
||||
for i in scores:
|
||||
n_pos = 0.0
|
||||
n_neg = 0.0
|
||||
for w in scores:
|
||||
if scores[w]['score'] == 'pos':
|
||||
n_pos += 1.0
|
||||
elif scores[w]['score'] == 'neg':
|
||||
n_neg += 1.0
|
||||
inter = interp1d([-1.0, 1.0], [0.0, 1.0])
|
||||
|
||||
try:
|
||||
g_score = (n_pos - n_neg) / (n_pos + n_neg)
|
||||
g_score = float(inter(g_score))
|
||||
except:
|
||||
if n_pos == 0 and n_neg == 0:
|
||||
g_score = 0.5
|
||||
|
||||
if g_score > 0.5: # Positive
|
||||
return [1, 0, 0]
|
||||
elif g_score < 0.5: # Negative
|
||||
return [0, 0, 1]
|
||||
else:
|
||||
return [0, 1, 0]
|
||||
|
||||
|
||||
test_cases = [
|
||||
{
|
||||
'input': 'Odio ir al cine',
|
||||
'params': {'language': 'es'},
|
||||
'polarity': 'marl:Negative'
|
||||
|
||||
},
|
||||
{
|
||||
'input': 'El cielo está nublado',
|
||||
'params': {'language': 'es'},
|
||||
'polarity': 'marl:Neutral'
|
||||
|
||||
},
|
||||
{
|
||||
'input': 'Esta tarta está muy buena',
|
||||
'params': {'language': 'es'},
|
||||
'polarity': 'marl:Negative' # SURPRISINGLY!
|
||||
|
||||
}
|
||||
]
|
7
community-plugins/sentiment-basic/sentiment-basic.senpy
Normal file
7
community-plugins/sentiment-basic/sentiment-basic.senpy
Normal file
@@ -0,0 +1,7 @@
|
||||
---
|
||||
module: sentiment-basic
|
||||
requirements:
|
||||
- nltk>=3.0.5
|
||||
- scipy>=0.14.0
|
||||
- textblob
|
||||
|
70
community-plugins/sentiment-basic/sentiwn.py
Normal file
70
community-plugins/sentiment-basic/sentiwn.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Author : Jaganadh Gopinadhan <jaganadhg@gmail.com>
|
||||
Copywright (C) : Jaganadh Gopinadhan
|
||||
|
||||
Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import sys,os
|
||||
import re
|
||||
|
||||
from nltk.corpus import wordnet
|
||||
|
||||
class SentiWordNet(object):
|
||||
"""
|
||||
Interface to SentiWordNet
|
||||
"""
|
||||
def __init__(self,swn_file):
|
||||
"""
|
||||
"""
|
||||
self.swn_file = swn_file
|
||||
self.pos_synset = self.__parse_swn_file()
|
||||
|
||||
def __parse_swn_file(self):
|
||||
"""
|
||||
Parse the SentiWordNet file and populate the POS and SynsetID hash
|
||||
"""
|
||||
pos_synset_hash = {}
|
||||
swn_data = open(self.swn_file,'r').readlines()
|
||||
head_less_swn_data = filter((lambda line: not re.search(r"^\s*#",\
|
||||
line)), swn_data)
|
||||
|
||||
for data in head_less_swn_data:
|
||||
fields = data.strip().split("\t")
|
||||
try:
|
||||
pos,syn_set_id,pos_score,neg_score,syn_set_score,\
|
||||
gloss = fields
|
||||
except:
|
||||
print("Found data without all details")
|
||||
pass
|
||||
|
||||
if pos and syn_set_score:
|
||||
pos_synset_hash[(pos,int(syn_set_id))] = (float(pos_score),\
|
||||
float(neg_score))
|
||||
|
||||
return pos_synset_hash
|
||||
|
||||
def get_score(self,word,pos=None):
|
||||
"""
|
||||
Get score for a given word/word pos combination
|
||||
"""
|
||||
senti_scores = []
|
||||
synsets = wordnet.synsets(word,pos)
|
||||
for synset in synsets:
|
||||
if (synset.pos(), synset.offset()) in self.pos_synset:
|
||||
pos_val, neg_val = self.pos_synset[(synset.pos(), synset.offset())]
|
||||
senti_scores.append({"pos":pos_val,"neg":neg_val,\
|
||||
"obj": 1.0 - (pos_val - neg_val),'synset':synset})
|
||||
|
||||
return senti_scores
|
Reference in New Issue
Block a user