1
0
mirror of https://github.com/gsi-upm/senpy synced 2025-08-24 02:22:20 +00:00

Merged into monorepo

This commit is contained in:
J. Fernando Sánchez
2018-06-14 19:38:08 +02:00
parent e51b659030
commit c52a894017
29 changed files with 406 additions and 493 deletions

View File

@@ -1,3 +0,0 @@
[submodule "data"]
path = data
url = ../data/sentiment-basic

View File

@@ -1,5 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import logging
import string
import nltk
import pickle
@@ -13,24 +14,40 @@ from os import path
from senpy.plugins import SentimentPlugin, SenpyPlugin
from senpy.models import Results, Entry, Sentiment
logger = logging.getLogger(__name__)
class SentiTextPlugin(SentimentPlugin):
class SentimentBasic(SentimentPlugin):
'''
Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.
'''
name = "sentiment-basic"
author = "github.com/nachtkatze"
version = "0.1.1"
extra_params = {
"language": {
"aliases": ["language", "l"],
"required": True,
"options": ["en","es", "it", "fr", "auto"],
"default": "auto"
}
}
sentiword_path = "SentiWordNet_3.0.txt"
pos_path = "unigram_spanish.pickle"
maxPolarityValue = 1
minPolarityValue = -1
nltk_resources = ['punkt','wordnet']
def _load_swn(self):
self.swn_path = path.join(path.abspath(path.dirname(__file__)), self.sentiword_path)
self.swn_path = self.find_file(self.sentiword_path)
swn = SentiWordNet(self.swn_path)
return swn
def _load_pos_tagger(self):
self.pos_path = path.join(path.abspath(path.dirname(__file__)), self.pos_path)
self.pos_path = self.find_file(self.pos_path)
with open(self.pos_path, 'r') as f:
tagger = pickle.load(f)
return tagger
def activate(self, *args, **kwargs):
nltk.download(['punkt','wordnet'])
self._swn = self._load_swn()
self._pos_tagger = self._load_pos_tagger()
@@ -54,11 +71,6 @@ class SentiTextPlugin(SentimentPlugin):
tokens[i]['tokens'] = self._pos_tagger.tag(tokens[i]['tokens'])
return tokens
# def _stopwords(sentences, lang='english'):
# for i in sentences:
# sentences[i]['tokens'] = [t for t in sentences[i]['tokens'] if t not in nltk.corpus.stopwords.words(lang)]
# return sentences
def _compare_synsets(self, synsets, tokens, i):
for synset in synsets:
for word in tokens[i]['lemmas']:
@@ -71,7 +83,7 @@ class SentiTextPlugin(SentimentPlugin):
def analyse_entry(self, entry, params):
language = params.get("language")
text = entry.get("text", None)
text = entry.text
tokens = self._tokenize(text)
tokens = self._pos(tokens)
sufixes = {'es':'spa','en':'eng','it':'ita','fr':'fra'}
@@ -130,19 +142,41 @@ class SentiTextPlugin(SentimentPlugin):
except:
if n_pos == 0 and n_neg == 0:
g_score = 0.5
polarity = 'marl:Neutral'
polarity_value = 0
if g_score > 0.5:
if g_score >= 0.5:
polarity = 'marl:Positive'
polarity_value = 1
elif g_score < 0.5:
polarity = 'marl:Negative'
polarity_value = -1
else:
polarity = 'marl:Neutral'
polarity_value = 0
opinion = Sentiment(id="Opinion0"+'_'+str(i),
marl__hasPolarity=polarity,
marl__polarityValue=polarity_value)
opinion.prov(self)
entry.sentiments.append(opinion)
yield entry
test_cases = [
{
'input': u'Odio ir al cine',
'params': {'language': 'es'},
'polarity': 'marl:Negative'
},
{
'input': u'El cielo está nublado',
'params': {'language': 'es'},
'polarity': 'marl:Positive'
},
{
'input': u'Esta tarta está muy buena',
'params': {'language': 'es'},
'polarity': 'marl:Negative'
}
]

View File

@@ -1,24 +1,7 @@
{
"name": "sentiment-basic",
"module": "sentiment-basic",
"description": "Sentiment classifier using rule-based classification for Spanish. Based on english to spanish translation and SentiWordNet sentiment knowledge. This is a demo plugin that uses only some features from the TASS 2015 classifier. To use the entirely functional classifier you can use the service in: http://senpy.cluster.gsi.dit.upm.es.",
"author": "github.com/nachtkatze",
"version": "0.1",
"requirements": [
"nltk>=3.0.5",
"scipy>=0.14.0",
"textblob"
],
"extra_params": {
"language": {
"aliases": ["language", "l"],
"required": true,
"options": ["en","es", "it", "fr", "auto"],
"default": "auto"
},
},
"sentiword_path": "data/SentiWordNet_3.0.txt",
"pos_path": "data/unigram_spanish.pickle",
"maxPolarityValue": "1",
"minPolarityValue": "-1"
}
---
module: sentiment-basic
requirements:
- nltk>=3.0.5
- scipy>=0.14.0
- textblob

View File

@@ -46,7 +46,7 @@ class SentiWordNet(object):
pos,syn_set_id,pos_score,neg_score,syn_set_score,\
gloss = fields
except:
print "Found data without all details"
print("Found data without all details")
pass
if pos and syn_set_score:
@@ -67,4 +67,4 @@ class SentiWordNet(object):
senti_scores.append({"pos":pos_val,"neg":neg_val,\
"obj": 1.0 - (pos_val - neg_val),'synset':synset})
return senti_scores
return senti_scores

View File

@@ -1,42 +0,0 @@
import os
import logging
logging.basicConfig()
try:
import unittest.mock as mock
except ImportError:
import mock
from senpy.extensions import Senpy
from flask import Flask
import unittest
class SentiTextTest(unittest.TestCase):
def setUp(self):
self.app = Flask("test_plugin")
self.dir = os.path.join(os.path.dirname(__file__))
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
self.senpy.init_app(self.app)
def tearDown(self):
self.senpy.deactivate_plugin("SentiText", sync=True)
def test_analyse(self):
plugin = self.senpy.plugins["SentiText"]
plugin.activate()
texts = {'Odio ir al cine' : 'marl:Neutral',
'El cielo esta nublado' : 'marl:Positive',
'Esta tarta esta muy buena' : 'marl:Neutral'}
for text in texts:
response = plugin.analyse(input=text)
sentimentSet = response.entries[0].sentiments[0]
print sentimentSet
expected = texts[text]
assert sentimentSet['marl:hasPolarity'] == expected
plugin.deactivate()
if __name__ == '__main__':
unittest.main()