1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-12-22 04:58:12 +00:00

Merge branch 'master' into 44-add-basic-evaluation-with-gsitk

This commit is contained in:
J. Fernando Sánchez 2018-04-23 15:28:51 +02:00
commit 5e2ada1654
11 changed files with 103 additions and 32 deletions

View File

@ -1,8 +1,11 @@
What is Senpy?
--------------
Web services can get really complex: data validation, user interaction, formatting, logging., etc.
The figure below summarizes the typical features in an analysis service.
Senpy is a framework for text analysis using Linked Data. There are three main applications of Senpy so far: sentiment and emotion analysis, user profiling and entity recoginition. Annotations and Services are compliant with NIF (NLP Interchange Format).
Senpy aims at providing a framework where analysis modules can be integrated easily as plugins, and providing a core functionality for managing tasks such as data validation, user interaction, formatting, logging, translation to linked data, etc.
The figure below summarizes the typical features in a text analysis service.
Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems.
.. image:: senpy-framework.png

View File

@ -1,8 +1,24 @@
Vocabularies and model
======================
The model used in Senpy is based on the following vocabularies:
The model used in Senpy is based on NIF 2.0 [1], which defines a semantic format and API for improving interoperability among natural language processing services.
* Marl, a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
* Onyx, which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
* NIF 2.0, which defines a semantic format and APO for improving interoperability among natural language processing services
Senpy has been applied to sentiment and emotion analysis services using the following vocabularies:
* Marl [2,6], a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
* Onyx [3,5], which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
An overview of the vocabularies and their use can be found in [4].
[1] Guidelines for developing NIF-based NLP services, Final Community Group Report 22 December 2015 Available at: https://www.w3.org/2015/09/bpmlod-reports/nif-based-nlp-webservices/
[2] Marl Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/marl/
[3] Onyx Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/onyx/
[4] Iglesias, C. A., Sánchez-Rada, J. F., Vulcu, G., & Buitelaar, P. (2017). Linked Data Models for Sentiment and Emotion Analysis in Social Networks. In Sentiment Analysis in Social Networks (pp. 49-69).
[5] Sánchez-Rada, J. F., & Iglesias, C. A. (2016). Onyx: A linked data approach to emotion representation. Information Processing & Management, 52(1), 99-114.
[6] Westerski, A., Iglesias Fernandez, C. A., & Tapia Rico, F. (2011). Linked opinions: Describing sentiments on the structured web of data.

View File

@ -22,6 +22,7 @@ the server.
from flask import Flask
from senpy.extensions import Senpy
from senpy.utils import easy_test
import logging
import os
@ -39,7 +40,7 @@ def main():
'-l',
metavar='logging_level',
type=str,
default="ERROR",
default="WARN",
help='Logging level')
parser.add_argument(
'--debug',
@ -75,6 +76,12 @@ def main():
action='store_true',
default=False,
help='Do not run a server, only install plugin dependencies')
parser.add_argument(
'--only-test',
'-t',
action='store_true',
default=False,
help='Do not run a server, just test all plugins')
parser.add_argument(
'--only-list',
'--list',
@ -122,6 +129,9 @@ def main():
if args.only_install:
return
sp.activate_all()
if args.only_test:
easy_test(sp.plugins())
return
print('Senpy version {}'.format(senpy.__version__))
print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
args.port))

View File

@ -16,7 +16,6 @@ import os
import copy
import errno
import logging
import traceback
#Correct this import for managing the datasets
from gsitk.datasets.datasets import DatasetManager
@ -176,22 +175,14 @@ class Senpy(object):
by api.parse_call().
"""
logger.debug("analysing request: {}".format(request))
try:
entries = request.entries
request.entries = []
plugins = self._get_plugins(request)
results = request
for i in self._process_entries(entries, results, plugins):
results.entries.append(i)
self.convert_emotions(results)
logger.debug("Returning analysis result: {}".format(results))
except (Error, Exception) as ex:
if not isinstance(ex, Error):
msg = "Error during analysis: {} \n\t{}".format(ex,
traceback.format_exc())
ex = Error(message=msg, status=500)
logger.exception('Error returning analysis result')
raise ex
entries = request.entries
request.entries = []
plugins = self._get_plugins(request)
results = request
for i in self._process_entries(entries, results, plugins):
results.entries.append(i)
self.convert_emotions(results)
logger.debug("Returning analysis result: {}".format(results))
results.analysis = [i['plugin'].id for i in results.analysis]
return results

View File

@ -327,6 +327,7 @@ for i in [
'emotionModel',
'emotionPlugin',
'emotionSet',
'entity',
'help',
'plugin',
'plugins',

View File

@ -42,7 +42,7 @@ class Sentiment140Plugin(SentimentPlugin):
from requests.
'''
from senpy.test import patch_requests
expected = {"data": [{"polarity": 10}]}
expected = {"data": [{"polarity": 4}]}
with patch_requests(expected) as (request, response):
super(Sentiment140Plugin, self).test(*args, **kwargs)
assert request.called

View File

@ -3,6 +3,8 @@ try:
except ImportError:
from mock import patch, MagicMock
from past.builtins import basestring
import json
from contextlib import contextmanager
@ -15,13 +17,17 @@ def patch_requests(value, code=200):
success = MagicMock()
if isinstance(value, BaseModel):
value = value.jsonld()
data = json.dumps(value)
if not isinstance(value, basestring):
data = json.dumps(value)
else:
data = value
success.json.return_value = value
success.data.return_value = data
success.status_code = code
success.content = json.dumps(value)
success.status_code = code
success.content = data
success.text = data
method_mocker = MagicMock()
method_mocker.return_value = success
with patch.multiple('requests', request=method_mocker,

View File

@ -71,8 +71,9 @@ def easy_test(plugin_list=None):
logger.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.INFO)
if not plugin_list:
from . import plugins
import __main__
logger.info('Loading classes from {}'.format(__main__))
from . import plugins
plugin_list = plugins.from_module(__main__)
for plug in plugin_list:
plug.test()

View File

@ -182,8 +182,7 @@ class ExtensionsTest(TestCase):
analyse(self.senpy, input='nothing', algorithm='MOCK')
assert False
except Exception as ex:
assert 'generic exception on analysis' in ex['message']
assert ex['status'] == 500
assert 'generic exception on analysis' in str(ex)
def test_filtering(self):
""" Filtering plugins """

View File

@ -9,6 +9,7 @@ from senpy.models import (Emotion,
EmotionAnalysis,
EmotionSet,
Entry,
Entity,
Error,
Results,
Sentiment,
@ -207,3 +208,14 @@ class ModelsTest(TestCase):
recovered = from_string(string)
assert isinstance(recovered, Results)
assert isinstance(recovered.entries[0], Entry)
def test_serializable(self):
r = Results()
e = Entry()
ent = Entity()
e.entities.append(ent)
r.entries.append(e)
d = r.serializable()
assert d
assert d['entries']
assert d['entries'][0]['entities']

32
tests/test_test.py Normal file
View File

@ -0,0 +1,32 @@
from unittest import TestCase
import requests
import json
from senpy.test import patch_requests
from senpy.models import Results
class TestTest(TestCase):
def test_patch_text(self):
with patch_requests('hello'):
r = requests.get('http://example.com')
assert r.text == 'hello'
assert r.content == 'hello'
def test_patch_json(self):
r = Results()
with patch_requests(r):
res = requests.get('http://example.com')
assert res.content == json.dumps(r.jsonld())
js = res.json()
assert js
assert js['@type'] == r['@type']
def test_patch_dict(self):
r = {'nothing': 'new'}
with patch_requests(r):
res = requests.get('http://example.com')
assert res.content == json.dumps(r)
js = res.json()
assert js
assert js['nothing'] == 'new'