1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-21 15:52:28 +00:00

Merge branch 'master' into 44-add-basic-evaluation-with-gsitk

This commit is contained in:
J. Fernando Sánchez 2018-04-23 15:28:51 +02:00
commit 5e2ada1654
11 changed files with 103 additions and 32 deletions

View File

@ -1,8 +1,11 @@
What is Senpy? What is Senpy?
-------------- --------------
Web services can get really complex: data validation, user interaction, formatting, logging., etc. Senpy is a framework for text analysis using Linked Data. There are three main applications of Senpy so far: sentiment and emotion analysis, user profiling and entity recoginition. Annotations and Services are compliant with NIF (NLP Interchange Format).
The figure below summarizes the typical features in an analysis service.
Senpy aims at providing a framework where analysis modules can be integrated easily as plugins, and providing a core functionality for managing tasks such as data validation, user interaction, formatting, logging, translation to linked data, etc.
The figure below summarizes the typical features in a text analysis service.
Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems. Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems.
.. image:: senpy-framework.png .. image:: senpy-framework.png

View File

@ -1,8 +1,24 @@
Vocabularies and model Vocabularies and model
====================== ======================
The model used in Senpy is based on the following vocabularies: The model used in Senpy is based on NIF 2.0 [1], which defines a semantic format and API for improving interoperability among natural language processing services.
* Marl, a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems. Senpy has been applied to sentiment and emotion analysis services using the following vocabularies:
* Onyx, which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
* NIF 2.0, which defines a semantic format and APO for improving interoperability among natural language processing services * Marl [2,6], a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
* Onyx [3,5], which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
An overview of the vocabularies and their use can be found in [4].
[1] Guidelines for developing NIF-based NLP services, Final Community Group Report 22 December 2015 Available at: https://www.w3.org/2015/09/bpmlod-reports/nif-based-nlp-webservices/
[2] Marl Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/marl/
[3] Onyx Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/onyx/
[4] Iglesias, C. A., Sánchez-Rada, J. F., Vulcu, G., & Buitelaar, P. (2017). Linked Data Models for Sentiment and Emotion Analysis in Social Networks. In Sentiment Analysis in Social Networks (pp. 49-69).
[5] Sánchez-Rada, J. F., & Iglesias, C. A. (2016). Onyx: A linked data approach to emotion representation. Information Processing & Management, 52(1), 99-114.
[6] Westerski, A., Iglesias Fernandez, C. A., & Tapia Rico, F. (2011). Linked opinions: Describing sentiments on the structured web of data.

View File

@ -22,6 +22,7 @@ the server.
from flask import Flask from flask import Flask
from senpy.extensions import Senpy from senpy.extensions import Senpy
from senpy.utils import easy_test
import logging import logging
import os import os
@ -39,7 +40,7 @@ def main():
'-l', '-l',
metavar='logging_level', metavar='logging_level',
type=str, type=str,
default="ERROR", default="WARN",
help='Logging level') help='Logging level')
parser.add_argument( parser.add_argument(
'--debug', '--debug',
@ -75,6 +76,12 @@ def main():
action='store_true', action='store_true',
default=False, default=False,
help='Do not run a server, only install plugin dependencies') help='Do not run a server, only install plugin dependencies')
parser.add_argument(
'--only-test',
'-t',
action='store_true',
default=False,
help='Do not run a server, just test all plugins')
parser.add_argument( parser.add_argument(
'--only-list', '--only-list',
'--list', '--list',
@ -122,6 +129,9 @@ def main():
if args.only_install: if args.only_install:
return return
sp.activate_all() sp.activate_all()
if args.only_test:
easy_test(sp.plugins())
return
print('Senpy version {}'.format(senpy.__version__)) print('Senpy version {}'.format(senpy.__version__))
print('Server running on port %s:%d. Ctrl+C to quit' % (args.host, print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
args.port)) args.port))

View File

@ -16,7 +16,6 @@ import os
import copy import copy
import errno import errno
import logging import logging
import traceback
#Correct this import for managing the datasets #Correct this import for managing the datasets
from gsitk.datasets.datasets import DatasetManager from gsitk.datasets.datasets import DatasetManager
@ -176,22 +175,14 @@ class Senpy(object):
by api.parse_call(). by api.parse_call().
""" """
logger.debug("analysing request: {}".format(request)) logger.debug("analysing request: {}".format(request))
try: entries = request.entries
entries = request.entries request.entries = []
request.entries = [] plugins = self._get_plugins(request)
plugins = self._get_plugins(request) results = request
results = request for i in self._process_entries(entries, results, plugins):
for i in self._process_entries(entries, results, plugins): results.entries.append(i)
results.entries.append(i) self.convert_emotions(results)
self.convert_emotions(results) logger.debug("Returning analysis result: {}".format(results))
logger.debug("Returning analysis result: {}".format(results))
except (Error, Exception) as ex:
if not isinstance(ex, Error):
msg = "Error during analysis: {} \n\t{}".format(ex,
traceback.format_exc())
ex = Error(message=msg, status=500)
logger.exception('Error returning analysis result')
raise ex
results.analysis = [i['plugin'].id for i in results.analysis] results.analysis = [i['plugin'].id for i in results.analysis]
return results return results

View File

@ -327,6 +327,7 @@ for i in [
'emotionModel', 'emotionModel',
'emotionPlugin', 'emotionPlugin',
'emotionSet', 'emotionSet',
'entity',
'help', 'help',
'plugin', 'plugin',
'plugins', 'plugins',

View File

@ -42,7 +42,7 @@ class Sentiment140Plugin(SentimentPlugin):
from requests. from requests.
''' '''
from senpy.test import patch_requests from senpy.test import patch_requests
expected = {"data": [{"polarity": 10}]} expected = {"data": [{"polarity": 4}]}
with patch_requests(expected) as (request, response): with patch_requests(expected) as (request, response):
super(Sentiment140Plugin, self).test(*args, **kwargs) super(Sentiment140Plugin, self).test(*args, **kwargs)
assert request.called assert request.called

View File

@ -3,6 +3,8 @@ try:
except ImportError: except ImportError:
from mock import patch, MagicMock from mock import patch, MagicMock
from past.builtins import basestring
import json import json
from contextlib import contextmanager from contextlib import contextmanager
@ -15,13 +17,17 @@ def patch_requests(value, code=200):
success = MagicMock() success = MagicMock()
if isinstance(value, BaseModel): if isinstance(value, BaseModel):
value = value.jsonld() value = value.jsonld()
data = json.dumps(value) if not isinstance(value, basestring):
data = json.dumps(value)
else:
data = value
success.json.return_value = value success.json.return_value = value
success.data.return_value = data
success.status_code = code
success.content = json.dumps(value) success.status_code = code
success.content = data
success.text = data
method_mocker = MagicMock() method_mocker = MagicMock()
method_mocker.return_value = success method_mocker.return_value = success
with patch.multiple('requests', request=method_mocker, with patch.multiple('requests', request=method_mocker,

View File

@ -71,8 +71,9 @@ def easy_test(plugin_list=None):
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
if not plugin_list: if not plugin_list:
from . import plugins
import __main__ import __main__
logger.info('Loading classes from {}'.format(__main__))
from . import plugins
plugin_list = plugins.from_module(__main__) plugin_list = plugins.from_module(__main__)
for plug in plugin_list: for plug in plugin_list:
plug.test() plug.test()

View File

@ -182,8 +182,7 @@ class ExtensionsTest(TestCase):
analyse(self.senpy, input='nothing', algorithm='MOCK') analyse(self.senpy, input='nothing', algorithm='MOCK')
assert False assert False
except Exception as ex: except Exception as ex:
assert 'generic exception on analysis' in ex['message'] assert 'generic exception on analysis' in str(ex)
assert ex['status'] == 500
def test_filtering(self): def test_filtering(self):
""" Filtering plugins """ """ Filtering plugins """

View File

@ -9,6 +9,7 @@ from senpy.models import (Emotion,
EmotionAnalysis, EmotionAnalysis,
EmotionSet, EmotionSet,
Entry, Entry,
Entity,
Error, Error,
Results, Results,
Sentiment, Sentiment,
@ -207,3 +208,14 @@ class ModelsTest(TestCase):
recovered = from_string(string) recovered = from_string(string)
assert isinstance(recovered, Results) assert isinstance(recovered, Results)
assert isinstance(recovered.entries[0], Entry) assert isinstance(recovered.entries[0], Entry)
def test_serializable(self):
r = Results()
e = Entry()
ent = Entity()
e.entities.append(ent)
r.entries.append(e)
d = r.serializable()
assert d
assert d['entries']
assert d['entries'][0]['entities']

32
tests/test_test.py Normal file
View File

@ -0,0 +1,32 @@
from unittest import TestCase
import requests
import json
from senpy.test import patch_requests
from senpy.models import Results
class TestTest(TestCase):
def test_patch_text(self):
with patch_requests('hello'):
r = requests.get('http://example.com')
assert r.text == 'hello'
assert r.content == 'hello'
def test_patch_json(self):
r = Results()
with patch_requests(r):
res = requests.get('http://example.com')
assert res.content == json.dumps(r.jsonld())
js = res.json()
assert js
assert js['@type'] == r['@type']
def test_patch_dict(self):
r = {'nothing': 'new'}
with patch_requests(r):
res = requests.get('http://example.com')
assert res.content == json.dumps(r)
js = res.json()
assert js
assert js['nothing'] == 'new'