mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-21 15:52:28 +00:00
Merge branch 'master' into 44-add-basic-evaluation-with-gsitk
This commit is contained in:
commit
5e2ada1654
@ -1,8 +1,11 @@
|
||||
What is Senpy?
|
||||
--------------
|
||||
|
||||
Web services can get really complex: data validation, user interaction, formatting, logging., etc.
|
||||
The figure below summarizes the typical features in an analysis service.
|
||||
Senpy is a framework for text analysis using Linked Data. There are three main applications of Senpy so far: sentiment and emotion analysis, user profiling and entity recoginition. Annotations and Services are compliant with NIF (NLP Interchange Format).
|
||||
|
||||
Senpy aims at providing a framework where analysis modules can be integrated easily as plugins, and providing a core functionality for managing tasks such as data validation, user interaction, formatting, logging, translation to linked data, etc.
|
||||
|
||||
The figure below summarizes the typical features in a text analysis service.
|
||||
Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems.
|
||||
|
||||
.. image:: senpy-framework.png
|
||||
|
@ -1,8 +1,24 @@
|
||||
Vocabularies and model
|
||||
======================
|
||||
|
||||
The model used in Senpy is based on the following vocabularies:
|
||||
The model used in Senpy is based on NIF 2.0 [1], which defines a semantic format and API for improving interoperability among natural language processing services.
|
||||
|
||||
* Marl, a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
|
||||
* Onyx, which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
|
||||
* NIF 2.0, which defines a semantic format and APO for improving interoperability among natural language processing services
|
||||
Senpy has been applied to sentiment and emotion analysis services using the following vocabularies:
|
||||
|
||||
* Marl [2,6], a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
|
||||
* Onyx [3,5], which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
|
||||
|
||||
An overview of the vocabularies and their use can be found in [4].
|
||||
|
||||
|
||||
[1] Guidelines for developing NIF-based NLP services, Final Community Group Report 22 December 2015 Available at: https://www.w3.org/2015/09/bpmlod-reports/nif-based-nlp-webservices/
|
||||
|
||||
[2] Marl Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/marl/
|
||||
|
||||
[3] Onyx Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/onyx/
|
||||
|
||||
[4] Iglesias, C. A., Sánchez-Rada, J. F., Vulcu, G., & Buitelaar, P. (2017). Linked Data Models for Sentiment and Emotion Analysis in Social Networks. In Sentiment Analysis in Social Networks (pp. 49-69).
|
||||
|
||||
[5] Sánchez-Rada, J. F., & Iglesias, C. A. (2016). Onyx: A linked data approach to emotion representation. Information Processing & Management, 52(1), 99-114.
|
||||
|
||||
[6] Westerski, A., Iglesias Fernandez, C. A., & Tapia Rico, F. (2011). Linked opinions: Describing sentiments on the structured web of data.
|
||||
|
@ -22,6 +22,7 @@ the server.
|
||||
|
||||
from flask import Flask
|
||||
from senpy.extensions import Senpy
|
||||
from senpy.utils import easy_test
|
||||
|
||||
import logging
|
||||
import os
|
||||
@ -39,7 +40,7 @@ def main():
|
||||
'-l',
|
||||
metavar='logging_level',
|
||||
type=str,
|
||||
default="ERROR",
|
||||
default="WARN",
|
||||
help='Logging level')
|
||||
parser.add_argument(
|
||||
'--debug',
|
||||
@ -75,6 +76,12 @@ def main():
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Do not run a server, only install plugin dependencies')
|
||||
parser.add_argument(
|
||||
'--only-test',
|
||||
'-t',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help='Do not run a server, just test all plugins')
|
||||
parser.add_argument(
|
||||
'--only-list',
|
||||
'--list',
|
||||
@ -122,6 +129,9 @@ def main():
|
||||
if args.only_install:
|
||||
return
|
||||
sp.activate_all()
|
||||
if args.only_test:
|
||||
easy_test(sp.plugins())
|
||||
return
|
||||
print('Senpy version {}'.format(senpy.__version__))
|
||||
print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
|
||||
args.port))
|
||||
|
@ -16,7 +16,6 @@ import os
|
||||
import copy
|
||||
import errno
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
#Correct this import for managing the datasets
|
||||
from gsitk.datasets.datasets import DatasetManager
|
||||
@ -176,22 +175,14 @@ class Senpy(object):
|
||||
by api.parse_call().
|
||||
"""
|
||||
logger.debug("analysing request: {}".format(request))
|
||||
try:
|
||||
entries = request.entries
|
||||
request.entries = []
|
||||
plugins = self._get_plugins(request)
|
||||
results = request
|
||||
for i in self._process_entries(entries, results, plugins):
|
||||
results.entries.append(i)
|
||||
self.convert_emotions(results)
|
||||
logger.debug("Returning analysis result: {}".format(results))
|
||||
except (Error, Exception) as ex:
|
||||
if not isinstance(ex, Error):
|
||||
msg = "Error during analysis: {} \n\t{}".format(ex,
|
||||
traceback.format_exc())
|
||||
ex = Error(message=msg, status=500)
|
||||
logger.exception('Error returning analysis result')
|
||||
raise ex
|
||||
entries = request.entries
|
||||
request.entries = []
|
||||
plugins = self._get_plugins(request)
|
||||
results = request
|
||||
for i in self._process_entries(entries, results, plugins):
|
||||
results.entries.append(i)
|
||||
self.convert_emotions(results)
|
||||
logger.debug("Returning analysis result: {}".format(results))
|
||||
results.analysis = [i['plugin'].id for i in results.analysis]
|
||||
return results
|
||||
|
||||
|
@ -327,6 +327,7 @@ for i in [
|
||||
'emotionModel',
|
||||
'emotionPlugin',
|
||||
'emotionSet',
|
||||
'entity',
|
||||
'help',
|
||||
'plugin',
|
||||
'plugins',
|
||||
|
@ -42,7 +42,7 @@ class Sentiment140Plugin(SentimentPlugin):
|
||||
from requests.
|
||||
'''
|
||||
from senpy.test import patch_requests
|
||||
expected = {"data": [{"polarity": 10}]}
|
||||
expected = {"data": [{"polarity": 4}]}
|
||||
with patch_requests(expected) as (request, response):
|
||||
super(Sentiment140Plugin, self).test(*args, **kwargs)
|
||||
assert request.called
|
||||
|
@ -3,6 +3,8 @@ try:
|
||||
except ImportError:
|
||||
from mock import patch, MagicMock
|
||||
|
||||
from past.builtins import basestring
|
||||
|
||||
|
||||
import json
|
||||
from contextlib import contextmanager
|
||||
@ -15,13 +17,17 @@ def patch_requests(value, code=200):
|
||||
success = MagicMock()
|
||||
if isinstance(value, BaseModel):
|
||||
value = value.jsonld()
|
||||
data = json.dumps(value)
|
||||
if not isinstance(value, basestring):
|
||||
data = json.dumps(value)
|
||||
else:
|
||||
data = value
|
||||
|
||||
success.json.return_value = value
|
||||
success.data.return_value = data
|
||||
success.status_code = code
|
||||
|
||||
success.content = json.dumps(value)
|
||||
success.status_code = code
|
||||
success.content = data
|
||||
success.text = data
|
||||
|
||||
method_mocker = MagicMock()
|
||||
method_mocker.return_value = success
|
||||
with patch.multiple('requests', request=method_mocker,
|
||||
|
@ -71,8 +71,9 @@ def easy_test(plugin_list=None):
|
||||
logger.setLevel(logging.DEBUG)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
if not plugin_list:
|
||||
from . import plugins
|
||||
import __main__
|
||||
logger.info('Loading classes from {}'.format(__main__))
|
||||
from . import plugins
|
||||
plugin_list = plugins.from_module(__main__)
|
||||
for plug in plugin_list:
|
||||
plug.test()
|
||||
|
@ -182,8 +182,7 @@ class ExtensionsTest(TestCase):
|
||||
analyse(self.senpy, input='nothing', algorithm='MOCK')
|
||||
assert False
|
||||
except Exception as ex:
|
||||
assert 'generic exception on analysis' in ex['message']
|
||||
assert ex['status'] == 500
|
||||
assert 'generic exception on analysis' in str(ex)
|
||||
|
||||
def test_filtering(self):
|
||||
""" Filtering plugins """
|
||||
|
@ -9,6 +9,7 @@ from senpy.models import (Emotion,
|
||||
EmotionAnalysis,
|
||||
EmotionSet,
|
||||
Entry,
|
||||
Entity,
|
||||
Error,
|
||||
Results,
|
||||
Sentiment,
|
||||
@ -207,3 +208,14 @@ class ModelsTest(TestCase):
|
||||
recovered = from_string(string)
|
||||
assert isinstance(recovered, Results)
|
||||
assert isinstance(recovered.entries[0], Entry)
|
||||
|
||||
def test_serializable(self):
|
||||
r = Results()
|
||||
e = Entry()
|
||||
ent = Entity()
|
||||
e.entities.append(ent)
|
||||
r.entries.append(e)
|
||||
d = r.serializable()
|
||||
assert d
|
||||
assert d['entries']
|
||||
assert d['entries'][0]['entities']
|
||||
|
32
tests/test_test.py
Normal file
32
tests/test_test.py
Normal file
@ -0,0 +1,32 @@
|
||||
from unittest import TestCase
|
||||
|
||||
import requests
|
||||
import json
|
||||
from senpy.test import patch_requests
|
||||
from senpy.models import Results
|
||||
|
||||
|
||||
class TestTest(TestCase):
|
||||
def test_patch_text(self):
|
||||
with patch_requests('hello'):
|
||||
r = requests.get('http://example.com')
|
||||
assert r.text == 'hello'
|
||||
assert r.content == 'hello'
|
||||
|
||||
def test_patch_json(self):
|
||||
r = Results()
|
||||
with patch_requests(r):
|
||||
res = requests.get('http://example.com')
|
||||
assert res.content == json.dumps(r.jsonld())
|
||||
js = res.json()
|
||||
assert js
|
||||
assert js['@type'] == r['@type']
|
||||
|
||||
def test_patch_dict(self):
|
||||
r = {'nothing': 'new'}
|
||||
with patch_requests(r):
|
||||
res = requests.get('http://example.com')
|
||||
assert res.content == json.dumps(r)
|
||||
js = res.json()
|
||||
assert js
|
||||
assert js['nothing'] == 'new'
|
Loading…
Reference in New Issue
Block a user