mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-22 00:02:28 +00:00
Merge branch 'master' into 44-add-basic-evaluation-with-gsitk
This commit is contained in:
commit
5e2ada1654
@ -1,8 +1,11 @@
|
|||||||
What is Senpy?
|
What is Senpy?
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
Web services can get really complex: data validation, user interaction, formatting, logging., etc.
|
Senpy is a framework for text analysis using Linked Data. There are three main applications of Senpy so far: sentiment and emotion analysis, user profiling and entity recoginition. Annotations and Services are compliant with NIF (NLP Interchange Format).
|
||||||
The figure below summarizes the typical features in an analysis service.
|
|
||||||
|
Senpy aims at providing a framework where analysis modules can be integrated easily as plugins, and providing a core functionality for managing tasks such as data validation, user interaction, formatting, logging, translation to linked data, etc.
|
||||||
|
|
||||||
|
The figure below summarizes the typical features in a text analysis service.
|
||||||
Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems.
|
Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems.
|
||||||
|
|
||||||
.. image:: senpy-framework.png
|
.. image:: senpy-framework.png
|
||||||
|
@ -1,8 +1,24 @@
|
|||||||
Vocabularies and model
|
Vocabularies and model
|
||||||
======================
|
======================
|
||||||
|
|
||||||
The model used in Senpy is based on the following vocabularies:
|
The model used in Senpy is based on NIF 2.0 [1], which defines a semantic format and API for improving interoperability among natural language processing services.
|
||||||
|
|
||||||
* Marl, a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
|
Senpy has been applied to sentiment and emotion analysis services using the following vocabularies:
|
||||||
* Onyx, which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
|
|
||||||
* NIF 2.0, which defines a semantic format and APO for improving interoperability among natural language processing services
|
* Marl [2,6], a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
|
||||||
|
* Onyx [3,5], which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
|
||||||
|
|
||||||
|
An overview of the vocabularies and their use can be found in [4].
|
||||||
|
|
||||||
|
|
||||||
|
[1] Guidelines for developing NIF-based NLP services, Final Community Group Report 22 December 2015 Available at: https://www.w3.org/2015/09/bpmlod-reports/nif-based-nlp-webservices/
|
||||||
|
|
||||||
|
[2] Marl Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/marl/
|
||||||
|
|
||||||
|
[3] Onyx Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/onyx/
|
||||||
|
|
||||||
|
[4] Iglesias, C. A., Sánchez-Rada, J. F., Vulcu, G., & Buitelaar, P. (2017). Linked Data Models for Sentiment and Emotion Analysis in Social Networks. In Sentiment Analysis in Social Networks (pp. 49-69).
|
||||||
|
|
||||||
|
[5] Sánchez-Rada, J. F., & Iglesias, C. A. (2016). Onyx: A linked data approach to emotion representation. Information Processing & Management, 52(1), 99-114.
|
||||||
|
|
||||||
|
[6] Westerski, A., Iglesias Fernandez, C. A., & Tapia Rico, F. (2011). Linked opinions: Describing sentiments on the structured web of data.
|
||||||
|
@ -22,6 +22,7 @@ the server.
|
|||||||
|
|
||||||
from flask import Flask
|
from flask import Flask
|
||||||
from senpy.extensions import Senpy
|
from senpy.extensions import Senpy
|
||||||
|
from senpy.utils import easy_test
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@ -39,7 +40,7 @@ def main():
|
|||||||
'-l',
|
'-l',
|
||||||
metavar='logging_level',
|
metavar='logging_level',
|
||||||
type=str,
|
type=str,
|
||||||
default="ERROR",
|
default="WARN",
|
||||||
help='Logging level')
|
help='Logging level')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--debug',
|
'--debug',
|
||||||
@ -75,6 +76,12 @@ def main():
|
|||||||
action='store_true',
|
action='store_true',
|
||||||
default=False,
|
default=False,
|
||||||
help='Do not run a server, only install plugin dependencies')
|
help='Do not run a server, only install plugin dependencies')
|
||||||
|
parser.add_argument(
|
||||||
|
'--only-test',
|
||||||
|
'-t',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='Do not run a server, just test all plugins')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--only-list',
|
'--only-list',
|
||||||
'--list',
|
'--list',
|
||||||
@ -122,6 +129,9 @@ def main():
|
|||||||
if args.only_install:
|
if args.only_install:
|
||||||
return
|
return
|
||||||
sp.activate_all()
|
sp.activate_all()
|
||||||
|
if args.only_test:
|
||||||
|
easy_test(sp.plugins())
|
||||||
|
return
|
||||||
print('Senpy version {}'.format(senpy.__version__))
|
print('Senpy version {}'.format(senpy.__version__))
|
||||||
print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
|
print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
|
||||||
args.port))
|
args.port))
|
||||||
|
@ -16,7 +16,6 @@ import os
|
|||||||
import copy
|
import copy
|
||||||
import errno
|
import errno
|
||||||
import logging
|
import logging
|
||||||
import traceback
|
|
||||||
|
|
||||||
#Correct this import for managing the datasets
|
#Correct this import for managing the datasets
|
||||||
from gsitk.datasets.datasets import DatasetManager
|
from gsitk.datasets.datasets import DatasetManager
|
||||||
@ -176,22 +175,14 @@ class Senpy(object):
|
|||||||
by api.parse_call().
|
by api.parse_call().
|
||||||
"""
|
"""
|
||||||
logger.debug("analysing request: {}".format(request))
|
logger.debug("analysing request: {}".format(request))
|
||||||
try:
|
entries = request.entries
|
||||||
entries = request.entries
|
request.entries = []
|
||||||
request.entries = []
|
plugins = self._get_plugins(request)
|
||||||
plugins = self._get_plugins(request)
|
results = request
|
||||||
results = request
|
for i in self._process_entries(entries, results, plugins):
|
||||||
for i in self._process_entries(entries, results, plugins):
|
results.entries.append(i)
|
||||||
results.entries.append(i)
|
self.convert_emotions(results)
|
||||||
self.convert_emotions(results)
|
logger.debug("Returning analysis result: {}".format(results))
|
||||||
logger.debug("Returning analysis result: {}".format(results))
|
|
||||||
except (Error, Exception) as ex:
|
|
||||||
if not isinstance(ex, Error):
|
|
||||||
msg = "Error during analysis: {} \n\t{}".format(ex,
|
|
||||||
traceback.format_exc())
|
|
||||||
ex = Error(message=msg, status=500)
|
|
||||||
logger.exception('Error returning analysis result')
|
|
||||||
raise ex
|
|
||||||
results.analysis = [i['plugin'].id for i in results.analysis]
|
results.analysis = [i['plugin'].id for i in results.analysis]
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@ -327,6 +327,7 @@ for i in [
|
|||||||
'emotionModel',
|
'emotionModel',
|
||||||
'emotionPlugin',
|
'emotionPlugin',
|
||||||
'emotionSet',
|
'emotionSet',
|
||||||
|
'entity',
|
||||||
'help',
|
'help',
|
||||||
'plugin',
|
'plugin',
|
||||||
'plugins',
|
'plugins',
|
||||||
|
@ -42,7 +42,7 @@ class Sentiment140Plugin(SentimentPlugin):
|
|||||||
from requests.
|
from requests.
|
||||||
'''
|
'''
|
||||||
from senpy.test import patch_requests
|
from senpy.test import patch_requests
|
||||||
expected = {"data": [{"polarity": 10}]}
|
expected = {"data": [{"polarity": 4}]}
|
||||||
with patch_requests(expected) as (request, response):
|
with patch_requests(expected) as (request, response):
|
||||||
super(Sentiment140Plugin, self).test(*args, **kwargs)
|
super(Sentiment140Plugin, self).test(*args, **kwargs)
|
||||||
assert request.called
|
assert request.called
|
||||||
|
@ -3,6 +3,8 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
from mock import patch, MagicMock
|
from mock import patch, MagicMock
|
||||||
|
|
||||||
|
from past.builtins import basestring
|
||||||
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
@ -15,13 +17,17 @@ def patch_requests(value, code=200):
|
|||||||
success = MagicMock()
|
success = MagicMock()
|
||||||
if isinstance(value, BaseModel):
|
if isinstance(value, BaseModel):
|
||||||
value = value.jsonld()
|
value = value.jsonld()
|
||||||
data = json.dumps(value)
|
if not isinstance(value, basestring):
|
||||||
|
data = json.dumps(value)
|
||||||
|
else:
|
||||||
|
data = value
|
||||||
|
|
||||||
success.json.return_value = value
|
success.json.return_value = value
|
||||||
success.data.return_value = data
|
|
||||||
success.status_code = code
|
|
||||||
|
|
||||||
success.content = json.dumps(value)
|
success.status_code = code
|
||||||
|
success.content = data
|
||||||
|
success.text = data
|
||||||
|
|
||||||
method_mocker = MagicMock()
|
method_mocker = MagicMock()
|
||||||
method_mocker.return_value = success
|
method_mocker.return_value = success
|
||||||
with patch.multiple('requests', request=method_mocker,
|
with patch.multiple('requests', request=method_mocker,
|
||||||
|
@ -71,8 +71,9 @@ def easy_test(plugin_list=None):
|
|||||||
logger.setLevel(logging.DEBUG)
|
logger.setLevel(logging.DEBUG)
|
||||||
logging.getLogger().setLevel(logging.INFO)
|
logging.getLogger().setLevel(logging.INFO)
|
||||||
if not plugin_list:
|
if not plugin_list:
|
||||||
from . import plugins
|
|
||||||
import __main__
|
import __main__
|
||||||
|
logger.info('Loading classes from {}'.format(__main__))
|
||||||
|
from . import plugins
|
||||||
plugin_list = plugins.from_module(__main__)
|
plugin_list = plugins.from_module(__main__)
|
||||||
for plug in plugin_list:
|
for plug in plugin_list:
|
||||||
plug.test()
|
plug.test()
|
||||||
|
@ -182,8 +182,7 @@ class ExtensionsTest(TestCase):
|
|||||||
analyse(self.senpy, input='nothing', algorithm='MOCK')
|
analyse(self.senpy, input='nothing', algorithm='MOCK')
|
||||||
assert False
|
assert False
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
assert 'generic exception on analysis' in ex['message']
|
assert 'generic exception on analysis' in str(ex)
|
||||||
assert ex['status'] == 500
|
|
||||||
|
|
||||||
def test_filtering(self):
|
def test_filtering(self):
|
||||||
""" Filtering plugins """
|
""" Filtering plugins """
|
||||||
|
@ -9,6 +9,7 @@ from senpy.models import (Emotion,
|
|||||||
EmotionAnalysis,
|
EmotionAnalysis,
|
||||||
EmotionSet,
|
EmotionSet,
|
||||||
Entry,
|
Entry,
|
||||||
|
Entity,
|
||||||
Error,
|
Error,
|
||||||
Results,
|
Results,
|
||||||
Sentiment,
|
Sentiment,
|
||||||
@ -207,3 +208,14 @@ class ModelsTest(TestCase):
|
|||||||
recovered = from_string(string)
|
recovered = from_string(string)
|
||||||
assert isinstance(recovered, Results)
|
assert isinstance(recovered, Results)
|
||||||
assert isinstance(recovered.entries[0], Entry)
|
assert isinstance(recovered.entries[0], Entry)
|
||||||
|
|
||||||
|
def test_serializable(self):
|
||||||
|
r = Results()
|
||||||
|
e = Entry()
|
||||||
|
ent = Entity()
|
||||||
|
e.entities.append(ent)
|
||||||
|
r.entries.append(e)
|
||||||
|
d = r.serializable()
|
||||||
|
assert d
|
||||||
|
assert d['entries']
|
||||||
|
assert d['entries'][0]['entities']
|
||||||
|
32
tests/test_test.py
Normal file
32
tests/test_test.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from senpy.test import patch_requests
|
||||||
|
from senpy.models import Results
|
||||||
|
|
||||||
|
|
||||||
|
class TestTest(TestCase):
|
||||||
|
def test_patch_text(self):
|
||||||
|
with patch_requests('hello'):
|
||||||
|
r = requests.get('http://example.com')
|
||||||
|
assert r.text == 'hello'
|
||||||
|
assert r.content == 'hello'
|
||||||
|
|
||||||
|
def test_patch_json(self):
|
||||||
|
r = Results()
|
||||||
|
with patch_requests(r):
|
||||||
|
res = requests.get('http://example.com')
|
||||||
|
assert res.content == json.dumps(r.jsonld())
|
||||||
|
js = res.json()
|
||||||
|
assert js
|
||||||
|
assert js['@type'] == r['@type']
|
||||||
|
|
||||||
|
def test_patch_dict(self):
|
||||||
|
r = {'nothing': 'new'}
|
||||||
|
with patch_requests(r):
|
||||||
|
res = requests.get('http://example.com')
|
||||||
|
assert res.content == json.dumps(r)
|
||||||
|
js = res.json()
|
||||||
|
assert js
|
||||||
|
assert js['nothing'] == 'new'
|
Loading…
Reference in New Issue
Block a user