diff --git a/docs/senpy.rst b/docs/senpy.rst index 67cdba7..bbdfb0d 100644 --- a/docs/senpy.rst +++ b/docs/senpy.rst @@ -1,8 +1,11 @@ What is Senpy? -------------- -Web services can get really complex: data validation, user interaction, formatting, logging., etc. -The figure below summarizes the typical features in an analysis service. +Senpy is a framework for text analysis using Linked Data. There are three main applications of Senpy so far: sentiment and emotion analysis, user profiling and entity recoginition. Annotations and Services are compliant with NIF (NLP Interchange Format). + +Senpy aims at providing a framework where analysis modules can be integrated easily as plugins, and providing a core functionality for managing tasks such as data validation, user interaction, formatting, logging, translation to linked data, etc. + +The figure below summarizes the typical features in a text analysis service. Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems. .. image:: senpy-framework.png diff --git a/docs/vocabularies.rst b/docs/vocabularies.rst index c47c69a..e55ad36 100644 --- a/docs/vocabularies.rst +++ b/docs/vocabularies.rst @@ -1,8 +1,24 @@ Vocabularies and model ====================== -The model used in Senpy is based on the following vocabularies: +The model used in Senpy is based on NIF 2.0 [1], which defines a semantic format and API for improving interoperability among natural language processing services. -* Marl, a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems. -* Onyx, which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language. -* NIF 2.0, which defines a semantic format and APO for improving interoperability among natural language processing services +Senpy has been applied to sentiment and emotion analysis services using the following vocabularies: + +* Marl [2,6], a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems. +* Onyx [3,5], which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language. + +An overview of the vocabularies and their use can be found in [4]. + + +[1] Guidelines for developing NIF-based NLP services, Final Community Group Report 22 December 2015 Available at: https://www.w3.org/2015/09/bpmlod-reports/nif-based-nlp-webservices/ + +[2] Marl Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/marl/ + +[3] Onyx Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/onyx/ + +[4] Iglesias, C. A., Sánchez-Rada, J. F., Vulcu, G., & Buitelaar, P. (2017). Linked Data Models for Sentiment and Emotion Analysis in Social Networks. In Sentiment Analysis in Social Networks (pp. 49-69). + +[5] Sánchez-Rada, J. F., & Iglesias, C. A. (2016). Onyx: A linked data approach to emotion representation. Information Processing & Management, 52(1), 99-114. + +[6] Westerski, A., Iglesias Fernandez, C. A., & Tapia Rico, F. (2011). Linked opinions: Describing sentiments on the structured web of data. diff --git a/senpy/__main__.py b/senpy/__main__.py index be5b31d..886675c 100644 --- a/senpy/__main__.py +++ b/senpy/__main__.py @@ -22,6 +22,7 @@ the server. from flask import Flask from senpy.extensions import Senpy +from senpy.utils import easy_test import logging import os @@ -39,7 +40,7 @@ def main(): '-l', metavar='logging_level', type=str, - default="ERROR", + default="WARN", help='Logging level') parser.add_argument( '--debug', @@ -75,6 +76,12 @@ def main(): action='store_true', default=False, help='Do not run a server, only install plugin dependencies') + parser.add_argument( + '--only-test', + '-t', + action='store_true', + default=False, + help='Do not run a server, just test all plugins') parser.add_argument( '--only-list', '--list', @@ -122,6 +129,9 @@ def main(): if args.only_install: return sp.activate_all() + if args.only_test: + easy_test(sp.plugins()) + return print('Senpy version {}'.format(senpy.__version__)) print('Server running on port %s:%d. Ctrl+C to quit' % (args.host, args.port)) diff --git a/senpy/extensions.py b/senpy/extensions.py index 735f8a3..8ed6e58 100644 --- a/senpy/extensions.py +++ b/senpy/extensions.py @@ -16,7 +16,6 @@ import os import copy import errno import logging -import traceback #Correct this import for managing the datasets from gsitk.datasets.datasets import DatasetManager @@ -176,22 +175,14 @@ class Senpy(object): by api.parse_call(). """ logger.debug("analysing request: {}".format(request)) - try: - entries = request.entries - request.entries = [] - plugins = self._get_plugins(request) - results = request - for i in self._process_entries(entries, results, plugins): - results.entries.append(i) - self.convert_emotions(results) - logger.debug("Returning analysis result: {}".format(results)) - except (Error, Exception) as ex: - if not isinstance(ex, Error): - msg = "Error during analysis: {} \n\t{}".format(ex, - traceback.format_exc()) - ex = Error(message=msg, status=500) - logger.exception('Error returning analysis result') - raise ex + entries = request.entries + request.entries = [] + plugins = self._get_plugins(request) + results = request + for i in self._process_entries(entries, results, plugins): + results.entries.append(i) + self.convert_emotions(results) + logger.debug("Returning analysis result: {}".format(results)) results.analysis = [i['plugin'].id for i in results.analysis] return results diff --git a/senpy/models.py b/senpy/models.py index 8a36633..25636a0 100644 --- a/senpy/models.py +++ b/senpy/models.py @@ -327,6 +327,7 @@ for i in [ 'emotionModel', 'emotionPlugin', 'emotionSet', + 'entity', 'help', 'plugin', 'plugins', diff --git a/senpy/plugins/sentiment/sentiment140/sentiment140.py b/senpy/plugins/sentiment/sentiment140/sentiment140.py index 782e06d..d10e010 100644 --- a/senpy/plugins/sentiment/sentiment140/sentiment140.py +++ b/senpy/plugins/sentiment/sentiment140/sentiment140.py @@ -42,7 +42,7 @@ class Sentiment140Plugin(SentimentPlugin): from requests. ''' from senpy.test import patch_requests - expected = {"data": [{"polarity": 10}]} + expected = {"data": [{"polarity": 4}]} with patch_requests(expected) as (request, response): super(Sentiment140Plugin, self).test(*args, **kwargs) assert request.called diff --git a/senpy/test.py b/senpy/test.py index 11b36fd..9190637 100644 --- a/senpy/test.py +++ b/senpy/test.py @@ -3,6 +3,8 @@ try: except ImportError: from mock import patch, MagicMock +from past.builtins import basestring + import json from contextlib import contextmanager @@ -15,13 +17,17 @@ def patch_requests(value, code=200): success = MagicMock() if isinstance(value, BaseModel): value = value.jsonld() - data = json.dumps(value) + if not isinstance(value, basestring): + data = json.dumps(value) + else: + data = value success.json.return_value = value - success.data.return_value = data + success.status_code = code + success.content = data + success.text = data - success.content = json.dumps(value) method_mocker = MagicMock() method_mocker.return_value = success with patch.multiple('requests', request=method_mocker, diff --git a/senpy/utils.py b/senpy/utils.py index 85fb5c3..1f14de3 100644 --- a/senpy/utils.py +++ b/senpy/utils.py @@ -71,8 +71,9 @@ def easy_test(plugin_list=None): logger.setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.INFO) if not plugin_list: - from . import plugins import __main__ + logger.info('Loading classes from {}'.format(__main__)) + from . import plugins plugin_list = plugins.from_module(__main__) for plug in plugin_list: plug.test() diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 849f5d9..af8b11b 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -182,8 +182,7 @@ class ExtensionsTest(TestCase): analyse(self.senpy, input='nothing', algorithm='MOCK') assert False except Exception as ex: - assert 'generic exception on analysis' in ex['message'] - assert ex['status'] == 500 + assert 'generic exception on analysis' in str(ex) def test_filtering(self): """ Filtering plugins """ diff --git a/tests/test_models.py b/tests/test_models.py index abdc8f0..0f8d170 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -9,6 +9,7 @@ from senpy.models import (Emotion, EmotionAnalysis, EmotionSet, Entry, + Entity, Error, Results, Sentiment, @@ -207,3 +208,14 @@ class ModelsTest(TestCase): recovered = from_string(string) assert isinstance(recovered, Results) assert isinstance(recovered.entries[0], Entry) + + def test_serializable(self): + r = Results() + e = Entry() + ent = Entity() + e.entities.append(ent) + r.entries.append(e) + d = r.serializable() + assert d + assert d['entries'] + assert d['entries'][0]['entities'] diff --git a/tests/test_test.py b/tests/test_test.py new file mode 100644 index 0000000..d5beea8 --- /dev/null +++ b/tests/test_test.py @@ -0,0 +1,32 @@ +from unittest import TestCase + +import requests +import json +from senpy.test import patch_requests +from senpy.models import Results + + +class TestTest(TestCase): + def test_patch_text(self): + with patch_requests('hello'): + r = requests.get('http://example.com') + assert r.text == 'hello' + assert r.content == 'hello' + + def test_patch_json(self): + r = Results() + with patch_requests(r): + res = requests.get('http://example.com') + assert res.content == json.dumps(r.jsonld()) + js = res.json() + assert js + assert js['@type'] == r['@type'] + + def test_patch_dict(self): + r = {'nothing': 'new'} + with patch_requests(r): + res = requests.get('http://example.com') + assert res.content == json.dumps(r) + js = res.json() + assert js + assert js['nothing'] == 'new'