Merge branch 'master' into 44-add-basic-evaluation-with-gsitk

2025-12-16 00:48:16 +00:00 · 2018-04-23 15:28:51 +02:00
parent 55c32dcd7c 7a188586c5
commit 5e2ada1654
11 changed files with 103 additions and 32 deletions
--- a/docs/senpy.rst
+++ b/docs/senpy.rst
@@ -1,8 +1,11 @@
 What is Senpy?
 --------------
-Web services can get really complex: data validation, user interaction, formatting, logging., etc. 
+Senpy is a framework for text analysis using Linked Data. There are three main applications of Senpy so far: sentiment and emotion analysis, user profiling and entity recoginition. Annotations and Services are compliant with NIF (NLP Interchange Format).
-The figure below summarizes the typical features in an analysis service.
+
 Senpy aims at providing a framework where analysis modules can be integrated easily as plugins, and providing a core functionality for managing tasks such as data validation, user interaction, formatting, logging, translation to linked data, etc. 
 The figure below summarizes the typical features in a text analysis service.
 Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems.
 .. image:: senpy-framework.png
--- a/docs/vocabularies.rst
+++ b/docs/vocabularies.rst
@@ -1,8 +1,24 @@
 Vocabularies and model
 ======================
-The model used in Senpy is based on the following vocabularies:
+The model used in Senpy is based on NIF 2.0 [1], which defines a semantic format and API for improving interoperability among natural language processing services.
-* Marl, a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
+Senpy has been applied to sentiment and emotion analysis services using the following vocabularies:
-* Onyx, which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
+
-* NIF 2.0, which defines a semantic format and APO for improving interoperability among natural language processing services
+* Marl [2,6], a vocabulary designed to annotate and describe subjetive opinions expressed on the web or in information systems.
 * Onyx [3,5], which is built one the same principles as Marl to annotate and describe emotions, and provides interoperability with Emotion Markup Language.
 An overview of the vocabularies and their use can be found in [4].
 [1] Guidelines for developing NIF-based NLP services, Final Community Group Report 22 December 2015 Available at: https://www.w3.org/2015/09/bpmlod-reports/nif-based-nlp-webservices/
 [2] Marl Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/marl/
 [3] Onyx Ontology Specification, available at http://www.gsi.dit.upm.es/ontologies/onyx/
 [4] Iglesias, C. A., Sánchez-Rada, J. F., Vulcu, G., & Buitelaar, P. (2017). Linked Data Models for Sentiment and Emotion Analysis in Social Networks. In Sentiment Analysis in Social Networks (pp. 49-69).
 [5] Sánchez-Rada, J. F., & Iglesias, C. A. (2016). Onyx: A linked data approach to emotion representation. Information Processing & Management, 52(1), 99-114.
 [6] Westerski, A., Iglesias Fernandez, C. A., & Tapia Rico, F. (2011). Linked opinions: Describing sentiments on the structured web of data.
--- a/senpy/main.py
+++ b/senpy/main.py
@@ -22,6 +22,7 @@ the server.
 from flask import Flask
 from senpy.extensions import Senpy
 from senpy.utils import easy_test
 import logging
 import os
@@ -39,7 +40,7 @@ def main():
        '-l',
        metavar='logging_level',
        type=str,
-        default="ERROR",
+        default="WARN",
        help='Logging level')
    parser.add_argument(
        '--debug',
@@ -75,6 +76,12 @@ def main():
        action='store_true',
        default=False,
        help='Do not run a server, only install plugin dependencies')
    parser.add_argument(
        '--only-test',
        '-t',
        action='store_true',
        default=False,
        help='Do not run a server, just test all plugins')
    parser.add_argument(
        '--only-list',
        '--list',
@@ -122,6 +129,9 @@ def main():
    if args.only_install:
        return
    sp.activate_all()
    if args.only_test:
        easy_test(sp.plugins())
        return
    print('Senpy version {}'.format(senpy.__version__))
    print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
                                                            args.port))
--- a/senpy/extensions.py
+++ b/senpy/extensions.py
@@ -16,7 +16,6 @@ import os
 import copy
 import errno
 import logging
 import traceback
 #Correct this import for managing the datasets
 from gsitk.datasets.datasets import DatasetManager
@@ -176,22 +175,14 @@ class Senpy(object):
        by api.parse_call().
        """
        logger.debug("analysing request: {}".format(request))
-        try:
+        entries = request.entries
-            entries = request.entries
+        request.entries = []
-            request.entries = []
+        plugins = self._get_plugins(request)
-            plugins = self._get_plugins(request)
+        results = request
-            results = request
+        for i in self._process_entries(entries, results, plugins):
-            for i in self._process_entries(entries, results, plugins):
+            results.entries.append(i)
-                results.entries.append(i)
+        self.convert_emotions(results)
-            self.convert_emotions(results)
+        logger.debug("Returning analysis result: {}".format(results))
            logger.debug("Returning analysis result: {}".format(results))
        except (Error, Exception) as ex:
            if not isinstance(ex, Error):
                msg = "Error during analysis: {} \n\t{}".format(ex,
                                                                traceback.format_exc())
                ex = Error(message=msg, status=500)
            logger.exception('Error returning analysis result')
            raise ex
        results.analysis = [i['plugin'].id for i in results.analysis]
        return results
--- a/senpy/models.py
+++ b/senpy/models.py
@@ -327,6 +327,7 @@ for i in [
        'emotionModel',
        'emotionPlugin',
        'emotionSet',
        'entity',
        'help',
        'plugin',
        'plugins',
--- a/senpy/plugins/sentiment/sentiment140/sentiment140.py
+++ b/senpy/plugins/sentiment/sentiment140/sentiment140.py
@@ -42,7 +42,7 @@ class Sentiment140Plugin(SentimentPlugin):
        from requests.
        '''
        from senpy.test import patch_requests
-        expected = {"data": [{"polarity": 10}]}
+        expected = {"data": [{"polarity": 4}]}
        with patch_requests(expected) as (request, response):
            super(Sentiment140Plugin, self).test(*args, **kwargs)
            assert request.called
--- a/senpy/test.py
+++ b/senpy/test.py
@@ -3,6 +3,8 @@ try:
 except ImportError:
    from mock import patch, MagicMock
 from past.builtins import basestring
 import json
 from contextlib import contextmanager
@@ -15,13 +17,17 @@ def patch_requests(value, code=200):
    success = MagicMock()
    if isinstance(value, BaseModel):
        value = value.jsonld()
-    data = json.dumps(value)
+    if not isinstance(value, basestring):
        data = json.dumps(value)
    else:
        data = value
    success.json.return_value = value
    success.data.return_value = data
    success.status_code = code
-    success.content = json.dumps(value)
+    success.status_code = code
    success.content = data
    success.text = data
    method_mocker = MagicMock()
    method_mocker.return_value = success
    with patch.multiple('requests', request=method_mocker,
--- a/senpy/utils.py
+++ b/senpy/utils.py
@@ -71,8 +71,9 @@ def easy_test(plugin_list=None):
    logger.setLevel(logging.DEBUG)
    logging.getLogger().setLevel(logging.INFO)
    if not plugin_list:
        from . import plugins
        import __main__
        logger.info('Loading classes from {}'.format(__main__))
        from . import plugins
        plugin_list = plugins.from_module(__main__)
    for plug in plugin_list:
        plug.test()
--- a/tests/test_extensions.py
+++ b/tests/test_extensions.py
@@ -182,8 +182,7 @@ class ExtensionsTest(TestCase):
            analyse(self.senpy, input='nothing', algorithm='MOCK')
            assert False
        except Exception as ex:
-            assert 'generic exception on analysis' in ex['message']
+            assert 'generic exception on analysis' in str(ex)
            assert ex['status'] == 500
    def test_filtering(self):
        """ Filtering plugins """
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -9,6 +9,7 @@ from senpy.models import (Emotion,
                          EmotionAnalysis,
                          EmotionSet,
                          Entry,
                          Entity,
                          Error,
                          Results,
                          Sentiment,
@@ -207,3 +208,14 @@ class ModelsTest(TestCase):
        recovered = from_string(string)
        assert isinstance(recovered, Results)
        assert isinstance(recovered.entries[0], Entry)
    def test_serializable(self):
        r = Results()
        e = Entry()
        ent = Entity()
        e.entities.append(ent)
        r.entries.append(e)
        d = r.serializable()
        assert d
        assert d['entries']
        assert d['entries'][0]['entities']
--- a/tests/test_test.py
+++ b/tests/test_test.py
@@ -0,0 +1,32 @@
 from unittest import TestCase
 import requests
 import json
 from senpy.test import patch_requests
 from senpy.models import Results
 class TestTest(TestCase):
    def test_patch_text(self):
        with patch_requests('hello'):
            r = requests.get('http://example.com')
            assert r.text == 'hello'
            assert r.content == 'hello'
    def test_patch_json(self):
        r = Results()
        with patch_requests(r):
            res = requests.get('http://example.com')
            assert res.content == json.dumps(r.jsonld())
            js = res.json()
            assert js
            assert js['@type'] == r['@type']
    def test_patch_dict(self):
        r = {'nothing': 'new'}
        with patch_requests(r):
            res = requests.get('http://example.com')
            assert res.content == json.dumps(r)
            js = res.json()
            assert js
            assert js['nothing'] == 'new'