1
0
mirror of https://github.com/gsi-upm/senpy synced 2025-09-17 12:02:21 +00:00

Compare commits

..

30 Commits

Author SHA1 Message Date
J. Fernando Sánchez
1104e816cb Push pip for tags without a preceding v 2017-06-12 21:06:34 +02:00
J. Fernando Sánchez
c19d03b41d Added SSH access to github fetch 2017-06-12 20:47:46 +02:00
J. Fernando Sánchez
42c9068991 Add pull policy to k8s deployment
* Add git fetch to (try to) fix github push from gitlab
2017-06-12 20:43:39 +02:00
J. Fernando Sánchez
96843827bd Removed __main__ from test coverage reports 2017-06-12 20:29:29 +02:00
J. Fernando Sánchez
d76e4618fe Removed python 3.4 from travis versions 2017-06-12 20:18:56 +02:00
J. Fernando Sánchez
c9bc485535 Merge branch '36-estimate-vad' 2017-06-12 20:10:21 +02:00
J. Fernando Sánchez
6d7575bbcd Merge branch '35-timeout-and-blocking-requests' 2017-06-12 19:57:28 +02:00
J. Fernando Sánchez
852bcc72ba Better centroid conversion
Also added **simple** tests for backward and forward conversion.
In future versions we should add thorough tests.

Should close gsi-upm/senpy#31
2017-06-12 19:52:00 +02:00
J. Fernando Sánchez
bf5ed1bd7d Merge remote-tracking branch 'drevicko/patch-6' 2017-06-12 18:14:15 +02:00
J. Fernando Sánchez
00da75153a Change conversion to Euclidean distance
* Added neutral point (if present)

Closes !gsi-upm/senpy#37 (Ian's)
2017-06-12 18:09:58 +02:00
J. Fernando Sánchez
fa082e11e7 Use flask's server by default
Using this server in production is discouraged, but to implement a
proper asynchronous server with tornado/gevent every blocking call would
have to be converted to a non-blocking call.

Failing to do so causes deadlocks like senpy/senpy#35

For now, it is easier to just use the default server.
2017-06-12 17:29:01 +02:00
J. Fernando Sánchez
6331d31b18 Merge branch '34-document-plugin-repo-creation' into 24-improve-docs
Closes #34
Closes #24
2017-06-12 12:53:24 +02:00
J. Fernando Sánchez
8ee324f566 Clearer docs 2017-06-12 09:31:42 +02:00
J. Fernando Sánchez
188c33332a Removed nbsphinx
It requires pandoc, which cannot be installed with pip.

We can either link to the nbfile or convert the file
manually/automatically:

```
nbconvert SenpyClientUse.ipynb --to rst
```
2017-06-12 09:31:42 +02:00
militarpancho
955e17eb2a Added travis, readthedocs and pypi badges 2017-06-12 09:31:42 +02:00
militarpancho
3e0f55dcff Improve docs. (Badges missing) 2017-06-12 09:31:38 +02:00
militarpancho
5bab9a6a02 #34. Fixed some errors from plugins examples 2017-06-02 17:43:18 +02:00
militarpancho
69ac95bb08 Added example plugin in docs. #34 2017-06-02 17:39:27 +02:00
drevicko
6b843a4384 fixes typo in code 2017-05-29 12:15:35 +01:00
drevicko
65d6e47513 Implements Fernando's suggestion in #31
I've added a neutral point definition (in the converters senpy file) as used in pull request #29
2017-05-29 12:13:21 +01:00
drevicko
8d56a0b630 fixes #31
I've used euclidean metric instead of taxicab as I feel it makes more sense (taxicab has bizzare unintuitive effects for points far from the centroids).
2017-05-29 12:06:44 +01:00
drevicko
e7ac6e66b0 update _forward_conversion docstring + minor edits 2017-05-29 11:50:14 +01:00
J. Fernando Sánchez
8c70433312 Added push to github 2017-05-19 18:54:57 +02:00
drevicko
dcc965ea63 removed superfluous 'neutral' centroid
Neutral is included as an 'origin' field. This is partly because emoml has no vocab for "Neutral" in dimensional models.
2017-05-08 14:34:28 +01:00
drevicko
400f647b7b removed unneccessary defaultdict import 2017-05-08 14:32:53 +01:00
Ian Wood
ec1a2ff5f9 added 'origin' to VAD representation, incorporated into weighed sum for Cat->VAD conversion 2017-05-08 14:28:51 +01:00
Ian Wood
1a9dd07f7e Merge branch 'master' 0.8.7 into patch-6 2017-05-05 15:02:15 +01:00
Ian Wood
b80b0c7947 used more specific exception specifier (KeyError) 2017-04-11 11:25:50 +01:00
Ian Wood
1ca6ec52fd fixed weighted average, no explicit treatment of 'neutral' 2017-04-11 11:12:02 +01:00
drevicko
53138e6942 Estimate VAD by weighted average
Does a weighted average of centroids.

If intensity sums to zero for a category, a 'neutral' category is used or 0 if it's not present. I'm not 100% sure this is the best approach, and the name of the "neutral" category perhaps should use some convention?

Note that if there are no categories present, then no VAD (or other dimensional) estimate is returned. It may be better to use the neutral centroid if it's present in this case also.
2017-04-04 15:37:07 +01:00
30 changed files with 726 additions and 191 deletions

View File

@@ -72,7 +72,7 @@ deploy_pypi:
- make pip_upload - make pip_upload
- echo "" > ~/.pypirc && rm ~/.pypirc # If the above fails, this won't run. - echo "" > ~/.pypirc && rm ~/.pypirc # If the above fails, this won't run.
only: only:
- /^v\d+\.\d+\.\d+([abc]\d*)?$/ # PEP-440 compliant version (tags) - /^v?\d+\.\d+\.\d+([abc]\d*)?$/ # PEP-440 compliant version (tags)
except: except:
- branches - branches
@@ -84,7 +84,15 @@ deploy:
only: only:
- master - master
clean_docker : push-github:
stage: deploy
script:
- make -e push-github
only:
- master
- triggers
clean :
stage: clean stage: clean
script: script:
- make -e clean - make -e clean

View File

@@ -7,7 +7,6 @@ language: python
env: env:
- PYV=2.7 - PYV=2.7
- PYV=3.4
- PYV=3.5 - PYV=3.5
# run nosetests - Tests # run nosetests - Tests
script: make test-$PYV script: make test-$PYV

View File

@@ -12,6 +12,7 @@ DEVPORT=5000
TARNAME=$(NAME)-$(VERSION).tar.gz TARNAME=$(NAME)-$(VERSION).tar.gz
action="test-${PYMAIN}" action="test-${PYMAIN}"
GITHUB_REPO=git@github.com:gsi-upm/senpy.git
KUBE_CA_PEM_FILE="" KUBE_CA_PEM_FILE=""
KUBE_URL="" KUBE_URL=""
@@ -132,6 +133,7 @@ push-github:
@echo "$$GITHUB_DEPLOY_KEY" > $(KEY_FILE) @echo "$$GITHUB_DEPLOY_KEY" > $(KEY_FILE)
@git remote rm github-deploy || true @git remote rm github-deploy || true
git remote add github-deploy $(GITHUB_REPO) git remote add github-deploy $(GITHUB_REPO)
@GIT_SSH_COMMAND="ssh -i $(KEY_FILE)" git fetch github-deploy $(CI_COMMIT_REF_NAME) || true
@GIT_SSH_COMMAND="ssh -i $(KEY_FILE)" git push github-deploy $(CI_COMMIT_REF_NAME) @GIT_SSH_COMMAND="ssh -i $(KEY_FILE)" git push github-deploy $(CI_COMMIT_REF_NAME)
rm $(KEY_FILE) rm $(KEY_FILE)

317
docs/SenpyClientUse.ipynb Normal file
View File

@@ -0,0 +1,317 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:05:31.465571Z",
"start_time": "2017-04-10T19:05:31.458282+02:00"
},
"deletable": true,
"editable": true
},
"source": [
"# Client"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"source": [
"The built-in senpy client allows you to query any Senpy endpoint. We will illustrate how to use it with the public demo endpoint, and then show you how to spin up your own endpoint using docker."
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Demo Endpoint\n",
"-------------"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"To start using senpy, simply create a new Client and point it to your endpoint. In this case, the latest version of Senpy at GSI."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:29:12.827640Z",
"start_time": "2017-04-10T19:29:12.818617+02:00"
},
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"from senpy.client import Client\n",
"\n",
"c = Client('http://latest.senpy.cluster.gsi.dit.upm.es/api')\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Now, let's use that client analyse some queries:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:29:14.011657Z",
"start_time": "2017-04-10T19:29:13.701808+02:00"
},
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"r = c.analyse('I like sugar!!', algorithm='sentiment140')\n",
"r"
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:08:19.616754Z",
"start_time": "2017-04-10T19:08:19.610767+02:00"
},
"deletable": true,
"editable": true
},
"source": [
"As you can see, that gave us the full JSON result. A more concise way to print it would be:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:29:14.854213Z",
"start_time": "2017-04-10T19:29:14.842068+02:00"
},
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"for entry in r.entries:\n",
" print('{} -> {}'.format(entry['text'], entry['sentiments'][0]['marl:hasPolarity']))"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"We can also obtain a list of available plugins with the client:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:29:16.245198Z",
"start_time": "2017-04-10T19:29:16.056545+02:00"
},
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"c.plugins()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Or, more concisely:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:29:17.663275Z",
"start_time": "2017-04-10T19:29:17.484623+02:00"
},
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"c.plugins().keys()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"Local Endpoint\n",
"--------------"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"To run your own instance of senpy, just create a docker container with the latest Senpy image. Using `--default-plugins` you will get some extra plugins to start playing with the API."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:29:20.637539Z",
"start_time": "2017-04-10T19:29:19.938322+02:00"
},
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"!docker run -ti --name 'SenpyEndpoint' -d -p 6000:5000 gsiupm/senpy:0.8.6 --host 0.0.0.0 --default-plugins"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"To use this endpoint:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:29:21.263976Z",
"start_time": "2017-04-10T19:29:21.260595+02:00"
},
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"c_local = Client('http://127.0.0.1:6000/api')"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"That's all! After you are done with your analysis, stop the docker container:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-04-10T17:29:33.226686Z",
"start_time": "2017-04-10T19:29:22.392121+02:00"
},
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"!docker stop SenpyEndpoint\n",
"!docker rm SenpyEndpoint"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
},
"toc": {
"colors": {
"hover_highlight": "#DAA520",
"running_highlight": "#FF0000",
"selected_highlight": "#FFD700"
},
"moveMenuLeft": true,
"nav_menu": {
"height": "68px",
"width": "252px"
},
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 4,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 1
}

View File

@@ -1,4 +1,4 @@
Research About
-------- --------
If you use Senpy in your research, please cite `Senpy: A Pragmatic Linked Sentiment Analysis Framework <http://gsi.dit.upm.es/index.php/es/investigacion/publicaciones?view=publication&task=show&id=417>`__ (`BibTex <http://gsi.dit.upm.es/index.php/es/investigacion/publicaciones?controller=publications&task=export&format=bibtex&id=417>`__): If you use Senpy in your research, please cite `Senpy: A Pragmatic Linked Sentiment Analysis Framework <http://gsi.dit.upm.es/index.php/es/investigacion/publicaciones?view=publication&task=show&id=417>`__ (`BibTex <http://gsi.dit.upm.es/index.php/es/investigacion/publicaciones?controller=publications&task=export&format=bibtex&id=417>`__):

View File

@@ -1,7 +1,7 @@
API and Schema API and Examples
############## ################
.. toctree:: .. toctree::
vocabularies.rst vocabularies.rst
api.rst api.rst
schema.rst examples.rst

View File

@@ -1,15 +0,0 @@
Architecture
============
The main component of a sentiment analysis service is the algorithm itself. However, for the algorithm to work, it needs to get the appropriate parameters from the user, format the results according to the defined API, interact with the user whn errors occur or more information is needed, etc.
Senpy proposes a modular and dynamic architecture that allows:
* Implementing different algorithms in a extensible way, yet offering a common interface.
* Offering common services that facilitate development, so developers can focus on implementing new and better algorithms.
The framework consists of two main modules: Senpy core, which is the building block of the service, and Senpy plugins, which consist of the analysis algorithm. The next figure depicts a simplified version of the processes involved in an analysis with the Senpy framework.
.. image:: senpy-architecture.png
:width: 100%
:align: center

9
docs/commandline.rst Normal file
View File

@@ -0,0 +1,9 @@
Command line
============
This video shows how to analyse text directly on the command line using the senpy tool.
.. image:: https://asciinema.org/a/9uwef1ghkjk062cw2t4mhzpyk.png
:width: 100%
:target: https://asciinema.org/a/9uwef1ghkjk062cw2t4mhzpyk
:alt: CLI demo

View File

@@ -37,7 +37,7 @@ extensions = [
'sphinx.ext.todo', 'sphinx.ext.todo',
'sphinxcontrib.httpdomain', 'sphinxcontrib.httpdomain',
'sphinx.ext.coverage', 'sphinx.ext.coverage',
'sphinx.ext.autosectionlabel' 'sphinx.ext.autosectionlabel',
] ]
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.

View File

@@ -1,4 +1,4 @@
Schema Examples
------ ------
All the examples in this page use the :download:`the main schema <_static/schemas/definitions.json>`. All the examples in this page use the :download:`the main schema <_static/schemas/definitions.json>`.
@@ -17,6 +17,8 @@ Sentiment Analysis
..................... .....................
Description Description
,,,,,,,,,,, ,,,,,,,,,,,
This annotation corresponds to the sentiment analysis of an input. The example shows the sentiment represented according to Marl format.
The sentiments detected are contained in the Sentiments array with their related part of the text.
Representation Representation
,,,,,,,,,,,,,, ,,,,,,,,,,,,,,
@@ -29,6 +31,7 @@ Suggestion Mining
................. .................
Description Description
,,,,,,,,,,, ,,,,,,,,,,,
The suggestions schema represented below shows the suggestions detected in the text. Within it, we can find the NIF fields highlighted that corresponds to the text of the detected suggestion.
Representation Representation
,,,,,,,,,,,,,, ,,,,,,,,,,,,,,
@@ -41,6 +44,7 @@ Emotion Analysis
................ ................
Description Description
,,,,,,,,,,, ,,,,,,,,,,,
This annotation represents the emotion analysis of an input to Senpy. The emotions are contained in the emotions section with the text that refers to following Onyx format and the emotion model defined beforehand.
Representation Representation
,,,,,,,,,,,,,, ,,,,,,,,,,,,,,
@@ -53,7 +57,7 @@ Named Entity Recognition
........................ ........................
Description Description
,,,,,,,,,,, ,,,,,,,,,,,
The Named Entity Recognition is represented as follows. In this particular case, it can be seen within the entities array the entities recognised. For the example input, Microsoft and Windows Phone are the ones detected.
Representation Representation
,,,,,,,,,,,,,, ,,,,,,,,,,,,,,

View File

@@ -1,28 +1,35 @@
Welcome to Senpy's documentation! Welcome to Senpy's documentation!
================================= =================================
.. image:: https://readthedocs.org/projects/senpy/badge/?version=latest
With Senpy, you can easily turn your sentiment or emotion analysis algorithm into a full blown semantic service. :target: http://senpy.readthedocs.io/en/latest/
Sharing your sentiment analysis with the world has never been easier. .. image:: https://badge.fury.io/py/senpy.svg
Senpy provides: :target: https://badge.fury.io/py/senpy
.. image:: https://lab.cluster.gsi.dit.upm.es/senpy/senpy/badges/master/build.svg
* Parameter validation, error handling :target: https://lab.cluster.gsi.dit.upm.es/senpy/senpy/commits/master
* Formatting: JSON-LD, Turtle/n-triples input and output, or simple text input .. image:: https://lab.cluster.gsi.dit.upm.es/senpy/senpy/badges/master/coverage.svg
* Linked Data. Results are semantically annotated, using a series of well established vocabularies, and sane default URIs. :target: https://lab.cluster.gsi.dit.upm.es/senpy/senpy/commits/master
* A web UI where users can explore your service and test different settings .. image:: https://img.shields.io/pypi/l/requests.svg
* A client to interact with any senpy service :target: https://lab.cluster.gsi.dit.upm.es/senpy/senpy/
* A command line tool
Senpy is a framework for sentiment and emotion analysis services.
Services built with senpy are interchangeable and easy to use because they share a common :doc:`apischema`.
It also simplifies service development.
.. image:: senpy-architecture.png
:width: 100%
:align: center
.. toctree:: .. toctree::
:caption: Learn more about senpy :caption: Learn more about senpy:
:maxdepth: 2 :maxdepth: 2
senpy senpy
installation installation
demo
usage usage
apischema apischema
plugins plugins
conversion conversion
demo about
research.rst

View File

@@ -1,6 +1,16 @@
Installation Installation
------------ ------------
The stable version can be installed in three ways. The stable version can be used in two ways: as a system/user library through pip, or as a docker image.
The docker image is the recommended way because it is self-contained and isolated from the system, which means:
* Downloading and using it is just one command
* All dependencies are included
* It is OS-independent (MacOS, Windows, GNU/Linux)
* Several versions may coexist in the same machine without additional virtual environments
Additionally, you may create your own docker image with your custom plugins, ready to be used by others.
Through PIP Through PIP
*********** ***********
@@ -33,15 +43,6 @@ To add custom plugins, use a docker volume:
.. code:: bash .. code:: bash
docker run -ti -p 5000:5000 -v <PATH OF PLUGINS>:/plugins gsiupm/senpy --host 0.0.0.0 --default-plugins -f /plugins docker run -ti -p 5000:5000 -v <PATH OF PLUGINS>:/plugins gsiupm/senpy --host 0.0.0.0 --default-plugins -f /plugins
Alias
.....
If you are using the docker approach regularly, it is advisable to use a script or an alias to simplify your executions:
.. code:: bash
alias senpy='docker run --rm -ti -p 5000:5000 -v $PWD:/senpy-plugins gsiupm/senpy --default-plugins'
Python 2 Python 2
@@ -54,3 +55,18 @@ There is a Senpy version for python2 too:
docker run -ti -p 5000:5000 gsiupm/senpy:python2.7 --host 0.0.0.0 --default-plugins docker run -ti -p 5000:5000 gsiupm/senpy:python2.7 --host 0.0.0.0 --default-plugins
Alias
.....
If you are using the docker approach regularly, it is advisable to use a script or an alias to simplify your executions:
.. code:: bash
alias senpy='docker run --rm -ti -p 5000:5000 -v $PWD:/senpy-plugins gsiupm/senpy --default-plugins'
Now, you may run senpy from any folder in your computer like so:
.. code:: bash
senpy --version

View File

@@ -4,6 +4,8 @@ This document describes how to develop a new analysis plugin. For an example of
A more step-by-step tutorial with slides is available `here <https://lab.cluster.gsi.dit.upm.es/senpy/senpy-tutorial>`__ A more step-by-step tutorial with slides is available `here <https://lab.cluster.gsi.dit.upm.es/senpy/senpy-tutorial>`__
.. contents:: :local:
What is a plugin? What is a plugin?
================= =================
@@ -113,7 +115,7 @@ The definition file would look like this:
module: helloworld module: helloworld
version: 0.0 version: 0.0
threshold: 10 threshold: 10
description: Hello World
Now, in a file named ``helloworld.py``: Now, in a file named ``helloworld.py``:
@@ -122,11 +124,11 @@ Now, in a file named ``helloworld.py``:
#!/bin/env python #!/bin/env python
#helloworld.py #helloworld.py
from senpy.plugins import SenpyPlugin from senpy.plugins import AnalysisPlugin
from senpy.models import Sentiment from senpy.models import Sentiment
class HelloWorld(SenpyPlugin): class HelloWorld(AnalysisPlugin):
def analyse_entry(entry, params): def analyse_entry(entry, params):
'''Basically do nothing with each entry''' '''Basically do nothing with each entry'''
@@ -139,6 +141,96 @@ Now, in a file named ``helloworld.py``:
entry.sentiments.append(sentiment) entry.sentiments.append(sentiment)
yield entry yield entry
The complete code of the example plugin is available `here <https://lab.cluster.gsi.dit.upm.es/senpy/plugin-prueba>`__.
Loading data and files
======================
Most plugins will need access to files (dictionaries, lexicons, etc.).
It is good practice to specify the paths of these files in the plugin configuration, so the same code can be reused with different resources.
.. code:: yaml
name: dictworld
module: dictworld
dictionary_path: <PATH OF THE FILE>
The path can be either absolute, or relative.
From absolute paths
???????????????????
Absolute paths (such as ``/data/dictionary.csv`` are straightfoward:
.. code:: python
with open(os.path.join(self.dictionary_path) as f:
...
From relative paths
???????????????????
Since plugins are loading dynamically, relative paths will refer to the current working directory.
Instead, what you usually want is to load files *relative to the plugin source folder*, like so:
::
.
..
plugin.senpy
plugin.py
dictionary.csv
For this, we need to first get the path of your source folder first, like so:
.. code:: python
import os
root = os.path.realpath(__file__)
with open(os.path.join(root, self.dictionary_path) as f:
...
Docker image
============
Add the following dockerfile to your project to generate a docker image with your plugin:
.. code:: dockerfile
FROM gsiupm/senpy:0.8.8
This will copy your source folder to the image, and install all dependencies.
Now, to build an image:
.. code:: shell
docker build . -t gsiupm/exampleplugin
And you can run it with:
.. code:: shell
docker run -p 5000:5000 gsiupm/exampleplugin
If the plugin non-source files (:ref:`loading data and files`), the recommended way is to use absolute paths.
Data can then be mounted in the container or added to the image.
The former is recommended for open source plugins with licensed resources, whereas the latter is the most convenient and can be used for private images.
Mounting data:
.. code:: bash
docker run -v $PWD/data:/data gsiupm/exampleplugin
Adding data to the image:
.. code:: dockerfile
FROM gsiupm/senpy:0.8.8
COPY data /
F.A.Q. F.A.Q.
====== ======
@@ -146,7 +238,7 @@ What annotations can I use?
??????????????????????????? ???????????????????????????
You can add almost any annotation to an entry. You can add almost any annotation to an entry.
The most common use cases are covered in the :doc:`schema`. The most common use cases are covered in the :doc:`apischema`.
Why does the analyse function yield instead of return? Why does the analyse function yield instead of return?
@@ -154,7 +246,7 @@ Why does the analyse function yield instead of return?
This is so that plugins may add new entries to the response or filter some of them. This is so that plugins may add new entries to the response or filter some of them.
For instance, a `context detection` plugin may add a new entry for each context in the original entry. For instance, a `context detection` plugin may add a new entry for each context in the original entry.
On the other hand, a conveersion plugin may leave out those entries that do not contain relevant information. On the other hand, a conversion plugin may leave out those entries that do not contain relevant information.
If I'm using a classifier, where should I train it? If I'm using a classifier, where should I train it?
@@ -164,9 +256,9 @@ Training a classifier can be time time consuming. To avoid running the training
.. code:: python .. code:: python
from senpy.plugins import ShelfMixin, SenpyPlugin from senpy.plugins import ShelfMixin, AnalysisPlugin
class MyPlugin(ShelfMixin, SenpyPlugin): class MyPlugin(ShelfMixin, AnalysisPlugin):
def train(self): def train(self):
''' Code to train the classifier ''' Code to train the classifier
''' '''
@@ -189,10 +281,10 @@ Shelves may get corrupted if the plugin exists unexpectedly.
A corrupt shelf prevents the plugin from loading. A corrupt shelf prevents the plugin from loading.
If you do not care about the pickle, you can force your plugin to remove the corrupted file and load anyway, set the 'force_shelf' to True in your .senpy file. If you do not care about the pickle, you can force your plugin to remove the corrupted file and load anyway, set the 'force_shelf' to True in your .senpy file.
I want to implement my service as a plugin, How i can do it? How can I turn an external service into a plugin?
???????????????????????????????????????????????????????????? ?????????????????????????????????????????????????
This example ilustrate how to implement the Sentiment140 service as a plugin in senpy This example ilustrate how to implement a plugin that accesses the Sentiment140 service.
.. code:: python .. code:: python
@@ -226,26 +318,30 @@ This example ilustrate how to implement the Sentiment140 service as a plugin in
yield entry yield entry
Where can I define extra parameters to be introduced in the request to my plugin? Can my plugin require additional parameters from the user?
????????????????????????????????????????????????????????????????????????????????? ??????????????????????????????????????????????????????????
You can add these parameters in the definition file under the attribute "extra_params" : "{param_name}". The name of the parameter has new attributes-value pairs. The basic attributes are: You can add extra parameters in the definition file under the attribute ``extra_params``.
It takes a dictionary, where the keys are the name of the argument/parameter, and the value has the following fields:
* aliases: the different names which can be used in the request to use the parameter. * aliases: the different names which can be used in the request to use the parameter.
* required: this option is a boolean and indicates if the parameters is binding in operation plugin. * required: if set to true, users need to provide this parameter unless a default is set.
* options: the different values of the paremeter. * options: the different acceptable values of the parameter (i.e. an enum). If set, the value provided must match one of the options.
* default: the default value of the parameter, this is useful in case the paremeter is required and you want to have a default value. * default: the default value of the parameter, if none is provided in the request.
.. code:: python .. code:: python
"extra_params": { extra_params
"language": { language:
"aliases": ["language", "l"], aliases:
"required": true, - language
"options": ["es","en"], - lang
"default": "es" - l
} required: true,
} options:
- es
- en
default: es
This example shows how to introduce a parameter associated with language. This example shows how to introduce a parameter associated with language.
The extraction of this paremeter is used in the analyse method of the Plugin interface. The extraction of this paremeter is used in the analyse method of the Plugin interface.
@@ -277,7 +373,6 @@ Additionally, with the ``--pdb`` option you will be dropped into a pdb post mort
senpy --pdb senpy --pdb
Where can I find more code examples? Where can I find more code examples?
???????????????????????????????????? ????????????????????????????????????

View File

@@ -1 +1,2 @@
sphinxcontrib-httpdomain>=1.4 sphinxcontrib-httpdomain>=1.4
nbsphinx

Binary file not shown.

Before

Width:  |  Height:  |  Size: 65 KiB

After

Width:  |  Height:  |  Size: 122 KiB

BIN
docs/senpy-framework.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

After

Width:  |  Height:  |  Size: 49 KiB

View File

@@ -1,6 +1,26 @@
What is Senpy? What is Senpy?
-------------- --------------
Web services can get really complex: data validation, user interaction, formatting, logging., etc.
The figure below summarizes the typical features in an analysis service.
Senpy implements all the common blocks, so developers can focus on what really matters: great analysis algorithms that solve real problems.
.. image:: senpy-framework.png
:width: 60%
:align: center
Senpy for end users
===================
All services built using senpy share a common interface.
This allows users to use them (almost) interchangeably.
Senpy comes with a :ref:`built-in client`.
Senpy for service developers
============================
Senpy is a framework that turns your sentiment or emotion analysis algorithm into a full blown semantic service. Senpy is a framework that turns your sentiment or emotion analysis algorithm into a full blown semantic service.
Senpy takes care of: Senpy takes care of:
@@ -12,21 +32,20 @@ Senpy takes care of:
Sharing your sentiment analysis with the world has never been easier! Sharing your sentiment analysis with the world has never been easier!
Senpy for service developers
============================
Check out the :doc:`plugins` if you have developed an analysis algorithm (e.g. sentiment analysis) and you want to publish it as a service. Check out the :doc:`plugins` if you have developed an analysis algorithm (e.g. sentiment analysis) and you want to publish it as a service.
Senpy for end users Architecture
=================== ============
All services built using senpy share a common interface. The main component of a sentiment analysis service is the algorithm itself. However, for the algorithm to work, it needs to get the appropriate parameters from the user, format the results according to the defined API, interact with the user whn errors occur or more information is needed, etc.
This allows users to use them (almost) interchangeably.
Senpy comes with a :ref:`built-in client`.
Senpy proposes a modular and dynamic architecture that allows:
.. toctree:: * Implementing different algorithms in a extensible way, yet offering a common interface.
:caption: Interested? Check out senpy's: * Offering common services that facilitate development, so developers can focus on implementing new and better algorithms.
architecture
The framework consists of two main modules: Senpy core, which is the building block of the service, and Senpy plugins, which consist of the analysis algorithm. The next figure depicts a simplified version of the processes involved in an analysis with the Senpy framework.
.. image:: senpy-architecture.png
:width: 100%
:align: center

58
docs/server.rst Normal file
View File

@@ -0,0 +1,58 @@
Server
======
The senpy server is launched via the `senpy` command:
.. code:: text
usage: senpy [-h] [--level logging_level] [--debug] [--default-plugins]
[--host HOST] [--port PORT] [--plugins-folder PLUGINS_FOLDER]
[--only-install]
Run a Senpy server
optional arguments:
-h, --help show this help message and exit
--level logging_level, -l logging_level
Logging level
--debug, -d Run the application in debug mode
--default-plugins Load the default plugins
--host HOST Use 0.0.0.0 to accept requests from any host.
--port PORT, -p PORT Port to listen on.
--plugins-folder PLUGINS_FOLDER, -f PLUGINS_FOLDER
Where to look for plugins.
--only-install, -i Do not run a server, only install plugin dependencies
When launched, the server will recursively look for plugins in the specified plugins folder (the current working directory by default).
For every plugin found, it will download its dependencies, and try to activate it.
The default server includes a playground and an endpoint with all plugins found.
Let's run senpy with the default plugins:
.. code:: bash
senpy -f . --default-plugins
Now go to `http://localhost:5000 <http://localhost:5000>`_, you should be greeted by the senpy playground:
.. image:: senpy-playground.png
:width: 100%
:alt: Playground
The playground is a user-friendly way to test your plugins, but you can always use the service directly: `http://localhost:5000/api?input=hello <http://localhost:5000/api?input=hello>`_.
By default, senpy will listen only on the `127.0.0.1` address.
That means you can only access the API from your (or localhost).
You can listen on a different address using the `--host` flag (e.g., 0.0.0.0).
The default port is 5000.
You can change it with the `--port` flag.
For instance, to accept connections on port 6000 on any interface:
.. code:: bash
senpy --host 0.0.0.0 --port 6000
For more options, see the `--help` page.

View File

@@ -2,72 +2,14 @@ Usage
----- -----
First of all, you need to install the package. First of all, you need to install the package.
See :doc:`installation` for installation instructions. See :doc:`installation` for instructions.
Once installed, the `senpy` command should be available. Once installed, the `senpy` command should be available.
Useful command-line options .. toctree::
=========================== :maxdepth: 1
In case you want to load modules, which are located in different folders under the root folder, use the next option. server
SenpyClientUse
.. code:: bash commandline
senpy -f .
The default port used by senpy is 5000, but you can change it using the `--port` flag.
.. code:: bash
senpy --port 8080
Also, the host can be changed where senpy is deployed. The default value is `127.0.0.1`.
.. code:: bash
senpy --host 0.0.0.0
For more options, see the `--help` page.
Alternatively, you can use the modules included in senpy to build your own application.
Senpy server
============
Once the server is launched, there is a basic endpoint in the server, which provides a playground to use the plugins that have been loaded.
In case you want to know the different endpoints of the server, there is more information available in the NIF API section_.
CLI demo
========
This video shows how to use senpy through command-line tool.
.. image:: https://asciinema.org/a/9uwef1ghkjk062cw2t4mhzpyk.png
:width: 100%
:target: https://asciinema.org/a/9uwef1ghkjk062cw2t4mhzpyk
:alt: CLI demo
Built-in client
===============
This example shows how to make a request to the default plugin:
.. code:: python
from senpy.client import Client
c = Client('http://127.0.0.1:5000/api/')
r = c.analyse('hello world')
for entry in r.entries:
print('{} -> {}'.format(entry.text, entry.emotions))
.. _section: http://senpy.readthedocs.org/en/latest/api.html
Conversion
==========
See :doc:`conversion`

View File

@@ -14,6 +14,7 @@ spec:
containers: containers:
- name: senpy-latest - name: senpy-latest
image: gsiupm/senpy:latest image: gsiupm/senpy:latest
imagePullPolicy: Always
args: args:
- "--default-plugins" - "--default-plugins"
resources: resources:

View File

@@ -22,10 +22,6 @@ the server.
from flask import Flask from flask import Flask
from senpy.extensions import Senpy from senpy.extensions import Senpy
from tornado.wsgi import WSGIContainer
from tornado.httpserver import HTTPServer
from tornado.ioloop import IOLoop
import logging import logging
import os import os
@@ -78,6 +74,11 @@ def main():
action='store_true', action='store_true',
default=False, default=False,
help='Do not run a server, only install plugin dependencies') help='Do not run a server, only install plugin dependencies')
parser.add_argument(
'--threaded',
action='store_false',
default=True,
help='Run a threaded server')
parser.add_argument( parser.add_argument(
'--version', '--version',
'-v', '-v',
@@ -101,18 +102,10 @@ def main():
print('Senpy version {}'.format(senpy.__version__)) print('Senpy version {}'.format(senpy.__version__))
print('Server running on port %s:%d. Ctrl+C to quit' % (args.host, print('Server running on port %s:%d. Ctrl+C to quit' % (args.host,
args.port)) args.port))
if not app.debug: app.run(args.host,
http_server = HTTPServer(WSGIContainer(app)) args.port,
http_server.listen(args.port, address=args.host) threaded=args.threaded,
try: debug=app.debug)
IOLoop.instance().start()
except KeyboardInterrupt:
print('Bye!')
http_server.stop()
else:
app.run(args.host,
args.port,
debug=True)
sp.deactivate_all() sp.deactivate_all()

View File

@@ -22,7 +22,6 @@ class Client(object):
response = requests.request(method=method, url=url, params=params) response = requests.request(method=method, url=url, params=params)
try: try:
resp = models.from_dict(response.json()) resp = models.from_dict(response.json())
resp.validate(resp)
except Exception as ex: except Exception as ex:
logger.error(('There seems to be a problem with the response:\n' logger.error(('There seems to be a problem with the response:\n'
'\tURL: {url}\n' '\tURL: {url}\n'

View File

@@ -32,36 +32,58 @@ class CentroidConversion(EmotionConversionPlugin):
nv1[aliases.get(k2, k2)] = v2 nv1[aliases.get(k2, k2)] = v2
ncentroids[aliases.get(k1, k1)] = nv1 ncentroids[aliases.get(k1, k1)] = nv1
info['centroids'] = ncentroids info['centroids'] = ncentroids
super(CentroidConversion, self).__init__(info) super(CentroidConversion, self).__init__(info)
self.dimensions = set()
for c in self.centroids.values():
self.dimensions.update(c.keys())
self.neutralPoints = self.get("neutralPoints", dict())
if not self.neutralPoints:
for i in self.dimensions:
self.neutralPoints[i] = self.get("neutralValue", 0)
def _forward_conversion(self, original): def _forward_conversion(self, original):
"""Sum the VAD value of all categories found.""" """Sum the VAD value of all categories found weighted by intensity.
Intensities are scaled by onyx:maxIntensityValue if it is present, else maxIntensityValue
is assumed to be one. Emotion entries that do not have onxy:hasEmotionIntensity specified
are assumed to have maxIntensityValue. Emotion entries that do not have
onyx:hasEmotionCategory specified are ignored."""
res = Emotion() res = Emotion()
maxIntensity = float(original.get("onyx:maxIntensityValue", 1))
for e in original.onyx__hasEmotion: for e in original.onyx__hasEmotion:
category = e.onyx__hasEmotionCategory category = e.get("onyx:hasEmotionCategory", None)
if category in self.centroids: if not category:
for dim, value in self.centroids[category].items(): continue
try: intensity = e.get("onyx:hasEmotionIntensity", maxIntensity) / maxIntensity
res[dim] += value if not intensity:
except Exception: continue
res[dim] = value centroid = self.centroids.get(category, None)
if centroid:
for dim, value in centroid.items():
neutral = self.neutralPoints[dim]
if dim not in res:
res[dim] = 0
res[dim] += (value - neutral) * intensity + neutral
return res return res
def _backwards_conversion(self, original): def _backwards_conversion(self, original):
"""Find the closest category""" """Find the closest category"""
dimensions = list(self.centroids.values())[0] centroids = self.centroids
neutralPoints = self.neutralPoints
dimensions = self.dimensions
def distance(e1, e2): def distance_k(centroid, original, k):
return sum((e1[k] - e2.get(k, 0)) for k in dimensions) # k component of the distance between the value and a given centroid
return (centroid.get(k, neutralPoints[k]) - original.get(k, neutralPoints[k]))**2
def distance(centroid):
return sum(distance_k(centroid, original, k) for k in dimensions)
emotion = min(centroids, key=lambda x: distance(centroids[x]))
emotion = ''
mindistance = 10000000000000000000000.0
for state in self.centroids:
d = distance(self.centroids[state], original)
if d < mindistance:
mindistance = d
emotion = state
result = Emotion(onyx__hasEmotionCategory=emotion) result = Emotion(onyx__hasEmotionCategory=emotion)
result.onyx__algorithmConfidence = distance(centroids[emotion])
return result return result
def convert(self, emotionSet, fromModel, toModel, params): def convert(self, emotionSet, fromModel, toModel, params):

View File

@@ -1,6 +1,6 @@
--- ---
name: Ekman2FSRE name: Ekman2FSRE
module: senpy.plugins.conversion.centroids module: senpy.plugins.conversion.emotion.centroids
description: Plugin to convert emotion sets from Ekman to VAD description: Plugin to convert emotion sets from Ekman to VAD
version: 0.1 version: 0.1
# No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction # No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction

View File

@@ -1,9 +1,14 @@
--- ---
name: Ekman2PAD name: Ekman2PAD
module: senpy.plugins.conversion.centroids module: senpy.plugins.conversion.emotion.centroids
description: Plugin to convert emotion sets from Ekman to VAD description: Plugin to convert emotion sets from Ekman to VAD
version: 0.1 version: 0.1
# No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction # No need to specify onyx:doesConversion because centroids.py adds it automatically from centroids_direction
origin:
# Point in VAD space with no emotion (aka Neutral)
A: 5.0
D: 5.0
V: 5.0
centroids: centroids:
anger: anger:
A: 6.95 A: 6.95
@@ -36,4 +41,4 @@ aliases: # These are aliases for any key in the centroid, to avoid repeating a l
disgust: emoml:big6disgust disgust: emoml:big6disgust
fear: emoml:big6fear fear: emoml:big6fear
happiness: emoml:big6happiness happiness: emoml:big6happiness
sadness: emoml:big6sadness sadness: emoml:big6sadness

View File

@@ -11,7 +11,7 @@ def read_version(versionfile=DEFAULT_FILE):
try: try:
with open(versionfile) as f: with open(versionfile) as f:
return f.read().strip() return f.read().strip()
except IOError: except IOError: # pragma: no cover
logger.error('Running an unknown version of senpy. Be careful!.') logger.error('Running an unknown version of senpy. Be careful!.')
return '0.0' return '0.0'

View File

@@ -11,4 +11,7 @@ max-line-length = 100
[bdist_wheel] [bdist_wheel]
universal=1 universal=1
[tool:pytest] [tool:pytest]
addopts = --cov=senpy --cov-report term-missing addopts = --cov=senpy --cov-report term-missing
[coverage:report]
omit = senpy/__main__.py

View File

@@ -6,8 +6,9 @@ import shutil
import tempfile import tempfile
from unittest import TestCase from unittest import TestCase
from senpy.models import Results, Entry from senpy.models import Results, Entry, EmotionSet, Emotion
from senpy.plugins import SentimentPlugin, ShelfMixin from senpy.plugins import SentimentPlugin, ShelfMixin
from senpy.plugins.conversion.emotion.centroids import CentroidConversion
class ShelfDummyPlugin(SentimentPlugin, ShelfMixin): class ShelfDummyPlugin(SentimentPlugin, ShelfMixin):
@@ -152,3 +153,52 @@ class PluginsTest(TestCase):
} }
}) })
assert 'example' in a.extra_params assert 'example' in a.extra_params
def test_conversion_centroids(self):
info = {
"name": "CentroidTest",
"description": "Centroid test",
"version": 0,
"centroids": {
"c1": {"V1": 0.5,
"V2": 0.5},
"c2": {"V1": -0.5,
"V2": 0.5},
"c3": {"V1": -0.5,
"V2": -0.5},
"c4": {"V1": 0.5,
"V2": -0.5}},
"aliases": {
"V1": "X-dimension",
"V2": "Y-dimension"
},
"centroids_direction": ["emoml:big6", "emoml:fsre-dimensions"]
}
c = CentroidConversion(info)
es1 = EmotionSet()
e1 = Emotion()
e1.onyx__hasEmotionCategory = "c1"
es1.onyx__hasEmotion.append(e1)
res = c._forward_conversion(es1)
assert res["X-dimension"] == 0.5
assert res["Y-dimension"] == 0.5
e2 = Emotion()
e2.onyx__hasEmotionCategory = "c2"
es1.onyx__hasEmotion.append(e2)
res = c._forward_conversion(es1)
assert res["X-dimension"] == 0
assert res["Y-dimension"] == 1
e = Emotion()
e["X-dimension"] = -0.2
e["Y-dimension"] = -0.3
res = c._backwards_conversion(e)
assert res["onyx:hasEmotionCategory"] == "c3"
e = Emotion()
e["X-dimension"] = -0.2
e["Y-dimension"] = 0.3
res = c._backwards_conversion(e)
assert res["onyx:hasEmotionCategory"] == "c2"