1
0
mirror of https://github.com/gsi-upm/senpy synced 2024-11-22 08:12:27 +00:00

Merge commit '23c6cdd58dd3071fe5f707d904afacde6bd1a870' as 'emotion-wnaffect'

This commit is contained in:
J. Fernando Sánchez 2018-06-12 10:01:44 +02:00
commit 08c1b4ce79
21 changed files with 921 additions and 0 deletions

2
emotion-wnaffect/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
__pycache__
*.pyc

View File

@ -0,0 +1,67 @@
# Uncomment if you want to use docker-in-docker
# image: gsiupm/dockermake:latest
# services:
# - docker:dind
# When using dind, it's wise to use the overlayfs driver for
# improved performance.
stages:
- test
- push
- deploy
- clean
before_script:
- make -e login
.test: &test_definition
stage: test
script:
- make -e test-$PYTHON_VERSION
test-3.5:
<<: *test_definition
variables:
PYTHON_VERSION: "3.5"
.image: &image_definition
stage: push
script:
- make -e push-$PYTHON_VERSION
only:
- tags
- triggers
push-3.5:
<<: *image_definition
variables:
PYTHON_VERSION: "3.5"
push-latest:
<<: *image_definition
variables:
PYTHON_VERSION: latest
only:
- tags
- triggers
deploy:
stage: deploy
environment: production
script:
- make -e deploy
only:
- tags
- triggers
clean :
stage: clean
script:
- make -e clean
when: manual
cleanup_py:
stage: clean
when: always # this is important; run even if preceding stages failed.
script:
- docker logout

3
emotion-wnaffect/.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "data"]
path = data
url = ../data/emotion-wnaffect

View File

@ -0,0 +1,27 @@
These makefiles are recipes for several common tasks in different types of projects.
To add them to your project, simply do:
```
git remote add makefiles ssh://git@lab.cluster.gsi.dit.upm.es:2200/docs/templates/makefiles.git
git subtree add --prefix=.makefiles/ makefiles master
touch Makefile
echo "include .makefiles/base.mk" >> Makefile
```
Now you can take advantage of the recipes.
For instance, to add useful targets for a python project, just add this to your Makefile:
```
include .makefiles/python.mk
```
You may need to set special variables like the name of your project or the python versions you're targetting.
Take a look at each specific `.mk` file for more information, and the `Makefile` in the [senpy](https://lab.cluster.gsi.dit.upm.es/senpy/senpy) project for a real use case.
If you update the makefiles from your repository, make sure to push the changes for review in upstream (this repository):
```
make makefiles-push
```
It will automatically commit all unstaged changes in the .makefiles folder.

View File

@ -0,0 +1,36 @@
export
NAME ?= $(shell basename $(CURDIR))
VERSION ?= $(shell git describe --tags --dirty 2>/dev/null)
ifeq ($(VERSION),)
VERSION:=unknown
endif
# Get the location of this makefile.
MK_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
-include .env
-include ../.env
help: ## Show this help.
@fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/\(.*:\)[^#]*##\s*\(.*\)/\1\t\2/' | column -t -s " "
config: ## Load config from the environment. You should run it once in every session before other tasks. Run: eval $(make config)
@awk '{ print "export " $$0}' ../.env
@awk '{ print "export " $$0}' .env
@echo "# Please, run: "
@echo "# eval \$$(make config)"
# If you need to run a command on the key/value pairs, use this:
# @awk '{ split($$0, a, "="); "echo " a[2] " | base64 -w 0" |& getline b64; print "export " a[1] "=" a[2]; print "export " a[1] "_BASE64=" b64}' .env
ci: ## Run a task using gitlab-runner. Only use to debug problems in the CI pipeline
gitlab-runner exec shell --builds-dir '.builds' --env CI_PROJECT_NAME=$(NAME) ${action}
include $(MK_DIR)/makefiles.mk
include $(MK_DIR)/docker.mk
include $(MK_DIR)/git.mk
info:: ## List all variables
env
.PHONY:: config help ci

View File

@ -0,0 +1,29 @@
IMAGENAME?=$(NAME)
IMAGEWTAG?=$(IMAGENAME):$(VERSION)
docker-login: ## Log in to the registry. It will only be used in the server, or when running a CI task locally (if CI_BUILD_TOKEN is set).
ifeq ($(CI_BUILD_TOKEN),)
@echo "Not logging in to the docker registry" "$(CI_REGISTRY)"
else
@docker login -u gitlab-ci-token -p $(CI_BUILD_TOKEN) $(CI_REGISTRY)
endif
ifeq ($(HUB_USER),)
@echo "Not logging in to global the docker registry"
else
@docker login -u $(HUB_USER) -p $(HUB_PASSWORD)
endif
docker-clean: ## Remove docker credentials
ifeq ($(HUB_USER),)
else
@docker logout
endif
login:: docker-login
clean:: docker-clean
docker-info:
@echo IMAGEWTAG=${IMAGEWTAG}
.PHONY:: docker-login docker-clean login clean

View File

@ -0,0 +1,28 @@
commit:
git commit -a
tag:
git tag ${VERSION}
git-push::
git push --tags -u origin HEAD
git-pull:
git pull --all
push-github: ## Push the code to github. You need to set up GITHUB_DEPLOY_KEY
ifeq ($(GITHUB_DEPLOY_KEY),)
else
$(eval KEY_FILE := "$(shell mktemp)")
@echo "$(GITHUB_DEPLOY_KEY)" > $(KEY_FILE)
@git remote rm github-deploy || true
git remote add github-deploy $(GITHUB_REPO)
-@GIT_SSH_COMMAND="ssh -i $(KEY_FILE)" git fetch github-deploy $(CI_COMMIT_REF_NAME)
@GIT_SSH_COMMAND="ssh -i $(KEY_FILE)" git push github-deploy HEAD:$(CI_COMMIT_REF_NAME)
rm $(KEY_FILE)
endif
push:: git-push
pull:: git-pull
.PHONY:: commit tag push git-push git-pull push-github

View File

@ -0,0 +1,51 @@
# Deployment with Kubernetes
# KUBE_CA_PEM_FILE is the path of a certificate file. It automatically set by GitLab
# if you enable Kubernetes integration in a project.
#
# As of this writing, Kubernetes integration can not be set on a group level, so it has to
# be manually set in every project.
# Alternatively, we use a custom KUBE_CA_BUNDLE environment variable, which can be set at
# the group level. In this case, the variable contains the whole content of the certificate,
# which we dump to a temporary file
#
# Check if the KUBE_CA_PEM_FILE exists. Otherwise, create it from KUBE_CA_BUNDLE
KUBE_CA_TEMP=false
ifndef KUBE_CA_PEM_FILE
KUBE_CA_PEM_FILE:=$$PWD/.ca.crt
CREATED:=$(shell echo -e "$(KUBE_CA_BUNDLE)" > $(KUBE_CA_PEM_FILE))
endif
KUBE_TOKEN?=""
KUBE_NAMESPACE?=$(NAME)
KUBECTL=docker run --rm -v $(KUBE_CA_PEM_FILE):/tmp/ca.pem -i lachlanevenson/k8s-kubectl --server="$(KUBE_URL)" --token="$(KUBE_TOKEN)" --certificate-authority="/tmp/ca.pem" -n $(KUBE_NAMESPACE)
CI_COMMIT_REF_NAME?=master
info:: ## Print variables. Useful for debugging.
@echo "#KUBERNETES"
@echo KUBE_URL=$(KUBE_URL)
@echo KUBE_CA_PEM_FILE=$(KUBE_CA_PEM_FILE)
@echo KUBE_CA_BUNDLE=$$KUBE_CA_BUNDLE
@echo KUBE_TOKEN=$(KUBE_TOKEN)
@echo KUBE_NAMESPACE=$(KUBE_NAMESPACE)
@echo KUBECTL=$(KUBECTL)
@echo "#CI"
@echo CI_PROJECT_NAME=$(CI_PROJECT_NAME)
@echo CI_REGISTRY=$(CI_REGISTRY)
@echo CI_REGISTRY_USER=$(CI_REGISTRY_USER)
@echo CI_COMMIT_REF_NAME=$(CI_COMMIT_REF_NAME)
@echo "CREATED=$(CREATED)"
#
# Deployment and advanced features
#
deploy: ## Deploy to kubernetes using the credentials in KUBE_CA_PEM_FILE (or KUBE_CA_BUNDLE ) and TOKEN
@ls k8s/*.yaml k8s/*.yml k8s/*.tmpl 2>/dev/null || true
@cat k8s/*.yaml k8s/*.yml k8s/*.tmpl 2>/dev/null | envsubst | $(KUBECTL) apply -f -
deploy-check: ## Get the deployed configuration.
@$(KUBECTL) get deploy,pods,svc,ingress
.PHONY:: info deploy deploy-check

View File

@ -0,0 +1,17 @@
makefiles-remote:
@git remote add makefiles ssh://git@lab.cluster.gsi.dit.upm.es:2200/docs/templates/makefiles.git 2>/dev/null || true
makefiles-commit: makefiles-remote
git add -f .makefiles
git commit -em "Updated makefiles from ${NAME}"
makefiles-push:
git subtree push --prefix=.makefiles/ makefiles $(NAME)
makefiles-pull: makefiles-remote
git subtree pull --prefix=.makefiles/ makefiles master --squash
pull:: makefiles-pull
push:: makefiles-push
.PHONY:: makefiles-remote makefiles-commit makefiles-push makefiles-pull pull push

View File

@ -0,0 +1,5 @@
init: ## Init pre-commit hooks (i.e. enforcing format checking before allowing a commit)
pip install --user pre-commit
pre-commit install
.PHONY:: init

View File

@ -0,0 +1,100 @@
PYVERSIONS ?= 3.5
PYMAIN ?= $(firstword $(PYVERSIONS))
TARNAME ?= $(NAME)-$(VERSION).tar.gz
VERSIONFILE ?= $(NAME)/VERSION
DEVPORT ?= 6000
.FORCE:
version: .FORCE
@echo $(VERSION) > $(VERSIONFILE)
@echo $(VERSION)
yapf: ## Format python code
yapf -i -r $(NAME)
yapf -i -r tests
dockerfiles: $(addprefix Dockerfile-,$(PYVERSIONS)) ## Generate dockerfiles for each python version
@unlink Dockerfile >/dev/null
ln -s Dockerfile-$(PYMAIN) Dockerfile
Dockerfile-%: Dockerfile.template ## Generate a specific dockerfile (e.g. Dockerfile-2.7)
sed "s/{{PYVERSION}}/$*/" Dockerfile.template > Dockerfile-$*
quick_build: $(addprefix build-, $(PYMAIN))
build: $(addprefix build-, $(PYVERSIONS)) ## Build all images / python versions
build-%: version Dockerfile-% ## Build a specific version (e.g. build-2.7)
docker build -t '$(IMAGEWTAG)-python$*' --cache-from $(IMAGENAME):python$* -f Dockerfile-$* .;
dev-%: ## Launch a specific development environment using docker (e.g. dev-2.7)
@docker start $(NAME)-dev$* || (\
$(MAKE) build-$*; \
docker run -d -w /usr/src/app/ -p $(DEVPORT):5000 -v $$PWD:/usr/src/app --entrypoint=/bin/bash -ti --name $(NAME)-dev$* '$(IMAGEWTAG)-python$*'; \
)\
docker exec -ti $(NAME)-dev$* bash
dev: dev-$(PYMAIN) ## Launch a development environment using docker, using the default python version
quick_test: test-$(PYMAIN)
test-%: ## Run setup.py from in an isolated container, built from the base image. (e.g. test-2.7)
# This speeds tests up because the image has most (if not all) of the dependencies already.
docker rm $(NAME)-test-$* || true
docker create -ti --name $(NAME)-test-$* --entrypoint="" -w /usr/src/app/ $(IMAGENAME):python$* python setup.py test
docker cp . $(NAME)-test-$*:/usr/src/app
docker start -a $(NAME)-test-$*
test: $(addprefix test-,$(PYVERSIONS)) ## Run the tests with the main python version
run-%: build-%
docker run --rm -p $(DEVPORT):5000 -ti '$(IMAGEWTAG)-python$(PYMAIN)' --default-plugins
run: run-$(PYMAIN)
# Pypy - Upload a package
dist/$(TARNAME): version
python setup.py sdist;
sdist: dist/$(TARNAME) ## Generate the distribution file (wheel)
pip_test-%: sdist ## Test the distribution file using pip install and a specific python version (e.g. pip_test-2.7)
docker run --rm -v $$PWD/dist:/dist/ python:$* pip install /dist/$(TARNAME);
pip_test: $(addprefix pip_test-,$(PYVERSIONS)) ## Test pip installation with the main python version
pip_upload: pip_test ## Upload package to pip
python setup.py sdist upload ;
# Pushing to docker
push-latest: $(addprefix push-latest-,$(PYVERSIONS)) ## Push the "latest" tag to dockerhub
docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGEWTAG)'
docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGENAME)'
docker push '$(IMAGENAME):latest'
docker push '$(IMAGEWTAG)'
push-latest-%: build-% ## Push the latest image for a specific python version
docker tag $(IMAGENAME):$(VERSION)-python$* $(IMAGENAME):python$*
docker push $(IMAGENAME):$(VERSION)-python$*
docker push $(IMAGENAME):python$*
push-%: build-% ## Push the image of the current version (tagged). e.g. push-2.7
docker push $(IMAGENAME):$(VERSION)-python$*
push:: $(addprefix push-,$(PYVERSIONS)) ## Push an image with the current version for every python version
docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGEWTAG)'
docker push $(IMAGENAME):$(VERSION)
clean:: ## Clean older docker images and containers related to this project and dev environments
@docker stop $(addprefix $(NAME)-dev,$(PYVERSIONS)) 2>/dev/null || true
@docker rm $(addprefix $(NAME)-dev,$(PYVERSIONS)) 2>/dev/null || true
@docker ps -a | grep $(IMAGENAME) | awk '{ split($$2, vers, "-"); if(vers[0] != "${VERSION}"){ print $$1;}}' | xargs docker rm -v 2>/dev/null|| true
@docker images | grep $(IMAGENAME) | awk '{ split($$2, vers, "-"); if(vers[0] != "${VERSION}"){ print $$1":"$$2;}}' | xargs docker rmi 2>/dev/null|| true
.PHONY:: yapf dockerfiles Dockerfile-% quick_build build build-% dev-% quick-dev test quick_test push-latest push-latest-% push-% push version .FORCE

View File

@ -0,0 +1,5 @@
FROM gsiupm/senpy:python2.7
MAINTAINER manuel.garcia-amado.sancho@alumnos.upm.es
COPY data /data

View File

@ -0,0 +1,4 @@
FROM gsiupm/senpy:python3.5
MAINTAINER manuel.garcia-amado.sancho@alumnos.upm.es
COPY data /data

View File

@ -0,0 +1,3 @@
FROM gsiupm/senpy:python{{PYVERSION}}
MAINTAINER manuel.garcia-amado.sancho@alumnos.upm.es

View File

@ -0,0 +1,9 @@
NAME:=wnaffect
VERSIONFILE:=VERSION
IMAGENAME:=registry.cluster.gsi.dit.upm.es/senpy/emotion-wnaffect
PYVERSIONS:=2.7 3.5
DEVPORT:=5000
include .makefiles/base.mk
include .makefiles/k8s.mk
include .makefiles/python.mk

View File

@ -0,0 +1,62 @@
# WordNet-Affect plugin
This plugin uses WordNet-Affect (http://wndomains.fbk.eu/wnaffect.html) to calculate the percentage of each emotion. The plugin classifies among five diferent emotions: anger, fear, disgust, joy and sadness. It is has been used a emotion mapping enlarge the emotions:
- anger : general-dislike
- fear : negative-fear
- disgust : shame
- joy : gratitude, affective, enthusiasm, love, joy, liking
- sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness
## Installation
* Download
```
git clone https://lab.cluster.gsi.dit.upm.es/senpy/emotion-wnaffect.git
```
* Get data
```
cd emotion-wnaffect
git submodule update --init --recursive
```
* Run
```
docker run -p 5000:5000 -v $PWD:/plugins gsiupm/senpy -f /plugins
```
## Data format
`data/a-hierarchy.xml` is a xml file
`data/a-synsets.xml` is a xml file
## Usage
The parameters accepted are:
- Language: English (en).
- Input: Text to analyse.
Example request:
```
http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-wnaffect&language=en&input=I%20love%20Madrid
```
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM for semantic web.
This plugin uses WNAffect labels for emotion analysis.
The emotion-wnaffect.senpy file can be copied and modified to use different versions of wnaffect with the same python code.
## Known issues
- This plugin run on **Python2.7** and **Python3.5**
- Wnaffect and corpora files are not included in the repository, but can be easily added either to the docker image (using a volume) or in a new docker image.
- You can download Wordnet 1.6 here: <http://wordnetcode.princeton.edu/1.6/wn16.unix.tar.gz> and extract the dict folder.
- The hierarchy and synsets files can be found here: <https://github.com/larsmans/wordnet-domains-sentiwords/tree/master/wn-domains/wn-affect-1.1>
![alt GSI Logo][logoGSI]
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"

1
emotion-wnaffect/data Submodule

@ -0,0 +1 @@
Subproject commit 0dc601a8713984812666ae6ef5f2d25c065dba17

View File

@ -0,0 +1,260 @@
# -*- coding: utf-8 -*-
from __future__ import division
import re
import nltk
import logging
import os
import string
import xml.etree.ElementTree as ET
from nltk.corpus import stopwords
from nltk.corpus import WordNetCorpusReader
from nltk.stem import wordnet
from emotion import Emotion as Emo
from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin
from senpy.models import Results, EmotionSet, Entry, Emotion
class EmotionTextPlugin(EmotionPlugin, ShelfMixin):
'''Emotion classifier using WordNet-Affect to calculate the percentage
of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness
or neutral. The only available language is English (en)
'''
def _load_synsets(self, synsets_path):
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
tree = ET.parse(synsets_path)
root = tree.getroot()
pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"}
synsets = {}
for pos in ["noun", "adj", "verb", "adv"]:
tag = pos_map[pos]
synsets[tag] = {}
for elem in root.findall(
".//{0}-syn-list//{0}-syn".format(pos, pos)):
offset = int(elem.get("id")[2:])
if not offset: continue
if elem.get("categ"):
synsets[tag][offset] = Emo.emotions[elem.get(
"categ")] if elem.get(
"categ") in Emo.emotions else None
elif elem.get("noun-id"):
synsets[tag][offset] = synsets[pos_map["noun"]][int(
elem.get("noun-id")[2:])]
return synsets
def _load_emotions(self, hierarchy_path):
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
tree = ET.parse(hierarchy_path)
root = tree.getroot()
for elem in root.findall("categ"):
name = elem.get("name")
if name == "root":
Emo.emotions["root"] = Emo("root")
else:
Emo.emotions[name] = Emo(name, elem.get("isa"))
def activate(self, *args, **kwargs):
nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet'])
self._stopwords = stopwords.words('english')
self._wnlemma = wordnet.WordNetLemmatizer()
self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'}
local_path = os.environ.get("SENPY_DATA")
self._categories = {
'anger': [
'general-dislike',
],
'fear': [
'negative-fear',
],
'disgust': [
'shame',
],
'joy':
['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'],
'sadness': [
'ingrattitude', 'daze', 'humility', 'compassion', 'despair',
'anxiety', 'sadness'
]
}
self._wnaffect_mappings = {
'anger': 'anger',
'fear': 'negative-fear',
'disgust': 'disgust',
'joy': 'joy',
'sadness': 'sadness'
}
self._load_emotions(local_path + self.hierarchy_path)
if 'total_synsets' not in self.sh:
total_synsets = self._load_synsets(local_path + self.synsets_path)
self.sh['total_synsets'] = total_synsets
self._total_synsets = self.sh['total_synsets']
self._wn16_path = self.wn16_path
self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path))
def deactivate(self, *args, **kwargs):
self.save()
def _my_preprocessor(self, text):
regHttp = re.compile(
'(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regHttps = re.compile(
'(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
text = re.sub(regHttp, '', text)
text = re.sub(regAt, '', text)
text = re.sub('RT : ', '', text)
text = re.sub(regHttps, '', text)
text = re.sub('[0-9]', '', text)
text = self._delete_punctuation(text)
return text
def _delete_punctuation(self, text):
exclude = set(string.punctuation)
s = ''.join(ch for ch in text if ch not in exclude)
return s
def _extract_ngrams(self, text):
unigrams_lemmas = []
pos_tagged = []
unigrams_words = []
tokens = text.split()
for token in nltk.pos_tag(tokens):
unigrams_words.append(token[0])
pos_tagged.append(token[1])
if token[1][0] in self._syntactics.keys():
unigrams_lemmas.append(
self._wnlemma.lemmatize(token[0], self._syntactics[token[1]
[0]]))
else:
unigrams_lemmas.append(token[0])
return unigrams_words, unigrams_lemmas, pos_tagged
def _find_ngrams(self, input_list, n):
return zip(*[input_list[i:] for i in range(n)])
def _clean_pos(self, pos_tagged):
pos_tags = {
'NN': 'NN',
'NNP': 'NN',
'NNP-LOC': 'NN',
'NNS': 'NN',
'JJ': 'JJ',
'JJR': 'JJ',
'JJS': 'JJ',
'RB': 'RB',
'RBR': 'RB',
'RBS': 'RB',
'VB': 'VB',
'VBD': 'VB',
'VGB': 'VB',
'VBN': 'VB',
'VBP': 'VB',
'VBZ': 'VB'
}
for i in range(len(pos_tagged)):
if pos_tagged[i] in pos_tags:
pos_tagged[i] = pos_tags[pos_tagged[i]]
return pos_tagged
def _extract_features(self, text):
feature_set = {k: 0 for k in self._categories}
ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text)
matches = 0
pos_tagged = self._clean_pos(pos_tagged)
tag_wn = {
'NN': self._wn16.NOUN,
'JJ': self._wn16.ADJ,
'VB': self._wn16.VERB,
'RB': self._wn16.ADV
}
for i in range(len(pos_tagged)):
if pos_tagged[i] in tag_wn:
synsets = self._wn16.synsets(ngrams_words[i],
tag_wn[pos_tagged[i]])
if synsets:
offset = synsets[0].offset()
if offset in self._total_synsets[pos_tagged[i]]:
if self._total_synsets[pos_tagged[i]][offset] is None:
continue
else:
emotion = self._total_synsets[pos_tagged[i]][
offset].get_level(5).name
matches += 1
for i in self._categories:
if emotion in self._categories[i]:
feature_set[i] += 1
if matches == 0:
matches = 1
for i in feature_set:
feature_set[i] = (feature_set[i] / matches) * 100
return feature_set
def analyse_entry(self, entry, params):
text_input = entry['nif:isString']
text = self._my_preprocessor(text_input)
feature_text = self._extract_features(text)
emotionSet = EmotionSet(id="Emotions0")
emotions = emotionSet.onyx__hasEmotion
for i in feature_text:
emotions.append(
Emotion(
onyx__hasEmotionCategory=self._wnaffect_mappings[i],
onyx__hasEmotionIntensity=feature_text[i]))
entry.emotions = [emotionSet]
yield entry
def test(self, *args, **kwargs):
results = list()
params = {'algo': 'emotion-wnaffect',
'intype': 'direct',
'expanded-jsonld': 0,
'informat': 'text',
'prefix': '',
'plugin_type': 'analysisPlugin',
'urischeme': 'RFC5147String',
'outformat': 'json-ld',
'i': 'Hello World',
'input': 'Hello World',
'conversion': 'full',
'language': 'en',
'algorithm': 'emotion-wnaffect'}
self.activate()
res = next(self.analyse_entry(Entry(nif__isString="This text make me sad"), params))
texts = {'I hate you': 'anger',
'i am sad': 'sadness',
'i am happy with my marks': 'joy',
'This movie is scary': 'negative-fear'}
for text in texts:
response = next(self.analyse_entry(Entry(nif__isString=text), params))
expected = texts[text]
emotionSet = response.emotions[0]
max_emotion = max(emotionSet['onyx:hasEmotion'], key=lambda x: x['onyx:hasEmotionIntensity'])
assert max_emotion['onyx:hasEmotionCategory'] == expected

View File

@ -0,0 +1,25 @@
---
name: emotion-wnaffect
module: emotion-wnaffect
description: 'Emotion classifier using WordNet-Affect to calculate the percentage
of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness
or neutral. The only available language is English (en)'
author: "@icorcuera @balkian"
version: '0.2'
extra_params:
language:
"@id": lang_wnaffect
aliases:
- language
- l
required: false
options:
- en
synsets_path: "/a-synsets.xml"
hierarchy_path: "/a-hierarchy.xml"
wn16_path: "/wordnet1.6/dict"
onyx:usesEmotionModel: emoml:big6
requirements:
- nltk>=3.0.5
- lxml>=3.4.2
async: false

View File

@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
"""
Clement Michard (c) 2015
"""
class Emotion:
"""Defines an emotion."""
emotions = {} # name to emotion (str -> Emotion)
def __init__(self, name, parent_name=None):
"""Initializes an Emotion object.
name -- name of the emotion (str)
parent_name -- name of the parent emotion (str)
"""
self.name = name
self.parent = None
self.level = 0
self.children = []
if parent_name:
self.parent = Emotion.emotions[parent_name] if parent_name else None
self.parent.children.append(self)
self.level = self.parent.level + 1
def get_level(self, level):
"""Returns the parent of self at the given level.
level -- level in the hierarchy (int)
"""
em = self
while em.level > level and em.level >= 0:
em = em.parent
return em
def __str__(self):
"""Returns the emotion string formatted."""
return self.name
def nb_children(self):
"""Returns the number of children of the emotion."""
return sum(child.nb_children() for child in self.children) + 1
@staticmethod
def printTree(emotion=None, indent="", last='updown'):
"""Prints the hierarchy of emotions.
emotion -- root emotion (Emotion)
"""
if not emotion:
emotion = Emotion.emotions["root"]
size_branch = {child: child.nb_children() for child in emotion.children}
leaves = sorted(emotion.children, key=lambda emotion: emotion.nb_children())
up, down = [], []
if leaves:
while sum(size_branch[e] for e in down) < sum(size_branch[e] for e in leaves):
down.append(leaves.pop())
up = leaves
for leaf in up:
next_last = 'up' if up.index(leaf) is 0 else ''
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'up' in last else '', " " * len(emotion.name))
Emotion.printTree(leaf, indent=next_indent, last=next_last)
if last == 'up':
start_shape = ''
elif last == 'down':
start_shape = ''
elif last == 'updown':
start_shape = ' '
else:
start_shape = ''
if up:
end_shape = ''
elif down:
end_shape = ''
else:
end_shape = ''
print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape))
for leaf in down:
next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '', " " * len(emotion.name))
Emotion.printTree(leaf, indent=next_indent, last=next_last)

View File

@ -0,0 +1,92 @@
# coding: utf-8
# In[1]:
# -*- coding: utf-8 -*-
"""
Clement Michard (c) 2015
"""
import os
import sys
import nltk
from emotion import Emotion
from nltk.corpus import WordNetCorpusReader
import xml.etree.ElementTree as ET
class WNAffect:
"""WordNet-Affect ressource."""
def __init__(self, wordnet16_dir, wn_domains_dir):
"""Initializes the WordNet-Affect object."""
cwd = os.getcwd()
nltk.data.path.append(cwd)
wn16_path = "{0}/dict".format(wordnet16_dir)
self.wn16 = WordNetCorpusReader(os.path.abspath("{0}/{1}".format(cwd, wn16_path)), nltk.data.find(wn16_path))
self.flat_pos = {'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', 'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
self.wn_pos = {'NN':self.wn16.NOUN, 'JJ':self.wn16.ADJ, 'VB':self.wn16.VERB, 'RB':self.wn16.ADV}
self._load_emotions(wn_domains_dir)
self.synsets = self._load_synsets(wn_domains_dir)
def _load_synsets(self, wn_domains_dir):
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
tree = ET.parse("{0}/a-synsets.xml".format(wn_domains_dir))
root = tree.getroot()
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
synsets = {}
for pos in ["noun", "adj", "verb", "adv"]:
tag = pos_map[pos]
synsets[tag] = {}
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
offset = int(elem.get("id")[2:])
if not offset: continue
if elem.get("categ"):
synsets[tag][offset] = Emotion.emotions[elem.get("categ")] if elem.get("categ") in Emotion.emotions else None
elif elem.get("noun-id"):
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
return synsets
def _load_emotions(self, wn_domains_dir):
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
tree = ET.parse("{0}/a-hierarchy.xml".format(wn_domains_dir))
root = tree.getroot()
for elem in root.findall("categ"):
name = elem.get("name")
if name == "root":
Emotion.emotions["root"] = Emotion("root")
else:
Emotion.emotions[name] = Emotion(name, elem.get("isa"))
def get_emotion(self, word, pos):
"""Returns the emotion of the word.
word -- the word (str)
pos -- part-of-speech (str)
"""
if pos in self.flat_pos:
pos = self.flat_pos[pos]
synsets = self.wn16.synsets(word, self.wn_pos[pos])
if synsets:
offset = synsets[0].offset()
if offset in self.synsets[pos]:
return self.synsets[pos][offset]
return None
if __name__ == "__main__":
wordnet16, wndomains32, word, pos = sys.argv[1:5]
wna = WNAffect(wordnet16, wndomains32)
print wna.get_emotion(word, pos)