commit 23c6cdd58dd3071fe5f707d904afacde6bd1a870 Author: J. Fernando Sánchez Date: Tue Jun 12 10:01:44 2018 +0200 Squashed 'emotion-wnaffect/' content from commit 74c40d7 git-subtree-dir: emotion-wnaffect git-subtree-split: 74c40d7e97d54d3c3e30739a85cf9322c92d5a87 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8d35cb3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +*.pyc diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..4806968 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,67 @@ +# Uncomment if you want to use docker-in-docker +# image: gsiupm/dockermake:latest +# services: +# - docker:dind +# When using dind, it's wise to use the overlayfs driver for +# improved performance. + +stages: + - test + - push + - deploy + - clean + +before_script: + - make -e login + +.test: &test_definition + stage: test + script: + - make -e test-$PYTHON_VERSION + +test-3.5: + <<: *test_definition + variables: + PYTHON_VERSION: "3.5" + +.image: &image_definition + stage: push + script: + - make -e push-$PYTHON_VERSION + only: + - tags + - triggers + +push-3.5: + <<: *image_definition + variables: + PYTHON_VERSION: "3.5" + +push-latest: + <<: *image_definition + variables: + PYTHON_VERSION: latest + only: + - tags + - triggers + +deploy: + stage: deploy + environment: production + script: + - make -e deploy + only: + - tags + - triggers + +clean : + stage: clean + script: + - make -e clean + when: manual + +cleanup_py: + stage: clean + when: always # this is important; run even if preceding stages failed. + script: + - docker logout diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..309bd76 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "data"] + path = data + url = ../data/emotion-wnaffect diff --git a/.makefiles/README.md b/.makefiles/README.md new file mode 100644 index 0000000..2ab487e --- /dev/null +++ b/.makefiles/README.md @@ -0,0 +1,27 @@ +These makefiles are recipes for several common tasks in different types of projects. +To add them to your project, simply do: + +``` +git remote add makefiles ssh://git@lab.cluster.gsi.dit.upm.es:2200/docs/templates/makefiles.git +git subtree add --prefix=.makefiles/ makefiles master +touch Makefile +echo "include .makefiles/base.mk" >> Makefile +``` + +Now you can take advantage of the recipes. +For instance, to add useful targets for a python project, just add this to your Makefile: + +``` +include .makefiles/python.mk +``` + +You may need to set special variables like the name of your project or the python versions you're targetting. +Take a look at each specific `.mk` file for more information, and the `Makefile` in the [senpy](https://lab.cluster.gsi.dit.upm.es/senpy/senpy) project for a real use case. + +If you update the makefiles from your repository, make sure to push the changes for review in upstream (this repository): + +``` +make makefiles-push +``` + +It will automatically commit all unstaged changes in the .makefiles folder. diff --git a/.makefiles/base.mk b/.makefiles/base.mk new file mode 100644 index 0000000..4371ebe --- /dev/null +++ b/.makefiles/base.mk @@ -0,0 +1,36 @@ +export +NAME ?= $(shell basename $(CURDIR)) +VERSION ?= $(shell git describe --tags --dirty 2>/dev/null) + +ifeq ($(VERSION),) + VERSION:=unknown +endif + +# Get the location of this makefile. +MK_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + +-include .env +-include ../.env + +help: ## Show this help. + @fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/\(.*:\)[^#]*##\s*\(.*\)/\1\t\2/' | column -t -s " " + +config: ## Load config from the environment. You should run it once in every session before other tasks. Run: eval $(make config) + @awk '{ print "export " $$0}' ../.env + @awk '{ print "export " $$0}' .env + @echo "# Please, run: " + @echo "# eval \$$(make config)" +# If you need to run a command on the key/value pairs, use this: +# @awk '{ split($$0, a, "="); "echo " a[2] " | base64 -w 0" |& getline b64; print "export " a[1] "=" a[2]; print "export " a[1] "_BASE64=" b64}' .env + +ci: ## Run a task using gitlab-runner. Only use to debug problems in the CI pipeline + gitlab-runner exec shell --builds-dir '.builds' --env CI_PROJECT_NAME=$(NAME) ${action} + +include $(MK_DIR)/makefiles.mk +include $(MK_DIR)/docker.mk +include $(MK_DIR)/git.mk + +info:: ## List all variables + env + +.PHONY:: config help ci diff --git a/.makefiles/docker.mk b/.makefiles/docker.mk new file mode 100644 index 0000000..f74d93f --- /dev/null +++ b/.makefiles/docker.mk @@ -0,0 +1,29 @@ +IMAGENAME?=$(NAME) +IMAGEWTAG?=$(IMAGENAME):$(VERSION) + +docker-login: ## Log in to the registry. It will only be used in the server, or when running a CI task locally (if CI_BUILD_TOKEN is set). +ifeq ($(CI_BUILD_TOKEN),) + @echo "Not logging in to the docker registry" "$(CI_REGISTRY)" +else + @docker login -u gitlab-ci-token -p $(CI_BUILD_TOKEN) $(CI_REGISTRY) +endif +ifeq ($(HUB_USER),) + @echo "Not logging in to global the docker registry" +else + @docker login -u $(HUB_USER) -p $(HUB_PASSWORD) +endif + +docker-clean: ## Remove docker credentials +ifeq ($(HUB_USER),) +else + @docker logout +endif + +login:: docker-login + +clean:: docker-clean + +docker-info: + @echo IMAGEWTAG=${IMAGEWTAG} + +.PHONY:: docker-login docker-clean login clean diff --git a/.makefiles/git.mk b/.makefiles/git.mk new file mode 100644 index 0000000..836eb14 --- /dev/null +++ b/.makefiles/git.mk @@ -0,0 +1,28 @@ +commit: + git commit -a + +tag: + git tag ${VERSION} + +git-push:: + git push --tags -u origin HEAD + +git-pull: + git pull --all + +push-github: ## Push the code to github. You need to set up GITHUB_DEPLOY_KEY +ifeq ($(GITHUB_DEPLOY_KEY),) +else + $(eval KEY_FILE := "$(shell mktemp)") + @echo "$(GITHUB_DEPLOY_KEY)" > $(KEY_FILE) + @git remote rm github-deploy || true + git remote add github-deploy $(GITHUB_REPO) + -@GIT_SSH_COMMAND="ssh -i $(KEY_FILE)" git fetch github-deploy $(CI_COMMIT_REF_NAME) + @GIT_SSH_COMMAND="ssh -i $(KEY_FILE)" git push github-deploy HEAD:$(CI_COMMIT_REF_NAME) + rm $(KEY_FILE) +endif + +push:: git-push +pull:: git-pull + +.PHONY:: commit tag push git-push git-pull push-github diff --git a/.makefiles/k8s.mk b/.makefiles/k8s.mk new file mode 100644 index 0000000..a493b4a --- /dev/null +++ b/.makefiles/k8s.mk @@ -0,0 +1,51 @@ +# Deployment with Kubernetes + +# KUBE_CA_PEM_FILE is the path of a certificate file. It automatically set by GitLab +# if you enable Kubernetes integration in a project. +# +# As of this writing, Kubernetes integration can not be set on a group level, so it has to +# be manually set in every project. +# Alternatively, we use a custom KUBE_CA_BUNDLE environment variable, which can be set at +# the group level. In this case, the variable contains the whole content of the certificate, +# which we dump to a temporary file +# +# Check if the KUBE_CA_PEM_FILE exists. Otherwise, create it from KUBE_CA_BUNDLE +KUBE_CA_TEMP=false +ifndef KUBE_CA_PEM_FILE +KUBE_CA_PEM_FILE:=$$PWD/.ca.crt +CREATED:=$(shell echo -e "$(KUBE_CA_BUNDLE)" > $(KUBE_CA_PEM_FILE)) +endif +KUBE_TOKEN?="" +KUBE_NAMESPACE?=$(NAME) +KUBECTL=docker run --rm -v $(KUBE_CA_PEM_FILE):/tmp/ca.pem -i lachlanevenson/k8s-kubectl --server="$(KUBE_URL)" --token="$(KUBE_TOKEN)" --certificate-authority="/tmp/ca.pem" -n $(KUBE_NAMESPACE) +CI_COMMIT_REF_NAME?=master + +info:: ## Print variables. Useful for debugging. + @echo "#KUBERNETES" + @echo KUBE_URL=$(KUBE_URL) + @echo KUBE_CA_PEM_FILE=$(KUBE_CA_PEM_FILE) + @echo KUBE_CA_BUNDLE=$$KUBE_CA_BUNDLE + @echo KUBE_TOKEN=$(KUBE_TOKEN) + @echo KUBE_NAMESPACE=$(KUBE_NAMESPACE) + @echo KUBECTL=$(KUBECTL) + + @echo "#CI" + @echo CI_PROJECT_NAME=$(CI_PROJECT_NAME) + @echo CI_REGISTRY=$(CI_REGISTRY) + @echo CI_REGISTRY_USER=$(CI_REGISTRY_USER) + @echo CI_COMMIT_REF_NAME=$(CI_COMMIT_REF_NAME) + @echo "CREATED=$(CREATED)" + +# +# Deployment and advanced features +# + + +deploy: ## Deploy to kubernetes using the credentials in KUBE_CA_PEM_FILE (or KUBE_CA_BUNDLE ) and TOKEN + @ls k8s/*.yaml k8s/*.yml k8s/*.tmpl 2>/dev/null || true + @cat k8s/*.yaml k8s/*.yml k8s/*.tmpl 2>/dev/null | envsubst | $(KUBECTL) apply -f - + +deploy-check: ## Get the deployed configuration. + @$(KUBECTL) get deploy,pods,svc,ingress + +.PHONY:: info deploy deploy-check diff --git a/.makefiles/makefiles.mk b/.makefiles/makefiles.mk new file mode 100644 index 0000000..03dcc17 --- /dev/null +++ b/.makefiles/makefiles.mk @@ -0,0 +1,17 @@ +makefiles-remote: + @git remote add makefiles ssh://git@lab.cluster.gsi.dit.upm.es:2200/docs/templates/makefiles.git 2>/dev/null || true + +makefiles-commit: makefiles-remote + git add -f .makefiles + git commit -em "Updated makefiles from ${NAME}" + +makefiles-push: + git subtree push --prefix=.makefiles/ makefiles $(NAME) + +makefiles-pull: makefiles-remote + git subtree pull --prefix=.makefiles/ makefiles master --squash + +pull:: makefiles-pull +push:: makefiles-push + +.PHONY:: makefiles-remote makefiles-commit makefiles-push makefiles-pull pull push diff --git a/.makefiles/precommit.mk b/.makefiles/precommit.mk new file mode 100644 index 0000000..82fe75f --- /dev/null +++ b/.makefiles/precommit.mk @@ -0,0 +1,5 @@ +init: ## Init pre-commit hooks (i.e. enforcing format checking before allowing a commit) + pip install --user pre-commit + pre-commit install + +.PHONY:: init diff --git a/.makefiles/python.mk b/.makefiles/python.mk new file mode 100644 index 0000000..2ad9559 --- /dev/null +++ b/.makefiles/python.mk @@ -0,0 +1,100 @@ +PYVERSIONS ?= 3.5 +PYMAIN ?= $(firstword $(PYVERSIONS)) +TARNAME ?= $(NAME)-$(VERSION).tar.gz +VERSIONFILE ?= $(NAME)/VERSION + +DEVPORT ?= 6000 + + +.FORCE: + +version: .FORCE + @echo $(VERSION) > $(VERSIONFILE) + @echo $(VERSION) + +yapf: ## Format python code + yapf -i -r $(NAME) + yapf -i -r tests + +dockerfiles: $(addprefix Dockerfile-,$(PYVERSIONS)) ## Generate dockerfiles for each python version + @unlink Dockerfile >/dev/null + ln -s Dockerfile-$(PYMAIN) Dockerfile + +Dockerfile-%: Dockerfile.template ## Generate a specific dockerfile (e.g. Dockerfile-2.7) + sed "s/{{PYVERSION}}/$*/" Dockerfile.template > Dockerfile-$* + +quick_build: $(addprefix build-, $(PYMAIN)) + +build: $(addprefix build-, $(PYVERSIONS)) ## Build all images / python versions + +build-%: version Dockerfile-% ## Build a specific version (e.g. build-2.7) + docker build -t '$(IMAGEWTAG)-python$*' --cache-from $(IMAGENAME):python$* -f Dockerfile-$* .; + +dev-%: ## Launch a specific development environment using docker (e.g. dev-2.7) + @docker start $(NAME)-dev$* || (\ + $(MAKE) build-$*; \ + docker run -d -w /usr/src/app/ -p $(DEVPORT):5000 -v $$PWD:/usr/src/app --entrypoint=/bin/bash -ti --name $(NAME)-dev$* '$(IMAGEWTAG)-python$*'; \ + )\ + + docker exec -ti $(NAME)-dev$* bash + +dev: dev-$(PYMAIN) ## Launch a development environment using docker, using the default python version + +quick_test: test-$(PYMAIN) + +test-%: ## Run setup.py from in an isolated container, built from the base image. (e.g. test-2.7) +# This speeds tests up because the image has most (if not all) of the dependencies already. + docker rm $(NAME)-test-$* || true + docker create -ti --name $(NAME)-test-$* --entrypoint="" -w /usr/src/app/ $(IMAGENAME):python$* python setup.py test + docker cp . $(NAME)-test-$*:/usr/src/app + docker start -a $(NAME)-test-$* + +test: $(addprefix test-,$(PYVERSIONS)) ## Run the tests with the main python version + +run-%: build-% + docker run --rm -p $(DEVPORT):5000 -ti '$(IMAGEWTAG)-python$(PYMAIN)' --default-plugins + +run: run-$(PYMAIN) + +# Pypy - Upload a package + +dist/$(TARNAME): version + python setup.py sdist; + +sdist: dist/$(TARNAME) ## Generate the distribution file (wheel) + +pip_test-%: sdist ## Test the distribution file using pip install and a specific python version (e.g. pip_test-2.7) + docker run --rm -v $$PWD/dist:/dist/ python:$* pip install /dist/$(TARNAME); + +pip_test: $(addprefix pip_test-,$(PYVERSIONS)) ## Test pip installation with the main python version + +pip_upload: pip_test ## Upload package to pip + python setup.py sdist upload ; + +# Pushing to docker + +push-latest: $(addprefix push-latest-,$(PYVERSIONS)) ## Push the "latest" tag to dockerhub + docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGEWTAG)' + docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGENAME)' + docker push '$(IMAGENAME):latest' + docker push '$(IMAGEWTAG)' + +push-latest-%: build-% ## Push the latest image for a specific python version + docker tag $(IMAGENAME):$(VERSION)-python$* $(IMAGENAME):python$* + docker push $(IMAGENAME):$(VERSION)-python$* + docker push $(IMAGENAME):python$* + +push-%: build-% ## Push the image of the current version (tagged). e.g. push-2.7 + docker push $(IMAGENAME):$(VERSION)-python$* + +push:: $(addprefix push-,$(PYVERSIONS)) ## Push an image with the current version for every python version + docker tag '$(IMAGEWTAG)-python$(PYMAIN)' '$(IMAGEWTAG)' + docker push $(IMAGENAME):$(VERSION) + +clean:: ## Clean older docker images and containers related to this project and dev environments + @docker stop $(addprefix $(NAME)-dev,$(PYVERSIONS)) 2>/dev/null || true + @docker rm $(addprefix $(NAME)-dev,$(PYVERSIONS)) 2>/dev/null || true + @docker ps -a | grep $(IMAGENAME) | awk '{ split($$2, vers, "-"); if(vers[0] != "${VERSION}"){ print $$1;}}' | xargs docker rm -v 2>/dev/null|| true + @docker images | grep $(IMAGENAME) | awk '{ split($$2, vers, "-"); if(vers[0] != "${VERSION}"){ print $$1":"$$2;}}' | xargs docker rmi 2>/dev/null|| true + +.PHONY:: yapf dockerfiles Dockerfile-% quick_build build build-% dev-% quick-dev test quick_test push-latest push-latest-% push-% push version .FORCE diff --git a/Dockerfile-2.7 b/Dockerfile-2.7 new file mode 100644 index 0000000..4dce575 --- /dev/null +++ b/Dockerfile-2.7 @@ -0,0 +1,5 @@ +FROM gsiupm/senpy:python2.7 + +MAINTAINER manuel.garcia-amado.sancho@alumnos.upm.es + +COPY data /data diff --git a/Dockerfile-3.5 b/Dockerfile-3.5 new file mode 100644 index 0000000..cdc7061 --- /dev/null +++ b/Dockerfile-3.5 @@ -0,0 +1,4 @@ +FROM gsiupm/senpy:python3.5 + +MAINTAINER manuel.garcia-amado.sancho@alumnos.upm.es +COPY data /data diff --git a/Dockerfile.template b/Dockerfile.template new file mode 100644 index 0000000..27c30a5 --- /dev/null +++ b/Dockerfile.template @@ -0,0 +1,3 @@ +FROM gsiupm/senpy:python{{PYVERSION}} + +MAINTAINER manuel.garcia-amado.sancho@alumnos.upm.es diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6308ecc --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +NAME:=wnaffect +VERSIONFILE:=VERSION +IMAGENAME:=registry.cluster.gsi.dit.upm.es/senpy/emotion-wnaffect +PYVERSIONS:=2.7 3.5 +DEVPORT:=5000 + +include .makefiles/base.mk +include .makefiles/k8s.mk +include .makefiles/python.mk diff --git a/README.md b/README.md new file mode 100644 index 0000000..f4f5cfe --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +# WordNet-Affect plugin + +This plugin uses WordNet-Affect (http://wndomains.fbk.eu/wnaffect.html) to calculate the percentage of each emotion. The plugin classifies among five diferent emotions: anger, fear, disgust, joy and sadness. It is has been used a emotion mapping enlarge the emotions: + +- anger : general-dislike +- fear : negative-fear +- disgust : shame +- joy : gratitude, affective, enthusiasm, love, joy, liking +- sadness : ingrattitude, daze, humlity, compassion, despair, anxiety, sadness + +## Installation + +* Download +``` +git clone https://lab.cluster.gsi.dit.upm.es/senpy/emotion-wnaffect.git +``` +* Get data +``` +cd emotion-wnaffect +git submodule update --init --recursive +``` +* Run +``` +docker run -p 5000:5000 -v $PWD:/plugins gsiupm/senpy -f /plugins +``` + +## Data format + +`data/a-hierarchy.xml` is a xml file +`data/a-synsets.xml` is a xml file + +## Usage + +The parameters accepted are: + +- Language: English (en). +- Input: Text to analyse. + +Example request: +``` +http://senpy.cluster.gsi.dit.upm.es/api/?algo=emotion-wnaffect&language=en&input=I%20love%20Madrid +``` + +Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section. + + +The response of this plugin uses [Onyx ontology](https://www.gsi.dit.upm.es/ontologies/onyx/) developed at GSI UPM for semantic web. + +This plugin uses WNAffect labels for emotion analysis. + +The emotion-wnaffect.senpy file can be copied and modified to use different versions of wnaffect with the same python code. + + +## Known issues + +- This plugin run on **Python2.7** and **Python3.5** +- Wnaffect and corpora files are not included in the repository, but can be easily added either to the docker image (using a volume) or in a new docker image. +- You can download Wordnet 1.6 here: and extract the dict folder. +- The hierarchy and synsets files can be found here: + +![alt GSI Logo][logoGSI] +[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo" diff --git a/data b/data new file mode 160000 index 0000000..0dc601a --- /dev/null +++ b/data @@ -0,0 +1 @@ +Subproject commit 0dc601a8713984812666ae6ef5f2d25c065dba17 diff --git a/emotion-wnaffect.py b/emotion-wnaffect.py new file mode 100644 index 0000000..638640e --- /dev/null +++ b/emotion-wnaffect.py @@ -0,0 +1,260 @@ +# -*- coding: utf-8 -*- +from __future__ import division +import re +import nltk +import logging +import os +import string +import xml.etree.ElementTree as ET +from nltk.corpus import stopwords +from nltk.corpus import WordNetCorpusReader +from nltk.stem import wordnet +from emotion import Emotion as Emo +from senpy.plugins import EmotionPlugin, AnalysisPlugin, ShelfMixin +from senpy.models import Results, EmotionSet, Entry, Emotion + + +class EmotionTextPlugin(EmotionPlugin, ShelfMixin): + '''Emotion classifier using WordNet-Affect to calculate the percentage + of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness + or neutral. The only available language is English (en) + ''' + def _load_synsets(self, synsets_path): + """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str).""" + tree = ET.parse(synsets_path) + root = tree.getroot() + pos_map = {"noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB"} + + synsets = {} + for pos in ["noun", "adj", "verb", "adv"]: + tag = pos_map[pos] + synsets[tag] = {} + for elem in root.findall( + ".//{0}-syn-list//{0}-syn".format(pos, pos)): + offset = int(elem.get("id")[2:]) + if not offset: continue + if elem.get("categ"): + synsets[tag][offset] = Emo.emotions[elem.get( + "categ")] if elem.get( + "categ") in Emo.emotions else None + elif elem.get("noun-id"): + synsets[tag][offset] = synsets[pos_map["noun"]][int( + elem.get("noun-id")[2:])] + return synsets + + def _load_emotions(self, hierarchy_path): + """Loads the hierarchy of emotions from the WordNet-Affect xml.""" + + tree = ET.parse(hierarchy_path) + root = tree.getroot() + for elem in root.findall("categ"): + name = elem.get("name") + if name == "root": + Emo.emotions["root"] = Emo("root") + else: + Emo.emotions[name] = Emo(name, elem.get("isa")) + + def activate(self, *args, **kwargs): + + nltk.download(['stopwords', 'averaged_perceptron_tagger', 'wordnet']) + self._stopwords = stopwords.words('english') + self._wnlemma = wordnet.WordNetLemmatizer() + self._syntactics = {'N': 'n', 'V': 'v', 'J': 'a', 'S': 's', 'R': 'r'} + local_path = os.environ.get("SENPY_DATA") + self._categories = { + 'anger': [ + 'general-dislike', + ], + 'fear': [ + 'negative-fear', + ], + 'disgust': [ + 'shame', + ], + 'joy': + ['gratitude', 'affective', 'enthusiasm', 'love', 'joy', 'liking'], + 'sadness': [ + 'ingrattitude', 'daze', 'humility', 'compassion', 'despair', + 'anxiety', 'sadness' + ] + } + + self._wnaffect_mappings = { + 'anger': 'anger', + 'fear': 'negative-fear', + 'disgust': 'disgust', + 'joy': 'joy', + 'sadness': 'sadness' + } + + self._load_emotions(local_path + self.hierarchy_path) + + if 'total_synsets' not in self.sh: + total_synsets = self._load_synsets(local_path + self.synsets_path) + self.sh['total_synsets'] = total_synsets + + self._total_synsets = self.sh['total_synsets'] + + self._wn16_path = self.wn16_path + self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(local_path + self._wn16_path)), nltk.data.find(local_path + self._wn16_path)) + + + def deactivate(self, *args, **kwargs): + self.save() + + def _my_preprocessor(self, text): + + regHttp = re.compile( + '(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') + regHttps = re.compile( + '(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?') + regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*') + text = re.sub(regHttp, '', text) + text = re.sub(regAt, '', text) + text = re.sub('RT : ', '', text) + text = re.sub(regHttps, '', text) + text = re.sub('[0-9]', '', text) + text = self._delete_punctuation(text) + return text + + def _delete_punctuation(self, text): + + exclude = set(string.punctuation) + s = ''.join(ch for ch in text if ch not in exclude) + return s + + def _extract_ngrams(self, text): + + unigrams_lemmas = [] + pos_tagged = [] + unigrams_words = [] + tokens = text.split() + for token in nltk.pos_tag(tokens): + unigrams_words.append(token[0]) + pos_tagged.append(token[1]) + if token[1][0] in self._syntactics.keys(): + unigrams_lemmas.append( + self._wnlemma.lemmatize(token[0], self._syntactics[token[1] + [0]])) + else: + unigrams_lemmas.append(token[0]) + + return unigrams_words, unigrams_lemmas, pos_tagged + + def _find_ngrams(self, input_list, n): + return zip(*[input_list[i:] for i in range(n)]) + + def _clean_pos(self, pos_tagged): + + pos_tags = { + 'NN': 'NN', + 'NNP': 'NN', + 'NNP-LOC': 'NN', + 'NNS': 'NN', + 'JJ': 'JJ', + 'JJR': 'JJ', + 'JJS': 'JJ', + 'RB': 'RB', + 'RBR': 'RB', + 'RBS': 'RB', + 'VB': 'VB', + 'VBD': 'VB', + 'VGB': 'VB', + 'VBN': 'VB', + 'VBP': 'VB', + 'VBZ': 'VB' + } + + for i in range(len(pos_tagged)): + if pos_tagged[i] in pos_tags: + pos_tagged[i] = pos_tags[pos_tagged[i]] + return pos_tagged + + def _extract_features(self, text): + + feature_set = {k: 0 for k in self._categories} + ngrams_words, ngrams_lemmas, pos_tagged = self._extract_ngrams(text) + matches = 0 + pos_tagged = self._clean_pos(pos_tagged) + + tag_wn = { + 'NN': self._wn16.NOUN, + 'JJ': self._wn16.ADJ, + 'VB': self._wn16.VERB, + 'RB': self._wn16.ADV + } + for i in range(len(pos_tagged)): + if pos_tagged[i] in tag_wn: + synsets = self._wn16.synsets(ngrams_words[i], + tag_wn[pos_tagged[i]]) + if synsets: + offset = synsets[0].offset() + if offset in self._total_synsets[pos_tagged[i]]: + if self._total_synsets[pos_tagged[i]][offset] is None: + continue + else: + emotion = self._total_synsets[pos_tagged[i]][ + offset].get_level(5).name + matches += 1 + for i in self._categories: + if emotion in self._categories[i]: + feature_set[i] += 1 + if matches == 0: + matches = 1 + + for i in feature_set: + feature_set[i] = (feature_set[i] / matches) * 100 + + return feature_set + + def analyse_entry(self, entry, params): + + text_input = entry['nif:isString'] + + text = self._my_preprocessor(text_input) + + feature_text = self._extract_features(text) + + emotionSet = EmotionSet(id="Emotions0") + emotions = emotionSet.onyx__hasEmotion + + for i in feature_text: + emotions.append( + Emotion( + onyx__hasEmotionCategory=self._wnaffect_mappings[i], + onyx__hasEmotionIntensity=feature_text[i])) + + entry.emotions = [emotionSet] + + yield entry + + + def test(self, *args, **kwargs): + results = list() + params = {'algo': 'emotion-wnaffect', + 'intype': 'direct', + 'expanded-jsonld': 0, + 'informat': 'text', + 'prefix': '', + 'plugin_type': 'analysisPlugin', + 'urischeme': 'RFC5147String', + 'outformat': 'json-ld', + 'i': 'Hello World', + 'input': 'Hello World', + 'conversion': 'full', + 'language': 'en', + 'algorithm': 'emotion-wnaffect'} + + self.activate() + res = next(self.analyse_entry(Entry(nif__isString="This text make me sad"), params)) + texts = {'I hate you': 'anger', + 'i am sad': 'sadness', + 'i am happy with my marks': 'joy', + 'This movie is scary': 'negative-fear'} + + for text in texts: + response = next(self.analyse_entry(Entry(nif__isString=text), params)) + expected = texts[text] + emotionSet = response.emotions[0] + max_emotion = max(emotionSet['onyx:hasEmotion'], key=lambda x: x['onyx:hasEmotionIntensity']) + assert max_emotion['onyx:hasEmotionCategory'] == expected diff --git a/emotion-wnaffect.senpy b/emotion-wnaffect.senpy new file mode 100644 index 0000000..e769205 --- /dev/null +++ b/emotion-wnaffect.senpy @@ -0,0 +1,25 @@ +--- +name: emotion-wnaffect +module: emotion-wnaffect +description: 'Emotion classifier using WordNet-Affect to calculate the percentage + of each emotion. This plugin classifies among 6 emotions: anger,fear,disgust,joy,sadness + or neutral. The only available language is English (en)' +author: "@icorcuera @balkian" +version: '0.2' +extra_params: + language: + "@id": lang_wnaffect + aliases: + - language + - l + required: false + options: + - en +synsets_path: "/a-synsets.xml" +hierarchy_path: "/a-hierarchy.xml" +wn16_path: "/wordnet1.6/dict" +onyx:usesEmotionModel: emoml:big6 +requirements: +- nltk>=3.0.5 +- lxml>=3.4.2 +async: false diff --git a/emotion.py b/emotion.py new file mode 100644 index 0000000..d30e35d --- /dev/null +++ b/emotion.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +""" +Clement Michard (c) 2015 +""" + +class Emotion: + """Defines an emotion.""" + + emotions = {} # name to emotion (str -> Emotion) + + def __init__(self, name, parent_name=None): + """Initializes an Emotion object. + name -- name of the emotion (str) + parent_name -- name of the parent emotion (str) + """ + + self.name = name + self.parent = None + self.level = 0 + self.children = [] + + if parent_name: + self.parent = Emotion.emotions[parent_name] if parent_name else None + self.parent.children.append(self) + self.level = self.parent.level + 1 + + + def get_level(self, level): + """Returns the parent of self at the given level. + level -- level in the hierarchy (int) + """ + + em = self + while em.level > level and em.level >= 0: + em = em.parent + return em + + + def __str__(self): + """Returns the emotion string formatted.""" + + return self.name + + + def nb_children(self): + """Returns the number of children of the emotion.""" + + return sum(child.nb_children() for child in self.children) + 1 + + + @staticmethod + def printTree(emotion=None, indent="", last='updown'): + """Prints the hierarchy of emotions. + emotion -- root emotion (Emotion) + """ + + if not emotion: + emotion = Emotion.emotions["root"] + + size_branch = {child: child.nb_children() for child in emotion.children} + leaves = sorted(emotion.children, key=lambda emotion: emotion.nb_children()) + up, down = [], [] + if leaves: + while sum(size_branch[e] for e in down) < sum(size_branch[e] for e in leaves): + down.append(leaves.pop()) + up = leaves + + for leaf in up: + next_last = 'up' if up.index(leaf) is 0 else '' + next_indent = '{0}{1}{2}'.format(indent, ' ' if 'up' in last else '│', " " * len(emotion.name)) + Emotion.printTree(leaf, indent=next_indent, last=next_last) + if last == 'up': + start_shape = '┌' + elif last == 'down': + start_shape = '└' + elif last == 'updown': + start_shape = ' ' + else: + start_shape = '├' + if up: + end_shape = '┤' + elif down: + end_shape = '┐' + else: + end_shape = '' + print ('{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape)) + for leaf in down: + next_last = 'down' if down.index(leaf) is len(down) - 1 else '' + next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name)) + Emotion.printTree(leaf, indent=next_indent, last=next_last) + + + + + diff --git a/wnaffect.py b/wnaffect.py new file mode 100644 index 0000000..29cf64d --- /dev/null +++ b/wnaffect.py @@ -0,0 +1,92 @@ + +# coding: utf-8 + +# In[1]: + + +# -*- coding: utf-8 -*- +""" +Clement Michard (c) 2015 +""" + +import os +import sys +import nltk +from emotion import Emotion +from nltk.corpus import WordNetCorpusReader +import xml.etree.ElementTree as ET + +class WNAffect: + """WordNet-Affect ressource.""" + + def __init__(self, wordnet16_dir, wn_domains_dir): + """Initializes the WordNet-Affect object.""" + + cwd = os.getcwd() + nltk.data.path.append(cwd) + wn16_path = "{0}/dict".format(wordnet16_dir) + self.wn16 = WordNetCorpusReader(os.path.abspath("{0}/{1}".format(cwd, wn16_path)), nltk.data.find(wn16_path)) + self.flat_pos = {'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', 'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'} + self.wn_pos = {'NN':self.wn16.NOUN, 'JJ':self.wn16.ADJ, 'VB':self.wn16.VERB, 'RB':self.wn16.ADV} + self._load_emotions(wn_domains_dir) + self.synsets = self._load_synsets(wn_domains_dir) + + + + def _load_synsets(self, wn_domains_dir): + """Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str).""" + + tree = ET.parse("{0}/a-synsets.xml".format(wn_domains_dir)) + root = tree.getroot() + pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" } + + synsets = {} + for pos in ["noun", "adj", "verb", "adv"]: + tag = pos_map[pos] + synsets[tag] = {} + for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)): + offset = int(elem.get("id")[2:]) + if not offset: continue + if elem.get("categ"): + synsets[tag][offset] = Emotion.emotions[elem.get("categ")] if elem.get("categ") in Emotion.emotions else None + elif elem.get("noun-id"): + synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])] + + return synsets + + def _load_emotions(self, wn_domains_dir): + """Loads the hierarchy of emotions from the WordNet-Affect xml.""" + + tree = ET.parse("{0}/a-hierarchy.xml".format(wn_domains_dir)) + root = tree.getroot() + for elem in root.findall("categ"): + name = elem.get("name") + if name == "root": + Emotion.emotions["root"] = Emotion("root") + else: + Emotion.emotions[name] = Emotion(name, elem.get("isa")) + + def get_emotion(self, word, pos): + """Returns the emotion of the word. + word -- the word (str) + pos -- part-of-speech (str) + """ + + if pos in self.flat_pos: + pos = self.flat_pos[pos] + synsets = self.wn16.synsets(word, self.wn_pos[pos]) + if synsets: + offset = synsets[0].offset() + if offset in self.synsets[pos]: + return self.synsets[pos][offset] + return None + + + + +if __name__ == "__main__": + wordnet16, wndomains32, word, pos = sys.argv[1:5] + wna = WNAffect(wordnet16, wndomains32) + print wna.get_emotion(word, pos) + +