mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-22 08:12:27 +00:00
Added WordNet-Affect plugin and Makefile
This commit is contained in:
parent
0e9db7081c
commit
5e8bc717a8
8
.gitignore
vendored
8
.gitignore
vendored
@ -55,3 +55,11 @@ docs/_build/
|
|||||||
|
|
||||||
# PyBuilder
|
# PyBuilder
|
||||||
target/
|
target/
|
||||||
|
.*
|
||||||
|
*.pyc
|
||||||
|
**/__pycache__
|
||||||
|
*/wordnet1.6
|
||||||
|
*/Corpus
|
||||||
|
*/a-hierarchy.xml
|
||||||
|
*/a-synsets.xml
|
||||||
|
*/wn16.txt
|
15
Dockerfile
Normal file
15
Dockerfile
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
from gsiupm/senpy:0.6.1-python2.7
|
||||||
|
|
||||||
|
RUN mkdir -p /senpy-plugins
|
||||||
|
RUN pip install nltk
|
||||||
|
RUN python -m nltk.downloader stopwords
|
||||||
|
RUN python -m nltk.downloader punkt
|
||||||
|
RUN python -m nltk.downloader maxent_treebank_pos_tagger
|
||||||
|
RUN python -m nltk.downloader wordnet
|
||||||
|
|
||||||
|
RUN pip install pytest
|
||||||
|
RUN pip install mock
|
||||||
|
ADD . /senpy-plugins
|
||||||
|
RUN senpy -f /senpy-plugins --only-install
|
||||||
|
|
||||||
|
WORKDIR /senpy-plugins/
|
25
Makefile
Normal file
25
Makefile
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
PYVERSION=2.7
|
||||||
|
NAME=senpycommunity
|
||||||
|
REPO=gsiupm
|
||||||
|
VERSION=test
|
||||||
|
PLUGINS= $(filter %/, $(wildcard */))
|
||||||
|
|
||||||
|
|
||||||
|
all: build run
|
||||||
|
|
||||||
|
build: clean Dockerfile
|
||||||
|
docker build -t '$(REPO)/$(NAME):$(VERSION)-python$(PYVERSION)' -f Dockerfile .;
|
||||||
|
|
||||||
|
test-%:
|
||||||
|
docker run -v $$PWD/$*:/senpy-plugins/ --rm --entrypoint=/usr/local/bin/py.test -ti '$(REPO)/$(NAME):$(VERSION)-python$(PYVERSION)' test.py
|
||||||
|
|
||||||
|
test: $(addprefix test-,$(PLUGINS))
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@docker ps -a | awk '/$(REPO)\/$(NAME)/{ split($$2, vers, "-"); if(vers[1] != "${VERSION}"){ print $$1;}}' | xargs docker rm 2>/dev/null|| true
|
||||||
|
@docker images | awk '/$(REPO)\/$(NAME)/{ split($$2, vers, "-"); if(vers[1] != "${VERSION}"){ print $$1":"$$2;}}' | xargs docker rmi 2>/dev/null|| true
|
||||||
|
|
||||||
|
run: build
|
||||||
|
docker run --rm -p 5000:5000 -ti '$(REPO)/$(NAME):$(VERSION)-python$(PYMAIN)'
|
||||||
|
|
||||||
|
.PHONY: test test-% build-% build test test_pip run clean
|
10
emoTextWNA/README.rst
Normal file
10
emoTextWNA/README.rst
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
This plugin uses WNAffect labels for emotion analysis.
|
||||||
|
|
||||||
|
The emotextWAF.senpy file can be copied and modified to use different versions of wnaffect with the same python code.
|
||||||
|
|
||||||
|
|
||||||
|
Known issues
|
||||||
|
============
|
||||||
|
|
||||||
|
* This plugin uses the pattern library, which means it will only run on python 2.7
|
||||||
|
* Wnaffect and corpora files are not included in the repository, but can be easily added either to the docker image (using a volume) or in a new docker image.
|
185
emoTextWNA/emotextWAF.py
Normal file
185
emoTextWNA/emotextWAF.py
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from __future__ import division
|
||||||
|
import re
|
||||||
|
import nltk
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import string
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from nltk.corpus import stopwords
|
||||||
|
from nltk.corpus import WordNetCorpusReader
|
||||||
|
|
||||||
|
from emotion import Emotion as Emo
|
||||||
|
from pattern.en import parse
|
||||||
|
from senpy.plugins import EmotionPlugin, SenpyPlugin
|
||||||
|
from senpy.models import Results, EmotionSet, Entry, Emotion
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class EmotionTextPlugin(EmotionPlugin):
|
||||||
|
|
||||||
|
def __init__(self, info, *args, **kwargs):
|
||||||
|
super(EmotionTextPlugin, self).__init__(info, *args, **kwargs)
|
||||||
|
self.id = info['module']
|
||||||
|
self.info = info
|
||||||
|
self._stopwords = stopwords.words('english')
|
||||||
|
local_path=os.path.dirname(os.path.abspath(__file__))
|
||||||
|
self._categories = {'anger': ['general-dislike',],
|
||||||
|
'fear': ['negative-fear',],
|
||||||
|
'disgust': ['shame',],
|
||||||
|
'joy': ['gratitude','affective','enthusiasm','love','joy','liking'],
|
||||||
|
'sadness': ['ingrattitude','daze','humility','compassion','despair','anxiety','sadness']}
|
||||||
|
|
||||||
|
self._wnaffect_mappings = {'anger': 'anger',
|
||||||
|
'fear': 'negative-fear',
|
||||||
|
'disgust': 'disgust',
|
||||||
|
'joy': 'joy',
|
||||||
|
'sadness': 'sadness'}
|
||||||
|
|
||||||
|
self._load_emotions(local_path+self.info['hierarchy_path'])
|
||||||
|
self._total_synsets = self._load_synsets(local_path+self.info['synsets_path'])
|
||||||
|
self._wn16_path = local_path+self.info['wn16_path']
|
||||||
|
self._wn16= None
|
||||||
|
self._wn16 = WordNetCorpusReader(os.path.abspath("{0}".format(self._wn16_path)), nltk.data.find(self._wn16_path))
|
||||||
|
|
||||||
|
|
||||||
|
def _load_synsets(self, synsets_path):
|
||||||
|
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
|
||||||
|
tree = ET.parse(synsets_path)
|
||||||
|
root = tree.getroot()
|
||||||
|
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
|
||||||
|
|
||||||
|
synsets = {}
|
||||||
|
for pos in ["noun", "adj", "verb", "adv"]:
|
||||||
|
tag = pos_map[pos]
|
||||||
|
synsets[tag] = {}
|
||||||
|
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
|
||||||
|
offset = int(elem.get("id")[2:])
|
||||||
|
if not offset: continue
|
||||||
|
if elem.get("categ"):
|
||||||
|
synsets[tag][offset] = Emo.emotions[elem.get("categ")] if elem.get("categ") in Emo.emotions else None
|
||||||
|
elif elem.get("noun-id"):
|
||||||
|
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
|
||||||
|
return synsets
|
||||||
|
|
||||||
|
def _load_emotions(self, hierarchy_path):
|
||||||
|
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
|
||||||
|
|
||||||
|
tree = ET.parse(hierarchy_path)
|
||||||
|
root = tree.getroot()
|
||||||
|
for elem in root.findall("categ"):
|
||||||
|
name = elem.get("name")
|
||||||
|
if name == "root":
|
||||||
|
Emo.emotions["root"] = Emo("root")
|
||||||
|
else:
|
||||||
|
Emo.emotions[name] = Emo(name, elem.get("isa"))
|
||||||
|
|
||||||
|
def activate(self, *args, **kwargs):
|
||||||
|
logger.info("EmoText plugin is ready to go!")
|
||||||
|
|
||||||
|
def deactivate(self, *args, **kwargs):
|
||||||
|
|
||||||
|
logger.info("EmoText plugin is being deactivated...")
|
||||||
|
|
||||||
|
def _my_preprocessor(self, text):
|
||||||
|
|
||||||
|
regHttp = re.compile('(http://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||||
|
regHttps = re.compile('(https://)[a-zA-Z0-9]*.[a-zA-Z0-9/]*(.[a-zA-Z0-9]*)?')
|
||||||
|
regAt = re.compile('@([a-zA-Z0-9]*[*_/&%#@$]*)*[a-zA-Z0-9]*')
|
||||||
|
text = re.sub(regHttp, '', text)
|
||||||
|
text = re.sub(regAt, '', text)
|
||||||
|
text = re.sub('RT : ', '', text)
|
||||||
|
text = re.sub(regHttps, '', text)
|
||||||
|
text = re.sub('[0-9]', '', text)
|
||||||
|
text = self._delete_punctuation(text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _delete_punctuation(self, text):
|
||||||
|
|
||||||
|
exclude = set(string.punctuation)
|
||||||
|
s = ''.join(ch for ch in text if ch not in exclude)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def _extract_ngrams(self, text):
|
||||||
|
|
||||||
|
unigrams_lemmas = []
|
||||||
|
pos_tagged = []
|
||||||
|
unigrams_words = []
|
||||||
|
sentences = parse(text,lemmata=True).split()
|
||||||
|
for sentence in sentences:
|
||||||
|
for token in sentence:
|
||||||
|
if token[0].lower() not in self._stopwords:
|
||||||
|
unigrams_words.append(token[0].lower())
|
||||||
|
unigrams_lemmas.append(token[4])
|
||||||
|
pos_tagged.append(token[1])
|
||||||
|
|
||||||
|
return unigrams_words,unigrams_lemmas,pos_tagged
|
||||||
|
|
||||||
|
def _find_ngrams(self, input_list, n):
|
||||||
|
return zip(*[input_list[i:] for i in range(n)])
|
||||||
|
|
||||||
|
def _clean_pos(self, pos_tagged):
|
||||||
|
|
||||||
|
pos_tags={'NN':'NN', 'NNP':'NN','NNP-LOC':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB',
|
||||||
|
'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
|
||||||
|
|
||||||
|
for i in range(len(pos_tagged)):
|
||||||
|
if pos_tagged[i] in pos_tags:
|
||||||
|
pos_tagged[i]=pos_tags[pos_tagged[i]]
|
||||||
|
return pos_tagged
|
||||||
|
|
||||||
|
def _extract_features(self, text):
|
||||||
|
|
||||||
|
feature_set={k:0 for k in self._categories}
|
||||||
|
ngrams_words,ngrams_lemmas,pos_tagged = self._extract_ngrams(text)
|
||||||
|
matches=0
|
||||||
|
pos_tagged=self._clean_pos(pos_tagged)
|
||||||
|
|
||||||
|
tag_wn={'NN':self._wn16.NOUN,'JJ':self._wn16.ADJ,'VB':self._wn16.VERB,'RB':self._wn16.ADV}
|
||||||
|
for i in range(len(pos_tagged)):
|
||||||
|
if pos_tagged[i] in tag_wn:
|
||||||
|
synsets = self._wn16.synsets(ngrams_words[i], tag_wn[pos_tagged[i]])
|
||||||
|
if synsets:
|
||||||
|
offset = synsets[0].offset()
|
||||||
|
if offset in self._total_synsets[pos_tagged[i]]:
|
||||||
|
if self._total_synsets[pos_tagged[i]][offset] is None:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
emotion = self._total_synsets[pos_tagged[i]][offset].get_level(5).name
|
||||||
|
matches+=1
|
||||||
|
for i in self._categories:
|
||||||
|
if emotion in self._categories[i]:
|
||||||
|
feature_set[i]+=1
|
||||||
|
if matches == 0:
|
||||||
|
matches=1
|
||||||
|
|
||||||
|
for i in feature_set:
|
||||||
|
feature_set[i] = (feature_set[i]/matches)*100
|
||||||
|
|
||||||
|
return feature_set
|
||||||
|
|
||||||
|
def analyse(self, **params):
|
||||||
|
|
||||||
|
logger.debug("Analysing with params {}".format(params))
|
||||||
|
|
||||||
|
text_input = params.get("input", None)
|
||||||
|
|
||||||
|
text=self._my_preprocessor(text_input)
|
||||||
|
|
||||||
|
feature_text=self._extract_features(text)
|
||||||
|
|
||||||
|
response = Results()
|
||||||
|
|
||||||
|
entry = Entry(id="Entry",
|
||||||
|
text=text_input)
|
||||||
|
emotionSet = EmotionSet(id="Emotions0")
|
||||||
|
emotions = emotionSet.onyx__hasEmotion
|
||||||
|
|
||||||
|
for i in feature_text:
|
||||||
|
emotions.append(Emotion(onyx__hasEmotionCategory=self._wnaffect_mappings[i],
|
||||||
|
onyx__hasEmotionIntensity=feature_text[i]))
|
||||||
|
|
||||||
|
entry.emotions = [emotionSet]
|
||||||
|
response.entries.append(entry)
|
||||||
|
return response
|
29
emoTextWNA/emotextWAF.senpy
Normal file
29
emoTextWNA/emotextWAF.senpy
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
{
|
||||||
|
"name": "EmoTextWAF",
|
||||||
|
"module": "emotextWAF",
|
||||||
|
"description": "Emotion classifier using rule-based classification.",
|
||||||
|
"author": "@icorcuera @balkian",
|
||||||
|
"version": "0.2",
|
||||||
|
"extra_params": {
|
||||||
|
"language": {
|
||||||
|
"aliases": ["language", "l"],
|
||||||
|
"required": true,
|
||||||
|
"options": ["en"],
|
||||||
|
"default": "en"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"requirements": {},
|
||||||
|
"synsets_path": "/a-synsets.xml",
|
||||||
|
"hierarchy_path": "/a-hierarchy.xml",
|
||||||
|
"wn16_path": "/wordnet1.6/dict",
|
||||||
|
"requirements": [
|
||||||
|
"nltk>=3.0.5",
|
||||||
|
"numpy>=1.8.2",
|
||||||
|
"scipy>=0.14.0",
|
||||||
|
"scikit-learn>=0.14.1",
|
||||||
|
"lxml>=3.4.2",
|
||||||
|
"pandas",
|
||||||
|
"senpy",
|
||||||
|
"pattern"
|
||||||
|
]
|
||||||
|
}
|
97
emoTextWNA/emotion.py
Normal file
97
emoTextWNA/emotion.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Clement Michard (c) 2015
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Emotion:
|
||||||
|
"""Defines an emotion."""
|
||||||
|
|
||||||
|
emotions = {} # name to emotion (str -> Emotion)
|
||||||
|
|
||||||
|
def __init__(self, name, parent_name=None):
|
||||||
|
"""Initializes an Emotion object.
|
||||||
|
name -- name of the emotion (str)
|
||||||
|
parent_name -- name of the parent emotion (str)
|
||||||
|
"""
|
||||||
|
|
||||||
|
self.name = name
|
||||||
|
self.parent = None
|
||||||
|
self.level = 0
|
||||||
|
self.children = []
|
||||||
|
|
||||||
|
if parent_name:
|
||||||
|
self.parent = Emotion.emotions[parent_name] if parent_name else None
|
||||||
|
self.parent.children.append(self)
|
||||||
|
self.level = self.parent.level + 1
|
||||||
|
|
||||||
|
|
||||||
|
def get_level(self, level):
|
||||||
|
"""Returns the parent of self at the given level.
|
||||||
|
level -- level in the hierarchy (int)
|
||||||
|
"""
|
||||||
|
|
||||||
|
em = self
|
||||||
|
while em.level > level and em.level >= 0:
|
||||||
|
em = em.parent
|
||||||
|
return em
|
||||||
|
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
"""Returns the emotion string formatted."""
|
||||||
|
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
|
||||||
|
def nb_children(self):
|
||||||
|
"""Returns the number of children of the emotion."""
|
||||||
|
|
||||||
|
return sum(child.nb_children() for child in self.children) + 1
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def printTree(emotion=None, indent="", last='updown'):
|
||||||
|
"""Prints the hierarchy of emotions.
|
||||||
|
emotion -- root emotion (Emotion)
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not emotion:
|
||||||
|
emotion = Emotion.emotions["root"]
|
||||||
|
|
||||||
|
size_branch = {child: child.nb_children() for child in emotion.children}
|
||||||
|
leaves = sorted(emotion.children, key=lambda emotion: emotion.nb_children())
|
||||||
|
up, down = [], []
|
||||||
|
if leaves:
|
||||||
|
while sum(size_branch[e] for e in down) < sum(size_branch[e] for e in leaves):
|
||||||
|
down.append(leaves.pop())
|
||||||
|
up = leaves
|
||||||
|
|
||||||
|
for leaf in up:
|
||||||
|
next_last = 'up' if up.index(leaf) is 0 else ''
|
||||||
|
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'up' in last else '│', " " * len(emotion.name))
|
||||||
|
Emotion.printTree(leaf, indent=next_indent, last=next_last)
|
||||||
|
if last == 'up':
|
||||||
|
start_shape = '┌'
|
||||||
|
elif last == 'down':
|
||||||
|
start_shape = '└'
|
||||||
|
elif last == 'updown':
|
||||||
|
start_shape = ' '
|
||||||
|
else:
|
||||||
|
start_shape = '├'
|
||||||
|
if up:
|
||||||
|
end_shape = '┤'
|
||||||
|
elif down:
|
||||||
|
end_shape = '┐'
|
||||||
|
else:
|
||||||
|
end_shape = ''
|
||||||
|
print '{0}{1}{2}{3}'.format(indent, start_shape, emotion.name, end_shape)
|
||||||
|
for leaf in down:
|
||||||
|
next_last = 'down' if down.index(leaf) is len(down) - 1 else ''
|
||||||
|
next_indent = '{0}{1}{2}'.format(indent, ' ' if 'down' in last else '│', " " * len(emotion.name))
|
||||||
|
Emotion.printTree(leaf, indent=next_indent, last=next_last)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
42
emoTextWNA/test_wna.py
Normal file
42
emoTextWNA/test_wna.py
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
import os
|
||||||
|
import logging
|
||||||
|
logging.basicConfig()
|
||||||
|
try:
|
||||||
|
import unittest.mock as mock
|
||||||
|
except ImportError:
|
||||||
|
import mock
|
||||||
|
from senpy.extensions import Senpy
|
||||||
|
from flask import Flask
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
class emoTextWAFTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.app = Flask("test_plugin")
|
||||||
|
self.dir = os.path.join(os.path.dirname(__file__))
|
||||||
|
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
|
||||||
|
self.senpy.init_app(self.app)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.senpy.deactivate_plugin("EmoTextWAF", sync=True)
|
||||||
|
|
||||||
|
def test_analyse(self):
|
||||||
|
plugin = self.senpy.plugins["EmoTextWAF"]
|
||||||
|
plugin.activate()
|
||||||
|
|
||||||
|
texts = {'I hate you': 'anger',
|
||||||
|
'i am sad': 'sadness',
|
||||||
|
'i am happy with my marks': 'joy',
|
||||||
|
'This movie is scary': 'negative-fear'}
|
||||||
|
|
||||||
|
for text in texts:
|
||||||
|
response = plugin.analyse(input=text)
|
||||||
|
expected = texts[text]
|
||||||
|
emotionSet = response.entries[0].emotions[0]
|
||||||
|
max_emotion = max(emotionSet['onyx:hasEmotion'], key=lambda x: x['onyx:hasEmotionIntensity'])
|
||||||
|
assert max_emotion['onyx:hasEmotionCategory'] == expected
|
||||||
|
|
||||||
|
plugin.deactivate()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
92
emoTextWNA/wnaffect.py
Normal file
92
emoTextWNA/wnaffect.py
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
# In[1]:
|
||||||
|
|
||||||
|
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Clement Michard (c) 2015
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import nltk
|
||||||
|
from emotion import Emotion
|
||||||
|
from nltk.corpus import WordNetCorpusReader
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
|
class WNAffect:
|
||||||
|
"""WordNet-Affect ressource."""
|
||||||
|
|
||||||
|
def __init__(self, wordnet16_dir, wn_domains_dir):
|
||||||
|
"""Initializes the WordNet-Affect object."""
|
||||||
|
|
||||||
|
cwd = os.getcwd()
|
||||||
|
nltk.data.path.append(cwd)
|
||||||
|
wn16_path = "{0}/dict".format(wordnet16_dir)
|
||||||
|
self.wn16 = WordNetCorpusReader(os.path.abspath("{0}/{1}".format(cwd, wn16_path)), nltk.data.find(wn16_path))
|
||||||
|
self.flat_pos = {'NN':'NN', 'NNS':'NN', 'JJ':'JJ', 'JJR':'JJ', 'JJS':'JJ', 'RB':'RB', 'RBR':'RB', 'RBS':'RB', 'VB':'VB', 'VBD':'VB', 'VGB':'VB', 'VBN':'VB', 'VBP':'VB', 'VBZ':'VB'}
|
||||||
|
self.wn_pos = {'NN':self.wn16.NOUN, 'JJ':self.wn16.ADJ, 'VB':self.wn16.VERB, 'RB':self.wn16.ADV}
|
||||||
|
self._load_emotions(wn_domains_dir)
|
||||||
|
self.synsets = self._load_synsets(wn_domains_dir)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _load_synsets(self, wn_domains_dir):
|
||||||
|
"""Returns a dictionary POS tag -> synset offset -> emotion (str -> int -> str)."""
|
||||||
|
|
||||||
|
tree = ET.parse("{0}/a-synsets.xml".format(wn_domains_dir))
|
||||||
|
root = tree.getroot()
|
||||||
|
pos_map = { "noun": "NN", "adj": "JJ", "verb": "VB", "adv": "RB" }
|
||||||
|
|
||||||
|
synsets = {}
|
||||||
|
for pos in ["noun", "adj", "verb", "adv"]:
|
||||||
|
tag = pos_map[pos]
|
||||||
|
synsets[tag] = {}
|
||||||
|
for elem in root.findall(".//{0}-syn-list//{0}-syn".format(pos, pos)):
|
||||||
|
offset = int(elem.get("id")[2:])
|
||||||
|
if not offset: continue
|
||||||
|
if elem.get("categ"):
|
||||||
|
synsets[tag][offset] = Emotion.emotions[elem.get("categ")] if elem.get("categ") in Emotion.emotions else None
|
||||||
|
elif elem.get("noun-id"):
|
||||||
|
synsets[tag][offset] = synsets[pos_map["noun"]][int(elem.get("noun-id")[2:])]
|
||||||
|
|
||||||
|
return synsets
|
||||||
|
|
||||||
|
def _load_emotions(self, wn_domains_dir):
|
||||||
|
"""Loads the hierarchy of emotions from the WordNet-Affect xml."""
|
||||||
|
|
||||||
|
tree = ET.parse("{0}/a-hierarchy.xml".format(wn_domains_dir))
|
||||||
|
root = tree.getroot()
|
||||||
|
for elem in root.findall("categ"):
|
||||||
|
name = elem.get("name")
|
||||||
|
if name == "root":
|
||||||
|
Emotion.emotions["root"] = Emotion("root")
|
||||||
|
else:
|
||||||
|
Emotion.emotions[name] = Emotion(name, elem.get("isa"))
|
||||||
|
|
||||||
|
def get_emotion(self, word, pos):
|
||||||
|
"""Returns the emotion of the word.
|
||||||
|
word -- the word (str)
|
||||||
|
pos -- part-of-speech (str)
|
||||||
|
"""
|
||||||
|
|
||||||
|
if pos in self.flat_pos:
|
||||||
|
pos = self.flat_pos[pos]
|
||||||
|
synsets = self.wn16.synsets(word, self.wn_pos[pos])
|
||||||
|
if synsets:
|
||||||
|
offset = synsets[0].offset()
|
||||||
|
if offset in self.synsets[pos]:
|
||||||
|
return self.synsets[pos][offset]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
wordnet16, wndomains32, word, pos = sys.argv[1:5]
|
||||||
|
wna = WNAffect(wordnet16, wndomains32)
|
||||||
|
print wna.get_emotion(word, pos)
|
||||||
|
|
||||||
|
|
@ -12,5 +12,6 @@
|
|||||||
"default": 42
|
"default": 42
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"requirements": ["noop"],
|
||||||
"custom_attribute": "42"
|
"custom_attribute": "42"
|
||||||
}
|
}
|
||||||
|
23
example-plugin/test_example.py
Normal file
23
example-plugin/test_example.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
import unittest
|
||||||
|
from flask import Flask
|
||||||
|
import os
|
||||||
|
|
||||||
|
from senpy.extensions import Senpy
|
||||||
|
|
||||||
|
class emoTextWAFTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.app = Flask("Example")
|
||||||
|
self.dir = os.path.join(os.path.dirname(__file__))
|
||||||
|
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
|
||||||
|
self.senpy.init_app(self.app)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.senpy.deactivate_plugin("ExamplePlugin", sync=True)
|
||||||
|
|
||||||
|
def test_analyse(self):
|
||||||
|
assert len(self.senpy.plugins.keys()) == 1
|
||||||
|
assert True
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Reference in New Issue
Block a user