mirror of
https://github.com/gsi-upm/senpy
synced 2024-11-13 04:02:29 +00:00
Squashed 'sentiment-vader/' content from commit ddb7432
git-subtree-dir: sentiment-vader git-subtree-split: ddb7432d260fd2d8fca719f1b3ee46117019f475
This commit is contained in:
commit
4a0b6c1bf4
40
README.md
Normal file
40
README.md
Normal file
@ -0,0 +1,40 @@
|
||||
# Sentimet-vader plugin
|
||||
|
||||
=========
|
||||
|
||||
Vader is a plugin developed at GSI UPM for sentiment analysis.
|
||||
|
||||
For developing this plugin, it has been used the module vaderSentiment, which is described in the paper:
|
||||
VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text
|
||||
C.J. Hutto and Eric Gilbert
|
||||
Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
|
||||
|
||||
If you use this plugin in your research, please cite the above paper.
|
||||
|
||||
For more information about the functionality, check the official repository
|
||||
|
||||
https://github.com/cjhutto/vaderSentiment
|
||||
|
||||
The response of this plugin uses [Marl ontology](https://www.gsi.dit.upm.es/ontologies/marl/) developed at GSI UPM for semantic web.
|
||||
|
||||
## Usage
|
||||
|
||||
Params accepted:
|
||||
- Language: es (Spanish), en(English).
|
||||
- Input: Text to analyse.
|
||||
|
||||
|
||||
Example request:
|
||||
```
|
||||
http://senpy.cluster.gsi.dit.upm.es/api/?algo=sentiment-vader&language=en&input=I%20love%20Madrid
|
||||
```
|
||||
|
||||
Example respond: This plugin follows the standard for the senpy plugin response. For more information, please visit [senpy documentation](http://senpy.readthedocs.io). Specifically, NIF API section.
|
||||
|
||||
This plugin supports **python3**
|
||||
|
||||
![alt GSI Logo][logoGSI]
|
||||
|
||||
[logoGSI]: http://www.gsi.dit.upm.es/images/stories/logos/gsi.png "GSI Logo"
|
||||
|
||||
========
|
16
README.txt
Normal file
16
README.txt
Normal file
@ -0,0 +1,16 @@
|
||||
==========
|
||||
|
||||
This README file describes the plugin vaderSentiment.
|
||||
|
||||
For developing this plugin, it has been used the module vaderSentiment, which is described in the paper:
|
||||
VADER: A Parsimonious Rule-based Model for Sentiment Analysis of Social Media Text
|
||||
C.J. Hutto and Eric Gilbert
|
||||
Eighth International Conference on Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
|
||||
|
||||
If you use this plugin in your research, please cite the above paper
|
||||
|
||||
For more information about the functionality, check the official repository
|
||||
|
||||
https://github.com/cjhutto/vaderSentiment
|
||||
|
||||
========
|
49
sentiment-vader.py
Normal file
49
sentiment-vader.py
Normal file
@ -0,0 +1,49 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from vaderSentiment import sentiment
|
||||
from senpy.plugins import SentimentPlugin, SenpyPlugin
|
||||
from senpy.models import Results, Sentiment, Entry
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class vaderSentimentPlugin(SentimentPlugin):
|
||||
|
||||
def analyse_entry(self,entry,params):
|
||||
|
||||
logger.debug("Analysing with params {}".format(params))
|
||||
|
||||
text_input = entry.get("text", None)
|
||||
aggregate = params['aggregate']
|
||||
|
||||
score = sentiment(text_input)
|
||||
|
||||
opinion0 = Sentiment(id= "Opinion_positive",
|
||||
marl__hasPolarity= "marl:Positive",
|
||||
marl__algorithmConfidence= score['pos']
|
||||
)
|
||||
opinion1 = Sentiment(id= "Opinion_negative",
|
||||
marl__hasPolarity= "marl:Negative",
|
||||
marl__algorithmConfidence= score['neg']
|
||||
)
|
||||
opinion2 = Sentiment(id= "Opinion_neutral",
|
||||
marl__hasPolarity = "marl:Neutral",
|
||||
marl__algorithmConfidence = score['neu']
|
||||
)
|
||||
|
||||
if aggregate == 'true':
|
||||
res = None
|
||||
confident = max(score['neg'],score['neu'],score['pos'])
|
||||
if opinion0.marl__algorithmConfidence == confident:
|
||||
res = opinion0
|
||||
elif opinion1.marl__algorithmConfidence == confident:
|
||||
res = opinion1
|
||||
elif opinion2.marl__algorithmConfidence == confident:
|
||||
res = opinion2
|
||||
entry.sentiments.append(res)
|
||||
else:
|
||||
entry.sentiments.append(opinion0)
|
||||
entry.sentiments.append(opinion1)
|
||||
entry.sentiments.append(opinion2)
|
||||
|
||||
yield entry
|
25
sentiment-vader.senpy
Normal file
25
sentiment-vader.senpy
Normal file
@ -0,0 +1,25 @@
|
||||
{
|
||||
"name": "sentiment-vader",
|
||||
"module": "sentiment-vader",
|
||||
"description": "Sentiment classifier using vaderSentiment module. Params accepted: Language: {en, es}. The output uses Marl ontology developed at GSI UPM for semantic web.",
|
||||
"author": "@icorcuera",
|
||||
"version": "0.1",
|
||||
"extra_params": {
|
||||
"language": {
|
||||
"@id": "lang_rand",
|
||||
"aliases": ["language", "l"],
|
||||
"required": false,
|
||||
"options": ["es", "en", "auto"]
|
||||
},
|
||||
|
||||
"aggregate": {
|
||||
"aliases": ["aggregate","agg"],
|
||||
"options": ["true", "false"],
|
||||
"required": false,
|
||||
"default": false
|
||||
|
||||
}
|
||||
|
||||
},
|
||||
"requirements": {}
|
||||
}
|
44
test.py
Normal file
44
test.py
Normal file
@ -0,0 +1,44 @@
|
||||
import os
|
||||
import logging
|
||||
logging.basicConfig()
|
||||
try:
|
||||
import unittest.mock as mock
|
||||
except ImportError:
|
||||
import mock
|
||||
from senpy.extensions import Senpy
|
||||
from flask import Flask
|
||||
from flask.ext.testing import TestCase
|
||||
import unittest
|
||||
|
||||
class vaderTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.app = Flask("test_plugin")
|
||||
self.dir = os.path.join(os.path.dirname(__file__))
|
||||
self.senpy = Senpy(plugin_folder=self.dir, default_plugins=False)
|
||||
self.senpy.init_app(self.app)
|
||||
|
||||
def tearDown(self):
|
||||
self.senpy.deactivate_plugin("vaderSentiment", sync=True)
|
||||
|
||||
def test_analyse(self):
|
||||
plugin = self.senpy.plugins["vaderSentiment"]
|
||||
plugin.activate()
|
||||
|
||||
texts = {'I am tired :(' : 'marl:Negative',
|
||||
'I love pizza' : 'marl:Positive',
|
||||
'I like going to the cinema :)' : 'marl:Positive',
|
||||
'This cake is disgusting' : 'marl:Negative'}
|
||||
|
||||
for text in texts:
|
||||
response = plugin.analyse(input=text)
|
||||
expected = texts[text]
|
||||
sentimentSet = response.entries[0].sentiments
|
||||
|
||||
max_sentiment = max(sentimentSet, key=lambda x: x['marl:polarityValue'])
|
||||
assert max_sentiment['marl:hasPolarity'] == expected
|
||||
|
||||
plugin.deactivate()
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
363
vaderSentiment.py
Normal file
363
vaderSentiment.py
Normal file
@ -0,0 +1,363 @@
|
||||
#!/usr/bin/python
|
||||
# coding: utf-8
|
||||
'''
|
||||
Created on July 04, 2013
|
||||
@author: C.J. Hutto
|
||||
|
||||
Citation Information
|
||||
|
||||
If you use any of the VADER sentiment analysis tools
|
||||
(VADER sentiment lexicon or Python code for rule-based sentiment
|
||||
analysis engine) in your work or research, please cite the paper.
|
||||
For example:
|
||||
|
||||
Hutto, C.J. & Gilbert, E.E. (2014). VADER: A Parsimonious Rule-based Model for
|
||||
Sentiment Analysis of Social Media Text. Eighth International Conference on
|
||||
Weblogs and Social Media (ICWSM-14). Ann Arbor, MI, June 2014.
|
||||
'''
|
||||
|
||||
import os, math, re, sys, fnmatch, string
|
||||
reload(sys)
|
||||
|
||||
def make_lex_dict(f):
|
||||
return dict(map(lambda (w, m): (w, float(m)), [wmsr.strip().split('\t')[0:2] for wmsr in open(f) ]))
|
||||
|
||||
f = 'vader_sentiment_lexicon.txt' # empirically derived valence ratings for words, emoticons, slang, swear words, acronyms/initialisms
|
||||
try:
|
||||
word_valence_dict = make_lex_dict(f)
|
||||
except:
|
||||
f = os.path.join(os.path.dirname(__file__),'vader_sentiment_lexicon.txt')
|
||||
word_valence_dict = make_lex_dict(f)
|
||||
|
||||
# for removing punctuation
|
||||
regex_remove_punctuation = re.compile('[%s]' % re.escape(string.punctuation))
|
||||
|
||||
def sentiment(text):
|
||||
"""
|
||||
Returns a float for sentiment strength based on the input text.
|
||||
Positive values are positive valence, negative value are negative valence.
|
||||
"""
|
||||
wordsAndEmoticons = str(text).split() #doesn't separate words from adjacent punctuation (keeps emoticons & contractions)
|
||||
text_mod = regex_remove_punctuation.sub('', text) # removes punctuation (but loses emoticons & contractions)
|
||||
wordsOnly = str(text_mod).split()
|
||||
# get rid of empty items or single letter "words" like 'a' and 'I' from wordsOnly
|
||||
for word in wordsOnly:
|
||||
if len(word) <= 1:
|
||||
wordsOnly.remove(word)
|
||||
# now remove adjacent & redundant punctuation from [wordsAndEmoticons] while keeping emoticons and contractions
|
||||
puncList = [".", "!", "?", ",", ";", ":", "-", "'", "\"",
|
||||
"!!", "!!!", "??", "???", "?!?", "!?!", "?!?!", "!?!?"]
|
||||
for word in wordsOnly:
|
||||
for p in puncList:
|
||||
pword = p + word
|
||||
x1 = wordsAndEmoticons.count(pword)
|
||||
while x1 > 0:
|
||||
i = wordsAndEmoticons.index(pword)
|
||||
wordsAndEmoticons.remove(pword)
|
||||
wordsAndEmoticons.insert(i, word)
|
||||
x1 = wordsAndEmoticons.count(pword)
|
||||
|
||||
wordp = word + p
|
||||
x2 = wordsAndEmoticons.count(wordp)
|
||||
while x2 > 0:
|
||||
i = wordsAndEmoticons.index(wordp)
|
||||
wordsAndEmoticons.remove(wordp)
|
||||
wordsAndEmoticons.insert(i, word)
|
||||
x2 = wordsAndEmoticons.count(wordp)
|
||||
# get rid of residual empty items or single letter "words" like 'a' and 'I' from wordsAndEmoticons
|
||||
for word in wordsAndEmoticons:
|
||||
if len(word) <= 1:
|
||||
wordsAndEmoticons.remove(word)
|
||||
|
||||
# remove stopwords from [wordsAndEmoticons]
|
||||
#stopwords = [str(word).strip() for word in open('stopwords.txt')]
|
||||
#for word in wordsAndEmoticons:
|
||||
# if word in stopwords:
|
||||
# wordsAndEmoticons.remove(word)
|
||||
|
||||
# check for negation
|
||||
negate = ["aint", "arent", "cannot", "cant", "couldnt", "darent", "didnt", "doesnt",
|
||||
"ain't", "aren't", "can't", "couldn't", "daren't", "didn't", "doesn't",
|
||||
"dont", "hadnt", "hasnt", "havent", "isnt", "mightnt", "mustnt", "neither",
|
||||
"don't", "hadn't", "hasn't", "haven't", "isn't", "mightn't", "mustn't",
|
||||
"neednt", "needn't", "never", "none", "nope", "nor", "not", "nothing", "nowhere",
|
||||
"oughtnt", "shant", "shouldnt", "uhuh", "wasnt", "werent",
|
||||
"oughtn't", "shan't", "shouldn't", "uh-uh", "wasn't", "weren't",
|
||||
"without", "wont", "wouldnt", "won't", "wouldn't", "rarely", "seldom", "despite"]
|
||||
def negated(list, nWords=[], includeNT=True):
|
||||
nWords.extend(negate)
|
||||
for word in nWords:
|
||||
if word in list:
|
||||
return True
|
||||
if includeNT:
|
||||
for word in list:
|
||||
if "n't" in word:
|
||||
return True
|
||||
if "least" in list:
|
||||
i = list.index("least")
|
||||
if i > 0 and list[i-1] != "at":
|
||||
return True
|
||||
return False
|
||||
|
||||
def normalize(score, alpha=15):
|
||||
# normalize the score to be between -1 and 1 using an alpha that approximates the max expected value
|
||||
normScore = score/math.sqrt( ((score*score) + alpha) )
|
||||
return normScore
|
||||
|
||||
def wildCardMatch(patternWithWildcard, listOfStringsToMatchAgainst):
|
||||
listOfMatches = fnmatch.filter(listOfStringsToMatchAgainst, patternWithWildcard)
|
||||
return listOfMatches
|
||||
|
||||
|
||||
def isALLCAP_differential(wordList):
|
||||
countALLCAPS= 0
|
||||
for w in wordList:
|
||||
if str(w).isupper():
|
||||
countALLCAPS += 1
|
||||
cap_differential = len(wordList) - countALLCAPS
|
||||
if cap_differential > 0 and cap_differential < len(wordList):
|
||||
isDiff = True
|
||||
else: isDiff = False
|
||||
return isDiff
|
||||
isCap_diff = isALLCAP_differential(wordsAndEmoticons)
|
||||
|
||||
b_incr = 0.293 #(empirically derived mean sentiment intensity rating increase for booster words)
|
||||
b_decr = -0.293
|
||||
# booster/dampener 'intensifiers' or 'degree adverbs' http://en.wiktionary.org/wiki/Category:English_degree_adverbs
|
||||
booster_dict = {"absolutely": b_incr, "amazingly": b_incr, "awfully": b_incr, "completely": b_incr, "considerably": b_incr,
|
||||
"decidedly": b_incr, "deeply": b_incr, "effing": b_incr, "enormously": b_incr,
|
||||
"entirely": b_incr, "especially": b_incr, "exceptionally": b_incr, "extremely": b_incr,
|
||||
"fabulously": b_incr, "flipping": b_incr, "flippin": b_incr,
|
||||
"fricking": b_incr, "frickin": b_incr, "frigging": b_incr, "friggin": b_incr, "fully": b_incr, "fucking": b_incr,
|
||||
"greatly": b_incr, "hella": b_incr, "highly": b_incr, "hugely": b_incr, "incredibly": b_incr,
|
||||
"intensely": b_incr, "majorly": b_incr, "more": b_incr, "most": b_incr, "particularly": b_incr,
|
||||
"purely": b_incr, "quite": b_incr, "really": b_incr, "remarkably": b_incr,
|
||||
"so": b_incr, "substantially": b_incr,
|
||||
"thoroughly": b_incr, "totally": b_incr, "tremendously": b_incr,
|
||||
"uber": b_incr, "unbelievably": b_incr, "unusually": b_incr, "utterly": b_incr,
|
||||
"very": b_incr,
|
||||
|
||||
"almost": b_decr, "barely": b_decr, "hardly": b_decr, "just enough": b_decr,
|
||||
"kind of": b_decr, "kinda": b_decr, "kindof": b_decr, "kind-of": b_decr,
|
||||
"less": b_decr, "little": b_decr, "marginally": b_decr, "occasionally": b_decr, "partly": b_decr,
|
||||
"scarcely": b_decr, "slightly": b_decr, "somewhat": b_decr,
|
||||
"sort of": b_decr, "sorta": b_decr, "sortof": b_decr, "sort-of": b_decr}
|
||||
sentiments = []
|
||||
for item in wordsAndEmoticons:
|
||||
v = 0
|
||||
i = wordsAndEmoticons.index(item)
|
||||
if (i < len(wordsAndEmoticons)-1 and str(item).lower() == "kind" and \
|
||||
str(wordsAndEmoticons[i+1]).lower() == "of") or str(item).lower() in booster_dict:
|
||||
sentiments.append(v)
|
||||
continue
|
||||
item_lowercase = str(item).lower()
|
||||
if item_lowercase in word_valence_dict:
|
||||
#get the sentiment valence
|
||||
v = float(word_valence_dict[item_lowercase])
|
||||
|
||||
#check if sentiment laden word is in ALLCAPS (while others aren't)
|
||||
c_incr = 0.733 #(empirically derived mean sentiment intensity rating increase for using ALLCAPs to emphasize a word)
|
||||
if str(item).isupper() and isCap_diff:
|
||||
if v > 0: v += c_incr
|
||||
else: v -= c_incr
|
||||
|
||||
#check if the preceding words increase, decrease, or negate/nullify the valence
|
||||
def scalar_inc_dec(word, valence):
|
||||
scalar = 0.0
|
||||
word_lower = str(word).lower()
|
||||
if word_lower in booster_dict:
|
||||
scalar = booster_dict[word_lower]
|
||||
if valence < 0: scalar *= -1
|
||||
#check if booster/dampener word is in ALLCAPS (while others aren't)
|
||||
if str(word).isupper() and isCap_diff:
|
||||
if valence > 0: scalar += c_incr
|
||||
else: scalar -= c_incr
|
||||
return scalar
|
||||
n_scalar = -0.74
|
||||
if i > 0 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict:
|
||||
s1 = scalar_inc_dec(wordsAndEmoticons[i-1], v)
|
||||
v = v+s1
|
||||
if negated([wordsAndEmoticons[i-1]]): v = v*n_scalar
|
||||
if i > 1 and str(wordsAndEmoticons[i-2]).lower() not in word_valence_dict:
|
||||
s2 = scalar_inc_dec(wordsAndEmoticons[i-2], v)
|
||||
if s2 != 0: s2 = s2*0.95
|
||||
v = v+s2
|
||||
# check for special use of 'never' as valence modifier instead of negation
|
||||
if wordsAndEmoticons[i-2] == "never" and (wordsAndEmoticons[i-1] == "so" or wordsAndEmoticons[i-1] == "this"):
|
||||
v = v*1.5
|
||||
# otherwise, check for negation/nullification
|
||||
elif negated([wordsAndEmoticons[i-2]]): v = v*n_scalar
|
||||
if i > 2 and str(wordsAndEmoticons[i-3]).lower() not in word_valence_dict:
|
||||
s3 = scalar_inc_dec(wordsAndEmoticons[i-3], v)
|
||||
if s3 != 0: s3 = s3*0.9
|
||||
v = v+s3
|
||||
# check for special use of 'never' as valence modifier instead of negation
|
||||
if wordsAndEmoticons[i-3] == "never" and \
|
||||
(wordsAndEmoticons[i-2] == "so" or wordsAndEmoticons[i-2] == "this") or \
|
||||
(wordsAndEmoticons[i-1] == "so" or wordsAndEmoticons[i-1] == "this"):
|
||||
v = v*1.25
|
||||
# otherwise, check for negation/nullification
|
||||
elif negated([wordsAndEmoticons[i-3]]): v = v*n_scalar
|
||||
|
||||
# check for special case idioms using a sentiment-laden keyword known to SAGE
|
||||
special_case_idioms = {"the shit": 3, "the bomb": 3, "bad ass": 1.5, "yeah right": -2,
|
||||
"cut the mustard": 2, "kiss of death": -1.5, "hand to mouth": -2}
|
||||
# future work: consider other sentiment-laden idioms
|
||||
#other_idioms = {"back handed": -2, "blow smoke": -2, "blowing smoke": -2, "upper hand": 1, "break a leg": 2,
|
||||
# "cooking with gas": 2, "in the black": 2, "in the red": -2, "on the ball": 2,"under the weather": -2}
|
||||
onezero = "{} {}".format(str(wordsAndEmoticons[i-1]), str(wordsAndEmoticons[i]))
|
||||
twoonezero = "{} {} {}".format(str(wordsAndEmoticons[i-2]), str(wordsAndEmoticons[i-1]), str(wordsAndEmoticons[i]))
|
||||
twoone = "{} {}".format(str(wordsAndEmoticons[i-2]), str(wordsAndEmoticons[i-1]))
|
||||
threetwoone = "{} {} {}".format(str(wordsAndEmoticons[i-3]), str(wordsAndEmoticons[i-2]), str(wordsAndEmoticons[i-1]))
|
||||
threetwo = "{} {}".format(str(wordsAndEmoticons[i-3]), str(wordsAndEmoticons[i-2]))
|
||||
if onezero in special_case_idioms: v = special_case_idioms[onezero]
|
||||
elif twoonezero in special_case_idioms: v = special_case_idioms[twoonezero]
|
||||
elif twoone in special_case_idioms: v = special_case_idioms[twoone]
|
||||
elif threetwoone in special_case_idioms: v = special_case_idioms[threetwoone]
|
||||
elif threetwo in special_case_idioms: v = special_case_idioms[threetwo]
|
||||
if len(wordsAndEmoticons)-1 > i:
|
||||
zeroone = "{} {}".format(str(wordsAndEmoticons[i]), str(wordsAndEmoticons[i+1]))
|
||||
if zeroone in special_case_idioms: v = special_case_idioms[zeroone]
|
||||
if len(wordsAndEmoticons)-1 > i+1:
|
||||
zeroonetwo = "{} {}".format(str(wordsAndEmoticons[i]), str(wordsAndEmoticons[i+1]), str(wordsAndEmoticons[i+2]))
|
||||
if zeroonetwo in special_case_idioms: v = special_case_idioms[zeroonetwo]
|
||||
|
||||
# check for booster/dampener bi-grams such as 'sort of' or 'kind of'
|
||||
if threetwo in booster_dict or twoone in booster_dict:
|
||||
v = v+b_decr
|
||||
|
||||
# check for negation case using "least"
|
||||
if i > 1 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict \
|
||||
and str(wordsAndEmoticons[i-1]).lower() == "least":
|
||||
if (str(wordsAndEmoticons[i-2]).lower() != "at" and str(wordsAndEmoticons[i-2]).lower() != "very"):
|
||||
v = v*n_scalar
|
||||
elif i > 0 and str(wordsAndEmoticons[i-1]).lower() not in word_valence_dict \
|
||||
and str(wordsAndEmoticons[i-1]).lower() == "least":
|
||||
v = v*n_scalar
|
||||
sentiments.append(v)
|
||||
|
||||
# check for modification in sentiment due to contrastive conjunction 'but'
|
||||
if 'but' in wordsAndEmoticons or 'BUT' in wordsAndEmoticons:
|
||||
try: bi = wordsAndEmoticons.index('but')
|
||||
except: bi = wordsAndEmoticons.index('BUT')
|
||||
for s in sentiments:
|
||||
si = sentiments.index(s)
|
||||
if si < bi:
|
||||
sentiments.pop(si)
|
||||
sentiments.insert(si, s*0.5)
|
||||
elif si > bi:
|
||||
sentiments.pop(si)
|
||||
sentiments.insert(si, s*1.5)
|
||||
|
||||
if sentiments:
|
||||
sum_s = float(sum(sentiments))
|
||||
#print sentiments, sum_s
|
||||
|
||||
# check for added emphasis resulting from exclamation points (up to 4 of them)
|
||||
ep_count = str(text).count("!")
|
||||
if ep_count > 4: ep_count = 4
|
||||
ep_amplifier = ep_count*0.292 #(empirically derived mean sentiment intensity rating increase for exclamation points)
|
||||
if sum_s > 0: sum_s += ep_amplifier
|
||||
elif sum_s < 0: sum_s -= ep_amplifier
|
||||
|
||||
# check for added emphasis resulting from question marks (2 or 3+)
|
||||
qm_count = str(text).count("?")
|
||||
qm_amplifier = 0
|
||||
if qm_count > 1:
|
||||
if qm_count <= 3: qm_amplifier = qm_count*0.18
|
||||
else: qm_amplifier = 0.96
|
||||
if sum_s > 0: sum_s += qm_amplifier
|
||||
elif sum_s < 0: sum_s -= qm_amplifier
|
||||
|
||||
compound = normalize(sum_s)
|
||||
|
||||
# want separate positive versus negative sentiment scores
|
||||
pos_sum = 0.0
|
||||
neg_sum = 0.0
|
||||
neu_count = 0
|
||||
for sentiment_score in sentiments:
|
||||
if sentiment_score > 0:
|
||||
pos_sum += (float(sentiment_score) +1) # compensates for neutral words that are counted as 1
|
||||
if sentiment_score < 0:
|
||||
neg_sum += (float(sentiment_score) -1) # when used with math.fabs(), compensates for neutrals
|
||||
if sentiment_score == 0:
|
||||
neu_count += 1
|
||||
|
||||
if pos_sum > math.fabs(neg_sum): pos_sum += (ep_amplifier+qm_amplifier)
|
||||
elif pos_sum < math.fabs(neg_sum): neg_sum -= (ep_amplifier+qm_amplifier)
|
||||
|
||||
total = pos_sum + math.fabs(neg_sum) + neu_count
|
||||
pos = math.fabs(pos_sum / total)
|
||||
neg = math.fabs(neg_sum / total)
|
||||
neu = math.fabs(neu_count / total)
|
||||
|
||||
else:
|
||||
compound = 0.0; pos = 0.0; neg = 0.0; neu = 0.0
|
||||
|
||||
s = {"neg" : round(neg, 3),
|
||||
"neu" : round(neu, 3),
|
||||
"pos" : round(pos, 3),
|
||||
"compound" : round(compound, 4)}
|
||||
return s
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# --- examples -------
|
||||
sentences = [
|
||||
"VADER is smart, handsome, and funny.", # positive sentence example
|
||||
"VADER is smart, handsome, and funny!", # punctuation emphasis handled correctly (sentiment intensity adjusted)
|
||||
"VADER is very smart, handsome, and funny.", # booster words handled correctly (sentiment intensity adjusted)
|
||||
"VADER is VERY SMART, handsome, and FUNNY.", # emphasis for ALLCAPS handled
|
||||
"VADER is VERY SMART, handsome, and FUNNY!!!",# combination of signals - VADER appropriately adjusts intensity
|
||||
"VADER is VERY SMART, really handsome, and INCREDIBLY FUNNY!!!",# booster words & punctuation make this close to ceiling for score
|
||||
"The book was good.", # positive sentence
|
||||
"The book was kind of good.", # qualified positive sentence is handled correctly (intensity adjusted)
|
||||
"The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence
|
||||
"A really bad, horrible book.", # negative sentence with booster words
|
||||
"At least it isn't a horrible book.", # negated negative sentence with contraction
|
||||
":) and :D", # emoticons handled
|
||||
"", # an empty string is correctly handled
|
||||
"Today sux", # negative slang handled
|
||||
"Today sux!", # negative slang with punctuation emphasis handled
|
||||
"Today SUX!", # negative slang with capitalization emphasis
|
||||
"Today kinda sux! But I'll get by, lol" # mixed sentiment example with slang and constrastive conjunction "but"
|
||||
]
|
||||
paragraph = "It was one of the worst movies I've seen, despite good reviews. \
|
||||
Unbelievably bad acting!! Poor direction. VERY poor production. \
|
||||
The movie was bad. Very bad movie. VERY bad movie. VERY BAD movie. VERY BAD movie!"
|
||||
|
||||
from nltk import tokenize
|
||||
lines_list = tokenize.sent_tokenize(paragraph)
|
||||
sentences.extend(lines_list)
|
||||
|
||||
tricky_sentences = [
|
||||
"Most automated sentiment analysis tools are shit.",
|
||||
"VADER sentiment analysis is the shit.",
|
||||
"Sentiment analysis has never been good.",
|
||||
"Sentiment analysis with VADER has never been this good.",
|
||||
"Warren Beatty has never been so entertaining.",
|
||||
"I won't say that the movie is astounding and I wouldn't claim that the movie is too banal either.",
|
||||
"I like to hate Michael Bay films, but I couldn't fault this one",
|
||||
"It's one thing to watch an Uwe Boll film, but another thing entirely to pay for it",
|
||||
"The movie was too good",
|
||||
"This movie was actually neither that funny, nor super witty.",
|
||||
"This movie doesn't care about cleverness, wit or any other kind of intelligent humor.",
|
||||
"Those who find ugly meanings in beautiful things are corrupt without being charming.",
|
||||
"There are slow and repetitive parts, BUT it has just enough spice to keep it interesting.",
|
||||
"The script is not fantastic, but the acting is decent and the cinematography is EXCELLENT!",
|
||||
"Roger Dodger is one of the most compelling variations on this theme.",
|
||||
"Roger Dodger is one of the least compelling variations on this theme.",
|
||||
"Roger Dodger is at least compelling as a variation on the theme.",
|
||||
"they fall in love with the product",
|
||||
"but then it breaks",
|
||||
"usually around the time the 90 day warranty expires",
|
||||
"the twin towers collapsed today",
|
||||
"However, Mr. Carter solemnly argues, his client carried out the kidnapping under orders and in the ''least offensive way possible.''"
|
||||
]
|
||||
sentences.extend(tricky_sentences)
|
||||
for sentence in sentences:
|
||||
print sentence,
|
||||
ss = sentiment(sentence)
|
||||
print "\t" + str(ss)
|
||||
|
||||
print "\n\n Done!"
|
7517
vader_sentiment_lexicon.txt
Normal file
7517
vader_sentiment_lexicon.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user