1
0
mirror of https://github.com/gsi-upm/sitc synced 2024-11-24 15:32:29 +00:00

Merge pull request #5 from gsi-upm/dveni-patch-2

Update 4_1_Lexical_Processing.ipynb
This commit is contained in:
Carlos A. Iglesias 2019-11-27 10:19:12 +01:00 committed by GitHub
commit 75f08ea170
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -326,7 +326,7 @@
"def preprocess(words, type='doc'):\n", "def preprocess(words, type='doc'):\n",
" if (type == 'tweet'):\n", " if (type == 'tweet'):\n",
" tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)\n", " tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)\n",
" tokens = tknzr.tokenize(tweet)\n", " tokens = tknzr.tokenize(words)\n",
" else:\n", " else:\n",
" tokens = nltk.word_tokenize(words.lower())\n", " tokens = nltk.word_tokenize(words.lower())\n",
" porter = nltk.PorterStemmer()\n", " porter = nltk.PorterStemmer()\n",