mirror of
https://github.com/gsi-upm/sitc
synced 2024-11-25 07:52:27 +00:00
Update 4_1_Lexical_Processing.ipynb
This commit is contained in:
parent
e70689072f
commit
19ea5dff09
@ -326,7 +326,7 @@
|
|||||||
"def preprocess(words, type='doc'):\n",
|
"def preprocess(words, type='doc'):\n",
|
||||||
" if (type == 'tweet'):\n",
|
" if (type == 'tweet'):\n",
|
||||||
" tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)\n",
|
" tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)\n",
|
||||||
" tokens = tknzr.tokenize(tweet)\n",
|
" tokens = tknzr.tokenize(words)\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" tokens = nltk.word_tokenize(words.lower())\n",
|
" tokens = nltk.word_tokenize(words.lower())\n",
|
||||||
" porter = nltk.PorterStemmer()\n",
|
" porter = nltk.PorterStemmer()\n",
|
||||||
|
Loading…
Reference in New Issue
Block a user