mirror of
https://github.com/gsi-upm/sitc
synced 2025-09-18 12:52:20 +00:00
Compare commits
3 Commits
2c8238f1f2
...
dveni-patc
Author | SHA1 | Date | |
---|---|---|---|
|
19ea5dff09 | ||
|
e70689072f | ||
|
344e054ba4 |
@@ -437,7 +437,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"#Show mean Age, mean SibSp, and number of passengers older than 25 that survived, grouped by Passenger Class and Sex\n",
|
"#Show mean Age, mean SibSp, and number of passengers older than 25 that survived, grouped by Passenger Class and Sex\n",
|
||||||
"df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])['Age','SibSp','Survived'].agg({'Age': np.mean, \n",
|
"df[(df.Age > 25 & (df.Survived == 1))].groupby(['Pclass', 'Sex'])['Age','SibSp','Survived'].agg({'Age': np.mean, \n",
|
||||||
" 'SibSp': np.mean, 'Survived': np.size})"
|
" 'SibSp': np.mean, 'Survived': np.sum})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@@ -326,7 +326,7 @@
|
|||||||
"def preprocess(words, type='doc'):\n",
|
"def preprocess(words, type='doc'):\n",
|
||||||
" if (type == 'tweet'):\n",
|
" if (type == 'tweet'):\n",
|
||||||
" tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)\n",
|
" tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)\n",
|
||||||
" tokens = tknzr.tokenize(tweet)\n",
|
" tokens = tknzr.tokenize(words)\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" tokens = nltk.word_tokenize(words.lower())\n",
|
" tokens = nltk.word_tokenize(words.lower())\n",
|
||||||
" porter = nltk.PorterStemmer()\n",
|
" porter = nltk.PorterStemmer()\n",
|
||||||
|
Reference in New Issue
Block a user