mirror of
https://github.com/gsi-upm/sitc
synced 2025-01-07 03:31:28 +00:00
Compare commits
5 Commits
3d6d96dd8a
...
ae8d3d3ba2
Author | SHA1 | Date | |
---|---|---|---|
|
ae8d3d3ba2 | ||
|
2ba0e2f3d9 | ||
|
c9114cc796 | ||
|
b80c097362 | ||
|
161cd8492b |
@ -46,7 +46,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -209,12 +209,315 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>PassengerId</th>\n",
|
||||
" <th>Survived</th>\n",
|
||||
" <th>Pclass</th>\n",
|
||||
" <th>Name</th>\n",
|
||||
" <th>Sex</th>\n",
|
||||
" <th>Age</th>\n",
|
||||
" <th>SibSp</th>\n",
|
||||
" <th>Parch</th>\n",
|
||||
" <th>Ticket</th>\n",
|
||||
" <th>Fare</th>\n",
|
||||
" <th>Cabin</th>\n",
|
||||
" <th>Embarked</th>\n",
|
||||
" <th>FamilySize</th>\n",
|
||||
" <th>AgeGroup</th>\n",
|
||||
" <th>Deck</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>Braund, Mr. Owen Harris</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>22.0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>A/5 21171</td>\n",
|
||||
" <td>7.2500</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>X</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
|
||||
" <td>female</td>\n",
|
||||
" <td>38.0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>PC 17599</td>\n",
|
||||
" <td>71.2833</td>\n",
|
||||
" <td>C85</td>\n",
|
||||
" <td>C</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>C</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>Heikkinen, Miss. Laina</td>\n",
|
||||
" <td>female</td>\n",
|
||||
" <td>26.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>STON/O2. 3101282</td>\n",
|
||||
" <td>7.9250</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>X</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
|
||||
" <td>female</td>\n",
|
||||
" <td>35.0</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>113803</td>\n",
|
||||
" <td>53.1000</td>\n",
|
||||
" <td>C123</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>C</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>Allen, Mr. William Henry</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>35.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>373450</td>\n",
|
||||
" <td>8.0500</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>X</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>886</th>\n",
|
||||
" <td>887</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>Montvila, Rev. Juozas</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>27.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>211536</td>\n",
|
||||
" <td>13.0000</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>X</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>887</th>\n",
|
||||
" <td>888</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Graham, Miss. Margaret Edith</td>\n",
|
||||
" <td>female</td>\n",
|
||||
" <td>19.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>112053</td>\n",
|
||||
" <td>30.0000</td>\n",
|
||||
" <td>B42</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>B</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>888</th>\n",
|
||||
" <td>889</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>Johnston, Miss. Catherine Helen \"Carrie\"</td>\n",
|
||||
" <td>female</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>W./C. 6607</td>\n",
|
||||
" <td>23.4500</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>X</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>889</th>\n",
|
||||
" <td>890</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Behr, Mr. Karl Howell</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>26.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>111369</td>\n",
|
||||
" <td>30.0000</td>\n",
|
||||
" <td>C148</td>\n",
|
||||
" <td>C</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>C</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>890</th>\n",
|
||||
" <td>891</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>Dooley, Mr. Patrick</td>\n",
|
||||
" <td>male</td>\n",
|
||||
" <td>32.0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>370376</td>\n",
|
||||
" <td>7.7500</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Q</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>X</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>891 rows × 15 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" PassengerId Survived Pclass \\\n",
|
||||
"0 1 0 3 \n",
|
||||
"1 2 1 1 \n",
|
||||
"2 3 1 3 \n",
|
||||
"3 4 1 1 \n",
|
||||
"4 5 0 3 \n",
|
||||
".. ... ... ... \n",
|
||||
"886 887 0 2 \n",
|
||||
"887 888 1 1 \n",
|
||||
"888 889 0 3 \n",
|
||||
"889 890 1 1 \n",
|
||||
"890 891 0 3 \n",
|
||||
"\n",
|
||||
" Name Sex Age SibSp \\\n",
|
||||
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
|
||||
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
|
||||
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
|
||||
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
|
||||
"4 Allen, Mr. William Henry male 35.0 0 \n",
|
||||
".. ... ... ... ... \n",
|
||||
"886 Montvila, Rev. Juozas male 27.0 0 \n",
|
||||
"887 Graham, Miss. Margaret Edith female 19.0 0 \n",
|
||||
"888 Johnston, Miss. Catherine Helen \"Carrie\" female NaN 1 \n",
|
||||
"889 Behr, Mr. Karl Howell male 26.0 0 \n",
|
||||
"890 Dooley, Mr. Patrick male 32.0 0 \n",
|
||||
"\n",
|
||||
" Parch Ticket Fare Cabin Embarked FamilySize AgeGroup \\\n",
|
||||
"0 0 A/5 21171 7.2500 NaN S 1 3.0 \n",
|
||||
"1 0 PC 17599 71.2833 C85 C 1 3.0 \n",
|
||||
"2 0 STON/O2. 3101282 7.9250 NaN S 0 3.0 \n",
|
||||
"3 0 113803 53.1000 C123 S 1 3.0 \n",
|
||||
"4 0 373450 8.0500 NaN S 0 3.0 \n",
|
||||
".. ... ... ... ... ... ... ... \n",
|
||||
"886 0 211536 13.0000 NaN S 0 3.0 \n",
|
||||
"887 0 112053 30.0000 B42 S 0 3.0 \n",
|
||||
"888 2 W./C. 6607 23.4500 NaN S 3 NaN \n",
|
||||
"889 0 111369 30.0000 C148 C 0 3.0 \n",
|
||||
"890 0 370376 7.7500 NaN Q 0 3.0 \n",
|
||||
"\n",
|
||||
" Deck \n",
|
||||
"0 X \n",
|
||||
"1 C \n",
|
||||
"2 X \n",
|
||||
"3 C \n",
|
||||
"4 X \n",
|
||||
".. ... \n",
|
||||
"886 X \n",
|
||||
"887 B \n",
|
||||
"888 X \n",
|
||||
"889 C \n",
|
||||
"890 X \n",
|
||||
"\n",
|
||||
"[891 rows x 15 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df['FamilySize'] = df['SibSp'] + df['Parch']\n",
|
||||
"df.head()"
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -303,9 +606,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "KeyError",
|
||||
"evalue": "'Salutation'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3079\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3080\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3081\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m: 'Salutation'",
|
||||
"\nThe above exception was the direct cause of the following exception:\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-8-515fd9f54fd1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Others'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Salutation'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Salutation'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup_salutation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Salutation'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3022\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3023\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3024\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3025\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3026\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3080\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3081\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3082\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3083\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3084\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mKeyError\u001b[0m: 'Salutation'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def group_salutation(old_salutation):\n",
|
||||
" if old_salutation == 'Mr':\n",
|
||||
@ -372,13 +697,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Group ages to simplify machine learning algorithms. 0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n",
|
||||
"df['AgeGroup'] = 0\n",
|
||||
"df.loc[(.Age<6),'AgeGroup'] = 0\n",
|
||||
"df['AgeGroup'] = np.nan\n",
|
||||
"df.loc[(df.Age<6),'AgeGroup'] = 0\n",
|
||||
"df.loc[(df.Age>=6) & (df.Age < 11),'AgeGroup'] = 1\n",
|
||||
"df.loc[(df.Age>=11) & (df.Age < 16),'AgeGroup'] = 2\n",
|
||||
"df.loc[(df.Age>=16) & (df.Age < 60),'AgeGroup'] = 3\n",
|
||||
@ -395,7 +720,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -404,8 +729,8 @@
|
||||
" if np.isnan(big_string):\n",
|
||||
" return 'X'\n",
|
||||
" for substring in substrings:\n",
|
||||
" if big_string.find(substring) != 1:\n",
|
||||
" return substring\n",
|
||||
" if substring in big_string:\n",
|
||||
" return substring[0::]\n",
|
||||
" print(big_string)\n",
|
||||
" return 'X'\n",
|
||||
" \n",
|
||||
@ -478,6 +803,15 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"datacleaner": {
|
||||
"position": {
|
||||
"top": "50px"
|
||||
},
|
||||
"python": {
|
||||
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
|
||||
},
|
||||
"window_display": false
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
@ -493,7 +827,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.1"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"latex_envs": {
|
||||
"LaTeX_envs_menu_present": true,
|
||||
|
@ -56,7 +56,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Genetic Algorithms\n",
|
||||
"In this section we are going to use the library DEAP [References](#References) for implementing a genetic algorithms.\n",
|
||||
"In this section we are going to use the library DEAP [[References](#References)] for implementing a genetic algorithms.\n",
|
||||
"\n",
|
||||
"We are going to implement the OneMax problem as seen in class.\n",
|
||||
"\n",
|
||||
@ -200,11 +200,13 @@
|
||||
"source": [
|
||||
"## Optimizing ML hyperparameters\n",
|
||||
"\n",
|
||||
"One of the applications of Genetic Algorithms is the optimization of ML hyperparameters. Previously we have used GridSearch from Scikit. Using (sklearn-deap)[#References], optimize the Titatic hyperparameters using both GridSearch and Genetic Algorithms. \n",
|
||||
"One of the applications of Genetic Algorithms is the optimization of ML hyperparameters. Previously we have used GridSearch from Scikit. Using (sklearn-deap)[[References](#References)], optimize the Titatic hyperparameters using both GridSearch and Genetic Algorithms. \n",
|
||||
"\n",
|
||||
"The same exercise (using the digits dataset) can be found in this [notebook](https://github.com/rsteca/sklearn-deap/blob/master/test.ipynb).\n",
|
||||
"\n",
|
||||
"Submit a notebook where you include well-crafted conclusions about the exercises, discussing the pros and cons of using genetic algorithms for this purpose.\n"
|
||||
"Submit a notebook where you include well-crafted conclusions about the exercises, discussing the pros and cons of using genetic algorithms for this purpose.\n",
|
||||
"\n",
|
||||
"Note: There is a problem with the version 0.24 of scikit. Just comment the different approaches."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -261,6 +263,15 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"datacleaner": {
|
||||
"position": {
|
||||
"top": "50px"
|
||||
},
|
||||
"python": {
|
||||
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
|
||||
},
|
||||
"window_display": false
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
@ -276,7 +287,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.1"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"latex_envs": {
|
||||
"LaTeX_envs_menu_present": true,
|
||||
|
@ -97,16 +97,19 @@
|
||||
"source": [
|
||||
"import gym\n",
|
||||
"\n",
|
||||
"env = gym.make('CartPole-v0')\n",
|
||||
"env = gym.make(\"CartPole-v1\")\n",
|
||||
"#env = gym.make('MountainCar-v0')\n",
|
||||
"#env = gym.make('Taxi-v2')\n",
|
||||
"\n",
|
||||
"#env = gym.make('Jamesbond-ram-v0')\n",
|
||||
"\n",
|
||||
"env.reset()\n",
|
||||
"observation = env.reset()\n",
|
||||
"for _ in range(1000):\n",
|
||||
" env.render()\n",
|
||||
" env.step(env.action_space.sample()) # take a random action"
|
||||
" env.render()\n",
|
||||
" action = env.action_space.sample() # your agent here (this takes random actions)\n",
|
||||
" observation, reward, done, info = env.step(action)\n",
|
||||
"\n",
|
||||
" if done:\n",
|
||||
" observation = env.reset()\n",
|
||||
"env.close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -403,6 +406,15 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"datacleaner": {
|
||||
"position": {
|
||||
"top": "50px"
|
||||
},
|
||||
"python": {
|
||||
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
|
||||
},
|
||||
"window_display": false
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
@ -418,7 +430,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.5"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"latex_envs": {
|
||||
"LaTeX_envs_menu_present": true,
|
||||
|
@ -76,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -85,7 +85,7 @@
|
||||
"(2034, 2807)"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -126,12 +126,15 @@
|
||||
"source": [
|
||||
"Although scikit-learn provides an LDA implementation, it is more popular the package *gensim*, which also provides an LSI implementation, as well as other functionalities. Fortunately, scikit-learn sparse matrices can be used in Gensim using the function *matutils.Sparse2Corpus()*. Anyway, if you are using intensively LDA,it can be convenient to create the corpus with their functions.\n",
|
||||
"\n",
|
||||
"You should install first *gensim*. Run 'conda install -c anaconda gensim=0.12.4' in a terminal."
|
||||
"You should install first:\n",
|
||||
"\n",
|
||||
"* *gensim*. Run 'conda install gensim' in a terminal.\n",
|
||||
"* *python-Levenshtein*. Run 'conda install python-Levenshtein' in a terminal"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -159,7 +162,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -173,23 +176,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(0,\n",
|
||||
" '0.007*\"car\" + 0.006*\"increased\" + 0.006*\"closely\" + 0.006*\"groups\" + 0.006*\"center\" + 0.006*\"88\" + 0.006*\"offer\" + 0.005*\"archie\" + 0.005*\"beginning\" + 0.005*\"comets\"'),\n",
|
||||
" '0.011*\"baptist\" + 0.010*\"koresh\" + 0.009*\"bible\" + 0.006*\"reality\" + 0.006*\"virtual\" + 0.005*\"scarlet\" + 0.005*\"shag\" + 0.004*\"tootsie\" + 0.004*\"kinda\" + 0.004*\"captain\"'),\n",
|
||||
" (1,\n",
|
||||
" '0.005*\"allow\" + 0.005*\"discuss\" + 0.005*\"condition\" + 0.004*\"certain\" + 0.004*\"member\" + 0.004*\"manipulation\" + 0.004*\"little\" + 0.003*\"proposal\" + 0.003*\"heavily\" + 0.003*\"obvious\"'),\n",
|
||||
" '0.010*\"targa\" + 0.008*\"thanks\" + 0.008*\"moon\" + 0.007*\"craig\" + 0.007*\"zoroastrians\" + 0.006*\"yayayay\" + 0.005*\"unfortunately\" + 0.005*\"windows\" + 0.005*\"rayshade\" + 0.004*\"tdb\"'),\n",
|
||||
" (2,\n",
|
||||
" '0.002*\"led\" + 0.002*\"mechanism\" + 0.002*\"frank\" + 0.002*\"platform\" + 0.002*\"mormons\" + 0.002*\"concepts\" + 0.002*\"proton\" + 0.002*\"aeronautics\" + 0.002*\"header\" + 0.002*\"foreign\"'),\n",
|
||||
" '0.009*\"mary\" + 0.007*\"whatever\" + 0.006*\"god\" + 0.005*\"ns\" + 0.005*\"lucky\" + 0.005*\"joseph\" + 0.005*\"ssrt\" + 0.005*\"samaritan\" + 0.005*\"crusades\" + 0.004*\"phobos\"'),\n",
|
||||
" (3,\n",
|
||||
" '0.004*\"objects\" + 0.003*\"activity\" + 0.003*\"manhattan\" + 0.003*\"obtained\" + 0.003*\"eyes\" + 0.003*\"education\" + 0.003*\"netters\" + 0.003*\"complex\" + 0.003*\"europe\" + 0.002*\"missions\"')]"
|
||||
" '0.009*\"islam\" + 0.008*\"western\" + 0.008*\"plane\" + 0.008*\"jeff\" + 0.007*\"cheers\" + 0.007*\"kent\" + 0.007*\"joy\" + 0.007*\"khomeini\" + 0.007*\"davidian\" + 0.006*\"basically\"')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -208,7 +211,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 62,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -240,7 +243,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 63,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -253,14 +256,14 @@
|
||||
],
|
||||
"source": [
|
||||
"# You can save the dictionary\n",
|
||||
"dictionary.save('newsgroup.dict')\n",
|
||||
"dictionary.save('newsgroup.dict.texts')\n",
|
||||
"\n",
|
||||
"print(dictionary)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 64,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -271,7 +274,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 65,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -283,28 +286,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING:root:random_state not set so using default value\n",
|
||||
"WARNING:root:failed to load state from newsgroups.dict.state: [Errno 2] No such file or directory: 'newsgroups.dict.state'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# You can optionally save the dictionary \n",
|
||||
"\n",
|
||||
"dictionary.save('newsgroups.dict')\n",
|
||||
"lda = LdaModel.load('newsgroups.dict')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 71,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -323,7 +305,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 72,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -333,7 +315,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 73,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -346,7 +328,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 74,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -364,7 +346,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 75,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -377,23 +359,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(0,\n",
|
||||
" '0.011*\"thanks\" + 0.010*\"targa\" + 0.008*\"mary\" + 0.008*\"western\" + 0.007*\"craig\" + 0.007*\"jeff\" + 0.006*\"yayayay\" + 0.006*\"phobos\" + 0.005*\"unfortunately\" + 0.005*\"martian\"'),\n",
|
||||
" '0.009*\"whatever\" + 0.007*\"plane\" + 0.007*\"ns\" + 0.007*\"joy\" + 0.006*\"happy\" + 0.005*\"bob\" + 0.004*\"phil\" + 0.004*\"nasa\" + 0.003*\"purdue\" + 0.003*\"neie\"'),\n",
|
||||
" (1,\n",
|
||||
" '0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"'),\n",
|
||||
" '0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"'),\n",
|
||||
" (2,\n",
|
||||
" '0.009*\"whatever\" + 0.009*\"baptist\" + 0.007*\"cheers\" + 0.007*\"kent\" + 0.006*\"khomeini\" + 0.006*\"davidian\" + 0.005*\"gerald\" + 0.005*\"bull\" + 0.005*\"sorry\" + 0.005*\"jesus\"'),\n",
|
||||
" '0.010*\"moon\" + 0.007*\"phobos\" + 0.006*\"unfortunately\" + 0.006*\"martian\" + 0.006*\"russian\" + 0.005*\"rayshade\" + 0.005*\"anybody\" + 0.005*\"perturbations\" + 0.005*\"thanks\" + 0.004*\"apollo\"'),\n",
|
||||
" (3,\n",
|
||||
" '0.005*\"pd\" + 0.004*\"baltimore\" + 0.004*\"also\" + 0.003*\"ipx\" + 0.003*\"dam\" + 0.003*\"feiner\" + 0.003*\"foley\" + 0.003*\"ideally\" + 0.003*\"srgp\" + 0.003*\"thank\"')]"
|
||||
" '0.008*\"islam\" + 0.008*\"western\" + 0.007*\"jeff\" + 0.007*\"zoroastrians\" + 0.006*\"davidian\" + 0.006*\"basically\" + 0.005*\"bull\" + 0.005*\"gerald\" + 0.005*\"sorry\" + 0.004*\"kent\"')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"execution_count": 76,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -405,14 +387,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 77,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[(0, 0.09401487), (1, 0.08991001), (2, 0.08514047), (3, 0.7309346)]\n"
|
||||
"[(0, 0.7154438), (1, 0.10569019), (2, 0.09522807), (3, 0.08363795)]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -424,7 +406,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 78,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -445,14 +427,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[(0, 0.06678458), (1, 0.8006135), (2, 0.06974816), (3, 0.062853776)]\n"
|
||||
"[(0, 0.06320839), (1, 0.80878526), (2, 0.06274223), (3, 0.065264106)]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -464,14 +446,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 80,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"\n"
|
||||
"0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -482,15 +464,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[(0, 0.110989906), (1, 0.670005), (2, 0.11422917), (3, 0.10477593)]\n",
|
||||
"0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"\n"
|
||||
"[(0, 0.10564032), (1, 0.67894983), (2, 0.104482815), (3, 0.11092702)]\n",
|
||||
"0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -510,7 +492,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -526,23 +508,23 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 83,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(0,\n",
|
||||
" '0.769*\"god\" + 0.345*\"jesus\" + 0.235*\"bible\" + 0.203*\"christian\" + 0.149*\"christians\" + 0.108*\"christ\" + 0.089*\"well\" + 0.085*\"koresh\" + 0.081*\"kent\" + 0.080*\"christianity\"'),\n",
|
||||
" '0.769*\"god\" + 0.346*\"jesus\" + 0.235*\"bible\" + 0.204*\"christian\" + 0.148*\"christians\" + 0.107*\"christ\" + 0.090*\"well\" + 0.085*\"koresh\" + 0.081*\"kent\" + 0.080*\"christianity\"'),\n",
|
||||
" (1,\n",
|
||||
" '-0.863*\"thanks\" + -0.255*\"please\" + -0.160*\"hello\" + -0.153*\"hi\" + 0.123*\"god\" + -0.112*\"sorry\" + -0.088*\"could\" + -0.075*\"windows\" + -0.068*\"jpeg\" + -0.062*\"gif\"'),\n",
|
||||
" '-0.863*\"thanks\" + -0.255*\"please\" + -0.159*\"hello\" + -0.152*\"hi\" + 0.124*\"god\" + -0.111*\"sorry\" + -0.088*\"could\" + -0.074*\"windows\" + -0.067*\"jpeg\" + -0.063*\"gif\"'),\n",
|
||||
" (2,\n",
|
||||
" '-0.779*\"well\" + 0.229*\"god\" + -0.164*\"yes\" + 0.153*\"thanks\" + -0.135*\"ico\" + -0.135*\"tek\" + -0.132*\"beauchaine\" + -0.132*\"queens\" + -0.132*\"bronx\" + -0.131*\"manhattan\"'),\n",
|
||||
" '-0.780*\"well\" + 0.229*\"god\" + -0.165*\"yes\" + 0.154*\"thanks\" + -0.133*\"ico\" + -0.133*\"tek\" + -0.130*\"queens\" + -0.130*\"bronx\" + -0.130*\"beauchaine\" + -0.130*\"manhattan\"'),\n",
|
||||
" (3,\n",
|
||||
" '0.343*\"well\" + -0.335*\"ico\" + -0.334*\"tek\" + -0.328*\"bronx\" + -0.328*\"beauchaine\" + -0.328*\"queens\" + -0.325*\"manhattan\" + -0.305*\"com\" + -0.303*\"bob\" + -0.073*\"god\"')]"
|
||||
" '-0.338*\"well\" + 0.336*\"ico\" + 0.334*\"tek\" + 0.328*\"bronx\" + 0.328*\"beauchaine\" + 0.328*\"queens\" + 0.326*\"manhattan\" + 0.305*\"com\" + 0.305*\"bob\" + 0.072*\"god\"')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"execution_count": 83,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -554,7 +536,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 84,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -603,6 +585,15 @@
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"datacleaner": {
|
||||
"position": {
|
||||
"top": "50px"
|
||||
},
|
||||
"python": {
|
||||
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
|
||||
},
|
||||
"window_display": false
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
@ -618,7 +609,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.1"
|
||||
"version": "3.8.8"
|
||||
},
|
||||
"latex_envs": {
|
||||
"LaTeX_envs_menu_present": true,
|
||||
|
Loading…
Reference in New Issue
Block a user