mirror of
https://github.com/gsi-upm/sitc
synced 2025-01-09 20:41:27 +00:00
Compare commits
No commits in common. "ae8d3d3ba24e88f5fb7b54a894cea3b45d475c5d" and "3d6d96dd8ae76fea8724f60630190ac56d50901a" have entirely different histories.
ae8d3d3ba2
...
3d6d96dd8a
@ -46,7 +46,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -209,315 +209,12 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>PassengerId</th>\n",
|
|
||||||
" <th>Survived</th>\n",
|
|
||||||
" <th>Pclass</th>\n",
|
|
||||||
" <th>Name</th>\n",
|
|
||||||
" <th>Sex</th>\n",
|
|
||||||
" <th>Age</th>\n",
|
|
||||||
" <th>SibSp</th>\n",
|
|
||||||
" <th>Parch</th>\n",
|
|
||||||
" <th>Ticket</th>\n",
|
|
||||||
" <th>Fare</th>\n",
|
|
||||||
" <th>Cabin</th>\n",
|
|
||||||
" <th>Embarked</th>\n",
|
|
||||||
" <th>FamilySize</th>\n",
|
|
||||||
" <th>AgeGroup</th>\n",
|
|
||||||
" <th>Deck</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>0</th>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3</td>\n",
|
|
||||||
" <td>Braund, Mr. Owen Harris</td>\n",
|
|
||||||
" <td>male</td>\n",
|
|
||||||
" <td>22.0</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>A/5 21171</td>\n",
|
|
||||||
" <td>7.2500</td>\n",
|
|
||||||
" <td>NaN</td>\n",
|
|
||||||
" <td>S</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>3.0</td>\n",
|
|
||||||
" <td>X</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>1</th>\n",
|
|
||||||
" <td>2</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
|
|
||||||
" <td>female</td>\n",
|
|
||||||
" <td>38.0</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>PC 17599</td>\n",
|
|
||||||
" <td>71.2833</td>\n",
|
|
||||||
" <td>C85</td>\n",
|
|
||||||
" <td>C</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>3.0</td>\n",
|
|
||||||
" <td>C</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2</th>\n",
|
|
||||||
" <td>3</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>3</td>\n",
|
|
||||||
" <td>Heikkinen, Miss. Laina</td>\n",
|
|
||||||
" <td>female</td>\n",
|
|
||||||
" <td>26.0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>STON/O2. 3101282</td>\n",
|
|
||||||
" <td>7.9250</td>\n",
|
|
||||||
" <td>NaN</td>\n",
|
|
||||||
" <td>S</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3.0</td>\n",
|
|
||||||
" <td>X</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>3</th>\n",
|
|
||||||
" <td>4</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
|
|
||||||
" <td>female</td>\n",
|
|
||||||
" <td>35.0</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>113803</td>\n",
|
|
||||||
" <td>53.1000</td>\n",
|
|
||||||
" <td>C123</td>\n",
|
|
||||||
" <td>S</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>3.0</td>\n",
|
|
||||||
" <td>C</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>4</th>\n",
|
|
||||||
" <td>5</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3</td>\n",
|
|
||||||
" <td>Allen, Mr. William Henry</td>\n",
|
|
||||||
" <td>male</td>\n",
|
|
||||||
" <td>35.0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>373450</td>\n",
|
|
||||||
" <td>8.0500</td>\n",
|
|
||||||
" <td>NaN</td>\n",
|
|
||||||
" <td>S</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3.0</td>\n",
|
|
||||||
" <td>X</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>...</th>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" <td>...</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>886</th>\n",
|
|
||||||
" <td>887</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>2</td>\n",
|
|
||||||
" <td>Montvila, Rev. Juozas</td>\n",
|
|
||||||
" <td>male</td>\n",
|
|
||||||
" <td>27.0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>211536</td>\n",
|
|
||||||
" <td>13.0000</td>\n",
|
|
||||||
" <td>NaN</td>\n",
|
|
||||||
" <td>S</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3.0</td>\n",
|
|
||||||
" <td>X</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>887</th>\n",
|
|
||||||
" <td>888</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>Graham, Miss. Margaret Edith</td>\n",
|
|
||||||
" <td>female</td>\n",
|
|
||||||
" <td>19.0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>112053</td>\n",
|
|
||||||
" <td>30.0000</td>\n",
|
|
||||||
" <td>B42</td>\n",
|
|
||||||
" <td>S</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3.0</td>\n",
|
|
||||||
" <td>B</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>888</th>\n",
|
|
||||||
" <td>889</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3</td>\n",
|
|
||||||
" <td>Johnston, Miss. Catherine Helen \"Carrie\"</td>\n",
|
|
||||||
" <td>female</td>\n",
|
|
||||||
" <td>NaN</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>2</td>\n",
|
|
||||||
" <td>W./C. 6607</td>\n",
|
|
||||||
" <td>23.4500</td>\n",
|
|
||||||
" <td>NaN</td>\n",
|
|
||||||
" <td>S</td>\n",
|
|
||||||
" <td>3</td>\n",
|
|
||||||
" <td>NaN</td>\n",
|
|
||||||
" <td>X</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>889</th>\n",
|
|
||||||
" <td>890</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>Behr, Mr. Karl Howell</td>\n",
|
|
||||||
" <td>male</td>\n",
|
|
||||||
" <td>26.0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>111369</td>\n",
|
|
||||||
" <td>30.0000</td>\n",
|
|
||||||
" <td>C148</td>\n",
|
|
||||||
" <td>C</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3.0</td>\n",
|
|
||||||
" <td>C</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>890</th>\n",
|
|
||||||
" <td>891</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3</td>\n",
|
|
||||||
" <td>Dooley, Mr. Patrick</td>\n",
|
|
||||||
" <td>male</td>\n",
|
|
||||||
" <td>32.0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>370376</td>\n",
|
|
||||||
" <td>7.7500</td>\n",
|
|
||||||
" <td>NaN</td>\n",
|
|
||||||
" <td>Q</td>\n",
|
|
||||||
" <td>0</td>\n",
|
|
||||||
" <td>3.0</td>\n",
|
|
||||||
" <td>X</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"<p>891 rows × 15 columns</p>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" PassengerId Survived Pclass \\\n",
|
|
||||||
"0 1 0 3 \n",
|
|
||||||
"1 2 1 1 \n",
|
|
||||||
"2 3 1 3 \n",
|
|
||||||
"3 4 1 1 \n",
|
|
||||||
"4 5 0 3 \n",
|
|
||||||
".. ... ... ... \n",
|
|
||||||
"886 887 0 2 \n",
|
|
||||||
"887 888 1 1 \n",
|
|
||||||
"888 889 0 3 \n",
|
|
||||||
"889 890 1 1 \n",
|
|
||||||
"890 891 0 3 \n",
|
|
||||||
"\n",
|
|
||||||
" Name Sex Age SibSp \\\n",
|
|
||||||
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
|
|
||||||
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
|
|
||||||
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
|
|
||||||
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
|
|
||||||
"4 Allen, Mr. William Henry male 35.0 0 \n",
|
|
||||||
".. ... ... ... ... \n",
|
|
||||||
"886 Montvila, Rev. Juozas male 27.0 0 \n",
|
|
||||||
"887 Graham, Miss. Margaret Edith female 19.0 0 \n",
|
|
||||||
"888 Johnston, Miss. Catherine Helen \"Carrie\" female NaN 1 \n",
|
|
||||||
"889 Behr, Mr. Karl Howell male 26.0 0 \n",
|
|
||||||
"890 Dooley, Mr. Patrick male 32.0 0 \n",
|
|
||||||
"\n",
|
|
||||||
" Parch Ticket Fare Cabin Embarked FamilySize AgeGroup \\\n",
|
|
||||||
"0 0 A/5 21171 7.2500 NaN S 1 3.0 \n",
|
|
||||||
"1 0 PC 17599 71.2833 C85 C 1 3.0 \n",
|
|
||||||
"2 0 STON/O2. 3101282 7.9250 NaN S 0 3.0 \n",
|
|
||||||
"3 0 113803 53.1000 C123 S 1 3.0 \n",
|
|
||||||
"4 0 373450 8.0500 NaN S 0 3.0 \n",
|
|
||||||
".. ... ... ... ... ... ... ... \n",
|
|
||||||
"886 0 211536 13.0000 NaN S 0 3.0 \n",
|
|
||||||
"887 0 112053 30.0000 B42 S 0 3.0 \n",
|
|
||||||
"888 2 W./C. 6607 23.4500 NaN S 3 NaN \n",
|
|
||||||
"889 0 111369 30.0000 C148 C 0 3.0 \n",
|
|
||||||
"890 0 370376 7.7500 NaN Q 0 3.0 \n",
|
|
||||||
"\n",
|
|
||||||
" Deck \n",
|
|
||||||
"0 X \n",
|
|
||||||
"1 C \n",
|
|
||||||
"2 X \n",
|
|
||||||
"3 C \n",
|
|
||||||
"4 X \n",
|
|
||||||
".. ... \n",
|
|
||||||
"886 X \n",
|
|
||||||
"887 B \n",
|
|
||||||
"888 X \n",
|
|
||||||
"889 C \n",
|
|
||||||
"890 X \n",
|
|
||||||
"\n",
|
|
||||||
"[891 rows x 15 columns]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 20,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"df['FamilySize'] = df['SibSp'] + df['Parch']\n",
|
"df['FamilySize'] = df['SibSp'] + df['Parch']\n",
|
||||||
"df"
|
"df.head()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -606,31 +303,9 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"ename": "KeyError",
|
|
||||||
"evalue": "'Salutation'",
|
|
||||||
"output_type": "error",
|
|
||||||
"traceback": [
|
|
||||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
||||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
|
||||||
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3079\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3080\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3081\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
||||||
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
|
||||||
"\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
|
|
||||||
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
|
||||||
"\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
|
|
||||||
"\u001b[0;31mKeyError\u001b[0m: 'Salutation'",
|
|
||||||
"\nThe above exception was the direct cause of the following exception:\n",
|
|
||||||
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
|
||||||
"\u001b[0;32m<ipython-input-8-515fd9f54fd1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Others'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Salutation'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Salutation'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup_salutation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 16\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Salutation'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
||||||
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3022\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3023\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3024\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3025\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3026\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
||||||
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m 3080\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3081\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3082\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3083\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3084\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
|
||||||
"\u001b[0;31mKeyError\u001b[0m: 'Salutation'"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"def group_salutation(old_salutation):\n",
|
"def group_salutation(old_salutation):\n",
|
||||||
" if old_salutation == 'Mr':\n",
|
" if old_salutation == 'Mr':\n",
|
||||||
@ -697,13 +372,13 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Group ages to simplify machine learning algorithms. 0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n",
|
"# Group ages to simplify machine learning algorithms. 0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n",
|
||||||
"df['AgeGroup'] = np.nan\n",
|
"df['AgeGroup'] = 0\n",
|
||||||
"df.loc[(df.Age<6),'AgeGroup'] = 0\n",
|
"df.loc[(.Age<6),'AgeGroup'] = 0\n",
|
||||||
"df.loc[(df.Age>=6) & (df.Age < 11),'AgeGroup'] = 1\n",
|
"df.loc[(df.Age>=6) & (df.Age < 11),'AgeGroup'] = 1\n",
|
||||||
"df.loc[(df.Age>=11) & (df.Age < 16),'AgeGroup'] = 2\n",
|
"df.loc[(df.Age>=11) & (df.Age < 16),'AgeGroup'] = 2\n",
|
||||||
"df.loc[(df.Age>=16) & (df.Age < 60),'AgeGroup'] = 3\n",
|
"df.loc[(df.Age>=16) & (df.Age < 60),'AgeGroup'] = 3\n",
|
||||||
@ -720,7 +395,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -729,8 +404,8 @@
|
|||||||
" if np.isnan(big_string):\n",
|
" if np.isnan(big_string):\n",
|
||||||
" return 'X'\n",
|
" return 'X'\n",
|
||||||
" for substring in substrings:\n",
|
" for substring in substrings:\n",
|
||||||
" if substring in big_string:\n",
|
" if big_string.find(substring) != 1:\n",
|
||||||
" return substring[0::]\n",
|
" return substring\n",
|
||||||
" print(big_string)\n",
|
" print(big_string)\n",
|
||||||
" return 'X'\n",
|
" return 'X'\n",
|
||||||
" \n",
|
" \n",
|
||||||
@ -803,15 +478,6 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"datacleaner": {
|
|
||||||
"position": {
|
|
||||||
"top": "50px"
|
|
||||||
},
|
|
||||||
"python": {
|
|
||||||
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
|
|
||||||
},
|
|
||||||
"window_display": false
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
@ -827,7 +493,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.9"
|
"version": "3.7.1"
|
||||||
},
|
},
|
||||||
"latex_envs": {
|
"latex_envs": {
|
||||||
"LaTeX_envs_menu_present": true,
|
"LaTeX_envs_menu_present": true,
|
||||||
|
@ -56,7 +56,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Genetic Algorithms\n",
|
"# Genetic Algorithms\n",
|
||||||
"In this section we are going to use the library DEAP [[References](#References)] for implementing a genetic algorithms.\n",
|
"In this section we are going to use the library DEAP [References](#References) for implementing a genetic algorithms.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We are going to implement the OneMax problem as seen in class.\n",
|
"We are going to implement the OneMax problem as seen in class.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -200,13 +200,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Optimizing ML hyperparameters\n",
|
"## Optimizing ML hyperparameters\n",
|
||||||
"\n",
|
"\n",
|
||||||
"One of the applications of Genetic Algorithms is the optimization of ML hyperparameters. Previously we have used GridSearch from Scikit. Using (sklearn-deap)[[References](#References)], optimize the Titatic hyperparameters using both GridSearch and Genetic Algorithms. \n",
|
"One of the applications of Genetic Algorithms is the optimization of ML hyperparameters. Previously we have used GridSearch from Scikit. Using (sklearn-deap)[#References], optimize the Titatic hyperparameters using both GridSearch and Genetic Algorithms. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"The same exercise (using the digits dataset) can be found in this [notebook](https://github.com/rsteca/sklearn-deap/blob/master/test.ipynb).\n",
|
"The same exercise (using the digits dataset) can be found in this [notebook](https://github.com/rsteca/sklearn-deap/blob/master/test.ipynb).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Submit a notebook where you include well-crafted conclusions about the exercises, discussing the pros and cons of using genetic algorithms for this purpose.\n",
|
"Submit a notebook where you include well-crafted conclusions about the exercises, discussing the pros and cons of using genetic algorithms for this purpose.\n"
|
||||||
"\n",
|
|
||||||
"Note: There is a problem with the version 0.24 of scikit. Just comment the different approaches."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -263,15 +261,6 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"datacleaner": {
|
|
||||||
"position": {
|
|
||||||
"top": "50px"
|
|
||||||
},
|
|
||||||
"python": {
|
|
||||||
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
|
|
||||||
},
|
|
||||||
"window_display": false
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
@ -287,7 +276,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.9"
|
"version": "3.7.1"
|
||||||
},
|
},
|
||||||
"latex_envs": {
|
"latex_envs": {
|
||||||
"LaTeX_envs_menu_present": true,
|
"LaTeX_envs_menu_present": true,
|
||||||
|
@ -97,19 +97,16 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import gym\n",
|
"import gym\n",
|
||||||
"\n",
|
"\n",
|
||||||
"env = gym.make(\"CartPole-v1\")\n",
|
"env = gym.make('CartPole-v0')\n",
|
||||||
"#env = gym.make('MountainCar-v0')\n",
|
"#env = gym.make('MountainCar-v0')\n",
|
||||||
"#env = gym.make('Taxi-v2')\n",
|
"#env = gym.make('Taxi-v2')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"observation = env.reset()\n",
|
"#env = gym.make('Jamesbond-ram-v0')\n",
|
||||||
"for _ in range(1000):\n",
|
|
||||||
" env.render()\n",
|
|
||||||
" action = env.action_space.sample() # your agent here (this takes random actions)\n",
|
|
||||||
" observation, reward, done, info = env.step(action)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" if done:\n",
|
"env.reset()\n",
|
||||||
" observation = env.reset()\n",
|
"for _ in range(1000):\n",
|
||||||
"env.close()"
|
" env.render()\n",
|
||||||
|
" env.step(env.action_space.sample()) # take a random action"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -406,15 +403,6 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"datacleaner": {
|
|
||||||
"position": {
|
|
||||||
"top": "50px"
|
|
||||||
},
|
|
||||||
"python": {
|
|
||||||
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
|
|
||||||
},
|
|
||||||
"window_display": false
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
@ -430,7 +418,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.7.9"
|
"version": "3.5.5"
|
||||||
},
|
},
|
||||||
"latex_envs": {
|
"latex_envs": {
|
||||||
"LaTeX_envs_menu_present": true,
|
"LaTeX_envs_menu_present": true,
|
||||||
|
@ -76,7 +76,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 33,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -85,7 +85,7 @@
|
|||||||
"(2034, 2807)"
|
"(2034, 2807)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 33,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -126,15 +126,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"Although scikit-learn provides an LDA implementation, it is more popular the package *gensim*, which also provides an LSI implementation, as well as other functionalities. Fortunately, scikit-learn sparse matrices can be used in Gensim using the function *matutils.Sparse2Corpus()*. Anyway, if you are using intensively LDA,it can be convenient to create the corpus with their functions.\n",
|
"Although scikit-learn provides an LDA implementation, it is more popular the package *gensim*, which also provides an LSI implementation, as well as other functionalities. Fortunately, scikit-learn sparse matrices can be used in Gensim using the function *matutils.Sparse2Corpus()*. Anyway, if you are using intensively LDA,it can be convenient to create the corpus with their functions.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"You should install first:\n",
|
"You should install first *gensim*. Run 'conda install -c anaconda gensim=0.12.4' in a terminal."
|
||||||
"\n",
|
|
||||||
"* *gensim*. Run 'conda install gensim' in a terminal.\n",
|
|
||||||
"* *python-Levenshtein*. Run 'conda install python-Levenshtein' in a terminal"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 34,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -162,7 +159,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 60,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -176,23 +173,23 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 61,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"[(0,\n",
|
"[(0,\n",
|
||||||
" '0.011*\"baptist\" + 0.010*\"koresh\" + 0.009*\"bible\" + 0.006*\"reality\" + 0.006*\"virtual\" + 0.005*\"scarlet\" + 0.005*\"shag\" + 0.004*\"tootsie\" + 0.004*\"kinda\" + 0.004*\"captain\"'),\n",
|
" '0.007*\"car\" + 0.006*\"increased\" + 0.006*\"closely\" + 0.006*\"groups\" + 0.006*\"center\" + 0.006*\"88\" + 0.006*\"offer\" + 0.005*\"archie\" + 0.005*\"beginning\" + 0.005*\"comets\"'),\n",
|
||||||
" (1,\n",
|
" (1,\n",
|
||||||
" '0.010*\"targa\" + 0.008*\"thanks\" + 0.008*\"moon\" + 0.007*\"craig\" + 0.007*\"zoroastrians\" + 0.006*\"yayayay\" + 0.005*\"unfortunately\" + 0.005*\"windows\" + 0.005*\"rayshade\" + 0.004*\"tdb\"'),\n",
|
" '0.005*\"allow\" + 0.005*\"discuss\" + 0.005*\"condition\" + 0.004*\"certain\" + 0.004*\"member\" + 0.004*\"manipulation\" + 0.004*\"little\" + 0.003*\"proposal\" + 0.003*\"heavily\" + 0.003*\"obvious\"'),\n",
|
||||||
" (2,\n",
|
" (2,\n",
|
||||||
" '0.009*\"mary\" + 0.007*\"whatever\" + 0.006*\"god\" + 0.005*\"ns\" + 0.005*\"lucky\" + 0.005*\"joseph\" + 0.005*\"ssrt\" + 0.005*\"samaritan\" + 0.005*\"crusades\" + 0.004*\"phobos\"'),\n",
|
" '0.002*\"led\" + 0.002*\"mechanism\" + 0.002*\"frank\" + 0.002*\"platform\" + 0.002*\"mormons\" + 0.002*\"concepts\" + 0.002*\"proton\" + 0.002*\"aeronautics\" + 0.002*\"header\" + 0.002*\"foreign\"'),\n",
|
||||||
" (3,\n",
|
" (3,\n",
|
||||||
" '0.009*\"islam\" + 0.008*\"western\" + 0.008*\"plane\" + 0.008*\"jeff\" + 0.007*\"cheers\" + 0.007*\"kent\" + 0.007*\"joy\" + 0.007*\"khomeini\" + 0.007*\"davidian\" + 0.006*\"basically\"')]"
|
" '0.004*\"objects\" + 0.003*\"activity\" + 0.003*\"manhattan\" + 0.003*\"obtained\" + 0.003*\"eyes\" + 0.003*\"education\" + 0.003*\"netters\" + 0.003*\"complex\" + 0.003*\"europe\" + 0.002*\"missions\"')]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 61,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -211,7 +208,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 62,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -243,7 +240,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 63,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -256,14 +253,14 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# You can save the dictionary\n",
|
"# You can save the dictionary\n",
|
||||||
"dictionary.save('newsgroup.dict.texts')\n",
|
"dictionary.save('newsgroup.dict')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(dictionary)"
|
"print(dictionary)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 64,
|
"execution_count": 7,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -274,7 +271,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 65,
|
"execution_count": 8,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -286,7 +283,28 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 71,
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"WARNING:root:random_state not set so using default value\n",
|
||||||
|
"WARNING:root:failed to load state from newsgroups.dict.state: [Errno 2] No such file or directory: 'newsgroups.dict.state'\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# You can optionally save the dictionary \n",
|
||||||
|
"\n",
|
||||||
|
"dictionary.save('newsgroups.dict')\n",
|
||||||
|
"lda = LdaModel.load('newsgroups.dict')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -305,7 +323,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 72,
|
"execution_count": 17,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -315,7 +333,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 73,
|
"execution_count": 18,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -328,7 +346,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 74,
|
"execution_count": 19,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -346,7 +364,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 75,
|
"execution_count": 20,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -359,23 +377,23 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 76,
|
"execution_count": 21,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"[(0,\n",
|
"[(0,\n",
|
||||||
" '0.009*\"whatever\" + 0.007*\"plane\" + 0.007*\"ns\" + 0.007*\"joy\" + 0.006*\"happy\" + 0.005*\"bob\" + 0.004*\"phil\" + 0.004*\"nasa\" + 0.003*\"purdue\" + 0.003*\"neie\"'),\n",
|
" '0.011*\"thanks\" + 0.010*\"targa\" + 0.008*\"mary\" + 0.008*\"western\" + 0.007*\"craig\" + 0.007*\"jeff\" + 0.006*\"yayayay\" + 0.006*\"phobos\" + 0.005*\"unfortunately\" + 0.005*\"martian\"'),\n",
|
||||||
" (1,\n",
|
" (1,\n",
|
||||||
" '0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"'),\n",
|
" '0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"'),\n",
|
||||||
" (2,\n",
|
" (2,\n",
|
||||||
" '0.010*\"moon\" + 0.007*\"phobos\" + 0.006*\"unfortunately\" + 0.006*\"martian\" + 0.006*\"russian\" + 0.005*\"rayshade\" + 0.005*\"anybody\" + 0.005*\"perturbations\" + 0.005*\"thanks\" + 0.004*\"apollo\"'),\n",
|
" '0.009*\"whatever\" + 0.009*\"baptist\" + 0.007*\"cheers\" + 0.007*\"kent\" + 0.006*\"khomeini\" + 0.006*\"davidian\" + 0.005*\"gerald\" + 0.005*\"bull\" + 0.005*\"sorry\" + 0.005*\"jesus\"'),\n",
|
||||||
" (3,\n",
|
" (3,\n",
|
||||||
" '0.008*\"islam\" + 0.008*\"western\" + 0.007*\"jeff\" + 0.007*\"zoroastrians\" + 0.006*\"davidian\" + 0.006*\"basically\" + 0.005*\"bull\" + 0.005*\"gerald\" + 0.005*\"sorry\" + 0.004*\"kent\"')]"
|
" '0.005*\"pd\" + 0.004*\"baltimore\" + 0.004*\"also\" + 0.003*\"ipx\" + 0.003*\"dam\" + 0.003*\"feiner\" + 0.003*\"foley\" + 0.003*\"ideally\" + 0.003*\"srgp\" + 0.003*\"thank\"')]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 76,
|
"execution_count": 21,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -387,14 +405,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 77,
|
"execution_count": 22,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"[(0, 0.7154438), (1, 0.10569019), (2, 0.09522807), (3, 0.08363795)]\n"
|
"[(0, 0.09401487), (1, 0.08991001), (2, 0.08514047), (3, 0.7309346)]\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -406,7 +424,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 78,
|
"execution_count": 24,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -427,14 +445,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 79,
|
"execution_count": 25,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"[(0, 0.06320839), (1, 0.80878526), (2, 0.06274223), (3, 0.065264106)]\n"
|
"[(0, 0.06678458), (1, 0.8006135), (2, 0.06974816), (3, 0.062853776)]\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -446,14 +464,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 80,
|
"execution_count": 26,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"\n"
|
"0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -464,15 +482,15 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 81,
|
"execution_count": 27,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"[(0, 0.10564032), (1, 0.67894983), (2, 0.104482815), (3, 0.11092702)]\n",
|
"[(0, 0.110989906), (1, 0.670005), (2, 0.11422917), (3, 0.10477593)]\n",
|
||||||
"0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"\n"
|
"0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -492,7 +510,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 82,
|
"execution_count": 28,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -508,23 +526,23 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 83,
|
"execution_count": 29,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"[(0,\n",
|
"[(0,\n",
|
||||||
" '0.769*\"god\" + 0.346*\"jesus\" + 0.235*\"bible\" + 0.204*\"christian\" + 0.148*\"christians\" + 0.107*\"christ\" + 0.090*\"well\" + 0.085*\"koresh\" + 0.081*\"kent\" + 0.080*\"christianity\"'),\n",
|
" '0.769*\"god\" + 0.345*\"jesus\" + 0.235*\"bible\" + 0.203*\"christian\" + 0.149*\"christians\" + 0.108*\"christ\" + 0.089*\"well\" + 0.085*\"koresh\" + 0.081*\"kent\" + 0.080*\"christianity\"'),\n",
|
||||||
" (1,\n",
|
" (1,\n",
|
||||||
" '-0.863*\"thanks\" + -0.255*\"please\" + -0.159*\"hello\" + -0.152*\"hi\" + 0.124*\"god\" + -0.111*\"sorry\" + -0.088*\"could\" + -0.074*\"windows\" + -0.067*\"jpeg\" + -0.063*\"gif\"'),\n",
|
" '-0.863*\"thanks\" + -0.255*\"please\" + -0.160*\"hello\" + -0.153*\"hi\" + 0.123*\"god\" + -0.112*\"sorry\" + -0.088*\"could\" + -0.075*\"windows\" + -0.068*\"jpeg\" + -0.062*\"gif\"'),\n",
|
||||||
" (2,\n",
|
" (2,\n",
|
||||||
" '-0.780*\"well\" + 0.229*\"god\" + -0.165*\"yes\" + 0.154*\"thanks\" + -0.133*\"ico\" + -0.133*\"tek\" + -0.130*\"queens\" + -0.130*\"bronx\" + -0.130*\"beauchaine\" + -0.130*\"manhattan\"'),\n",
|
" '-0.779*\"well\" + 0.229*\"god\" + -0.164*\"yes\" + 0.153*\"thanks\" + -0.135*\"ico\" + -0.135*\"tek\" + -0.132*\"beauchaine\" + -0.132*\"queens\" + -0.132*\"bronx\" + -0.131*\"manhattan\"'),\n",
|
||||||
" (3,\n",
|
" (3,\n",
|
||||||
" '-0.338*\"well\" + 0.336*\"ico\" + 0.334*\"tek\" + 0.328*\"bronx\" + 0.328*\"beauchaine\" + 0.328*\"queens\" + 0.326*\"manhattan\" + 0.305*\"com\" + 0.305*\"bob\" + 0.072*\"god\"')]"
|
" '0.343*\"well\" + -0.335*\"ico\" + -0.334*\"tek\" + -0.328*\"bronx\" + -0.328*\"beauchaine\" + -0.328*\"queens\" + -0.325*\"manhattan\" + -0.305*\"com\" + -0.303*\"bob\" + -0.073*\"god\"')]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 83,
|
"execution_count": 29,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -536,7 +554,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 84,
|
"execution_count": 30,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -585,15 +603,6 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"datacleaner": {
|
|
||||||
"position": {
|
|
||||||
"top": "50px"
|
|
||||||
},
|
|
||||||
"python": {
|
|
||||||
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
|
|
||||||
},
|
|
||||||
"window_display": false
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
@ -609,7 +618,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.8"
|
"version": "3.7.1"
|
||||||
},
|
},
|
||||||
"latex_envs": {
|
"latex_envs": {
|
||||||
"LaTeX_envs_menu_present": true,
|
"LaTeX_envs_menu_present": true,
|
||||||
|
Loading…
Reference in New Issue
Block a user