Updated with the new libraries

updated to last version of OpenGym
Fixed broken link and bug of sklearn-deap with scikit 0.24
2025-06-15 04:22:22 +00:00 · 2021-05-07 11:10:21 +02:00 · 2021-04-19 19:10:03 +02:00 · 2021-04-19 17:47:22 +02:00 · 2021-04-06 10:21:25 +02:00 · 2021-04-06 10:20:29 +02:00
4 changed files with 439 additions and 91 deletions
--- a/ml2/3_5_Exercise_1.ipynb
+++ b/ml2/3_5_Exercise_1.ipynb
@ -46,7 +46,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
@ -209,12 +209,315 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>PassengerId</th>\n",
+       "      <th>Survived</th>\n",
+       "      <th>Pclass</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Sex</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>SibSp</th>\n",
+       "      <th>Parch</th>\n",
+       "      <th>Ticket</th>\n",
+       "      <th>Fare</th>\n",
+       "      <th>Cabin</th>\n",
+       "      <th>Embarked</th>\n",
+       "      <th>FamilySize</th>\n",
+       "      <th>AgeGroup</th>\n",
+       "      <th>Deck</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Braund, Mr. Owen Harris</td>\n",
+       "      <td>male</td>\n",
+       "      <td>22.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>A/5 21171</td>\n",
+       "      <td>7.2500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
+       "      <td>female</td>\n",
+       "      <td>38.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>PC 17599</td>\n",
+       "      <td>71.2833</td>\n",
+       "      <td>C85</td>\n",
+       "      <td>C</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Heikkinen, Miss. Laina</td>\n",
+       "      <td>female</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>STON/O2. 3101282</td>\n",
+       "      <td>7.9250</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
+       "      <td>female</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>113803</td>\n",
+       "      <td>53.1000</td>\n",
+       "      <td>C123</td>\n",
+       "      <td>S</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Allen, Mr. William Henry</td>\n",
+       "      <td>male</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>373450</td>\n",
+       "      <td>8.0500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>886</th>\n",
+       "      <td>887</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Montvila, Rev. Juozas</td>\n",
+       "      <td>male</td>\n",
+       "      <td>27.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>211536</td>\n",
+       "      <td>13.0000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>887</th>\n",
+       "      <td>888</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Graham, Miss. Margaret Edith</td>\n",
+       "      <td>female</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>112053</td>\n",
+       "      <td>30.0000</td>\n",
+       "      <td>B42</td>\n",
+       "      <td>S</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>B</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>888</th>\n",
+       "      <td>889</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Johnston, Miss. Catherine Helen \"Carrie\"</td>\n",
+       "      <td>female</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>W./C. 6607</td>\n",
+       "      <td>23.4500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>S</td>\n",
+       "      <td>3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>X</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>889</th>\n",
+       "      <td>890</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Behr, Mr. Karl Howell</td>\n",
+       "      <td>male</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>111369</td>\n",
+       "      <td>30.0000</td>\n",
+       "      <td>C148</td>\n",
+       "      <td>C</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>C</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>890</th>\n",
+       "      <td>891</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Dooley, Mr. Patrick</td>\n",
+       "      <td>male</td>\n",
+       "      <td>32.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>370376</td>\n",
+       "      <td>7.7500</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Q</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>X</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>891 rows × 15 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     PassengerId  Survived  Pclass  \\\n",
+       "0              1         0       3   \n",
+       "1              2         1       1   \n",
+       "2              3         1       3   \n",
+       "3              4         1       1   \n",
+       "4              5         0       3   \n",
+       "..           ...       ...     ...   \n",
+       "886          887         0       2   \n",
+       "887          888         1       1   \n",
+       "888          889         0       3   \n",
+       "889          890         1       1   \n",
+       "890          891         0       3   \n",
+       "\n",
+       "                                                  Name     Sex   Age  SibSp  \\\n",
+       "0                              Braund, Mr. Owen Harris    male  22.0      1   \n",
+       "1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
+       "2                               Heikkinen, Miss. Laina  female  26.0      0   \n",
+       "3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
+       "4                             Allen, Mr. William Henry    male  35.0      0   \n",
+       "..                                                 ...     ...   ...    ...   \n",
+       "886                              Montvila, Rev. Juozas    male  27.0      0   \n",
+       "887                       Graham, Miss. Margaret Edith  female  19.0      0   \n",
+       "888           Johnston, Miss. Catherine Helen \"Carrie\"  female   NaN      1   \n",
+       "889                              Behr, Mr. Karl Howell    male  26.0      0   \n",
+       "890                                Dooley, Mr. Patrick    male  32.0      0   \n",
+       "\n",
+       "     Parch            Ticket     Fare Cabin Embarked  FamilySize  AgeGroup  \\\n",
+       "0        0         A/5 21171   7.2500   NaN        S           1       3.0   \n",
+       "1        0          PC 17599  71.2833   C85        C           1       3.0   \n",
+       "2        0  STON/O2. 3101282   7.9250   NaN        S           0       3.0   \n",
+       "3        0            113803  53.1000  C123        S           1       3.0   \n",
+       "4        0            373450   8.0500   NaN        S           0       3.0   \n",
+       "..     ...               ...      ...   ...      ...         ...       ...   \n",
+       "886      0            211536  13.0000   NaN        S           0       3.0   \n",
+       "887      0            112053  30.0000   B42        S           0       3.0   \n",
+       "888      2        W./C. 6607  23.4500   NaN        S           3       NaN   \n",
+       "889      0            111369  30.0000  C148        C           0       3.0   \n",
+       "890      0            370376   7.7500   NaN        Q           0       3.0   \n",
+       "\n",
+       "    Deck  \n",
+       "0      X  \n",
+       "1      C  \n",
+       "2      X  \n",
+       "3      C  \n",
+       "4      X  \n",
+       "..   ...  \n",
+       "886    X  \n",
+       "887    B  \n",
+       "888    X  \n",
+       "889    C  \n",
+       "890    X  \n",
+       "\n",
+       "[891 rows x 15 columns]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "df['FamilySize'] = df['SibSp'] + df['Parch']\n",
-    "df.head()"
+    "df"
   ]
  },
  {
@ -303,9 +606,31 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "'Salutation'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3079\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3080\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3081\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'Salutation'",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-8-515fd9f54fd1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     13\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m                     \u001b[0;32mreturn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Others'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 15\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Salutation'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Salutation'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup_salutation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     16\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Salutation'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m   3022\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlevels\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3023\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_multilevel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3024\u001b[0;31m             \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3025\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3026\u001b[0m                 \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mget_loc\u001b[0;34m(self, key, method, tolerance)\u001b[0m\n\u001b[1;32m   3080\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_loc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcasted_key\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3081\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3082\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3083\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3084\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mtolerance\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'Salutation'"
+     ]
+    }
+   ],
   "source": [
    "def group_salutation(old_salutation):\n",
    "    if old_salutation == 'Mr':\n",
@ -372,13 +697,13 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Group ages to simplify machine learning algorithms.  0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n",
-    "df['AgeGroup'] = 0\n",
-    "df.loc[(.Age<6),'AgeGroup'] = 0\n",
+    "df['AgeGroup'] = np.nan\n",
+    "df.loc[(df.Age<6),'AgeGroup'] = 0\n",
    "df.loc[(df.Age>=6) & (df.Age < 11),'AgeGroup'] = 1\n",
    "df.loc[(df.Age>=11) & (df.Age < 16),'AgeGroup'] = 2\n",
    "df.loc[(df.Age>=16) & (df.Age < 60),'AgeGroup'] = 3\n",
@ -395,7 +720,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
@ -404,8 +729,8 @@
    "        if np.isnan(big_string):\n",
    "            return 'X'\n",
    "    for substring in substrings:\n",
-    "        if big_string.find(substring) != 1:\n",
-    "            return substring\n",
+    "        if substring in big_string:\n",
+    "            return substring[0::]\n",
    "    print(big_string)\n",
    "    return 'X'\n",
    " \n",
@ -478,6 +803,15 @@
  }
 ],
 "metadata": {
+  "datacleaner": {
+   "position": {
+    "top": "50px"
+   },
+   "python": {
+    "varRefreshCmd": "try:\n    print(_datacleaner.dataframe_metadata())\nexcept:\n    print([])"
+   },
+   "window_display": false
+  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
@ -493,7 +827,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.7.9"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
--- a/ml4/2_5_1_Exercise.ipynb
+++ b/ml4/2_5_1_Exercise.ipynb
@ -56,7 +56,7 @@
   "metadata": {},
   "source": [
    "# Genetic Algorithms\n",
-    "In this section we are going to use the library DEAP [References](#References) for implementing a genetic algorithms.\n",
+    "In this section we are going to use the library DEAP [[References](#References)] for implementing a genetic algorithms.\n",
    "\n",
    "We are going to implement the OneMax problem as seen in class.\n",
    "\n",
@ -200,11 +200,13 @@
   "source": [
    "## Optimizing ML hyperparameters\n",
    "\n",
-    "One of the applications of Genetic Algorithms is the optimization of ML hyperparameters. Previously we have used GridSearch from Scikit. Using (sklearn-deap)[#References], optimize the Titatic hyperparameters using both GridSearch and Genetic Algorithms. \n",
+    "One of the applications of Genetic Algorithms is the optimization of ML hyperparameters. Previously we have used GridSearch from Scikit. Using (sklearn-deap)[[References](#References)], optimize the Titatic hyperparameters using both GridSearch and Genetic Algorithms. \n",
    "\n",
    "The same exercise (using the digits dataset) can be found in this [notebook](https://github.com/rsteca/sklearn-deap/blob/master/test.ipynb).\n",
    "\n",
-    "Submit a notebook where you include well-crafted conclusions about the exercises, discussing the pros and cons of using genetic algorithms for this purpose.\n"
+    "Submit a notebook where you include well-crafted conclusions about the exercises, discussing the pros and cons of using genetic algorithms for this purpose.\n",
+    "\n",
+    "Note: There is a problem with the version 0.24 of scikit. Just comment the different approaches."
   ]
  },
  {
@ -261,6 +263,15 @@
  }
 ],
 "metadata": {
+  "datacleaner": {
+   "position": {
+    "top": "50px"
+   },
+   "python": {
+    "varRefreshCmd": "try:\n    print(_datacleaner.dataframe_metadata())\nexcept:\n    print([])"
+   },
+   "window_display": false
+  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
@ -276,7 +287,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.7.9"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
--- a/ml5/2_6_1_Q-Learning.ipynb
+++ b/ml5/2_6_1_Q-Learning.ipynb
@ -97,16 +97,19 @@
   "source": [
    "import gym\n",
    "\n",
-    "env = gym.make('CartPole-v0')\n",
+    "env = gym.make(\"CartPole-v1\")\n",
    "#env = gym.make('MountainCar-v0')\n",
    "#env = gym.make('Taxi-v2')\n",
    "\n",
-    "#env = gym.make('Jamesbond-ram-v0')\n",
-    "\n",
-    "env.reset()\n",
+    "observation = env.reset()\n",
    "for _ in range(1000):\n",
-    "    env.render()\n",
-    "    env.step(env.action_space.sample()) # take a random action"
+    "  env.render()\n",
+    "  action = env.action_space.sample() # your agent here (this takes random actions)\n",
+    "  observation, reward, done, info = env.step(action)\n",
+    "\n",
+    "  if done:\n",
+    "    observation = env.reset()\n",
+    "env.close()"
   ]
  },
  {
@ -403,6 +406,15 @@
  }
 ],
 "metadata": {
+  "datacleaner": {
+   "position": {
+    "top": "50px"
+   },
+   "python": {
+    "varRefreshCmd": "try:\n    print(_datacleaner.dataframe_metadata())\nexcept:\n    print([])"
+   },
+   "window_display": false
+  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
@ -418,7 +430,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.5.5"
+   "version": "3.7.9"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
--- a/nlp/4_5_Semantic_Models.ipynb
+++ b/nlp/4_5_Semantic_Models.ipynb
@ -76,7 +76,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
@ -85,7 +85,7 @@
       "(2034, 2807)"
      ]
     },
-     "execution_count": 1,
+     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -126,12 +126,15 @@
   "source": [
    "Although scikit-learn provides an LDA implementation, it is more popular the package *gensim*, which also provides an LSI implementation, as well as other functionalities. Fortunately, scikit-learn sparse matrices can be used in Gensim using the function *matutils.Sparse2Corpus()*. Anyway, if you are using intensively LDA,it can be convenient to create the corpus with their functions.\n",
    "\n",
-    "You should install first *gensim*. Run 'conda install -c anaconda gensim=0.12.4' in a terminal."
+    "You should install first:\n",
+    "\n",
+    "* *gensim*. Run 'conda install gensim' in a terminal.\n",
+    "* *python-Levenshtein*. Run 'conda install python-Levenshtein' in a terminal"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
@ -159,7 +162,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
@ -173,23 +176,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0,\n",
-       "  '0.007*\"car\" + 0.006*\"increased\" + 0.006*\"closely\" + 0.006*\"groups\" + 0.006*\"center\" + 0.006*\"88\" + 0.006*\"offer\" + 0.005*\"archie\" + 0.005*\"beginning\" + 0.005*\"comets\"'),\n",
+       "  '0.011*\"baptist\" + 0.010*\"koresh\" + 0.009*\"bible\" + 0.006*\"reality\" + 0.006*\"virtual\" + 0.005*\"scarlet\" + 0.005*\"shag\" + 0.004*\"tootsie\" + 0.004*\"kinda\" + 0.004*\"captain\"'),\n",
       " (1,\n",
-       "  '0.005*\"allow\" + 0.005*\"discuss\" + 0.005*\"condition\" + 0.004*\"certain\" + 0.004*\"member\" + 0.004*\"manipulation\" + 0.004*\"little\" + 0.003*\"proposal\" + 0.003*\"heavily\" + 0.003*\"obvious\"'),\n",
+       "  '0.010*\"targa\" + 0.008*\"thanks\" + 0.008*\"moon\" + 0.007*\"craig\" + 0.007*\"zoroastrians\" + 0.006*\"yayayay\" + 0.005*\"unfortunately\" + 0.005*\"windows\" + 0.005*\"rayshade\" + 0.004*\"tdb\"'),\n",
       " (2,\n",
-       "  '0.002*\"led\" + 0.002*\"mechanism\" + 0.002*\"frank\" + 0.002*\"platform\" + 0.002*\"mormons\" + 0.002*\"concepts\" + 0.002*\"proton\" + 0.002*\"aeronautics\" + 0.002*\"header\" + 0.002*\"foreign\"'),\n",
+       "  '0.009*\"mary\" + 0.007*\"whatever\" + 0.006*\"god\" + 0.005*\"ns\" + 0.005*\"lucky\" + 0.005*\"joseph\" + 0.005*\"ssrt\" + 0.005*\"samaritan\" + 0.005*\"crusades\" + 0.004*\"phobos\"'),\n",
       " (3,\n",
-       "  '0.004*\"objects\" + 0.003*\"activity\" + 0.003*\"manhattan\" + 0.003*\"obtained\" + 0.003*\"eyes\" + 0.003*\"education\" + 0.003*\"netters\" + 0.003*\"complex\" + 0.003*\"europe\" + 0.002*\"missions\"')]"
+       "  '0.009*\"islam\" + 0.008*\"western\" + 0.008*\"plane\" + 0.008*\"jeff\" + 0.007*\"cheers\" + 0.007*\"kent\" + 0.007*\"joy\" + 0.007*\"khomeini\" + 0.007*\"davidian\" + 0.006*\"basically\"')]"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -208,7 +211,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
@ -240,7 +243,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
@ -253,14 +256,14 @@
   ],
   "source": [
    "# You can save the dictionary\n",
-    "dictionary.save('newsgroup.dict')\n",
+    "dictionary.save('newsgroup.dict.texts')\n",
    "\n",
    "print(dictionary)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
@ -271,7 +274,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
@ -283,28 +286,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:root:random_state not set so using default value\n",
-      "WARNING:root:failed to load state from newsgroups.dict.state: [Errno 2] No such file or directory: 'newsgroups.dict.state'\n"
-     ]
-    }
-   ],
-   "source": [
-    "# You can optionally save the  dictionary \n",
-    "\n",
-    "dictionary.save('newsgroups.dict')\n",
-    "lda = LdaModel.load('newsgroups.dict')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
@ -323,7 +305,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
@ -333,7 +315,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
@ -346,7 +328,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
@ -364,7 +346,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
@ -377,23 +359,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0,\n",
-       "  '0.011*\"thanks\" + 0.010*\"targa\" + 0.008*\"mary\" + 0.008*\"western\" + 0.007*\"craig\" + 0.007*\"jeff\" + 0.006*\"yayayay\" + 0.006*\"phobos\" + 0.005*\"unfortunately\" + 0.005*\"martian\"'),\n",
+       "  '0.009*\"whatever\" + 0.007*\"plane\" + 0.007*\"ns\" + 0.007*\"joy\" + 0.006*\"happy\" + 0.005*\"bob\" + 0.004*\"phil\" + 0.004*\"nasa\" + 0.003*\"purdue\" + 0.003*\"neie\"'),\n",
       " (1,\n",
-       "  '0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"'),\n",
+       "  '0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"'),\n",
       " (2,\n",
-       "  '0.009*\"whatever\" + 0.009*\"baptist\" + 0.007*\"cheers\" + 0.007*\"kent\" + 0.006*\"khomeini\" + 0.006*\"davidian\" + 0.005*\"gerald\" + 0.005*\"bull\" + 0.005*\"sorry\" + 0.005*\"jesus\"'),\n",
+       "  '0.010*\"moon\" + 0.007*\"phobos\" + 0.006*\"unfortunately\" + 0.006*\"martian\" + 0.006*\"russian\" + 0.005*\"rayshade\" + 0.005*\"anybody\" + 0.005*\"perturbations\" + 0.005*\"thanks\" + 0.004*\"apollo\"'),\n",
       " (3,\n",
-       "  '0.005*\"pd\" + 0.004*\"baltimore\" + 0.004*\"also\" + 0.003*\"ipx\" + 0.003*\"dam\" + 0.003*\"feiner\" + 0.003*\"foley\" + 0.003*\"ideally\" + 0.003*\"srgp\" + 0.003*\"thank\"')]"
+       "  '0.008*\"islam\" + 0.008*\"western\" + 0.007*\"jeff\" + 0.007*\"zoroastrians\" + 0.006*\"davidian\" + 0.006*\"basically\" + 0.005*\"bull\" + 0.005*\"gerald\" + 0.005*\"sorry\" + 0.004*\"kent\"')]"
      ]
     },
-     "execution_count": 21,
+     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -405,14 +387,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[(0, 0.09401487), (1, 0.08991001), (2, 0.08514047), (3, 0.7309346)]\n"
+      "[(0, 0.7154438), (1, 0.10569019), (2, 0.09522807), (3, 0.08363795)]\n"
     ]
    }
   ],
@ -424,7 +406,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
@ -445,14 +427,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[(0, 0.06678458), (1, 0.8006135), (2, 0.06974816), (3, 0.062853776)]\n"
+      "[(0, 0.06320839), (1, 0.80878526), (2, 0.06274223), (3, 0.065264106)]\n"
     ]
    }
   ],
@ -464,14 +446,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"\n"
+      "0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"\n"
     ]
    }
   ],
@ -482,15 +464,15 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[(0, 0.110989906), (1, 0.670005), (2, 0.11422917), (3, 0.10477593)]\n",
-      "0.007*\"islam\" + 0.006*\"koresh\" + 0.006*\"moon\" + 0.006*\"bible\" + 0.006*\"plane\" + 0.006*\"ns\" + 0.005*\"zoroastrians\" + 0.005*\"joy\" + 0.005*\"lucky\" + 0.005*\"ssrt\"\n"
+      "[(0, 0.10564032), (1, 0.67894983), (2, 0.104482815), (3, 0.11092702)]\n",
+      "0.009*\"god\" + 0.008*\"mary\" + 0.008*\"targa\" + 0.007*\"baptist\" + 0.007*\"thanks\" + 0.007*\"koresh\" + 0.006*\"really\" + 0.006*\"bible\" + 0.005*\"lot\" + 0.005*\"lucky\"\n"
     ]
    }
   ],
@ -510,7 +492,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
@ -526,23 +508,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0,\n",
-       "  '0.769*\"god\" + 0.345*\"jesus\" + 0.235*\"bible\" + 0.203*\"christian\" + 0.149*\"christians\" + 0.108*\"christ\" + 0.089*\"well\" + 0.085*\"koresh\" + 0.081*\"kent\" + 0.080*\"christianity\"'),\n",
+       "  '0.769*\"god\" + 0.346*\"jesus\" + 0.235*\"bible\" + 0.204*\"christian\" + 0.148*\"christians\" + 0.107*\"christ\" + 0.090*\"well\" + 0.085*\"koresh\" + 0.081*\"kent\" + 0.080*\"christianity\"'),\n",
       " (1,\n",
-       "  '-0.863*\"thanks\" + -0.255*\"please\" + -0.160*\"hello\" + -0.153*\"hi\" + 0.123*\"god\" + -0.112*\"sorry\" + -0.088*\"could\" + -0.075*\"windows\" + -0.068*\"jpeg\" + -0.062*\"gif\"'),\n",
+       "  '-0.863*\"thanks\" + -0.255*\"please\" + -0.159*\"hello\" + -0.152*\"hi\" + 0.124*\"god\" + -0.111*\"sorry\" + -0.088*\"could\" + -0.074*\"windows\" + -0.067*\"jpeg\" + -0.063*\"gif\"'),\n",
       " (2,\n",
-       "  '-0.779*\"well\" + 0.229*\"god\" + -0.164*\"yes\" + 0.153*\"thanks\" + -0.135*\"ico\" + -0.135*\"tek\" + -0.132*\"beauchaine\" + -0.132*\"queens\" + -0.132*\"bronx\" + -0.131*\"manhattan\"'),\n",
+       "  '-0.780*\"well\" + 0.229*\"god\" + -0.165*\"yes\" + 0.154*\"thanks\" + -0.133*\"ico\" + -0.133*\"tek\" + -0.130*\"queens\" + -0.130*\"bronx\" + -0.130*\"beauchaine\" + -0.130*\"manhattan\"'),\n",
       " (3,\n",
-       "  '0.343*\"well\" + -0.335*\"ico\" + -0.334*\"tek\" + -0.328*\"bronx\" + -0.328*\"beauchaine\" + -0.328*\"queens\" + -0.325*\"manhattan\" + -0.305*\"com\" + -0.303*\"bob\" + -0.073*\"god\"')]"
+       "  '-0.338*\"well\" + 0.336*\"ico\" + 0.334*\"tek\" + 0.328*\"bronx\" + 0.328*\"beauchaine\" + 0.328*\"queens\" + 0.326*\"manhattan\" + 0.305*\"com\" + 0.305*\"bob\" + 0.072*\"god\"')]"
      ]
     },
-     "execution_count": 29,
+     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -554,7 +536,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
@ -603,6 +585,15 @@
  }
 ],
 "metadata": {
+  "datacleaner": {
+   "position": {
+    "top": "50px"
+   },
+   "python": {
+    "varRefreshCmd": "try:\n    print(_datacleaner.dataframe_metadata())\nexcept:\n    print([])"
+   },
+   "window_display": false
+  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
@ -618,7 +609,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.8.8"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
Author	SHA1	Message	Date
cif2cif	ae8d3d3ba2	Updated with the new libraries	2021-05-07 11:10:21 +02:00
cif2cif	2ba0e2f3d9	updated to last version of OpenGym	2021-04-19 19:10:03 +02:00
cif2cif	c9114cc796	Fixed broken link and bug of sklearn-deap with scikit 0.24	2021-04-19 17:47:22 +02:00
cif2cif	b80c097362	Merge branch 'master' of https://github.com/gsi-upm/sitc	2021-04-06 10:21:25 +02:00
cif2cif	161cd8492b	Fixed bug in substrings_in_string and set default df[AgeGroup] to np.nan	2021-04-06 10:20:29 +02:00