Remove outputs and metadata

2025-10-09 23:22:22 +00:00 · 2019-02-28 15:30:33 +01:00
parent a1be167cc0
commit c1d3ca38ea
25 changed files with 989 additions and 14268 deletions
--- a/ml2/3_1_Read_Data.ipynb
+++ b/ml2/3_1_Read_Data.ipynb
--- a/ml2/3_2_Pandas.ipynb
+++ b/ml2/3_2_Pandas.ipynb
@@ -84,25 +84,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0     5\n",
-       "1    10\n",
-       "2    15\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
@@ -124,25 +108,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "a     5\n",
-       "b    10\n",
-       "c    15\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "d = {'a': 5, 'b': 10, 'c': 15}\n",
    "s = Series(d)\n",
@@ -151,22 +119,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['a', 'b', 'c'], dtype='object')"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# We can get the list of indexes\n",
    "s.index"
@@ -174,22 +129,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([ 5, 10, 15])"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# and the values\n",
    "s.values"
@@ -204,28 +146,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid       3141991\n",
-       "Barcelona    1604555\n",
-       "Valencia      786189\n",
-       "Sevilla       693878\n",
-       "Zaragoza      664953\n",
-       "Malaga        569130\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Series with population in 2015 of more populated cities in Spain\n",
    "s = Series([3141991, 1604555, 786189, 693878, 664953, 569130], index=['Madrid', 'Barcelona', 'Valencia', 'Sevilla', \n",
@@ -235,22 +158,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "3141991"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Population of Madrid\n",
    "s['Madrid']"
@@ -272,28 +182,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid        True\n",
-       "Barcelona     True\n",
-       "Valencia     False\n",
-       "Sevilla      False\n",
-       "Zaragoza     False\n",
-       "Malaga       False\n",
-       "dtype: bool"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "#Boolean condition\n",
    "s > 1000000"
@@ -301,24 +192,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid       3141991\n",
-       "Barcelona    1604555\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Cities with population greater than 1.000.000\n",
    "s[s > 1000000]"
@@ -333,24 +209,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid       3141991\n",
-       "Barcelona    1604555\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Cities with population greater than the mean\n",
    "s[s > s.mean()]"
@@ -358,25 +219,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid       3141991\n",
-       "Barcelona    1604555\n",
-       "Valencia      786189\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Cities with population greater than the median\n",
    "s[s > s.median()]"
@@ -384,28 +229,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid        True\n",
-       "Barcelona     True\n",
-       "Valencia      True\n",
-       "Sevilla      False\n",
-       "Zaragoza     False\n",
-       "Malaga       False\n",
-       "dtype: bool"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Check cities with a population greater than 700.000\n",
    "s > 700000"
@@ -413,25 +239,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid       3141991\n",
-       "Barcelona    1604555\n",
-       "Valencia      786189\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# List cities with a population greater than 700.000\n",
    "s[s > 700000]"
@@ -439,28 +249,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid        True\n",
-       "Barcelona     True\n",
-       "Valencia      True\n",
-       "Sevilla      False\n",
-       "Zaragoza     False\n",
-       "Malaga       False\n",
-       "dtype: bool"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "#Another way to write the same boolean indexing selection\n",
    "bigger_than_700000 = s > 700000\n",
@@ -469,25 +260,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid       3141991\n",
-       "Barcelona    1604555\n",
-       "Valencia      786189\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "#Cities with population > 700000\n",
    "s[bigger_than_700000]"
@@ -509,28 +284,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid       1570995.5\n",
-       "Barcelona     802277.5\n",
-       "Valencia      393094.5\n",
-       "Sevilla       346939.0\n",
-       "Zaragoza      332476.5\n",
-       "Malaga        284565.0\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Divide population by 2\n",
    "s / 2"
@@ -538,22 +294,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1243449.3333333333"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Get the average population\n",
    "s.mean()"
@@ -561,22 +304,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "3141991"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Get the highest population\n",
    "s.max()"
@@ -598,28 +328,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid       3320000\n",
-       "Barcelona    1604555\n",
-       "Valencia      786189\n",
-       "Sevilla       693878\n",
-       "Zaragoza      664953\n",
-       "Malaga        569130\n",
-       "dtype: int64"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Change population of one city\n",
    "s['Madrid'] = 3320000\n",
@@ -628,28 +339,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Madrid       3652000.0\n",
-       "Barcelona    1765010.5\n",
-       "Valencia      864807.9\n",
-       "Sevilla       693878.0\n",
-       "Zaragoza      664953.0\n",
-       "Malaga        569130.0\n",
-       "dtype: float64"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# Increase by 10% cities with population greater than 700000\n",
    "s[s > 700000] = 1.1 * s[s > 700000]\n",
@@ -672,61 +364,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>one</th>\n",
-       "      <th>two</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>a</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>b</th>\n",
-       "      <td>2.0</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>c</th>\n",
-       "      <td>3.0</td>\n",
-       "      <td>3.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>d</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>4.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   one  two\n",
-       "a  1.0  1.0\n",
-       "b  2.0  2.0\n",
-       "c  3.0  3.0\n",
-       "d  NaN  4.0"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# We are going to create a DataFrame from a dict of Series\n",
    "d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),\n",
@@ -748,55 +388,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>one</th>\n",
-       "      <th>two</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>d</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>4.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>b</th>\n",
-       "      <td>2.0</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>a</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   one  two\n",
-       "d  NaN  4.0\n",
-       "b  2.0  2.0\n",
-       "a  1.0  1.0"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "# We can filter\n",
    "df = DataFrame(d, index=['d', 'b', 'a'])\n",
@@ -812,55 +406,9 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>two</th>\n",
-       "      <th>three</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>d</th>\n",
-       "      <td>4.0</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>b</th>\n",
-       "      <td>2.0</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>a</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   two three\n",
-       "d  4.0   NaN\n",
-       "b  2.0   NaN\n",
-       "a  1.0   NaN"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
    "df = DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])\n",
    "df"
--- a/ml2/3_3_Data_Munging_with_Pandas.ipynb
+++ b/ml2/3_3_Data_Munging_with_Pandas.ipynb
--- a/ml2/3_4_Visualisation_Pandas.ipynb
+++ b/ml2/3_4_Visualisation_Pandas.ipynb
--- a/ml2/3_5_Exercise_1.ipynb
+++ b/ml2/3_5_Exercise_1.ipynb
@@ -46,10 +46,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": null,
+   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
@@ -82,9 +80,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
@@ -105,9 +101,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
@@ -121,9 +115,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
@@ -137,9 +129,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
@@ -153,17 +143,13 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "source": [
    "How many passsengers have survived? List them grouped by Sex and Pclass.\n",
    "\n",
@@ -173,17 +159,13 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "source": [
    "Visualise df_1 as an histogram."
   ]
@@ -191,17 +173,13 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "source": [
    "# Feature Engineering"
   ]
@@ -232,9 +210,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "df['FamilySize'] = df['SibSp'] + df['Parch']\n",
@@ -258,9 +234,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "df['Alone'] = (df.FamilySize == 0)\n",
@@ -284,9 +258,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "#Taken from http://www.analyticsvidhya.com/blog/2014/09/data-munging-python-using-pandas-baby-steps-python/\n",
@@ -307,9 +279,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "df['Salutation'].unique()"
@@ -318,9 +288,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "df.groupby(['Salutation']).size()"
@@ -336,9 +304,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "def group_salutation(old_salutation):\n",
@@ -362,9 +328,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# Distribution\n",
@@ -375,9 +339,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "df.boxplot(column='Age', by = 'Salutation', sym='k.')"
@@ -393,9 +355,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# Specific features for Children and Female since there are more survivors\n",
@@ -413,9 +373,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "# Group ages to simplify machine learning algorithms.  0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n",
@@ -437,10 +395,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "collapsed": false
-   },
+   "execution_count": null,
+   "metadata": {},
   "outputs": [],
   "source": [
    "def substrings_in_string(big_string, substrings):\n",
@@ -475,9 +431,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "df['FarePerPerson']= df['Fare'] / (df['FamilySize'] + 1)"
@@ -500,9 +454,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
   "outputs": [],
   "source": [
    "df['AgeClass']=df['Age']*df['Pclass']"
--- a/ml2/3_7_SVM.ipynb
+++ b/ml2/3_7_SVM.ipynb
--- a/ml2/plot_learning_curve.py
+++ b/ml2/plot_learning_curve.py
@@ -19,11 +19,10 @@ samples.

 import numpy as np
 import matplotlib.pyplot as plt
-from sklearn import cross_validation
 from sklearn.naive_bayes import GaussianNB
 from sklearn.svm import SVC
 from sklearn.datasets import load_digits
-from sklearn.learning_curve import learning_curve
+from sklearn.model_selection import learning_curve


 def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
@@ -53,7 +52,7 @@ def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
    cv : integer, cross-validation generator, optional
        If an integer is passed, it is the number of folds (defaults to 3).
        Specific cross-validation objects can be passed, see
-        sklearn.cross_validation module for the list of possible objects
+        sklearn.model_selection module for the list of possible objects

    n_jobs : integer, optional
        Number of jobs to run in parallel (default 1).