diff --git a/ml2/3_5_Exercise_1.ipynb b/ml2/3_5_Exercise_1.ipynb index 76920c7..80dcc56 100644 --- a/ml2/3_5_Exercise_1.ipynb +++ b/ml2/3_5_Exercise_1.ipynb @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -83,7 +83,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": false }, "outputs": [], "source": [] @@ -420,11 +420,11 @@ "source": [ "# Group ages to simplify machine learning algorithms. 0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n", "df['AgeGroup'] = 0\n", - "df.loc[(.AgeFill<6),'AgeGroup'] = 0\n", - "df.loc[(df.AgeFill>=6) & (df.AgeFill < 11),'AgeGroup'] = 1\n", - "df.loc[(df.AgeFill>=11) & (df.AgeFill < 16),'AgeGroup'] = 2\n", - "df.loc[(df.AgeFill>=16) & (df.AgeFill < 60),'AgeGroup'] = 3\n", - "df.loc[(df.AgeFill>=60),'AgeGroup'] = 4" + "df.loc[(.Age<6),'AgeGroup'] = 0\n", + "df.loc[(df.Age>=6) & (df.Age < 11),'AgeGroup'] = 1\n", + "df.loc[(df.Age>=11) & (df.Age < 16),'AgeGroup'] = 2\n", + "df.loc[(df.Age>=16) & (df.Age < 60),'AgeGroup'] = 3\n", + "df.loc[(df.Age>=60),'AgeGroup'] = 4" ] }, { @@ -437,18 +437,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def substrings_in_string(big_string, substrings):\n", + " if type(big_string) == float:\n", + " if np.isnan(big_string):\n", + " return 'X'\n", " for substring in substrings:\n", " if big_string.find(substring) != 1:\n", - " return substring\n", + " return substring\n", " print(big_string)\n", - " return np.nan\n", + " return 'X'\n", + " \n", "#Turning cabin number into Deck\n", "cabin_list = ['A', 'B', 'C', 'D', 'E', 'F', 'T', 'G', 'Unknown']\n", "df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))" @@ -537,7 +541,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.1+" + "version": "3.5.2" } }, "nbformat": 4,