From 01d87762955be807d4aa84cc443334d89422ab0d Mon Sep 17 00:00:00 2001 From: "Carlos A. Iglesias" Date: Thu, 23 Mar 2017 16:40:30 +0100 Subject: [PATCH 1/3] Corrected typo AgeFill instead of Age --- ml2/3_5_Exercise_1.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ml2/3_5_Exercise_1.ipynb b/ml2/3_5_Exercise_1.ipynb index 76920c7..cf7dfd7 100644 --- a/ml2/3_5_Exercise_1.ipynb +++ b/ml2/3_5_Exercise_1.ipynb @@ -420,11 +420,11 @@ "source": [ "# Group ages to simplify machine learning algorithms. 0: 0-5, 1: 6-10, 2: 11-15, 3: 16-59 and 4: 60-80\n", "df['AgeGroup'] = 0\n", - "df.loc[(.AgeFill<6),'AgeGroup'] = 0\n", - "df.loc[(df.AgeFill>=6) & (df.AgeFill < 11),'AgeGroup'] = 1\n", - "df.loc[(df.AgeFill>=11) & (df.AgeFill < 16),'AgeGroup'] = 2\n", - "df.loc[(df.AgeFill>=16) & (df.AgeFill < 60),'AgeGroup'] = 3\n", - "df.loc[(df.AgeFill>=60),'AgeGroup'] = 4" + "df.loc[(.Age<6),'AgeGroup'] = 0\n", + "df.loc[(df.Age>=6) & (df.Age < 11),'AgeGroup'] = 1\n", + "df.loc[(df.Age>=11) & (df.Age < 16),'AgeGroup'] = 2\n", + "df.loc[(df.Age>=16) & (df.Age < 60),'AgeGroup'] = 3\n", + "df.loc[(df.Age>=60),'AgeGroup'] = 4" ] }, { From 9a1ac84072c4d4474d0432fca9d2600fc469d8cf Mon Sep 17 00:00:00 2001 From: cif2cif Date: Thu, 23 Mar 2017 20:46:02 +0100 Subject: [PATCH 2/3] Arreglado error en Feature Dec --- ml2/3_5_Exercise_1.ipynb | 234 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 223 insertions(+), 11 deletions(-) diff --git a/ml2/3_5_Exercise_1.ipynb b/ml2/3_5_Exercise_1.ipynb index cf7dfd7..c59fba3 100644 --- a/ml2/3_5_Exercise_1.ipynb +++ b/ml2/3_5_Exercise_1.ipynb @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -81,12 +81,147 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { - "collapsed": true + "collapsed": false }, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass \\\n", + "0 1 0 3 \n", + "1 2 1 1 \n", + "2 3 1 3 \n", + "3 4 1 1 \n", + "4 5 0 3 \n", + "\n", + " Name Sex Age SibSp \\\n", + "0 Braund, Mr. Owen Harris male 22.0 1 \n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", + "2 Heikkinen, Miss. Laina female 26.0 0 \n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", + "4 Allen, Mr. William Henry male 35.0 0 \n", + "\n", + " Parch Ticket Fare Cabin Embarked \n", + "0 0 A/5 21171 7.2500 NaN S \n", + "1 0 PC 17599 71.2833 C85 C \n", + "2 0 STON/O2. 3101282 7.9250 NaN S \n", + "3 0 113803 53.1000 C123 S \n", + "4 0 373450 8.0500 NaN S " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url=\"https://raw.githubusercontent.com/gsi-upm/sitc/master/ml2/data-titanic/train.csv\"\n", + "df = pd.read_csv(url)\n", + "df_original = df.copy() # Copy to have a version of df without modifications\n", + "df.head()" + ] }, { "cell_type": "markdown", @@ -437,21 +572,98 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 X\n", + "1 A\n", + "2 X\n", + "3 A\n", + "4 X\n", + "5 X\n", + "6 A\n", + "7 X\n", + "8 X\n", + "9 X\n", + "10 A\n", + "11 A\n", + "12 X\n", + "13 X\n", + "14 X\n", + "15 X\n", + "16 X\n", + "17 X\n", + "18 X\n", + "19 X\n", + "20 X\n", + "21 A\n", + "22 X\n", + "23 A\n", + "24 X\n", + "25 X\n", + "26 X\n", + "27 A\n", + "28 X\n", + "29 X\n", + " ..\n", + "861 X\n", + "862 A\n", + "863 X\n", + "864 X\n", + "865 X\n", + "866 X\n", + "867 A\n", + "868 X\n", + "869 X\n", + "870 X\n", + "871 A\n", + "872 A\n", + "873 X\n", + "874 X\n", + "875 X\n", + "876 X\n", + "877 X\n", + "878 X\n", + "879 A\n", + "880 X\n", + "881 X\n", + "882 X\n", + "883 X\n", + "884 X\n", + "885 X\n", + "886 X\n", + "887 A\n", + "888 X\n", + "889 A\n", + "890 X\n", + "Name: Deck, dtype: object" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def substrings_in_string(big_string, substrings):\n", + " if type(big_string) == float:\n", + " if np.isnan(big_string):\n", + " return 'X'\n", " for substring in substrings:\n", " if big_string.find(substring) != 1:\n", - " return substring\n", + " return substring\n", " print(big_string)\n", - " return np.nan\n", + " return 'X'\n", + " \n", "#Turning cabin number into Deck\n", "cabin_list = ['A', 'B', 'C', 'D', 'E', 'F', 'T', 'G', 'Unknown']\n", - "df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))" + "df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))\n", + "df['Deck']\n" ] }, { @@ -537,7 +749,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.5.1+" + "version": "3.5.2" } }, "nbformat": 4, From eb43e50ae78373333cff02092d7211078b69e6df Mon Sep 17 00:00:00 2001 From: cif2cif Date: Thu, 23 Mar 2017 20:47:47 +0100 Subject: [PATCH 3/3] Arreglado error en Feature Dec --- ml2/3_5_Exercise_1.ipynb | 220 ++------------------------------------- 1 file changed, 6 insertions(+), 214 deletions(-) diff --git a/ml2/3_5_Exercise_1.ipynb b/ml2/3_5_Exercise_1.ipynb index c59fba3..80dcc56 100644 --- a/ml2/3_5_Exercise_1.ipynb +++ b/ml2/3_5_Exercise_1.ipynb @@ -81,147 +81,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "collapsed": false }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", - "
" - ], - "text/plain": [ - " PassengerId Survived Pclass \\\n", - "0 1 0 3 \n", - "1 2 1 1 \n", - "2 3 1 3 \n", - "3 4 1 1 \n", - "4 5 0 3 \n", - "\n", - " Name Sex Age SibSp \\\n", - "0 Braund, Mr. Owen Harris male 22.0 1 \n", - "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", - "2 Heikkinen, Miss. Laina female 26.0 0 \n", - "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", - "4 Allen, Mr. William Henry male 35.0 0 \n", - "\n", - " Parch Ticket Fare Cabin Embarked \n", - "0 0 A/5 21171 7.2500 NaN S \n", - "1 0 PC 17599 71.2833 C85 C \n", - "2 0 STON/O2. 3101282 7.9250 NaN S \n", - "3 0 113803 53.1000 C123 S \n", - "4 0 373450 8.0500 NaN S " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "url=\"https://raw.githubusercontent.com/gsi-upm/sitc/master/ml2/data-titanic/train.csv\"\n", - "df = pd.read_csv(url)\n", - "df_original = df.copy() # Copy to have a version of df without modifications\n", - "df.head()" - ] + "outputs": [], + "source": [] }, { "cell_type": "markdown", @@ -572,83 +437,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "collapsed": false }, - "outputs": [ - { - "data": { - "text/plain": [ - "0 X\n", - "1 A\n", - "2 X\n", - "3 A\n", - "4 X\n", - "5 X\n", - "6 A\n", - "7 X\n", - "8 X\n", - "9 X\n", - "10 A\n", - "11 A\n", - "12 X\n", - "13 X\n", - "14 X\n", - "15 X\n", - "16 X\n", - "17 X\n", - "18 X\n", - "19 X\n", - "20 X\n", - "21 A\n", - "22 X\n", - "23 A\n", - "24 X\n", - "25 X\n", - "26 X\n", - "27 A\n", - "28 X\n", - "29 X\n", - " ..\n", - "861 X\n", - "862 A\n", - "863 X\n", - "864 X\n", - "865 X\n", - "866 X\n", - "867 A\n", - "868 X\n", - "869 X\n", - "870 X\n", - "871 A\n", - "872 A\n", - "873 X\n", - "874 X\n", - "875 X\n", - "876 X\n", - "877 X\n", - "878 X\n", - "879 A\n", - "880 X\n", - "881 X\n", - "882 X\n", - "883 X\n", - "884 X\n", - "885 X\n", - "886 X\n", - "887 A\n", - "888 X\n", - "889 A\n", - "890 X\n", - "Name: Deck, dtype: object" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "def substrings_in_string(big_string, substrings):\n", " if type(big_string) == float:\n", @@ -662,8 +455,7 @@ " \n", "#Turning cabin number into Deck\n", "cabin_list = ['A', 'B', 'C', 'D', 'E', 'F', 'T', 'G', 'Unknown']\n", - "df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))\n", - "df['Deck']\n" + "df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))" ] }, {