1
0
mirror of https://github.com/gsi-upm/sitc synced 2024-11-21 22:12:30 +00:00

Arreglado error en Feature Dec

This commit is contained in:
cif2cif 2017-03-23 20:47:47 +01:00
parent 9a1ac84072
commit eb43e50ae7

View File

@ -81,147 +81,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"metadata": { "metadata": {
"collapsed": false "collapsed": false
}, },
"outputs": [ "outputs": [],
{ "source": []
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url=\"https://raw.githubusercontent.com/gsi-upm/sitc/master/ml2/data-titanic/train.csv\"\n",
"df = pd.read_csv(url)\n",
"df_original = df.copy() # Copy to have a version of df without modifications\n",
"df.head()"
]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -572,83 +437,11 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 8,
"metadata": { "metadata": {
"collapsed": false "collapsed": false
}, },
"outputs": [ "outputs": [],
{
"data": {
"text/plain": [
"0 X\n",
"1 A\n",
"2 X\n",
"3 A\n",
"4 X\n",
"5 X\n",
"6 A\n",
"7 X\n",
"8 X\n",
"9 X\n",
"10 A\n",
"11 A\n",
"12 X\n",
"13 X\n",
"14 X\n",
"15 X\n",
"16 X\n",
"17 X\n",
"18 X\n",
"19 X\n",
"20 X\n",
"21 A\n",
"22 X\n",
"23 A\n",
"24 X\n",
"25 X\n",
"26 X\n",
"27 A\n",
"28 X\n",
"29 X\n",
" ..\n",
"861 X\n",
"862 A\n",
"863 X\n",
"864 X\n",
"865 X\n",
"866 X\n",
"867 A\n",
"868 X\n",
"869 X\n",
"870 X\n",
"871 A\n",
"872 A\n",
"873 X\n",
"874 X\n",
"875 X\n",
"876 X\n",
"877 X\n",
"878 X\n",
"879 A\n",
"880 X\n",
"881 X\n",
"882 X\n",
"883 X\n",
"884 X\n",
"885 X\n",
"886 X\n",
"887 A\n",
"888 X\n",
"889 A\n",
"890 X\n",
"Name: Deck, dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def substrings_in_string(big_string, substrings):\n", "def substrings_in_string(big_string, substrings):\n",
" if type(big_string) == float:\n", " if type(big_string) == float:\n",
@ -662,8 +455,7 @@
" \n", " \n",
"#Turning cabin number into Deck\n", "#Turning cabin number into Deck\n",
"cabin_list = ['A', 'B', 'C', 'D', 'E', 'F', 'T', 'G', 'Unknown']\n", "cabin_list = ['A', 'B', 'C', 'D', 'E', 'F', 'T', 'G', 'Unknown']\n",
"df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))\n", "df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))"
"df['Deck']\n"
] ]
}, },
{ {