Arreglado error en Feature Dec

pull/1/head
cif2cif 7 years ago
parent 9a1ac84072
commit eb43e50ae7

@ -81,147 +81,12 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url=\"https://raw.githubusercontent.com/gsi-upm/sitc/master/ml2/data-titanic/train.csv\"\n",
"df = pd.read_csv(url)\n",
"df_original = df.copy() # Copy to have a version of df without modifications\n",
"df.head()"
]
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
@ -572,83 +437,11 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 X\n",
"1 A\n",
"2 X\n",
"3 A\n",
"4 X\n",
"5 X\n",
"6 A\n",
"7 X\n",
"8 X\n",
"9 X\n",
"10 A\n",
"11 A\n",
"12 X\n",
"13 X\n",
"14 X\n",
"15 X\n",
"16 X\n",
"17 X\n",
"18 X\n",
"19 X\n",
"20 X\n",
"21 A\n",
"22 X\n",
"23 A\n",
"24 X\n",
"25 X\n",
"26 X\n",
"27 A\n",
"28 X\n",
"29 X\n",
" ..\n",
"861 X\n",
"862 A\n",
"863 X\n",
"864 X\n",
"865 X\n",
"866 X\n",
"867 A\n",
"868 X\n",
"869 X\n",
"870 X\n",
"871 A\n",
"872 A\n",
"873 X\n",
"874 X\n",
"875 X\n",
"876 X\n",
"877 X\n",
"878 X\n",
"879 A\n",
"880 X\n",
"881 X\n",
"882 X\n",
"883 X\n",
"884 X\n",
"885 X\n",
"886 X\n",
"887 A\n",
"888 X\n",
"889 A\n",
"890 X\n",
"Name: Deck, dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"def substrings_in_string(big_string, substrings):\n",
" if type(big_string) == float:\n",
@ -662,8 +455,7 @@
" \n",
"#Turning cabin number into Deck\n",
"cabin_list = ['A', 'B', 'C', 'D', 'E', 'F', 'T', 'G', 'Unknown']\n",
"df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))\n",
"df['Deck']\n"
"df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))"
]
},
{

Loading…
Cancel
Save