1
0
mirror of https://github.com/gsi-upm/sitc synced 2024-11-22 06:22:29 +00:00

Arreglado error en Feature Dec

This commit is contained in:
cif2cif 2017-03-23 20:46:02 +01:00
parent 01d8776295
commit 9a1ac84072

View File

@ -46,7 +46,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {
"collapsed": true
},
@ -81,12 +81,147 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {
"collapsed": true
"collapsed": false
},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url=\"https://raw.githubusercontent.com/gsi-upm/sitc/master/ml2/data-titanic/train.csv\"\n",
"df = pd.read_csv(url)\n",
"df_original = df.copy() # Copy to have a version of df without modifications\n",
"df.head()"
]
},
{
"cell_type": "markdown",
@ -437,21 +572,98 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"0 X\n",
"1 A\n",
"2 X\n",
"3 A\n",
"4 X\n",
"5 X\n",
"6 A\n",
"7 X\n",
"8 X\n",
"9 X\n",
"10 A\n",
"11 A\n",
"12 X\n",
"13 X\n",
"14 X\n",
"15 X\n",
"16 X\n",
"17 X\n",
"18 X\n",
"19 X\n",
"20 X\n",
"21 A\n",
"22 X\n",
"23 A\n",
"24 X\n",
"25 X\n",
"26 X\n",
"27 A\n",
"28 X\n",
"29 X\n",
" ..\n",
"861 X\n",
"862 A\n",
"863 X\n",
"864 X\n",
"865 X\n",
"866 X\n",
"867 A\n",
"868 X\n",
"869 X\n",
"870 X\n",
"871 A\n",
"872 A\n",
"873 X\n",
"874 X\n",
"875 X\n",
"876 X\n",
"877 X\n",
"878 X\n",
"879 A\n",
"880 X\n",
"881 X\n",
"882 X\n",
"883 X\n",
"884 X\n",
"885 X\n",
"886 X\n",
"887 A\n",
"888 X\n",
"889 A\n",
"890 X\n",
"Name: Deck, dtype: object"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def substrings_in_string(big_string, substrings):\n",
" if type(big_string) == float:\n",
" if np.isnan(big_string):\n",
" return 'X'\n",
" for substring in substrings:\n",
" if big_string.find(substring) != 1:\n",
" return substring\n",
" return substring\n",
" print(big_string)\n",
" return np.nan\n",
" return 'X'\n",
" \n",
"#Turning cabin number into Deck\n",
"cabin_list = ['A', 'B', 'C', 'D', 'E', 'F', 'T', 'G', 'Unknown']\n",
"df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))"
"df['Deck']=df['Cabin'].map(lambda x: substrings_in_string(x, cabin_list))\n",
"df['Deck']\n"
]
},
{
@ -537,7 +749,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1+"
"version": "3.5.2"
}
},
"nbformat": 4,