mirror of
				https://github.com/gsi-upm/sitc
				synced 2025-11-04 01:18:16 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			579 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			579 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
{
 | 
						||
 "cells": [
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "849ad57e-6adb-4c2e-afd6-73db37eef572",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    ""
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "179cc802-9f1d-40b0-bf0c-9d4fb7ea1262",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# Course Notes for Learning Intelligent Systems"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "9858d815-0390-4e77-a5ff-a8d2a1960981",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © Carlos A. Iglesias"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "markdown",
 | 
						||
   "id": "238bab60-75f0-4d29-ab05-66afc463b506",
 | 
						||
   "metadata": {},
 | 
						||
   "source": [
 | 
						||
    "# Autoclean\n",
 | 
						||
    "A simple library to clean data. [Autoclean](https://github.com/elisemercury/AutoClean) supports:\n",
 | 
						||
    "AutoClean supports:\n",
 | 
						||
    "\n",
 | 
						||
    "* Handling of duplicates\n",
 | 
						||
    "* Various imputation methods for missing values\n",
 | 
						||
    "* Handling of outliers\n",
 | 
						||
    "* Encoding of categorical data (OneHot, Label)\n",
 | 
						||
    "* Extraction of data time values\n",
 | 
						||
    "\n",
 | 
						||
    "Install the package: **pip install py-AutoClean**.\n",
 | 
						||
    "\n",
 | 
						||
    "Parameters:\n",
 | 
						||
    "\n",
 | 
						||
    "* **duplicates**\n",
 | 
						||
    "    *  default: False,\n",
 | 
						||
    "    *  other values: 'auto', True\n",
 | 
						||
    "* **missing_num**\n",
 | 
						||
    "    * default:False,\n",
 | 
						||
    "    * other values:\t'auto', 'linreg', 'knn', 'mean', 'median', 'most_frequent', 'delete', False\n",
 | 
						||
    "* **missing_categ**\n",
 | 
						||
    "    * default: False,\n",
 | 
						||
    "    * other values:\t'auto', 'logreg', 'knn', 'most_frequent', 'delete', False\n",
 | 
						||
    "* **encode_categ**\n",
 | 
						||
    "    * default: False,\n",
 | 
						||
    "    * other values:\t'auto', ['onehot'], ['label'], False ; to encode only specific columns add a list of column names or indexes: ['auto', ['col1', 2]]\n",
 | 
						||
    "* **extract_datetime**\n",
 | 
						||
    "    * default:\tFalse,\n",
 | 
						||
    "    * other values:\t'auto', 'D', 'M', 'Y', 'h', 'm', 's'\n",
 | 
						||
    "* **outliers**\n",
 | 
						||
    "    * default:\tFalse,\n",
 | 
						||
    "    * other values:\t'auto', 'winz', 'delete'\n",
 | 
						||
    "* **outlier_param**\tdefault:\t1.5,  other values:\tany int or float, False\n",
 | 
						||
    "* **logfile**\n",
 | 
						||
    "    * default: True,\n",
 | 
						||
    "    * other values:\tFalse\n",
 | 
						||
    "* **verbose**\n",
 | 
						||
    "    * default: False,\n",
 | 
						||
    "    * other values:\tTrue"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 29,
 | 
						||
   "id": "491b034b-994e-4f06-b4bc-df0590a62aab",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>PassengerId</th>\n",
 | 
						||
       "      <th>Survived</th>\n",
 | 
						||
       "      <th>Pclass</th>\n",
 | 
						||
       "      <th>Name</th>\n",
 | 
						||
       "      <th>Sex</th>\n",
 | 
						||
       "      <th>Age</th>\n",
 | 
						||
       "      <th>SibSp</th>\n",
 | 
						||
       "      <th>Parch</th>\n",
 | 
						||
       "      <th>Ticket</th>\n",
 | 
						||
       "      <th>Fare</th>\n",
 | 
						||
       "      <th>Cabin</th>\n",
 | 
						||
       "      <th>Embarked</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>Braund, Mr. Owen Harris</td>\n",
 | 
						||
       "      <td>male</td>\n",
 | 
						||
       "      <td>22.0</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>A/5 21171</td>\n",
 | 
						||
       "      <td>7.2500</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
 | 
						||
       "      <td>female</td>\n",
 | 
						||
       "      <td>38.0</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>PC 17599</td>\n",
 | 
						||
       "      <td>71.2833</td>\n",
 | 
						||
       "      <td>C85</td>\n",
 | 
						||
       "      <td>C</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>Heikkinen, Miss. Laina</td>\n",
 | 
						||
       "      <td>female</td>\n",
 | 
						||
       "      <td>26.0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>STON/O2. 3101282</td>\n",
 | 
						||
       "      <td>7.9250</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
 | 
						||
       "      <td>female</td>\n",
 | 
						||
       "      <td>35.0</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>113803</td>\n",
 | 
						||
       "      <td>53.1000</td>\n",
 | 
						||
       "      <td>C123</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>Allen, Mr. William Henry</td>\n",
 | 
						||
       "      <td>male</td>\n",
 | 
						||
       "      <td>35.0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>373450</td>\n",
 | 
						||
       "      <td>8.0500</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>...</th>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "      <td>...</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>886</th>\n",
 | 
						||
       "      <td>887</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>Montvila, Rev. Juozas</td>\n",
 | 
						||
       "      <td>male</td>\n",
 | 
						||
       "      <td>27.0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>211536</td>\n",
 | 
						||
       "      <td>13.0000</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>887</th>\n",
 | 
						||
       "      <td>888</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>Graham, Miss. Margaret Edith</td>\n",
 | 
						||
       "      <td>female</td>\n",
 | 
						||
       "      <td>19.0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>112053</td>\n",
 | 
						||
       "      <td>30.0000</td>\n",
 | 
						||
       "      <td>B42</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>888</th>\n",
 | 
						||
       "      <td>889</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>Johnston, Miss. Catherine Helen \"Carrie\"</td>\n",
 | 
						||
       "      <td>female</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>W./C. 6607</td>\n",
 | 
						||
       "      <td>23.4500</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>889</th>\n",
 | 
						||
       "      <td>890</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>Behr, Mr. Karl Howell</td>\n",
 | 
						||
       "      <td>male</td>\n",
 | 
						||
       "      <td>26.0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>111369</td>\n",
 | 
						||
       "      <td>30.0000</td>\n",
 | 
						||
       "      <td>C148</td>\n",
 | 
						||
       "      <td>C</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>890</th>\n",
 | 
						||
       "      <td>891</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>Dooley, Mr. Patrick</td>\n",
 | 
						||
       "      <td>male</td>\n",
 | 
						||
       "      <td>32.0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>370376</td>\n",
 | 
						||
       "      <td>7.7500</td>\n",
 | 
						||
       "      <td>NaN</td>\n",
 | 
						||
       "      <td>Q</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "<p>891 rows × 12 columns</p>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "     PassengerId  Survived  Pclass  \\\n",
 | 
						||
       "0              1         0       3   \n",
 | 
						||
       "1              2         1       1   \n",
 | 
						||
       "2              3         1       3   \n",
 | 
						||
       "3              4         1       1   \n",
 | 
						||
       "4              5         0       3   \n",
 | 
						||
       "..           ...       ...     ...   \n",
 | 
						||
       "886          887         0       2   \n",
 | 
						||
       "887          888         1       1   \n",
 | 
						||
       "888          889         0       3   \n",
 | 
						||
       "889          890         1       1   \n",
 | 
						||
       "890          891         0       3   \n",
 | 
						||
       "\n",
 | 
						||
       "                                                  Name     Sex   Age  SibSp  \\\n",
 | 
						||
       "0                              Braund, Mr. Owen Harris    male  22.0      1   \n",
 | 
						||
       "1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
 | 
						||
       "2                               Heikkinen, Miss. Laina  female  26.0      0   \n",
 | 
						||
       "3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
 | 
						||
       "4                             Allen, Mr. William Henry    male  35.0      0   \n",
 | 
						||
       "..                                                 ...     ...   ...    ...   \n",
 | 
						||
       "886                              Montvila, Rev. Juozas    male  27.0      0   \n",
 | 
						||
       "887                       Graham, Miss. Margaret Edith  female  19.0      0   \n",
 | 
						||
       "888           Johnston, Miss. Catherine Helen \"Carrie\"  female   NaN      1   \n",
 | 
						||
       "889                              Behr, Mr. Karl Howell    male  26.0      0   \n",
 | 
						||
       "890                                Dooley, Mr. Patrick    male  32.0      0   \n",
 | 
						||
       "\n",
 | 
						||
       "     Parch            Ticket     Fare Cabin Embarked  \n",
 | 
						||
       "0        0         A/5 21171   7.2500   NaN        S  \n",
 | 
						||
       "1        0          PC 17599  71.2833   C85        C  \n",
 | 
						||
       "2        0  STON/O2. 3101282   7.9250   NaN        S  \n",
 | 
						||
       "3        0            113803  53.1000  C123        S  \n",
 | 
						||
       "4        0            373450   8.0500   NaN        S  \n",
 | 
						||
       "..     ...               ...      ...   ...      ...  \n",
 | 
						||
       "886      0            211536  13.0000   NaN        S  \n",
 | 
						||
       "887      0            112053  30.0000   B42        S  \n",
 | 
						||
       "888      2        W./C. 6607  23.4500   NaN        S  \n",
 | 
						||
       "889      0            111369  30.0000  C148        C  \n",
 | 
						||
       "890      0            370376   7.7500   NaN        Q  \n",
 | 
						||
       "\n",
 | 
						||
       "[891 rows x 12 columns]"
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 29,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "import pandas as pd\n",
 | 
						||
    "import numpy as np\n",
 | 
						||
    "\n",
 | 
						||
    "from AutoClean import AutoClean\n",
 | 
						||
    "\n",
 | 
						||
    "df = pd.read_csv('https://raw.githubusercontent.com/gsi-upm/sitc/master/ml2/data-titanic/train.csv')\n",
 | 
						||
    "df"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 36,
 | 
						||
   "id": "d842eedf-3971-4966-a8b4-543bb56dd60d",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "name": "stdout",
 | 
						||
     "output_type": "stream",
 | 
						||
     "text": [
 | 
						||
      "AutoClean process completed in 0.289385 seconds\n",
 | 
						||
      "Logfile saved to: /home/cif/GoogleDrive/cursos/summer-school-romania/2019/notebooks/preprocessing/autoclean.log\n"
 | 
						||
     ]
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "autoclean = AutoClean(df, mode='auto')\n",
 | 
						||
    "\n",
 | 
						||
    "# We can control the preprocessing\n",
 | 
						||
    "#autoclean = AutoClean(df, mode='auto', duplicates=False, missing_num=False, missing_categ=False, encode_categ=False, extract_datetime=False, outliers=False, outlier_param=1.5, logfile=True, verbose=False)\n"
 | 
						||
   ]
 | 
						||
  },
 | 
						||
  {
 | 
						||
   "cell_type": "code",
 | 
						||
   "execution_count": 38,
 | 
						||
   "id": "4ede7c55-475a-4748-8cc4-788f46c88b26",
 | 
						||
   "metadata": {},
 | 
						||
   "outputs": [
 | 
						||
    {
 | 
						||
     "data": {
 | 
						||
      "text/html": [
 | 
						||
       "<div>\n",
 | 
						||
       "<style scoped>\n",
 | 
						||
       "    .dataframe tbody tr th:only-of-type {\n",
 | 
						||
       "        vertical-align: middle;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe tbody tr th {\n",
 | 
						||
       "        vertical-align: top;\n",
 | 
						||
       "    }\n",
 | 
						||
       "\n",
 | 
						||
       "    .dataframe thead th {\n",
 | 
						||
       "        text-align: right;\n",
 | 
						||
       "    }\n",
 | 
						||
       "</style>\n",
 | 
						||
       "<table border=\"1\" class=\"dataframe\">\n",
 | 
						||
       "  <thead>\n",
 | 
						||
       "    <tr style=\"text-align: right;\">\n",
 | 
						||
       "      <th></th>\n",
 | 
						||
       "      <th>PassengerId</th>\n",
 | 
						||
       "      <th>Survived</th>\n",
 | 
						||
       "      <th>Pclass</th>\n",
 | 
						||
       "      <th>Name</th>\n",
 | 
						||
       "      <th>Sex</th>\n",
 | 
						||
       "      <th>Age</th>\n",
 | 
						||
       "      <th>SibSp</th>\n",
 | 
						||
       "      <th>Parch</th>\n",
 | 
						||
       "      <th>Ticket</th>\n",
 | 
						||
       "      <th>Fare</th>\n",
 | 
						||
       "      <th>Cabin</th>\n",
 | 
						||
       "      <th>Embarked</th>\n",
 | 
						||
       "      <th>Sex_female</th>\n",
 | 
						||
       "      <th>Sex_male</th>\n",
 | 
						||
       "      <th>Embarked_C</th>\n",
 | 
						||
       "      <th>Embarked_Q</th>\n",
 | 
						||
       "      <th>Embarked_S</th>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </thead>\n",
 | 
						||
       "  <tbody>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>0</th>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>Braund, Mr. Owen Harris</td>\n",
 | 
						||
       "      <td>male</td>\n",
 | 
						||
       "      <td>22.0</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>A/5 21171</td>\n",
 | 
						||
       "      <td>7.2500</td>\n",
 | 
						||
       "      <td>C128</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>1</th>\n",
 | 
						||
       "      <td>2</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
 | 
						||
       "      <td>female</td>\n",
 | 
						||
       "      <td>38.0</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>PC 17599</td>\n",
 | 
						||
       "      <td>65.6344</td>\n",
 | 
						||
       "      <td>C85</td>\n",
 | 
						||
       "      <td>C</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>2</th>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>Heikkinen, Miss. Laina</td>\n",
 | 
						||
       "      <td>female</td>\n",
 | 
						||
       "      <td>26.0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>STON/O2. 3101282</td>\n",
 | 
						||
       "      <td>7.9250</td>\n",
 | 
						||
       "      <td>C128</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>3</th>\n",
 | 
						||
       "      <td>4</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
 | 
						||
       "      <td>female</td>\n",
 | 
						||
       "      <td>35.0</td>\n",
 | 
						||
       "      <td>1</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>113803</td>\n",
 | 
						||
       "      <td>53.1000</td>\n",
 | 
						||
       "      <td>C123</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "    <tr>\n",
 | 
						||
       "      <th>4</th>\n",
 | 
						||
       "      <td>5</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>3</td>\n",
 | 
						||
       "      <td>Allen, Mr. William Henry</td>\n",
 | 
						||
       "      <td>male</td>\n",
 | 
						||
       "      <td>35.0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>0</td>\n",
 | 
						||
       "      <td>373450</td>\n",
 | 
						||
       "      <td>8.0500</td>\n",
 | 
						||
       "      <td>C128</td>\n",
 | 
						||
       "      <td>S</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>False</td>\n",
 | 
						||
       "      <td>True</td>\n",
 | 
						||
       "    </tr>\n",
 | 
						||
       "  </tbody>\n",
 | 
						||
       "</table>\n",
 | 
						||
       "</div>"
 | 
						||
      ],
 | 
						||
      "text/plain": [
 | 
						||
       "   PassengerId  Survived  Pclass  \\\n",
 | 
						||
       "0            1         0       3   \n",
 | 
						||
       "1            2         1       1   \n",
 | 
						||
       "2            3         1       3   \n",
 | 
						||
       "3            4         1       1   \n",
 | 
						||
       "4            5         0       3   \n",
 | 
						||
       "\n",
 | 
						||
       "                                                Name     Sex   Age  SibSp  \\\n",
 | 
						||
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
 | 
						||
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
 | 
						||
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
 | 
						||
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
 | 
						||
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
 | 
						||
       "\n",
 | 
						||
       "   Parch            Ticket     Fare Cabin Embarked  Sex_female  Sex_male  \\\n",
 | 
						||
       "0      0         A/5 21171   7.2500  C128        S       False      True   \n",
 | 
						||
       "1      0          PC 17599  65.6344   C85        C        True     False   \n",
 | 
						||
       "2      0  STON/O2. 3101282   7.9250  C128        S        True     False   \n",
 | 
						||
       "3      0            113803  53.1000  C123        S        True     False   \n",
 | 
						||
       "4      0            373450   8.0500  C128        S       False      True   \n",
 | 
						||
       "\n",
 | 
						||
       "   Embarked_C  Embarked_Q  Embarked_S  \n",
 | 
						||
       "0       False       False        True  \n",
 | 
						||
       "1        True       False       False  \n",
 | 
						||
       "2       False       False        True  \n",
 | 
						||
       "3       False       False        True  \n",
 | 
						||
       "4       False       False        True  "
 | 
						||
      ]
 | 
						||
     },
 | 
						||
     "execution_count": 38,
 | 
						||
     "metadata": {},
 | 
						||
     "output_type": "execute_result"
 | 
						||
    }
 | 
						||
   ],
 | 
						||
   "source": [
 | 
						||
    "df_clean = autoclean.output\n",
 | 
						||
    "df_clean[0:5]"
 | 
						||
   ]
 | 
						||
  }
 | 
						||
 ],
 | 
						||
 "metadata": {
 | 
						||
  "kernelspec": {
 | 
						||
   "display_name": "Python 3 (ipykernel)",
 | 
						||
   "language": "python",
 | 
						||
   "name": "python3"
 | 
						||
  },
 | 
						||
  "language_info": {
 | 
						||
   "codemirror_mode": {
 | 
						||
    "name": "ipython",
 | 
						||
    "version": 3
 | 
						||
   },
 | 
						||
   "file_extension": ".py",
 | 
						||
   "mimetype": "text/x-python",
 | 
						||
   "name": "python",
 | 
						||
   "nbconvert_exporter": "python",
 | 
						||
   "pygments_lexer": "ipython3",
 | 
						||
   "version": "3.11.7"
 | 
						||
  }
 | 
						||
 },
 | 
						||
 "nbformat": 4,
 | 
						||
 "nbformat_minor": 5
 | 
						||
}
 |