{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "![](images/EscUpmPolit_p.gif \"UPM\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Course Notes for Learning Intelligent Systems" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Department of Telematic Engineering Systems, Universidad Politécnica de Madrid, © 2016 Carlos A. Iglesias" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## [Introduction to Machine Learning II](3_0_0_Intro_ML_2.ipynb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Introduction SVM " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In this notebook we are going to train a classifier with the preprocessed Titanic dataset. \n", "\n", "We are going to use the dataset we obtained in the [pandas munging notebook](3_3_Data_Munging_with_Pandas.ipynb) for simplicity. You can try some of the techniques learnt in the previous notebook." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load and clean" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# General import and load data\n", "import pandas as pd\n", "import numpy as np\n", "\n", "from pandas import Series, DataFrame\n", "\n", "# Training and test spliting\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn import preprocessing\n", "\n", "# Estimators\n", "from sklearn.svm import SVC\n", "\n", "# Evaluation\n", "from sklearn import metrics\n", "from sklearn.cross_validation import cross_val_score, KFold, StratifiedKFold\n", "from sklearn.metrics import classification_report\n", "from sklearn.metrics import roc_curve\n", "from sklearn.metrics import roc_auc_score\n", "\n", "# Optimization\n", "from sklearn.grid_search import GridSearchCV\n", "\n", "# Visualisation\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "sns.set(color_codes=True)\n", "\n", "\n", "# if matplotlib is not set inline, you will not see plots\n", "#alternatives auto gtk gtk2 inline osx qt qt5 wx tk\n", "#%matplotlib auto\n", "#%matplotlib qt\n", "%matplotlib inline\n", "%run plot_learning_curve" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | PassengerId | \n", "Survived | \n", "Pclass | \n", "Sex | \n", "Age | \n", "SibSp | \n", "Parch | \n", "Fare | \n", "Embarked | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "0 | \n", "3 | \n", "0 | \n", "22.0 | \n", "1 | \n", "0 | \n", "7.2500 | \n", "0 | \n", "
1 | \n", "2 | \n", "1 | \n", "1 | \n", "1 | \n", "38.0 | \n", "1 | \n", "0 | \n", "71.2833 | \n", "1 | \n", "
2 | \n", "3 | \n", "1 | \n", "3 | \n", "1 | \n", "26.0 | \n", "0 | \n", "0 | \n", "7.9250 | \n", "0 | \n", "
3 | \n", "4 | \n", "1 | \n", "1 | \n", "1 | \n", "35.0 | \n", "1 | \n", "0 | \n", "53.1000 | \n", "0 | \n", "
4 | \n", "5 | \n", "0 | \n", "3 | \n", "0 | \n", "35.0 | \n", "0 | \n", "0 | \n", "8.0500 | \n", "0 | \n", "