mirror of
https://github.com/gsi-upm/sitc
synced 2025-01-08 04:01:27 +00:00
81 lines
2.4 KiB
Python
81 lines
2.4 KiB
Python
from patsy import dmatrices
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
from sklearn import svm
|
|
|
|
# Taken from http://nbviewer.jupyter.org/github/agconti/kaggle-titanic/blob/master/Titanic.ipynb
|
|
|
|
def plot_svm(df):
|
|
# set plotting parameters
|
|
plt.figure(figsize=(8,6))
|
|
|
|
# # Create an acceptable formula for our machine learning algorithms
|
|
formula_ml = 'Survived ~ C(Pclass) + C(Sex) + Age + SibSp + Parch + C(Embarked)'
|
|
# create a regression friendly data frame
|
|
y, x = dmatrices(formula_ml, data=df, return_type='matrix')
|
|
|
|
# select which features we would like to analyze
|
|
# try chaning the selection here for diffrent output.
|
|
# Choose : [2,3] - pretty sweet DBs [3,1] --standard DBs [7,3] -very cool DBs,
|
|
# [3,6] -- very long complex dbs, could take over an hour to calculate!
|
|
feature_1 = 2
|
|
feature_2 = 3
|
|
|
|
X = np.asarray(x)
|
|
X = X[:,[feature_1, feature_2]]
|
|
|
|
|
|
y = np.asarray(y)
|
|
# needs to be 1 dimensional so we flatten. it comes out of dmatrices with a shape.
|
|
y = y.flatten()
|
|
|
|
n_sample = len(X)
|
|
|
|
np.random.seed(0)
|
|
order = np.random.permutation(n_sample)
|
|
|
|
X = X[order]
|
|
y = y[order].astype(np.float)
|
|
|
|
# do a cross validation
|
|
nighty_precent_of_sample = int(.9 * n_sample)
|
|
X_train = X[:nighty_precent_of_sample]
|
|
y_train = y[:nighty_precent_of_sample]
|
|
X_test = X[nighty_precent_of_sample:]
|
|
y_test = y[nighty_precent_of_sample:]
|
|
|
|
# create a list of the types of kerneks we will use for your analysis
|
|
types_of_kernels = ['linear', 'rbf', 'poly']
|
|
|
|
# specify our color map for plotting the results
|
|
color_map = plt.cm.RdBu_r
|
|
|
|
# fit the model
|
|
for fig_num, kernel in enumerate(types_of_kernels):
|
|
clf = svm.SVC(kernel=kernel, gamma=3)
|
|
clf.fit(X_train, y_train)
|
|
|
|
plt.figure(fig_num)
|
|
plt.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=color_map)
|
|
|
|
# circle out the test data
|
|
plt.scatter(X_test[:, 0], X_test[:, 1], s=80, facecolors='none', zorder=10)
|
|
|
|
plt.axis('tight')
|
|
x_min = X[:, 0].min()
|
|
x_max = X[:, 0].max()
|
|
y_min = X[:, 1].min()
|
|
y_max = X[:, 1].max()
|
|
|
|
XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
|
|
Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])
|
|
|
|
# put the result into a color plot
|
|
Z = Z.reshape(XX.shape)
|
|
plt.pcolormesh(XX, YY, Z > 0, cmap=color_map)
|
|
plt.contour(XX, YY, Z, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'],
|
|
levels=[-.5, 0, .5])
|
|
|
|
plt.title(kernel)
|
|
plt.show()
|