Compare commits

...

6 Commits

File diff suppressed because one or more lines are too long

@ -74,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -124,25 +124,9 @@
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=3,\n",
" max_features=None, max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, presort=False, random_state=1,\n",
" splitter='best')"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"import numpy as np\n",
@ -161,24 +145,9 @@
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Prediction [1 0 1 1 1 0 0 1 0 2 0 0 1 2 0 1 2 2 1 1 0 0 2 0 0 2 1 1 2 2 2 2 0 0 1 1 0\n",
" 1 2 1 2 0 2 0 1 0 2 1 0 2 2 0 0 2 0 0 0 2 2 0 1 0 1 0 1 1 1 1 1 0 1 0 1 2\n",
" 0 0 0 0 2 2 0 1 1 2 1 0 0 2 1 1 0 1 1 0 2 1 2 1 2 0 1 0 0 0 2 1 2 1 2 1 2\n",
" 0]\n",
"Expected [1 0 1 1 1 0 0 1 0 2 0 0 1 2 0 1 2 2 1 1 0 0 2 0 0 2 1 1 2 2 2 2 0 0 1 1 0\n",
" 1 2 1 2 0 2 0 1 0 2 1 0 2 2 0 0 2 0 0 0 2 2 0 1 0 1 0 1 1 1 1 1 0 1 0 1 2\n",
" 0 0 0 0 2 2 0 1 1 2 1 0 0 1 1 1 0 1 1 0 2 2 2 1 2 0 1 0 0 0 2 1 2 1 2 1 2\n",
" 0]\n"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"Prediction \", model.predict(x_train))\n",
"print(\"Expected \", y_train)"
@ -193,26 +162,9 @@
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Predicted probabilities [[0. 0.97368421 0.02631579]\n",
" [1. 0. 0. ]\n",
" [0. 0.97368421 0.02631579]\n",
" [0. 0.97368421 0.02631579]\n",
" [0. 0.97368421 0.02631579]\n",
" [1. 0. 0. ]\n",
" [1. 0. 0. ]\n",
" [0. 0.97368421 0.02631579]\n",
" [1. 0. 0. ]\n",
" [0. 0. 1. ]]\n"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Print the \n",
"print(\"Predicted probabilities\", model.predict_proba(x_train[:10]))"
@ -220,17 +172,9 @@
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy in training 0.9821428571428571\n"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Evaluate Accuracy in training\n",
"\n",
@ -241,17 +185,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy in testing 0.9210526315789473\n"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Now we evaluate error in testing\n",
"y_test_pred = model.predict(x_test)\n",
@ -273,24 +209,12 @@
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'pydotplus'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-1bf5ec7fb043>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mIPython\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdisplay\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexternals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msix\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mStringIO\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpydotplus\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpydot\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mdot_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mStringIO\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pydotplus'"
]
}
],
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import Image \n",
"from sklearn.externals.six import StringIO\n",
"from six import StringIO\n",
"import pydotplus as pydot\n",
"\n",
"dot_data = StringIO() \n",
@ -529,6 +453,15 @@
}
],
"metadata": {
"datacleaner": {
"position": {
"top": "50px"
},
"python": {
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
},
"window_display": false
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
@ -544,7 +477,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
"version": "3.7.9"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,

@ -117,7 +117,7 @@
"outputs": [],
"source": [
"# save model\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"joblib.dump(model, 'filename.pkl') \n",
"\n",
"#load model\n",
@ -151,6 +151,15 @@
}
],
"metadata": {
"datacleaner": {
"position": {
"top": "50px"
},
"python": {
"varRefreshCmd": "try:\n print(_datacleaner.dataframe_metadata())\nexcept:\n print([])"
},
"window_display": false
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
@ -166,7 +175,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.7.9"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,

@ -2,6 +2,7 @@ import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
# Taken from http://scikit-learn.org/stable/auto_examples/neighbors/plot_classification.html
@ -19,9 +20,9 @@ def plot_classification_iris():
h = .02 # step size in the mesh
n_neighbors = 15
# Create color maps
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
# Create color maps
cmap_light = ListedColormap(['orange', 'cyan', 'cornflowerblue'])
cmap_bold = ['darkorange', 'c', 'darkblue']
for weights in ['uniform', 'distance']:
# we create an instance of Neighbours Classifier and fit the data.
@ -29,7 +30,7 @@ def plot_classification_iris():
clf.fit(X, y)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
# point in the mesh [x_min, x_max]x[y_min, y_max].
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
@ -38,14 +39,17 @@ def plot_classification_iris():
# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
plt.figure(figsize=(8, 6))
plt.contourf(xx, yy, Z, cmap=cmap_light)
# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=iris.target_names[y],
palette=cmap_bold, alpha=1.0, edgecolor="black")
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title("3-Class classification (k = %i, weights = '%s')"
% (n_neighbors, weights))
% (n_neighbors, weights))
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.show()
plt.show()
Loading…
Cancel
Save