mirror of
https://github.com/gsi-upm/sitc
synced 2026-03-02 01:38:17 +00:00
Compare commits
4 Commits
470a3d692d
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5d01f26e72 | ||
|
|
8e177963af | ||
|
|
ecd53ceee5 | ||
|
|
99f8032d05 |
@@ -330,7 +330,7 @@
|
|||||||
"# Saving the resulting axes as ax each time causes the resulting plot to be shown\n",
|
"# Saving the resulting axes as ax each time causes the resulting plot to be shown\n",
|
||||||
"# on top of the previous axes\n",
|
"# on top of the previous axes\n",
|
||||||
"ax = sns.boxplot(x=\"species\", y=\"petal length (cm)\", data=iris_df)\n",
|
"ax = sns.boxplot(x=\"species\", y=\"petal length (cm)\", data=iris_df)\n",
|
||||||
"ax = sns.stripplot(x=\"species\", y=\"petal length (cm)\", data=iris_df, jitter=True, edgecolor=\"gray\")"
|
"ax = sns.stripplot(x=\"species\", y=\"petal length (cm)\", data=iris_df, jitter=True, edgecolor=\"auto\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -348,7 +348,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# A violin plot combines the benefits of the previous two plots and simplifies them\n",
|
"# A violin plot combines the benefits of the previous two plots and simplifies them\n",
|
||||||
"# Denser regions of the data are fatter, and sparser thinner in a violin plot\n",
|
"# Denser regions of the data are fatter, and sparser thinner in a violin plot\n",
|
||||||
"sns.violinplot(x=\"species\", y=\"petal length (cm)\", data=iris_df, size=6)"
|
"sns.violinplot(x=\"species\", y=\"petal length (cm)\", data=iris_df)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -72,7 +72,7 @@
|
|||||||
"Machine learning algorithms are programs that learn a model from a dataset to make predictions or learn structures to organize the data.\n",
|
"Machine learning algorithms are programs that learn a model from a dataset to make predictions or learn structures to organize the data.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In scikit-learn, machine learning algorithms take as input a *numpy* array (n_samples, n_features), where\n",
|
"In scikit-learn, machine learning algorithms take as input a *numpy* array (n_samples, n_features), where\n",
|
||||||
"* **n_samples**: number of samples. Each sample is an item to process (i.e., classify). A sample can be a document, a picture, a sound, a video, a row in a database or CSV file, or whatever you can describe with a fixed set of quantitative traits.\n",
|
"* **n_samples**: number of samples. Each sample is an item to be processed (i.e., classified). A sample can be a document, a picture, a sound, a video, a row in a database or CSV file, or whatever you can describe with a fixed set of quantitative traits.\n",
|
||||||
"* **n_features**: The number of features or distinct traits that can be used to describe each item quantitatively.\n",
|
"* **n_features**: The number of features or distinct traits that can be used to describe each item quantitatively.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The number of features should be defined in advance. A specific type of feature set is high-dimensional (e.g., millions of features), but most values are zero for a given sample. Using (numpy) arrays, all those zero values would also take up memory. For this reason, these feature sets are often represented with sparse matrices (scipy.sparse) instead of (numpy) arrays.\n",
|
"The number of features should be defined in advance. A specific type of feature set is high-dimensional (e.g., millions of features), but most values are zero for a given sample. Using (numpy) arrays, all those zero values would also take up memory. For this reason, these feature sets are often represented with sparse matrices (scipy.sparse) instead of (numpy) arrays.\n",
|
||||||
@@ -112,7 +112,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"In *unsupervised machine learning models*, the machine learning model algorithm takes as input the feature vectors. It produces a predictive model that is used to fit its parameters to summarize the best regularities found in the data.\n",
|
"In *unsupervised machine learning models*, the machine learning model algorithm takes as input the feature vectors. It produces a predictive model that is used to fit its parameters to summarize the best regularities found in the data.\n",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -140,7 +140,7 @@
|
|||||||
" * **model.fit_transform()**: Some estimators implement this method, which performs a fit and a transform on the same input data.\n",
|
" * **model.fit_transform()**: Some estimators implement this method, which performs a fit and a transform on the same input data.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -53,10 +53,10 @@ import matplotlib.pyplot as plt
|
|||||||
|
|
||||||
from sklearn.datasets import load_iris
|
from sklearn.datasets import load_iris
|
||||||
from sklearn.tree import DecisionTreeClassifier
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
from sklearn.inspection import DecisionBoundaryDisplay
|
||||||
|
|
||||||
def plot_tree_iris():
|
def plot_tree_iris():
|
||||||
"""
|
"""
|
||||||
|
|
||||||
Taken fromhttp://scikit-learn.org/stable/auto_examples/tree/plot_iris.html
|
Taken fromhttp://scikit-learn.org/stable/auto_examples/tree/plot_iris.html
|
||||||
"""
|
"""
|
||||||
# Parameters
|
# Parameters
|
||||||
@@ -67,11 +67,11 @@ def plot_tree_iris():
|
|||||||
# Load data
|
# Load data
|
||||||
iris = load_iris()
|
iris = load_iris()
|
||||||
|
|
||||||
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3],
|
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]):
|
||||||
[1, 2], [1, 3], [2, 3]]):
|
|
||||||
# We only take the two corresponding features
|
# We only take the two corresponding features
|
||||||
X = iris.data[:, pair]
|
X = iris.data[:, pair]
|
||||||
y = iris.target
|
y = iris.target
|
||||||
|
'''
|
||||||
|
|
||||||
# Shuffle
|
# Shuffle
|
||||||
idx = np.arange(X.shape[0])
|
idx = np.arange(X.shape[0])
|
||||||
@@ -84,34 +84,38 @@ def plot_tree_iris():
|
|||||||
mean = X.mean(axis=0)
|
mean = X.mean(axis=0)
|
||||||
std = X.std(axis=0)
|
std = X.std(axis=0)
|
||||||
X = (X - mean) / std
|
X = (X - mean) / std
|
||||||
|
'''
|
||||||
# Train
|
# Train
|
||||||
model = DecisionTreeClassifier(max_depth=3, random_state=1).fit(X, y)
|
clf = DecisionTreeClassifier(max_depth=3, random_state=1).fit(X, y)
|
||||||
|
|
||||||
# Plot the decision boundary
|
# Plot the decision boundary
|
||||||
plt.subplot(2, 3, pairidx + 1)
|
# Taken from https://scikit-learn.org/stable/auto_examples/tree/plot_iris_dtc.html
|
||||||
|
# Plot the decision boundary
|
||||||
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
|
ax = plt.subplot(2, 3, pairidx + 1)
|
||||||
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
|
plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
|
||||||
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
|
DecisionBoundaryDisplay.from_estimator(
|
||||||
np.arange(y_min, y_max, plot_step))
|
clf,
|
||||||
|
X,
|
||||||
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
|
cmap=plt.cm.RdYlBu,
|
||||||
Z = Z.reshape(xx.shape)
|
response_method="predict",
|
||||||
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
|
ax=ax,
|
||||||
|
xlabel=iris.feature_names[pair[0]],
|
||||||
plt.xlabel(iris.feature_names[pair[0]])
|
ylabel=iris.feature_names[pair[1]],
|
||||||
plt.ylabel(iris.feature_names[pair[1]])
|
)
|
||||||
plt.axis("tight")
|
|
||||||
|
|
||||||
# Plot the training points
|
# Plot the training points
|
||||||
for i, color in zip(range(n_classes), plot_colors):
|
for i, color in zip(range(n_classes), plot_colors):
|
||||||
idx = np.where(y == i)
|
idx = np.asarray(y == i).nonzero()
|
||||||
plt.scatter(X[idx, 0], X[idx, 1], c=color, label=iris.target_names[i],
|
plt.scatter(
|
||||||
cmap=plt.cm.Paired)
|
X[idx, 0],
|
||||||
|
X[idx, 1],
|
||||||
plt.axis("tight")
|
c=color,
|
||||||
|
label=iris.target_names[i],
|
||||||
|
edgecolor="black",
|
||||||
|
s=15
|
||||||
|
)
|
||||||
|
plt.axis("tight")
|
||||||
plt.suptitle("Decision surface of a decision tree using paired features")
|
plt.suptitle("Decision surface of a decision tree using paired features")
|
||||||
plt.legend()
|
#plt.legend()
|
||||||
|
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|||||||
Reference in New Issue
Block a user