Dataset
Predict the species of a
new iris based on petals length and width.

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
dataset = load_iris()
X1, X2, y1, y2 = train_test_split(
dataset['data'], dataset['target'], random_state=0
)
df = pd.DataFrame(X1, columns=dataset.feature_names)
pd.plotting.scatter_matrix(
df, c=y1, figsize=(15, 15), marker='o',
s=60, alpha = .8, diagonal='none'
)
plt.suptitle('Iris features matrix')
plt.show()
print('Description: \n ', dataset['DESCR'][:193])
print('Keys: ', dataset.keys())
print('Target names: ', dataset['target_names'])
print('Feature_names: ', dataset['feature_names'])
print('Shape: ', dataset['data'].shape)
print('Data[:2]:\n ', dataset['data'][:2])
print('Target[:2]: ', dataset['target'][:2])
print('Target[148:]: ', dataset['target'][148:])
print('X1 shape: ', X1.shape)
print('X2 shape: ', X2.shape)
print('y1 shape: ', y1.shape)
print('y2 shape: ', y2.shape)
Prediction
Build the actual ML model with
KNeighborsClassifier algorithm.

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import matplotlib.pyplot as plt
dataset = load_iris()
X1, X2, y1, y2 = train_test_split(
dataset['data'], dataset['target'], random_state=0
)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X1, y1)
X_new = np.array([5, 2.9, 1, 0.2]).reshape(1, 4)
y_new = knn.predict(X_new)
df = pd.DataFrame(X1, columns=dataset.feature_names)
fig, ax = plt.subplots()
ax.set_title("Petals")
ax.set_xlabel('length (cm)')
ax.set_ylabel('width (cm)')
ax.scatter(df['petal length (cm)'], df['petal width (cm)'], c=y1)
ax.scatter(X_new[0][2], X_new[0][3], c='r', marker='x', s=100)
ax.grid()
fig, ax = plt.subplots()
ax.set_title("Sepals")
ax.set_xlabel('length (cm)')
ax.set_ylabel('width (cm)')
ax.scatter(df['sepal length (cm)'], df['sepal width (cm)'], c=y1)
ax.scatter(X_new[0][0], X_new[0][1], c='r', marker='x', s=100)
ax.grid()
axes = pd.plotting.scatter_matrix(
df, c=y1, figsize=(15, 15), marker='o',
s=60, alpha = .8, diagonal='none'
)
for i in range(4):
for j in range(4):
if i == j:
continue
ax = axes[i, j]
ax.scatter(X_new[:, j], X_new[:, i], c='r', marker='x', s=200)
plt.show()
print("Prediction class:", y_new)
print("Predicted target:", dataset['target_names'][y_new])
Evaluation
The
test dataset is usefull when evaluating the model.

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
dataset = load_iris()
X1, X2, y1, y2 = train_test_split(
dataset['data'], dataset['target'], random_state=0
)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X1, y1)
y_new = knn.predict(X2)
score1 = np.mean(y_new == y2)
score2 = knn.score(X2, y2)
print("Score using mean():", round(score1, 2))
print("Score using knn object:", round(score2, 2))