# minte9 LearnRemember

### Euclidean Distance

KNN uses Euclidean distance between two points.

""" K-Nearest Neighbors Classifier / Algorithm

The algorithm works by finding the k nearest neighbors to the input data point,
and then assigning the output label based on the majority
vote of the k-nearest neighbors.

In classical algorithms, the f(x) is provided by the programmer.
In ML, the computer uses data to learn the best f(x) fit.

2. Select a value for k
3. Calculate distances to the new point
4. Select the k-nearest points
5. Get the most common class
6. Assign the new point to that class
"""

import numpy as np
import matplotlib.pyplot as plt

# Train dataset
X = np.array([
[2, 2], [2, 2.5], [2.5, 2.5], [2.5, 2], [2.25, 2.25],
[3, 3], [3, 3.5], [3.5, 3.5], [3.5, 3], [3.25, 3.25],
[4, 4], [4, 4.5], [4.5, 4.5], [4.5, 4], [4.25, 4.25],
])
y = np.array([
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3,
])

k_nearest = 3
x_unknown = np.array([3.6, 1.8])

# --------------------------------------------------------------

# Square distances matrix
SD = np.sqrt(np.sum((X - x_unknown)**2, axis=1)) # axis=1 means rows of X
keys = np.argsort(SD)

# Neighbors target matrix
keys_knn = keys[:k_nearest]
targets_knn = y[keys_knn]

# Optim target
most_common = np.bincount(targets_knn) # by number of occurrences
knn_class = most_common.argmax() # max value in array

# --------------------------------------------------------------

# Plot the point and lines to th k neighbors
fig, ax = plt.subplots()
ax.set_xlabel('x1')
ax.set_ylabel('x2')

z = x_unknown
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.scatter(z[0], z[1], marker='x', color='r', label='Class =%s' %knn_class)

for i in keys_knn:
plt.plot((z[0], X[i][0]), (z[1], X[i][1]), color='gray', linestyle='--')

plt.title('K-nearest Neigbors')
plt.xlim(0, 6)
plt.ylim(0, 6)
plt.legend()
plt.show()

print("Square distances:", SD)
print("Keys ordered by distances:", keys)
print("Nearest neighbors keys:", keys_knn)
print("Nearest neighbors targets:", targets_knn)
print("Algorithm target response:", knn_class)
print("Class prediction for", x_unknown, "=", knn_class)

"""
Square distances:           [1.61245155 1.74642492 1.30384048 ...]
Keys ordered by distances:  [ 3  8  2  5  4  9  0  7 ...]
Nearest neighbors keys:     [3 8 2]
Nearest neighbors targets:  [1 2 1]
Algorithm target response:  1
Class prediction:           [3.6, 1.8] = 1
"""

$$d(x,y) = \sqrt{\sum_{i=1}^{n} (x_i - y_i)^2}$$

### KNN Class

Encapsulate the algorithm into a class that classify items.

""" K-Nearest Neighbors Classifier / Usage Example

The algorithm works by finding the k nearest neighbors to the input data point,
and then assigning the output label based on the majority
vote of the k-nearest neighbors.
"""

import numpy as np
import matplotlib.pyplot as plt

# Train datasets
X = [
[2, 2], [2, 2.5], [2.5, 2.5], [2.5, 2], [2.25, 2.25],
[3, 3], [3, 3.5], [3.5, 3.5], [3.5, 3], [3.25, 3.25],
[4, 4], [4, 4.5], [4.5, 4.5], [4.5, 4], [4.25, 4.25],
]
y = [
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3,
]

class KNeighborsClassifier:

def __init__(self, n_neighbors):
self.k = n_neighbors

def fit(self, X_train, y_train):
self.X = np.array(X_train)
self.y = np.array(y_train)

def predict(self, x_unknown):
z = np.array(x_unknown)

# Square distances
SD = np.sqrt(np.sum((self.X - z)**2, axis=1))
keys = np.argsort(SD)

# Neighbors targets
keys_knn = keys[:self.k]
targets_knn = self.y[keys_knn]

# Optim target
most_common = np.bincount(targets_knn)
result = most_common.argmax()

return result

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X, y)

x_unknown = [3.6, 1.8]
knn_class = knn.predict(x_unknown)
print("Prediction for", x_unknown, "= class", knn_class)

"""
Prediction for [3.6, 1.8] = class 1
"""