# minte9 LearnRemember

### Linear Regression Lines

Explore linear regression models with different slopes (parameter 'a'). Let's pretend that intercept is known b = -18
$$f(x) = ax -18$$

""" Linear Regression lines with know intercept parameter (b = -18)
"""

import numpy as np
import matplotlib.pyplot as plt

# Training Dataset
X = np.array([30, 46, 60, 65, 77, 95]).reshape(6,1)
Y = np.array([31, 30, 80, 49, 70, 118])

# Define a range of slope values (parameter 'a') to explore
A = np.linspace(-2, 4.5, 13) # 13 values

# Output results
print("Slope range: \n", A)

# Create a plot for the training data and various linear regression lines
fig, ax = plt.subplots()
plt.ylim(0, 140)
plt.xlim(0, 140)

# Plot training data points
ax.plot(X, Y, 'o', color='g', label='training data')

for i in range(len(A)):
msg ='f(x) = -18 + %sx' % A[i].round(1)

# Plot linear regression lines
ax.plot(X, -18 + A[i]*X, label = msg)

plt.xlabel("x")
plt.ylabel("f(x)")
plt.legend()
plt.show()

"""
Slope range:
[-2.         -1.45833333 -0.91666667 -0.375       0.16666667  0.70833333
1.25        1.79166667  2.33333333  2.875       3.41666667  3.95833333
4.5       ]
"""


### Cost function J(a)

The cost function is named SSR(a) or J(a), the sum of squared residuals.
$$J(a) = \sum_{i=1}^{n} (R^2)$$

""" Cost function J(a) visualization
"""

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.axes3d import Axes3D

# Training Dataset
X = np.array([30, 46, 60, 65, 77, 95]).reshape(6,1)
Y = np.array([31, 30, 80, 49, 70, 118])

# # Define a range of slope values (parameter 'a') to explore
A = np.linspace(-2, 4.5, 13) # 13 values

# Initialize a list to store the Sum of Squared Residuals (SSR) for each 'a'
SSR = []

# Loop through each 'a' value and calculate SSR
for a in A:
P = []  # predictions
SR = [] # square residuals
for i in X:
P.append(-18 + a*i)
for i in range(0, len(X)):
SR.append((Y[i] - P[i])**2)
SSR.append(np.sum(SR).round())

# Output results
print("SSR(a -18): \n", SSR, "\n")
print("SSR optim:", min(SSR))

# Define a generic cost function SSR(a) = J
def J(a, b=-18):
J = 0
for i in range(len(X)): # number of train points
J += (Y[i] - (a*X[i] + b))**2
return J

# Create a plot of the cost function J(a, -18) for different 'a' values
fig, ax = plt.subplots()
ax.plot(A, J(A)) # J(a)
for a in A:
msg ='J(%.1f, -18)' % a
ax.plot(a, J(a), 'o', label = msg) # Plot points on the cost function curve
plt.xlabel("a")
plt.ylabel("SSR(a)")
plt.legend()
plt.show()

"""
SSR(a -18):
[282654.0, 197923.0, 128329.0, 73872.0, 34552.0, 10368.0, 1320.0, 7409.0,
28635.0, 64998.0, 116497.0, 183133.0, 264906.0]

SSR optim: 1320.0
"""


### Cost function J(a, b)

We can visualize the cost function J(a,b) for different b parameter. The result is a 3D surface.
$$J(a, b) = \sum_{i=1}^{n} (R^2)$$

""" Cost function J(a,b) visualization
"""

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.axes3d import Axes3D

# Training Dataset
X = np.array([30, 46, 60, 65, 77, 95]).reshape(6,1)
Y = np.array([31, 30, 80, 49, 70, 118])

# Define a generic cost function SSR(a,b) = J
def J(a, b=-18):
J = 0
for i in range(len(X)): # number of train points
J += (Y[i] - (a*X[i] + b))**2
return J

# Create a 3D plot of the cost function J(a, b)
fig = plt.figure()
a = np.linspace(-1, 4, 20)
b = np.linspace(-100, 100, 10)
aa, bb = np.meshgrid(a, b)
ax.plot_surface(aa, bb, J(aa, bb)) # Plot the 3D surface of the cost function
ax.view_init(50,-150) # Set the view angle
plt.show()


The Algorithm starts with a random value of the parameter a, b=-18 Then, it finds the direction in which the function descrease faster and takes a step in that direction. Then, repeat until it finds the optimal value for coeficient.

Find the optimal value of a linear regression parameter 'a' for a given dataset
"""

import matplotlib.pyplot as plt
import numpy as np

# Training Dataset
X = np.array([30, 46, 60, 65, 77, 95]).reshape(6,1)
Y = np.array([31, 30, 80, 49, 70, 118])

# Cost function
def J(a):
J = 0

# Loop through each data point
for i in range(len(X)):

# Calculate the squared error
J += (Y[i] - (a*X[i] + -18))**2

return J

# Derivative of the cost function
def dJ(a):
dJ = 0
for i in range(len(X)):

# Calculate the derivative
dJ += -2*X[i]*(Y[i] - (a*X[i] + -18)) # d(x^2) = 2x

return dJ.item()

def gradient_descent(X, Y, b=-18, lr=0.00001, loops=15):
a = 0
for i in range(15):

# Update 'a' using the gradient of the cost function
d = dJ(a)
a = a - d*lr

print(f'Step {i+1} a = {round(a, 5)}')
return round(a, 5)

# Result

# Compute values to print and plot
a = 0       # start value
l = 0.00001 # learning rate

a0 = 0
a1 = a  - l * dJ(a)  # step 1
a2 = a1 - l * dJ(a1) # step 2
a3 = a2 - l * dJ(a2) # step 3

# Plot lines SSR curve
fig, ax = plt.subplots()
A = np.linspace(-2, 4.5, 23) # 21 values
ax.plot(A, J(A), label='J(a) = sum(R(X)^2)') # J(a)

# Mark the minimum SSR(a) (optim_a)
ax.plot(optim_a, J(optim_a), 'o', color='g', label='optim_a = 1.3029')

# Draw points (as gradient descends)
ax.plot(a0, J(0), 'o', color='r')
ax.plot(a1, J(a1), 'o', color='r')
ax.plot(a2, J(a2), 'o', color='r')
ax.plot(a3, J(a3), 'o', color='r')

# Draw lines to minimum
ax.plot([a0,  a1], [J(0), J(a1)], color='r')
ax.plot([a1, a2], [J(a1), J(a2)], color='r')
ax.plot([a2, a3], [J(a2), J(a3)], color='r')

# Customize the plot
plt.xlim(-2, 5)
plt.ylim(-10000, 70000)
plt.xlabel("a")
plt.ylabel("SSR(a)")
ax.axhline(y=0, color='k')
ax.axvline(x=0, color='k')
plt.legend()

# Show the plot
plt.show()

# Print results
print('Derivative of cost function J(0) = ', dJ(0))
print('Step 1 a =', round(a1, 5))
print('Step 2 a =', round(a2, 5))
print('Step 3 a =', round(a3, 5), "\n")
print("Gradient descent optim_a slope: \n", round(optim_a, 4))

"""
Step 1 a = 0.67218
Step 2 a = 0.99758
Step 3 a = 1.15511
Step 4 a = 1.23137
Step 5 a = 1.26829
Step 6 a = 1.28616
Step 7 a = 1.29481
Step 8 a = 1.299
Step 9 a = 1.30102
Step 10 a = 1.30201
Step 11 a = 1.30248
Step 12 a = 1.30271
Step 13 a = 1.30282
Step 14 a = 1.30288
Step 15 a = 1.3029
"""


### Learning (a, b)

Finding the optimal value for both, coeficient and intercept.

""" Gradient descent (two params, a and b)
Algorithm starts with a random value of the parameter a, b
"""

import matplotlib.pyplot as plt
import numpy as np

# The model (linear)
def predict(X, a, b):
Y = X*a + b
return np.round(Y) # f(x) = ax + b

# Cost function
def J(a, b):
J = np.sum((Y - predict(X, a, b))**2)
return J

# Derivatives
def dJ(a, b):
da = np.sum(-2 * X * (Y - predict(X, a, b))) # b fixed
db = np.sum(-2 * 1 * (Y - predict(X, a, b))) # a fixed
return da, db

a = 0
b = 0
for i in range(loops):
da, db = dJ(a, b)
a = a - lr * da
for j in range(loops):
b = b - lr * db
return round(a, 1), round(b, 1)

# Train dataset 1
X = np.array([30, 46, 60, 65, 77, 95])
Y = np.array([31, 30, 80, 49, 70, 118])
print("\nLearning 1")

# Learning a,b
print('a =', a, ' b =', b)
print('Predictions:', f'f(x) = {a}x + {b}')

# Predictions
x = 33; y = predict(x, a, b); print("f(%s) =" %x, y)
x = 45; y = predict(x, a, b); print("f(%s) =" %x, y)
x = 62; y = predict(x, a, b); print("f(%s) =" %x, y)

fig, ax = plt.subplots()
ax.set_xlabel('x')
ax.set_ylabel('f(x)')
ax.grid(True, which='both')
ax.axhline(y=0, color='k')
ax.axvline(x=0, color='k')

# Draw dataset 1
ax.plot(X, Y, 'x', color='g', label='training data')
ax.plot(X, a*X + b, label=f'f(x) = {b} + {a}x') # line
ax.plot(55, predict(55, a, b), 'o', color='r')
plt.legend(loc='upper right')

# Train dataset 2
X = np.array([15, 18, 20, 21, 23, 25, 27, 28, 29, 30, 32, 34, 35, 36])
Y = np.array([23, 74, 65, 82, 135, 321, 440, 400, 290, 620, 630, 610, 560, 568])
print("\nLearning 2")

# Learning a,b
print('a =', a, ' b =', b)
print('Predictions:', f'f(x) = {a}x + {a}')

x = 20; y = predict(x, a, b); print("f(%s) =" %x, y)
x = 24; y = predict(x, a, b); print("f(%s) =" %x, y)
x = 33; y = predict(x, a, b); print("f(%s) =" %x, y)

# Draw dataset 2
ax.plot(X, Y, 'x', color='g')
ax.plot(X, a*X + b, label=f'f(x) = {b} + {a}x') # line
ax.plot(55, predict(33, a, b), 'o', color='r')
plt.legend(loc='upper right')
plt.show()

"""
Learning 1
a = 1.3  b = -17.3
Predictions: f(x) = 1.3x + -17.3
f(33) = 26.0
f(45) = 41.0
f(62) = 63.0

Learning 2
a = 32.9  b = -533.1
Predictions: f(x) = 32.9x + 32.9
f(20) = 125.0
f(24) = 256.0
"""


### Algorithm

Can be used to optimize the parameters of any ML model, not just linear regression.
$$J(\theta_{0}, \theta_{1}) = 1/2m * \sum_{i=0}^m (h_{\theta}(x^{(i)}) - y^{(i}))^2$$


1. Initialize the parameters
select initial set of params for the model

2. Compute the cost function
differences between predictions and actual values

partial derivatives of cost function

4. Update the parameters
use the gradients and a learning rate

5. Repeat steps 2-4
"""

import numpy as np

def cost(theta, x, y):

y_pred = np.dot(x, theta)
error = y_pred - y
return (1 / (2 * len(y))) * np.dot(error.T, error)

def gradient_descent(x, y, theta, lr, num_iterations):

cost_history = np.zeros(num_iterations)
for i in range(num_iterations):

y_pred = np.dot(x, theta)
error = y_pred - y

theta = theta - (lr/len(y)) * np.dot(x.T, error)
cost_history[i] = cost(theta, x, y)

return theta, cost_history

x = np.array([[1, 2], [1, 3], [1, 4], [1, 5]])
y = np.array([[7], [6], [5], [7]])

theta = np.random.randn(2, 1)
lr = 0.01
num_iterations = 1000

theta, cost_history = gradient_descent(x, y, theta, lr, num_iterations)
print("Theta: ", theta)
# [[4.55230192] [0.43431721]]


Last update: 272 days ago