# minte9 LearnRemember

### Population Variance

Variance is a measure of the spread of the data. The formula to calculate population variance is:
$$\sigma^2 = \frac{1}{N} \sum_{i=1}^{N} (x_i - \mu)^2$$
A high variance means that data are spread over a large range. A low variance means that data are clustered close together.

""" Matrices / Population Variance
"""

import numpy as np

A = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
])
B = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 900], # Look Here
])

def population_variance(X):
N = X.size
avg = np.mean(X)
variance = (1/N) * np.sum((X - avg)**2)
return variance

A_variance = population_variance(A)
B_variance = population_variance(B)

assert B_variance > A_variance
print("A_variance = ", A_variance.round(2))
print("B_variance = ", B_variance.round(2))
print("np.var(A) = ",  np.var(A).round(2)) # build-in
print("np.var(B) = ",  np.var(B).round(2))

"""
A_variance =  6.67
B_variance =  79206.67
np.var(A) =  6.67
np.var(B) =  79206.67
"""


### Standard Deviation

Standard deviation is more intuitive than variance. It is expressed in the same units as data.
$$\sigma = \sqrt{\frac{1}{N} \sum_{i=1}^{N} (x_i - \mu)^2}$$

""" Matrices / Standard deviation
"""

import numpy as np

A = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
])

B = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 900],
])

# Algorithms
def variation(M):
N = A.size
mean = np.mean(A)
variation = (1/N) * np.sum((A - mean)**2) # population variance
return variation

def standard_deviation(M):
return np.sqrt(variation(M))
return

# Native
def variation_np(M):
return np.var(M)

def standard_deviation_np(M):
return np.std(M)

assert variation(A).round(14) == variation_np(A).round(14)
assert standard_deviation(A)  == standard_deviation_np(A)

print("A = \n", A)
print("B = \n", B)
print("np.var(A) variation =", np.var(A))
print("np.var(B) variation =", np.var(B))
print("np.std(A) standard deviation = ", np.std(A))
print("np.std(B) standard deviation = ", np.std(B))

"""
A =
[[1 2 3]
[4 5 6]
[7 8 9]]
B =
[[  1   2   3]
[  4   5   6]
[  7   8 900]]
np.var(A) variation = 6.666666666666667
np.var(B) variation = 79206.66666666667
np.std(A) standard deviation =  2.581988897471611
np.std(B) standard deviation =  281.43678982440565
"""


Last update: 278 days ago