minte9
LearnRemember




Population Variance

Variance is a measure of the spread of the data. The formula to calculate population variance is:
\( \sigma^2 = \frac{1}{N} \sum_{i=1}^{N} (x_i - \mu)^2 \)
A high variance means that data are spread over a large range. A low variance means that data are clustered close together.
 
""" Matrices / Population Variance
"""

import numpy as np

A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
])
B = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 900], # Look Here
])

def population_variance(X):
    N = X.size
    avg = np.mean(X)
    variance = (1/N) * np.sum((X - avg)**2)
    return variance

A_variance = population_variance(A)
B_variance = population_variance(B)

assert B_variance > A_variance
print("A_variance = ", A_variance.round(2))
print("B_variance = ", B_variance.round(2))
print("np.var(A) = ",  np.var(A).round(2)) # build-in
print("np.var(B) = ",  np.var(B).round(2))

"""
    A_variance =  6.67
    B_variance =  79206.67
    np.var(A) =  6.67
    np.var(B) =  79206.67
"""

Standard Deviation

Standard deviation is more intuitive than variance. It is expressed in the same units as data.
\( \sigma = \sqrt{\frac{1}{N} \sum_{i=1}^{N} (x_i - \mu)^2} \)
 
""" Matrices / Standard deviation
"""

import numpy as np

A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
])

B = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 900],
])

# Algorithms
def variation(M):
    N = A.size
    mean = np.mean(A)
    variation = (1/N) * np.sum((A - mean)**2) # population variance
    return variation

def standard_deviation(M):
    return np.sqrt(variation(M))
    return

# Native
def variation_np(M):
    return np.var(M)

def standard_deviation_np(M):
    return np.std(M)

assert variation(A).round(14) == variation_np(A).round(14)
assert standard_deviation(A)  == standard_deviation_np(A)

print("A = \n", A)
print("B = \n", B)
print("np.var(A) variation =", np.var(A))
print("np.var(B) variation =", np.var(B))
print("np.std(A) standard deviation = ", np.std(A))
print("np.std(B) standard deviation = ", np.std(B))

"""
    A = 
    [[1 2 3]
     [4 5 6]
     [7 8 9]]
    B = 
    [[  1   2   3]
     [  4   5   6]
     [  7   8 900]]
    np.var(A) variation = 6.666666666666667
    np.var(B) variation = 79206.66666666667
    np.std(A) standard deviation =  2.581988897471611
    np.std(B) standard deviation =  281.43678982440565
"""



  Last update: 71 days ago