Population Variance
Variance is a measure of the spread of the data. The formula to calculate population variance is:\(
\sigma^2 = \frac{1}{N} \sum_{i=1}^{N} (x_i - \mu)^2
\)
A high variance means that data are spread over a large range.
A low variance means that data are clustered close together.
""" Matrices / Population Variance
"""
import numpy as np
A = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
])
B = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 900], # Look Here
])
def population_variance(X):
N = X.size
avg = np.mean(X)
variance = (1/N) * np.sum((X - avg)**2)
return variance
A_variance = population_variance(A)
B_variance = population_variance(B)
assert B_variance > A_variance
print("A_variance = ", A_variance.round(2))
print("B_variance = ", B_variance.round(2))
print("np.var(A) = ", np.var(A).round(2)) # build-in
print("np.var(B) = ", np.var(B).round(2))
"""
A_variance = 6.67
B_variance = 79206.67
np.var(A) = 6.67
np.var(B) = 79206.67
"""
Standard Deviation
Standard deviation is more intuitive than variance. It is expressed in the same units as data.\(
\sigma = \sqrt{\frac{1}{N} \sum_{i=1}^{N} (x_i - \mu)^2}
\)
""" Matrices / Standard deviation
"""
import numpy as np
A = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
])
B = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 900],
])
# Algorithms
def variation(M):
N = A.size
mean = np.mean(A)
variation = (1/N) * np.sum((A - mean)**2) # population variance
return variation
def standard_deviation(M):
return np.sqrt(variation(M))
return
# Native
def variation_np(M):
return np.var(M)
def standard_deviation_np(M):
return np.std(M)
assert variation(A).round(14) == variation_np(A).round(14)
assert standard_deviation(A) == standard_deviation_np(A)
print("A = \n", A)
print("B = \n", B)
print("np.var(A) variation =", np.var(A))
print("np.var(B) variation =", np.var(B))
print("np.std(A) standard deviation = ", np.std(A))
print("np.std(B) standard deviation = ", np.std(B))
"""
A =
[[1 2 3]
[4 5 6]
[7 8 9]]
B =
[[ 1 2 3]
[ 4 5 6]
[ 7 8 900]]
np.var(A) variation = 6.666666666666667
np.var(B) variation = 79206.66666666667
np.std(A) standard deviation = 2.581988897471611
np.std(B) standard deviation = 281.43678982440565
"""
Last update: 452 days ago