Function Fitting - Question 1¶

import numpy as np
from matplotlib import pyplot as plt

# generate x,y for y = 3x + 2 + z

num_points = 100

def generate(num_points):
    x = np.random.random(num_points)
    z = np.random.normal(scale=0.5, size=num_points)
    y = (3*x) + 2 + z
    return x,y

def get_fit(x,y):
    A = np.dstack((x, np.ones(len(x))))[0]
    U,E,V = np.linalg.svd(A)
    V = V.transpose()
    E_i = np.array([1/e if e != 0 else 0 for e in E])
    W_i = np.zeros((2,len(x)))
    W_i[:2,:2] = np.diag(E_i)
    
    coeffs = np.dot(np.dot(V,np.dot(W_i,U.T)), y)

    return coeffs[0], coeffs[1], U, W_i, V

def error_eq(V, W_i):
    return (0.5**2)*np.sum((V[0]**2)*(W_i[:,0])**2), (0.5**2)*np.sum((V[1]**2)*(W_i[:,1])**2)

num_datasets = 100

def error_bootstrap(x, y, num_datasets, num_points):
    bootstrap_a1 = []
    bootstrap_a2 = []
    for i in range(num_datasets):
        ridx = np.random.randint(0, num_points, num_points)
        a1, a2, U, W_i, V = get_fit(x[ridx], y[ridx])
        bootstrap_a1.append(a1)
        bootstrap_a2.append(a2)
        
    mean_a1 = np.mean(bootstrap_a1)
    mean_a2 = np.mean(bootstrap_a2)
    var_a1 = np.var(bootstrap_a1)
    var_a2 = np.var(bootstrap_a2)
    
    return var_a1, var_a2, mean_a1, mean_a2

num_sets = 100
def error_independant(num_sets):
    ind_a1 = []
    ind_a2 = []
    for i in range(num_sets):
        x,y = generate(num_points)
        a1, a2, U, E, V = get_fit(x,y)
        ind_a1.append(a1)
        ind_a2.append(a2)
        
    mean_a1 = np.mean(ind_a1)
    mean_a2 = np.mean(ind_a2)
    var_a1 = np.var(ind_a1)
    var_a2 = np.var(ind_a2)
    
    return var_a1, var_a2, mean_a1, mean_a2

x,y = generate(num_points)
a1, a2, U, W_i, V = get_fit(x,y)
print "a1: ", a1, "a2: ", a2
plt.plot(x,y, 'ro')
t = np.linspace(0,1,num=10)
plt.plot(t,(a1*t) + a2)
plt.show()

a1:  2.92164371533 a2:  2.10082956622

print W_i.shape
print V.shape
print "error equation 12.34", error_eq(V, W_i)
print "error equation bootstrap", error_bootstrap(x,y,num_datasets, num_points)
print "error equation independant sets", error_independant(num_sets)

(2, 100)
(2, 2)
error equation 12.34 (0.00050282400450206845, 0.011138088739828684)
error equation bootstrap (0.034323310993653655, 0.014784304022981941)
error equation independant sets (0.024830484676770924, 0.0084067775734672189)

v1, v2, m1, m2 = error_bootstrap(x,y,num_datasets, num_points)
plt.figure()
plt.plot(x,y, 'ro')
t = np.linspace(0,1,num=10)
plt.title("Bootstrap Fit")
plt.plot(t,(m1*t) + m2)
plt.show()

v1_i, v2_i, m1_i, m2_i = error_independant(num_sets)
plt.figure()
plt.plot(x,y, 'ro')
t = np.linspace(0,1,num=10)
plt.title("Independent Sets Fit")
plt.plot(t,(m1*t) + m2)
plt.show()

# combined plot

plt.plot(x,y, 'ro')
t = np.linspace(0,1,num=10)
plt.title("All Methods")
plt.plot(t,(a1*t) + a2, label="SVD Original")
plt.plot(t,(m1*t) + m2, label="Bootstrap")
plt.plot(t,(m1_i*t) + m2_i, label="Independent")
plt.legend()
plt.show()