# generate x,y for y = 3x + 2 + z
num_points = 100
def generate(num_points):
x = np.random.random(num_points)
z = np.random.normal(scale=0.5, size=num_points)
y = (3*x) + 2 + z
return x,y
def get_fit(x,y):
A = np.dstack((x, np.ones(len(x))))[0]
U,E,V = np.linalg.svd(A)
V = V.transpose()
E_i = np.array([1/e if e != 0 else 0 for e in E])
W_i = np.zeros((2,len(x)))
W_i[:2,:2] = np.diag(E_i)
coeffs = np.dot(np.dot(V,np.dot(W_i,U.T)), y)
return coeffs[0], coeffs[1], U, W_i, V
def error_eq(V, W_i):
return (0.5**2)*np.sum((V[0]**2)*(W_i[:,0])**2), (0.5**2)*np.sum((V[1]**2)*(W_i[:,1])**2)
num_datasets = 100
def error_bootstrap(x, y, num_datasets, num_points):
bootstrap_a1 = []
bootstrap_a2 = []
for i in range(num_datasets):
ridx = np.random.randint(0, num_points, num_points)
a1, a2, U, W_i, V = get_fit(x[ridx], y[ridx])
bootstrap_a1.append(a1)
bootstrap_a2.append(a2)
mean_a1 = np.mean(bootstrap_a1)
mean_a2 = np.mean(bootstrap_a2)
var_a1 = np.var(bootstrap_a1)
var_a2 = np.var(bootstrap_a2)
return var_a1, var_a2, mean_a1, mean_a2
num_sets = 100
def error_independant(num_sets):
ind_a1 = []
ind_a2 = []
for i in range(num_sets):
x,y = generate(num_points)
a1, a2, U, E, V = get_fit(x,y)
ind_a1.append(a1)
ind_a2.append(a2)
mean_a1 = np.mean(ind_a1)
mean_a2 = np.mean(ind_a2)
var_a1 = np.var(ind_a1)
var_a2 = np.var(ind_a2)
return var_a1, var_a2, mean_a1, mean_a2