#dhaval adjodah April 10, 2011 #written with help from Ari Kardasis's code import matplotlib.pyplot as plt import numpy as np import random import math N = 100 a = 2 b = 3 mu = 0 sd = 0.5 ini_x = [ random.random() for i in range(N) ] #ini_y = [ a + b*i + random.gauss(mu, sd) for i in ini_x] ini_y = [ a + b*i + random.gauss(mu, sd) for i in ini_x] #print x #print y def normalize(v): mean = sum(v)/len(v) diff = [ i-mean for i in v] v_sd = math.sqrt(sum([i*i for i in diff])/len(v)) return [i/v_sd for i in diff], mean, v_sd def least_squares(ini_x, ini_y): n = len(ini_x) x, mean_x, std_x = normalize(ini_x) y, mean_y, std_y = normalize(ini_y) #filling in the elements of A # A = np.array([[1, i] for i in x]) A = np.array([[1, i] for i in x]) U, W_diag, V = np.linalg.svd(A, full_matrices=0) W_inv_diag = [] #doing the weird magic: for i in W_diag: if i == 0.0: W_inv_diag.append(0) else: W_inv_diag.append(1/i) W_inv = np.diag(W_inv_diag) #using equations 12.12 # coefs = np.matrix(V) * np.matrix(W_inv) * np.matrix(np.transpose(U)) * np.transpose(np.matrix(y)) coefs = np.matrix(V) * np.matrix(W_inv) * np.matrix(np.transpose(U)) * np.transpose(np.matrix(y)) #x-intercept c0 = coefs.item(0,0)*std_y - coefs.item(1,0) * mean_x * std_y/std_x + mean_y #slope c1 = coefs.item(1,0)*std_y/std_x fit_error = [] for i in range(len(W_diag)): fit_error.append(sum([(V.item(i,j)/W_diag.item(j))**2 for j in range(len(W_diag))])) return c0, c1, fit_error def bootstrap(raw_x, raw_y): bs_x_samples=[] bs_y_samples=[] for j in range(len(raw_x)): i = random.randint(0, len(raw_x)-1) bs_x_samples.append(raw_x[i]) bs_y_samples.append(raw_y[i]) return least_squares(bs_x_samples, bs_y_samples) if __name__ == "__main__": c0, c1, fit = least_squares(ini_x, ini_y) print "error using first ensemble", fit plt.figure(1) plt.title('1st ensemble scatter plot and line graph') plt.scatter(ini_x, ini_y) plt.plot([0,1], [c0, c1+c0]) b_c0s = [] b_c1s = [] for i in range(N): b_c0, b_c1, b_fits = bootstrap(ini_x, ini_y) b_c0s.append(b_c0) b_c1s.append(b_c1) stuff1, stuff2, b_c0_sd = normalize(b_c0s) stuff3, stuff4, b_c1_sd = normalize(b_c1s) print "bootstrap standard dev of a and b for 100 runs", b_c0_sd**2, b_c1_sd**2 plt.figure(2) plt.title('Histogram of 100 bootstrap runs of a and b values') plt.hist(b_c0s, 10) plt.hist(b_c1s, 10) e_c0s = [] e_c1s = [] for i in range(N): x = [ random.random() for i in range(N) ] y = [ a + b*i + random.gauss(mu, sd) for i in x] e_c0, e_c1, e_fit = least_squares(x, y) e_c0s.append(e_c0) e_c1s.append(e_c1) stuff1, stuff2, e_c0_sd = normalize(e_c0s) stuff3, stuff4, e_c1_sd = normalize(e_c1s) print "100 ensembles' standard dev of a and b", e_c0_sd**2, e_c1_sd**2 plt.figure(3) plt.title('Histogram of 100 ensembles of a and b values') plt.hist(e_c0s, 10) plt.hist(e_c1s, 10) plt.show()