import numpy as np import matplotlib.pyplot as plt import random, math def norm(data): mean = sum(data)/len(data) centered = [datum - mean for datum in data] std_diviation = math.sqrt(sum([datum**2 for datum in centered])/len(centered)) norm = [datum/std_diviation for datum in centered] return norm,mean,std_diviation def svd_fit(x,y): norm_x, mean_x, std_x = norm(x) norm_y, mean_y, std_y = norm(y) A = np.array([[1,elm] for elm in norm_x]) U, s, V = np.linalg.svd(A, full_matrices=False) #s is the diagolan component of W s_inv = [] for elm in s: if elm == 0.0: s_inv.append(elm) else: s_inv.append(1/elm) W_inverse = np.diag(s_inv) v = np.matrix(V)*np.matrix(W_inverse)*np.transpose(np.matrix(U))*np.transpose(np.matrix(norm_y)) coef_a = v.item(0,0)*std_y - v.item(1,0) * mean_x * std_y/std_x + mean_y coef_b = v.item(1,0)*std_y/std_x error = [] for i in range(len(s)): error.append(sum([(V.item(i,j)/s[j])**2 for j in range(len(s))])) return coef_a, coef_b, error #create data n = 100 x = [random.random() for e in xrange(n)] y = [2 + 3*i + random.gauss(0,.5) for i in x] print norm(x) #svd coef_a , coef_b, error = svd_fit(x,y) print error #bootstapping bs_ca = [] bs_cb = [] for i in xrange(10): sel_x=[] sel_y=[] for data in range(100): rand = random.randint(0,len(x)) sel_x.append(x[i]) sel_y.append(y[i]) bs_c_a, bs_c_b, bs_err = svd_fit(sel_x,sel_y) bs_ca.append(bs_c_a) bs_cb.append(bs_c_b) bs_c0_norm, bs_c0_mean, bs_c0_std = norm(bs_ca) bs_c1_norm, bs_c1_mean, bs_c1_std = norm(bs_cb) print "bootstrapping error", [bs_c0_std**2, bs_c1_std**2] #display plt.scatter(x,y) plt.plot([0,1],[coef_a+coef_b*0,coef_a+coef_b*1]) plt.show()