#!/usr/bin/env python from numpy import * from matplotlib import pyplot as plt def fit(x,y,return_all=False): #we want to fit y=a1*x+a2, so linear sys is Aa=y, where A = (x,1) n = shape(x)[-1] A = dstack((x,ones_like(x)))[0] U,w,V = linalg.svd(A) V = V.T #notational #W = zeros((n,2)) #W[:2,:2] = diag(w) #assert allclose(A,dot(U,dot(W, V.T))) #checking interpretation w_inv = array([1/wi if wi!=0 else 0 for wi in w]) W_inv = zeros((2,n)); W_inv[:2,:2] = diag(w_inv) if return_all: return dot(dot(V,dot(W_inv,U.T)), y), U,w,w_inv,V else: return dot(dot(V,dot(W_inv,U.T)), y) def find_var_sq(data): n = shape(data)[-2] mu = sum(data,axis=0)/n return sum((data-mu)**2,axis=0)/n def main(): n=100 en = 100 #number of error trials dev = .5 x = random.random(n) z = random.normal(loc=0.,scale=dev,size=n) y = 2 + 3*x + z a,U,w,w_inv,V = fit(x,y,return_all=True) print a #12.34 print "%.3f, %.3f: 12.34 var squared"%(dev**2*sum(V[0]**2*w_inv**2), dev**2*sum(V[1]**2*w_inv**2)) #bootstrap bsd = [] for i in range(en): rand_i = random.randint(0,n,n) bsd.append(fit(x[rand_i],y[rand_i])) print "%.3f, %.3f: bootstrap var squared"%tuple(find_var_sq(asarray(bsd))) #independent ind = [] for i in range(en): x = random.random(n) z = random.normal(loc=0.,scale=dev,size=n) y = 2 + 3*x + z ind.append(fit(x,y)) print "%.3f, %.3f: independent ensemble var squared"%tuple(find_var_sq(asarray(ind))) plt.plot(x,y,marker='.',linestyle='',label='samples') endpts = array([0,1]) plt.plot(endpts,a[0]*endpts+a[1],label='fit: $%.3f x + %.3f$'%tuple(a)) plt.plot(endpts,3*endpts+2,linestyle='--',label='true: $3x+2$') plt.title('Linear least squares fitting') plt.legend(loc='lower right') plt.show() if __name__ == '__main__': main()