import numpy as np from numpy import * import matplotlib matplotlib.use('TkAgg') from matplotlib import pyplot as plt from matplotlib import animation from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm import sys import os import random N = 100 M = 2 def gen_x(): global N x = zeros(N) for i in range(N): x[i] = random.random() return x def func_y(x): global N y = zeros(N) for i in range(N): gamma = random.gauss(0, 0.5) y[i] = 2 + 3*x[i] + gamma return y def one_over(w): out = zeros(len(w)) for i in range(len(w)): if w[i] == 0: out[i] = 0 else: out[i] = 1/w[i] return out def lin_svd(x,y): global N, M A = np.matrix(np.append(ones([N,1]), x.reshape(N,1), 1)) U, s, V = np.linalg.svd(A) w = np.matrix(np.append(np.diag(one_over(s)), zeros([M, N-M]),1)) y = np.matrix(y).T xx = V.T*w*U.T*y b = xx[0,0] a = xx[1,0] return a, b, V, s x = gen_x() y = func_y(x) # SVD # fit data to y = a1*x + a0 # x = V * diag(1/wj) * transpose(U) * b # [1 x1] [a0] = [y1] # [1 x2] [a1] [y2] # [1 x3] [y3] # [1 xN] [yN] a,b,V,s = lin_svd(x,y) # evaluate errors in a and b # eq. 12.34 err_a = (0.5)*pow(sum((np.asarray(V)[1,:]**2)/(np.asarray(s)**2)),0.5) err_b = (0.5)*pow(sum((np.asarray(V)[0,:]**2)/(np.asarray(s)**2)),0.5) print "12.34 error:" print "a = " + str(a) + " b = " + str(b) print "error a = " + str(err_a) + " error b = " + str(err_b) print " " x_fit1 = arange(0,1,0.01) y_fit1 = x_fit1*a + b # bootstrap error nsamples = 100 err_aa = []; err_bb = [] aa = []; bb = [] for i in range(nsamples): i = np.random.randint(N, size=nsamples) xbs = x[i] ybs = y[i] a,b,V,s = lin_svd(xbs, ybs) aa.append(a) bb.append(b) aa = np.asarray(aa); bb = np.asarray(bb) mean_a = sum(aa)/nsamples var_a = sum(aa**2)/nsamples - mean_a**2 mean_b = sum(bb)/nsamples var_b = sum(bb**2)/nsamples - mean_b**2 print "Bootstrap error:" print "mean_a = " + str(mean_a) + " mean_b = " + str(mean_b) print "error a = " + str(sqrt(var_a)) + " error b = " + str(sqrt(var_b)) print " " x_fit2 = arange(0,1,0.01) y_fit2 = x_fit2*mean_a + mean_b # ensemble error err_aa = []; err_bb = [] aa = []; bb = [] for i in range(nsamples): x = gen_x() y = func_y(x) a,b,V,s = lin_svd(x, y) aa.append(a) bb.append(b) aa = np.asarray(aa); bb = np.asarray(bb) mean_a = sum(aa)/nsamples var_a = sum(aa**2)/nsamples - mean_a**2 mean_b = sum(bb)/nsamples var_b = sum(bb**2)/nsamples - mean_b**2 print "Ensemble error:" print "mean_a = " + str(mean_a) + " mean_b = " + str(mean_b) print "error a = " + str(sqrt(var_a)) + " error b = " + str(sqrt(var_b)) print " " x_fit3 = arange(0,1,0.01) y_fit3 = x_fit3*mean_a + mean_b plt.figure() ax = plt.axes(xlim=(0,1), ylim=(min(y)/1.2,max(y)*1.2)) ax.plot(x,y,'.') ax.plot(np.squeeze(np.asarray(x_fit1)),np.squeeze(np.asarray(y_fit1))) ax.plot(np.squeeze(np.asarray(x_fit2)),np.squeeze(np.asarray(y_fit2))) ax.plot(np.squeeze(np.asarray(x_fit3)),np.squeeze(np.asarray(y_fit3))) ax.legend(['data','12.34 Error','Boostrap Error','Ensemble Error']) plt.show()