MAS.864 Week 7, Function Fitting, Benjamin Preis¶

import numpy as np
import pandas as pd
import sympy as sym
from matplotlib import pyplot as plt
%matplotlib inline

9.1 a¶

To solve least squares with an SVD, we first must decompose our matrix (A) using the svd function, and then solve the equation $\mathbf{A}\vec x=\vec b$, substituting in our SVD solution for $\mathbf{A}$. This gives us eqn. 12.12: $\vec v=\mathbf{VW^{-1}U^T}\vec b$.

def svd_ols(X,Y):
    #Create the A Matrix
    A = np.empty((100,2))
    #First column to represent the intersect
    A[:,0] = np.repeat(1,100)
    #Second column represents 
    A[:,1] = X
    u, s, vh = np.linalg.svd(A)
    W = np.diag(s)
    #Eqn 12.12 from the book
    #the concatenate term in the middle makes the W matrix MxN, mostly 0 except for the two diagonal pieces.
    ols = vh.transpose()@np.concatenate((np.linalg.inv(W),np.tile(np.array([[0],[0]]),98)),axis=1)@u.transpose()@Y
    return ols[0], ols[1]

np.random.seed(15217)
x = np.random.uniform(size=100)
zeta = np.random.normal(scale=.5,size=100)
y = 2 + 3*x + zeta

a, b = svd_ols(x,y)

x_line = np.linspace(0,1,100)
plt.plot(x_line,a+b*x_line,label="SVD Solution")
plt.scatter(x,y,label="Data")
plt.plot(x_line,2+3*x_line,label="True Process")
plt.legend(loc="upper left")
plt.show()

Now, to calculate the error we use eqn. 12.34 from the book:

$$ \frac{\sigma^2_{\eta,i}}{\sigma^2_\zeta}=\sum_j\frac{V_{ij}^2}{w^2_j} $$

We know $\sigma^2_\zeta=.25$, as we set $\sigma_\zeta=.5$ in the equation above.

#creating our A matrix and SVD again
#since our svd_ols doesn't report out V...
A_error = np.empty((100,2))
#First column to represent the intersect
A_error[:,0] = np.repeat(1,100)
#Second column represents 
A_error[:,1] = x

U, W, VT = np.linalg.svd(A_error)

{"SD_a": np.sqrt(.25*np.sum(VT[:,0]**2/W[0]**2)), "SD_b":np.sqrt(.25*np.sum(VT[:,1]**2/W[1]**2))}

{'SD_a': 0.04502580788346778, 'SD_b': 0.197184116510552}

9.1 b¶

To generate a bootstrap, we must sample (with replacement). We'll do this 100 times. We'll sample index locations, so that we can get both the x and y values.

np.random.seed(15217)
bootstrap_indices = np.random.choice(np.arange(0,100),(100,100))
coefs = np.empty([bootstrap_indices.shape[1],2])

for bootstrap in range(0,bootstrap_indices.shape[1]):
    x_bootstrap = x[bootstrap_indices[:,bootstrap]]
    y_bootstrap = y[bootstrap_indices[:,bootstrap]]
    bootstrap_a, bootstrap_b = svd_ols(x_bootstrap,y_bootstrap)
    coefs[bootstrap,0], coefs[bootstrap,1] = bootstrap_a, bootstrap_b

error = np.quantile(coefs,(.025,.975),axis=0)
error_above = error[0,0]+error[0,1]*x_line
error_below = error[1,0]+error[1,1]*x_line
error = pd.DataFrame(error)
error.columns = ['Intercept','Slope']
error.index = ['2.5%','97.5%']
print(error)

       Intercept     Slope
2.5%    1.636437  2.905694
97.5%   2.079508  3.783398

plt.plot(x_line,a+b*x_line,label="SVD Solution",color='#1C2833')
plt.fill_between(x_line,error_above,error_below)
plt.show()

9.1 c¶

x_c = np.random.uniform(size=(100,100))
zeta_c = np.random.normal(scale=.5,size=(100,100))
y_c = 2 + 3*x_c + zeta_c

coefs_c = np.empty([100,2])
for i in range(0,100):
    coefs_c[i,0], coefs_c[i,1] = svd_ols(x_c[:,i],y_c[:,i])

error_c =np.quantile(coefs_c,(.025,.975),axis=0)
error_above_c = error_c[0,0]+error_c[0,1]*x_line
error_below_c = error_c[1,0]+error_c[1,1]*x_line
error_c = pd.DataFrame(error_c)
error_c.columns = ['Intercept','Slope']
error_c.index = ['2.5%','97.5%']
print(error_c)

       Intercept     Slope
2.5%    1.833166  2.648160
97.5%   2.201332  3.303202

plt.plot(x_line,a+b*x_line,label="SVD Solution",color='#1C2833')
plt.title('Graph of Least Squares, with Different Error Calculations')
#plt.fill_between(x_line,error_above,error_below,alpha=.75,label="Bootstrap Error")
plt.fill_between(x_line,error_above_c,error_below_c,alpha=.5,label="Re-Sample Error")
plt.legend(loc="upper left")
plt.show()

9.2¶

x = np.random.uniform(size=100)
zeta = np.random.normal(scale=.1,size=100)
y = np.sin(2+3*x) + zeta

To solve this problem, we need to solve eqn. 12.30 ($\delta\vec a=-\mathbf{M}^{-1}\nabla\chi^2$), as this defines the step size. $\mathbf{M}$ is given by eqn. 12.28:

$$ M_{ii}=\frac{1}{2}\frac{\partial^2\chi^2}{\partial a_i^2}(1+\lambda), M_{ij}=\frac{1}{2}\frac{\partial^2\chi^2}{\partial a_i\partial a_j} $$

In turn, $\frac{\partial^2\chi^2}{\partial a_i\partial a_j}$ is given by eqns. 12.22 and 12.21 for the Hessian and Gradient of $\chi^2$.

def partials(a_vec,X):
    #Define function
    y_x = sym.symbols('y_x',cls=sym.Function)
    a, b, x = sym.symbols('a b x')
    y_x = sym.sin(a+b*x)
    #Turn it ingto a function that numpy can evaluate
    f = sym.lambdify([a,b,x],y_x,'numpy')
    #Evaluate the two partial differential equations
    partial_1 = sym.lambdify([a,b,x],sym.diff(y_x,a))
    partial_2 = sym.lambdify([a,b,x],sym.diff(y_x,b))
    a_0 = a_vec[0,]
    a_1 = a_vec[1,]
    return partial_1(a_0,a_1,X), partial_2(a_0,a_1,X)

def gradient(Y,a_vec,X,sigma = .1):
    partial_1, partial_2 = partials(a_vec,X)
    grad_1 =-2*np.sum((Y-np.sin(a_vec[0]+X*a_vec[1,]))*partial_1/sigma)
    grad_2 =-2*np.sum((Y-np.sin(a_vec[0]+X*a_vec[1,]))*partial_2/sigma)
    return grad_1, grad_2

def hessian(Y,a_vec,X,sigma = .1):
    partial_1, partial_2 = partials(a_vec,X)
    return 2*np.sum((1/sigma)*partial_1*partial_2)

def LM(Y, a_vec,X,lamb,sigma=.1):
    a_0 = a_vec[0,]
    a_1 = a_vec[1,]
    M = np.array([[.5*hessian(Y,np.array([[a_0],[a_0]]),X,sigma)*(1+lamb),
                   .5*hessian(Y,np.array([[a_0],[a_1]]),X,sigma)],
                 [.5*hessian(Y,np.array([[a_1],[a_0]]),X,sigma),
                   .5*hessian(Y,np.array([[a_1],[a_1]]),X,sigma)*(1+lamb)]])
    M_inv = np.linalg.inv(M)
    step = -np.dot(M_inv,gradient(Y,a_vec,X,sigma = .1))
    return step

The LM method keeps iterating through possible values for $\vec a$, as given by eqn 12.30, above. In the non-adaptive stepper, we continue interating until the residual is minimized.

Is the adaptive stepper, we change the size of $\lambda$ for the sake of efficiency.

#Non-adaptive LM
r = np.sum((y - np.sin(1+1*x))**2)
r_new = np.sum((y - np.sin(1+1*x))**2)
a0 = np.array([[1.],[1.]])
i = 1
while r <=r_new:
    r_new = r
    step = LM(y,a0,x,2)
    a0[0,] = a0[0,] + step[0]
    a0[1,] = a0[1,] + step[1]
    r_test = np.sum((y - np.sin(a0[0,]+a0[1,]*x))**2)
    i +=1
    r = r_test

print(a0)
print(i)

[[2.01794605]
 [2.93394245]]
198

#Adaptive LM
r = np.sum((y - np.sin(1+1*x))**2)
r_new = np.sum((y - np.sin(1+1*x))**2)
a0 = np.array([[1.],[1.]])
a_new = np.empty([2,1])
lamb_up = 1.1
lamb_down = .9
lamb0 = 2
i = 1
while r <= r_new:
    r_new = r
    step = LM(y,a0,x,lamb0)
    a_new[0,] = a0[0,] + step[0]
    a_new[1,] = a0[1,] + step[1]
    r_test = np.sum((y - np.sin(a_new[0,]+a_new[1,]*x))**2)
    if r_test < r:
        a0 = a_new
        lamb0 = lamb0 * lamb_down
    elif r_test > r:
        lamb0 = lamb0 * lamb_up
    i +=1
    r = r_test
print(a0)
print(i)

[[2.01806688]
 [2.93420458]]
27

As we can see, the non-adaptive stepper took 198 steps to reach the final value for $\vec a$, while the adaptive stepper only took 27 steps. The final estimates are nearly identical, as would be expected.

x_line = np.linspace(0,1,100)
plt.plot(x_line,np.sin(a1[0,]+a1[1,]*x_line),label="LM Solution",color="red")
plt.scatter(x,y,label="Data")
plt.plot(x_line,np.sin(2+3*x_line),label="True Process",color="green")
plt.legend(loc="upper right")
plt.show()