In [173]:
import numpy as np
from matplotlib import rc
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns
rc('animation', html='html5')
sns.set(style='whitegrid', rc={'figure.figsize':(8,6)})

(12.1) Linear least squares - SVD

In [171]:
def generate_dataset(size, sigma):
    x = np.random.uniform(size=size)
    noise = np.random.normal(0, sigma, size)
    y = 2 + 3*x + noise
    return x, y
In [172]:
size = 100
sigma = 0.5
x, y = generate_dataset(size, sigma)

plt.plot(x, y, 'o')
plt.show()
In [149]:
# fit an SVD to y = a + bx
def svd_solve(x, y):
    mat = np.stack([np.ones(size), x], axis=-1)
#     print("A", mat.shape)

    u, s, vh = np.linalg.svd(mat, full_matrices=False)
#     print("U", u.shape)
#     print("W", s.shape)
#     print("V^T", vh.shape)

    mat_inv = (vh.T * (1 / s)).dot(u.T)
#     print("V * W^{-1} * U^T", mat_inv.shape)

    sol = mat_inv.dot(y)
    return sol
In [150]:
sol = svd_solve(x, y)
print("Solution: ", sol)
Solution:  [2.09730609 2.79190181]

(a) Error estimation according to equation (12.34) - known error model

In [151]:
errs = []
for i in range(2):
    err = 0
    for j in range(2):
        err += (vh[j, i] / s[j]) ** 2
    errs.append(err)
    
print("a error: %.4f" % np.sqrt(errs[0] * (sigma ** 2)))
print("b error: %.4f" % np.sqrt(errs[1] * (sigma ** 2)))
a error: 0.1077
b error: 0.1807

(b) Error estimation by Bootstrap sampling

In [152]:
def bootstrap_sample(x, y):
    tmp = np.random.choice(size, size, replace=True)
    tmp_x = [x[i] for i in tmp]
    tmp_y = [y[i] for i in tmp]
    
    tmp_sol = svd_solve(tmp_x, tmp_y)
    
    return tmp_sol

def bootstrap_error(x, y, times):
    a = []
    b = []
    for i in range(times):
        sol = bootstrap_sample(x, y)
        # collect all solutions
        a.append(sol[0])
        b.append(sol[1])
        
    # calculate standard deviation for solutions
    return np.std(a), np.std(b)
In [153]:
err_a, err_b = bootstrap_error(x, y, 100)
print("a error: %.4f" % err_a)
print("b error: %.4f" % err_b)
a error: 0.1113
b error: 0.1767

(c) Ensemble of independent dataset

In [154]:
def ensemble_error(times):
    a = []
    b = []
    for i in range(times):
        x, y = generate_dataset(size, sigma)
        sol = svd_solve(x, y)
        # collect all solutions
        a.append(sol[0])
        b.append(sol[1])
        
    # calculate standard deviation for solutions
    return np.std(a), np.std(b)
In [155]:
err_a, err_b = ensemble_error(100)
print("a error: %.4f" % err_a)
print("b error: %.4f" % err_b)
a error: 0.1041
b error: 0.1825

(12.2) Non linear least squares - Levenberg-Marquardt

In [186]:
size = 100
sigma = 0.1
x = np.random.uniform(size=size)
noise = np.random.normal(0, sigma, size)
y = np.sin(2 + 3*x) + noise

plt.plot(x, y, 'o')
plt.show()

Let's calculate some derivatives

$$ y = sin(a + bx) $$

$$ \chi^2(a, b) = \sum_{i = 1}^{N}\Big(\frac{y_n - sin(a + bx_n)}{\sigma_n}\Big)^2 $$ Question... what is $\sigma_n$? If it is constant across sample, then it doesn't matter as it would cancel out in the calculation...

Now calculate first order derivatives:

$$ \frac{\partial \chi^2(a, b)}{\partial a} = -2\sum_{i = 1}^{N}\frac{(y_n - sin(a + bx_n))cos(a + bx_n)}{\sigma_n^2} $$

$$ \frac{\partial \chi^2(a, b)}{\partial b} = -2\sum_{i = 1}^{N}\frac{(y_n - sin(a + bx_n))x_ncos(a + bx_n)}{\sigma_n^2} $$

And the Hessian (without the second term)

$$ \frac{\partial^2 \chi^2(a, b)}{\partial a^2} = 2\sum_{i = 1}^{N}\frac{cos^2(a + bx_n)}{\sigma_n^2} $$

$$ \frac{\partial^2 \chi^2(a, b)}{\partial b^2} = 2\sum_{i = 1}^{N}\frac{x_n^2cos^2(a + bx_n)}{\sigma_n^2} $$

$$ \frac{\partial^2 \chi^2(a, b)}{\partial a \partial b} = 2\sum_{i = 1}^{N}\frac{x_ncos^2(a + bx_n)}{\sigma_n^2} $$

In [187]:
def levenberg_marquardt_step(x, y, a, b, lambd):
    M = np.zeros((2, 2))
    M[0, 0] = 0.5 * (1 + lambd) * 2 * np.sum(np.cos(a + b * x) ** 2)
    M[1, 1] = 0.5 * (1 + lambd) * 2 * np.sum((x * np.cos(a + b * x)) ** 2)
    
    M[0, 1] = 0.5 * 2 * np.sum(x * (np.cos(a + b * x) ** 2))
    M[1, 0] = M[0, 1]
    
    M_inv = np.linalg.inv(M)
    
    delta = np.zeros(2)
    delta[0] = -2 * np.sum((y - np.sin(a + b*x)) * np.cos(a + b * x))
    delta[1] = -2 * np.sum((y - np.sin(a + b*x)) * np.cos(a + b * x) * x)
    
    step = -M_inv.dot(delta)
    
    return step
In [196]:
a = 1
b = 1
lambd = 20
steps = 100

cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]

fig = plt.figure()  
ax = fig.add_subplot(1, 1, 1)

ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()

def update_anim(i):
    global a, b
    
    step = levenberg_marquardt_step(x, y, a, b, lambd)
    
    # update a,b 
    a += step[0]
    b += step[1]
    
    # plot
    ax.plot(a, b, 'o', color=seq_colors[i + 1])
    ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))

animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)
Out[196]:

Large $\lambda$ - nice convergence regardless of starting point, but slows down significantly close to the minima

In [197]:
a = 1
b = 1
lambd = 1
steps = 100

cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]

fig = plt.figure()  
ax = fig.add_subplot(1, 1, 1)

ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()

def update_anim(i):
    global a, b
    
    step = levenberg_marquardt_step(x, y, a, b, lambd)
    
    # update a,b 
    a += step[0]
    b += step[1]
    
    #plot
    ax.plot(a, b, 'o', color=seq_colors[i + 1])
    ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))

animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)
Out[197]:

$\lambda = 1$ - nice convergence regardless of starting point, and much closer to minima

In [198]:
a = 1
b = 1
lambd = 0
steps = 100

cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]

fig = plt.figure()  
ax = fig.add_subplot(1, 1, 1)

ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()

def update_anim(i):
    global a, b
    
    step = levenberg_marquardt_step(x, y, a, b, lambd)
    
    # update a,b 
    a += step[0]
    b += step[1]
    
    # plot
    ax.plot(a, b, 'o', color=seq_colors[i + 1])
    ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))

animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)
Out[198]:
In [206]:
a = 1
b = 1
lambd = 0.5
steps = 100

cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]

fig = plt.figure()  
ax = fig.add_subplot(1, 1, 1)

ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()

def update_anim(i):
    global a, b
    
    step = levenberg_marquardt_step(x, y, a, b, lambd)
    
    # update a,b 
    a += step[0]
    b += step[1]
    
    # plot
    ax.plot(a, b, 'o', color=seq_colors[i + 1])
    ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))

animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)
Out[206]:

$\lambda = 0$ - Newton's method, no convergence (at least not to the right a,b... there seems to be another pair which fits this function nicely)

In [201]:
def calc_error(x, y, a, b):
    return np.sum((np.sin(a + b * x) - y) ** 2)
In [205]:
# adaptive lambda

a = 1
b = 1
lambd = 20 # initial
steps = 100

last_err = calc_error(x, y, a, b)

cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]

fig = plt.figure()  
ax = fig.add_subplot(1, 1, 1)

ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()

def update_anim(i):
    global a, b, lambd, last_err
    
    step = levenberg_marquardt_step(x, y, a, b, lambd)
    
    # update a, b
    a += step[0]
    b += step[1]
    
    # plot
    ax.plot(a, b, 'o', color=seq_colors[i + 1])
    ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))
    
    # check error
    new_err = calc_error(x, y, a, b)
    if new_err > last_err:
        # if bigger, increase
        lambd = lambd * 1.1
    else:
        # else, decrease
        lambd = lambd * 0.9
    # update last error
    last_err = new_err

animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)
Out[205]: