import numpy as np
from matplotlib import rc
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import seaborn as sns
rc('animation', html='html5')
sns.set(style='whitegrid', rc={'figure.figsize':(8,6)})
def generate_dataset(size, sigma):
x = np.random.uniform(size=size)
noise = np.random.normal(0, sigma, size)
y = 2 + 3*x + noise
return x, y
size = 100
sigma = 0.5
x, y = generate_dataset(size, sigma)
plt.plot(x, y, 'o')
plt.show()
# fit an SVD to y = a + bx
def svd_solve(x, y):
mat = np.stack([np.ones(size), x], axis=-1)
# print("A", mat.shape)
u, s, vh = np.linalg.svd(mat, full_matrices=False)
# print("U", u.shape)
# print("W", s.shape)
# print("V^T", vh.shape)
mat_inv = (vh.T * (1 / s)).dot(u.T)
# print("V * W^{-1} * U^T", mat_inv.shape)
sol = mat_inv.dot(y)
return sol
sol = svd_solve(x, y)
print("Solution: ", sol)
errs = []
for i in range(2):
err = 0
for j in range(2):
err += (vh[j, i] / s[j]) ** 2
errs.append(err)
print("a error: %.4f" % np.sqrt(errs[0] * (sigma ** 2)))
print("b error: %.4f" % np.sqrt(errs[1] * (sigma ** 2)))
def bootstrap_sample(x, y):
tmp = np.random.choice(size, size, replace=True)
tmp_x = [x[i] for i in tmp]
tmp_y = [y[i] for i in tmp]
tmp_sol = svd_solve(tmp_x, tmp_y)
return tmp_sol
def bootstrap_error(x, y, times):
a = []
b = []
for i in range(times):
sol = bootstrap_sample(x, y)
# collect all solutions
a.append(sol[0])
b.append(sol[1])
# calculate standard deviation for solutions
return np.std(a), np.std(b)
err_a, err_b = bootstrap_error(x, y, 100)
print("a error: %.4f" % err_a)
print("b error: %.4f" % err_b)
def ensemble_error(times):
a = []
b = []
for i in range(times):
x, y = generate_dataset(size, sigma)
sol = svd_solve(x, y)
# collect all solutions
a.append(sol[0])
b.append(sol[1])
# calculate standard deviation for solutions
return np.std(a), np.std(b)
err_a, err_b = ensemble_error(100)
print("a error: %.4f" % err_a)
print("b error: %.4f" % err_b)
size = 100
sigma = 0.1
x = np.random.uniform(size=size)
noise = np.random.normal(0, sigma, size)
y = np.sin(2 + 3*x) + noise
plt.plot(x, y, 'o')
plt.show()
Let's calculate some derivatives
$$ y = sin(a + bx) $$
$$ \chi^2(a, b) = \sum_{i = 1}^{N}\Big(\frac{y_n - sin(a + bx_n)}{\sigma_n}\Big)^2 $$ Question... what is $\sigma_n$? If it is constant across sample, then it doesn't matter as it would cancel out in the calculation...
Now calculate first order derivatives:
$$ \frac{\partial \chi^2(a, b)}{\partial a} = -2\sum_{i = 1}^{N}\frac{(y_n - sin(a + bx_n))cos(a + bx_n)}{\sigma_n^2} $$
$$ \frac{\partial \chi^2(a, b)}{\partial b} = -2\sum_{i = 1}^{N}\frac{(y_n - sin(a + bx_n))x_ncos(a + bx_n)}{\sigma_n^2} $$
And the Hessian (without the second term)
$$ \frac{\partial^2 \chi^2(a, b)}{\partial a^2} = 2\sum_{i = 1}^{N}\frac{cos^2(a + bx_n)}{\sigma_n^2} $$
$$ \frac{\partial^2 \chi^2(a, b)}{\partial b^2} = 2\sum_{i = 1}^{N}\frac{x_n^2cos^2(a + bx_n)}{\sigma_n^2} $$
$$ \frac{\partial^2 \chi^2(a, b)}{\partial a \partial b} = 2\sum_{i = 1}^{N}\frac{x_ncos^2(a + bx_n)}{\sigma_n^2} $$
def levenberg_marquardt_step(x, y, a, b, lambd):
M = np.zeros((2, 2))
M[0, 0] = 0.5 * (1 + lambd) * 2 * np.sum(np.cos(a + b * x) ** 2)
M[1, 1] = 0.5 * (1 + lambd) * 2 * np.sum((x * np.cos(a + b * x)) ** 2)
M[0, 1] = 0.5 * 2 * np.sum(x * (np.cos(a + b * x) ** 2))
M[1, 0] = M[0, 1]
M_inv = np.linalg.inv(M)
delta = np.zeros(2)
delta[0] = -2 * np.sum((y - np.sin(a + b*x)) * np.cos(a + b * x))
delta[1] = -2 * np.sum((y - np.sin(a + b*x)) * np.cos(a + b * x) * x)
step = -M_inv.dot(delta)
return step
a = 1
b = 1
lambd = 20
steps = 100
cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()
def update_anim(i):
global a, b
step = levenberg_marquardt_step(x, y, a, b, lambd)
# update a,b
a += step[0]
b += step[1]
# plot
ax.plot(a, b, 'o', color=seq_colors[i + 1])
ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))
animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)
Large $\lambda$ - nice convergence regardless of starting point, but slows down significantly close to the minima
a = 1
b = 1
lambd = 1
steps = 100
cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()
def update_anim(i):
global a, b
step = levenberg_marquardt_step(x, y, a, b, lambd)
# update a,b
a += step[0]
b += step[1]
#plot
ax.plot(a, b, 'o', color=seq_colors[i + 1])
ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))
animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)
$\lambda = 1$ - nice convergence regardless of starting point, and much closer to minima
a = 1
b = 1
lambd = 0
steps = 100
cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()
def update_anim(i):
global a, b
step = levenberg_marquardt_step(x, y, a, b, lambd)
# update a,b
a += step[0]
b += step[1]
# plot
ax.plot(a, b, 'o', color=seq_colors[i + 1])
ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))
animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)
a = 1
b = 1
lambd = 0.5
steps = 100
cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()
def update_anim(i):
global a, b
step = levenberg_marquardt_step(x, y, a, b, lambd)
# update a,b
a += step[0]
b += step[1]
# plot
ax.plot(a, b, 'o', color=seq_colors[i + 1])
ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))
animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)
$\lambda = 0$ - Newton's method, no convergence (at least not to the right a,b... there seems to be another pair which fits this function nicely)
def calc_error(x, y, a, b):
return np.sum((np.sin(a + b * x) - y) ** 2)
# adaptive lambda
a = 1
b = 1
lambd = 20 # initial
steps = 100
last_err = calc_error(x, y, a, b)
cmap = sns.color_palette("coolwarm", steps + 1)
seq_colors = [cmap[i] for i in range(steps + 1)]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(a, b, 'o', color=seq_colors[0])
ax.plot(2, 3, 'x', color='gray', markersize=15)
plt.close()
def update_anim(i):
global a, b, lambd, last_err
step = levenberg_marquardt_step(x, y, a, b, lambd)
# update a, b
a += step[0]
b += step[1]
# plot
ax.plot(a, b, 'o', color=seq_colors[i + 1])
ax.set_title("Levenberg-Marquardt, Lambda: %.1f, Step: %d" % (lambd, i))
# check error
new_err = calc_error(x, y, a, b)
if new_err > last_err:
# if bigger, increase
lambd = lambd * 1.1
else:
# else, decrease
lambd = lambd * 0.9
# update last error
last_err = new_err
animation.FuncAnimation(fig, update_anim, range(steps), blit=False, interval=100, repeat=True)