Find the first five diagonal Pade approximants $[1/1],\dots,[5/5]$ to $e^x$ around the origin. Remember that the numerator and denominator can be multiplied by a constant to make the numbers as convenient as possible. Evaluate the approximations at $x = 1$ and compare with the correct value of $e = 2.718281828459045$. How is the error improving with the order? How does that compare to the polynomial error?
# From Neil's pade.py code
import numpy as np
np.set_printoptions(precision=1, suppress=True)
for N in range(1, 6):
M = N
c = 1/np.hstack([1, np.cumprod(np.arange(1, N + M + 1))])
C = c[N + 1 - 1:N + 1 - M - 1:-1].reshape(1, M)
for l in range(N + 2, N + M + 1):
C = np.vstack([C, c[l - 1:l - M - 1:-1]])
b = -np.linalg.inv(C)@c[N + 1:N + M + 1]
b = np.hstack([1, b])
a = np.zeros(N + 1)
a[0] = c[0]
for n in range(1, N + 1):
a[n] = c[n]
for m in range(1, n + 1):
a[n] += b[m]*c[n-m]
x = 1
xp = x**(np.arange(N + M + 1))
y = np.dot(c, xp)
poly_error = np.exp(x) - y
xp = x**(np.arange(N + 1))
y = np.dot(a, xp)/np.dot(b, xp)
pade_error = np.exp(x) - y
print("Order: ", N)
print(" Polynomial Error: %.3g" % poly_error)
print(" Pade Error: %.3g" % pade_error)
print(" a: ", a/np.min(np.abs(a)))
print(" b: ", b/np.min(np.abs(a)))
The error is improving faster than exponentially, and faster than the polynomial expansion.
Take as a data set $x = \{-10, -9, \dots, 9, 10\}$, and $y(x) = 0$ if $x \le 0$ and $y(x) = 1$ if $x \gt 0$.
(a) Fit the data with a polynomial with 5, 10, and 15 terms, using a pseudo-inverse of the Vandermonde matrix.
# From Neil's fit.py code
import matplotlib.pyplot as plt
import numpy as np
fit_range = [5, 10, 15]
x_fit = np.arange(-10, 11).reshape((-1, 1))
y_fit = 1*(x_fit > 0)
x_cross = np.arange(-10.5, 11).reshape((-1, 1))
y_cross = 1*(x_cross > 0)
x_test = np.arange(-10.5, 10.6, 0.1).reshape((-1, 1))
plt.ion()
poly_error = []
plt.plot(x_fit, y_fit, '.-', markersize=10)
for n_fit in fit_range:
A = np.ones(x_fit.shape)
for i in range(1, n_fit):
A = np.hstack([A, x_fit**i])
coeff = np.linalg.pinv(A)@y_fit
A = np.ones(x_cross.shape)
for i in range(1, n_fit):
A = np.hstack([A, x_cross**i])
poly_error.append(np.std(y_cross - A@coeff))
A = np.ones(x_test.shape)
for i in range(1, n_fit):
A = np.hstack([A, x_test**i])
plt.plot(x_test, A@coeff, markersize=10, label=str(n_fit)+' terms')
plt.xlim(-11, 11)
plt.ylim(-0.5, 1.5)
plt.title("Polynomial Fit")
plt.legend()
plt.show()
(b) Fit the data with 5, 10, and 15 $r^3$ RBFs uniformly distributed between $x = -10$ and $x = 10$.
rbf_error = []
plt.plot(x_fit, y_fit, '.-', markersize=10)
for n_fit in fit_range:
centers = -10 + 20*np.arange(n_fit)/(n_fit - 1)
r2 = (x_fit - centers[0])**2
A = r2.reshape(-1, 1)
for i in range(1, n_fit):
r2 = (x_fit - centers[i])**2
A = np.hstack([A, r2**(3/2)])
coeff = np.linalg.pinv(A)@y_fit
r2 = (x_cross - centers[0])**2
A = r2.reshape(-1, 1)
for i in range(1, n_fit):
r2 = (x_cross - centers[i])**2
A = np.hstack([A, r2**(3/2)])
rbf_error.append(np.std(y_cross - A@coeff))
r2 = (x_test - centers[0])**2
A = r2.reshape(-1, 1)
for i in range(1, n_fit):
r2 = (x_test - centers[i])**2
A = np.hstack([A, r2**(3/2)])
plt.plot(x_test, A@coeff, markersize=10, label=str(n_fit)+' terms')
plt.xlim(-11, 11)
plt.ylim(-0.5, 1.5)
plt.title("RBF Fit")
plt.legend()
plt.show()
(c) Using the coefficients found for these six fits, evaluate the total out-of-sample error at $x = \{-10.5, -9.5, \dots, 9.5, 10.5 \}$.
plt.plot(fit_range, poly_error, '.-', markersize=10, label='polynomial')
plt.plot(fit_range, rbf_error, '.-', markersize=10, label='RBF')
plt.title("Out-of-Sample Error")
plt.legend()
plt.show()
(d) Using a 15th-order polynomial, fit the data with the curvature regularizer in equation (14.53), plot the fits, and compare the out-of-sample errors for $\lambda = 0, 0.01, 0.1, 1$ (this part is harder than the others.
M = 15
lambd = [0, 0.01, 0.1, 1]
plt.plot(x_fit, y_fit, '.-', markersize=10)
for i in range(len(lambd)):
A = np.zeros((M, 1))
for l in range(M):
A[l] = np.sum(y_fit*(x_fit/10)**l)
B = np.zeros((M, M))
for l in range(M):
for m in range(M):
B[l,m] = np.sum((x_fit/10)**(m + l))
C = np.zeros((M, M))
for l in range(2, M):
for m in range(2, M):
C[l,m] = lambd[i]*l*(l - 1)*m*(m - 1)*(1 - (-1)**(m + l - 3))/(m + l - 3)
coeff = np.linalg.inv(B - C)@A
y_out = np.zeros(x_cross.shape)
for m in range(M):
y_out += coeff[m]*(x_cross/10)**m
error = np.std(y_out - y_cross)
y_pred = np.zeros(x_test.shape)
for m in range(M):
y_pred += coeff[m]*(x_test/10)**m
plt.plot(x_test, y_pred, label='lambda '+str(lambd[i])+' error %.2f'%error)
plt.xlim(-11, 11)
plt.ylim(-0.5, 1.5)
plt.title("Regularized Fit")
plt.legend()
plt.show()
Train a neural network on the output from an order 4 maximal LFSR (Problem 6.3) and learn to reproduce it. How do the results depend on the network depth and architecture? For extra credit, try learning a longer sequence (Table 6.1).
# From Neil's lfsrdnn.py
import numpy as py
M = 4
taps = [1, 4]
network = [4, 20, 20, 1]
alpha = 0.01
n_steps = 100
lfsr = np.ones(M + 1)
def lfsr_step():
global lfsr
lfsr[1:] = lfsr[0:-1]
lfsr[0] = 0
for j in taps:
lfsr[0] += lfsr[j]
lfsr[0] = lfsr[0]%2
n_pts = 2**M
inputs = np.zeros((n_pts, M))
outputs = np.zeros((n_pts, 1))
for i in range(2**M):
lfsr_step()
inputs[i][:] = lfsr[1:]
outputs[i][0] = lfsr[0]
def g(x):
return np.tanh(x)
def dg(x):
return 1/(np.cosh(x)**2)
def init():
global x, y, network, weights
x = []
y = []
for layer in range(len(network)):
x.append(np.zeros(network[layer]))
y.append(np.zeros(network[layer]))
weights = [[]]
for layer in range(1, len(network)):
weights.append(2*np.random.rand(network[layer], network[layer-1]) - 1)
def forward(n):
global x, y, network, weights, inputs
x[0][:] = inputs[n]
for layer in range(1, len(network)):
y[layer][:] = np.zeros(network[layer])
for i in range(network[layer]):
y[layer][i] = np.sum(weights[layer][i][:]*x[layer-1][:])
x[layer][:] = g(y[layer][:])
def backward(n):
global outputs
layer = len(network) - 1
delta = np.zeros(network[layer])
for i in range(network[layer]):
delta[i] = 2*(x[layer][i] - outputs[n][i])*dg(y[layer][i])
weights[layer][i][:] -= alpha*delta[i]*x[layer-1][:]
for layer in range(len(network) - 2, 0, -1):
last_delta = np.copy(delta)
delta = np.zeros(network[layer])
for j in range(network[layer]):
for i in range(network[layer+1]):
delta[j] += weights[layer+1][i][j]*last_delta[i]
delta *= dg(y[layer][j])
weights[layer][j][:] -= alpha*delta[j]*x[layer-1][:]
def update():
for n in range(n_pts):
forward(n)
backward(n)
def error():
global outputs
err = 0
for n in range(n_pts):
forward(n)
err += np.abs(np.sum(x[-1] - outputs[n]))
print(f"{x[-1][0]:.2f}:{outputs[n][0]:.0f}")
return err/n_pts
init()
print("output:")
print(f"average error: {error():.3f}")
for i in range(n_steps):
update()
print("output:")
print(f"average error: {error():.3f}")