Before we proceed to compute the approximants in Python, let's rewrite the equations (14.7) into its matrix form.
For an $[N/M]$ order polynomial, we have for $l = N+1,.\dots,N+M$ $$\begin{align} 0 &= c_l + \sum_{m=1}^{N}b_m c_{l-m} \\ \begin{bmatrix}0 \\ 0 \\ \vdots \\ 0 \end{bmatrix} &= \begin{bmatrix}c_{N+1} \\ c_{N+2} \\ \vdots \\ c_{N+M} \end{bmatrix} + \begin{bmatrix} c_N & c_{N-1} & \dots \\ \vdots & \ddots & \\ c_{N+1-M} & & c_{N} \end{bmatrix} \begin{bmatrix}b_1 \\ b_2 \\ \vdots \\ b_M \end{bmatrix} \\ \begin{bmatrix}b_1 \\ b_2 \\ \vdots \\ b_M \end{bmatrix} &= \begin{bmatrix} c_N & c_{N-1} & \dots \\ \vdots & \ddots & \\ c_{N+1-M} & & c_{N} \end{bmatrix}^{-1} \begin{bmatrix}c_{N+1} \\ c_{N+2} \\ \vdots \\ c_{N+M} \end{bmatrix} \end{align}$$
Also, the Taylor expansion for $e^x$ is $1 + \sum_{m=1}^{\infty}\frac{x^m}{m!}$.
# Finding Pade approximants
import numpy as np
def exp_pade_approximant(M, N, x=None):
# Calculate Taylor expansion for exp up to N+M order
c = 1. / np.insert(np.cumprod(range(1, M+N+1), dtype=np.float32),0,1)
# Calculate b coefficients by doing the matrix multiplication described above
C = np.zeros((M, M))
for i, l in enumerate(range(N, N+M)):
C[i] = c[l:l-M:-1]
# print(C)
# print(c[N+1:N+M+1])
b = - np.linalg.inv(C) @ c[N+1:N+M+1]
b = np.insert(b, 0, 1) # b_0 = 1
# print(b)
# Calculate a coefficients
a = np.zeros_like(b)
a[0] = c[0]
for n in range(1, N+1):
a[n] = c[n] + b[1:n+1]@np.flip(c[:n], 0)
print(a)
if x:
x = [x**i for i in range(0, N+M+1)]
y = a@x[:N+1] / (b@x[:M+1])
err = np.abs(np.exp(x[1]) - y)
# Scaling coefficients to have the constant term equal to 1
scale = np.min(np.abs(a))
return a/scale, b/scale, y, err
else:
return a, b
np.set_printoptions(precision=2)
for i in range(1,6):
a, b, y, err = exp_pade_approximant(i, i, x=1.)
print('For the [{0}/{0}] approximant'.format(i))
print('-----------------------------')
print('Error: %.2g' % err)
print('a', a)
print('b', b)
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')
sns.set_context('notebook')
x_train = np.arange(-10,10,1)
y_train = (x_train>0).astype(np.float32)
x_valid = np.arange(-10.5,10.5,1)
y_valid = (x_valid>0).astype(np.float32)
x_vis = np.arange(-10.5, 10.5, .1)
def fit_polynomial(x_train, y_train, x_valid, y_valid, x_vis, order):
# Create Vandermonde matrix
A = np.zeros((order, len(x_train)))
for i in range(order):
A[i] = x_train**i
c = np.linalg.pinv(A.T)@y_train
A = np.zeros((order, len(x_valid)))
for i in range(order):
A[i] = x_valid**i
error = np.mean(np.square(y_valid - A.T@c))
A = np.zeros((order, len(x_vis)))
for i in range(order):
A[i] = x_vis**i
y_vis = A.T@c
return error, y_vis
plt.figure()
plt.scatter(x_train, y_train, color='k', marker='x')
plt.xlim([-10.5, 10.5])
plt.ylim([-.5, 1.5])
for order in [5, 10, 15]:
error, y_vis = fit_polynomial(x_train, y_train, x_valid, y_valid, x_vis, order)
plt.plot(x_vis, y_vis, label='%dth Order, Error: %.3f' % (order, error))
plt.legend()
plt.show()
def fit_rbf(x_train, y_train, x_valid, y_valid, x_vis, order):
# Position RBFs
c = np.arange(-10, 10, 20//order)
# Create RBF matrix
R = np.zeros((order, len(x_train)))
for i in range(order):
R[i] = np.abs(x_train - c[i])**3
a = np.linalg.pinv(R.T)@y_train
R = np.zeros((order, len(x_valid)))
for i in range(order):
R[i] = np.abs(x_valid - c[i])**3
error = np.mean(np.square(y_valid - R.T@a))
R = np.zeros((order, len(x_vis)))
for i in range(order):
R[i] = np.abs(x_vis - c[i])**3
y_vis = R.T@a
return error, y_vis
plt.figure()
plt.scatter(x_train, y_train, color='k', marker='x')
plt.xlim([-10.5, 10.5])
plt.ylim([-.5, 1.5])
for order in [5, 10, 15]:
error, y_vis = fit_rbf(x_train, y_train, x_valid, y_valid, x_vis, order)
plt.plot(x_vis, y_vis, label='%dth Order, Error: %.3f' % (order, error))
plt.legend()
plt.show()
plt.figure()
polynomial_err = [fit_polynomial(x_train, y_train, x_valid, y_valid, x_vis, order)[0] for order in [5, 10, 15]]
rbf_err = [fit_rbf(x_train, y_train, x_valid, y_valid, x_vis, order)[0] for order in [5, 10, 15]]
plt.plot([5, 10, 15], polynomial_err, label='Polynomial Fit', marker='o')
plt.plot([5, 10, 15], rbf_err, label='RBF Fit', marker='*', markersize='10')
plt.xlabel('Order')
plt.ylabel('Error')
plt.xticks([5, 10, 15])
plt.title('Error vs Order')
plt.legend()
plt.show()
Adding the curvature regularizer, we want to minimize $$I = \sum_{n=1}^{N}[y_n-f(x_n,\vec{a})]^2 + \lambda \int \Big[\frac{d^2f}{dx^2}\Big]^2dx$$
We have $$f(x,\vec{a}) = \sum_{m=0}^{10}a_mx^m$$ and $$\frac{d^2f}{dx^2} = \sum_{m=0}^{10}a_m m (m-1)x^{m-2}$$
Thus, $$I = \sum_{n=1}^{N}[y_n-\sum_{m=0}^{10}a_mx_n^m]^2 + \lambda \int_{x_-}^{x_+} \Big[\sum_{m=2}^{10}a_m m (m-1)x^{m-2}]^2dx$$
To minimize, we want the derivative with respect to the coefficient to be zero, $$\frac{\partial I}{\partial a_i}=0 \\ \frac{\partial}{\partial a_i}\sum_{n=1}^{N}[y_n-\sum_{m=0}^{10}a_ix_n^m]^2 + \frac{\partial}{\partial a_i}\lambda \int_{x_-}^{x_+} \Big[\sum_{m=2}^{10}a_i m (m-1)x^{m-2}]^2dx = 0 \\ -x_n^{i}\Big(\sum_{n=1}^{N}[y_n-\sum_{m=0}^{10}a_ix_n^m]\Big) -\lambda i(i-1)x^{i-2} \int_{x_-}^{x_+} \Big[\sum_{m=2}^{10}a_i m (m-1)x^{m-2}]^2dx = 0 \\ -\sum_{n=1}^{N}x_n^{i}y_n + \sum_{n=1}^{N}\sum_{m=0}^{10}a_ix_n^{m+i} - \lambda i(i-1)\sum_{m=2}^{10}a_i m (m-1)\int_{x_-}^{x_+}x^{m+i-4} = 0 \\ -\sum_{n=1}^{N}x_n^{i}y_n + \sum_{n=1}^{N}\sum_{m=0}^{10}a_ix_n^{m+i} - \lambda i(i-1)\sum_{m=2}^{10}a_i m (m-1)\frac{x_+^{m+i-3}-x_-^{m+i-3}}{m+i-3} = 0 $$
Wow, that required extreme bookkeeping to do the partial with the integration and all the sums!!
Now we care about the coefficients, so we can solve for them $$a_i = -\sum_{n=1}^{N}x_n^{i}y_n + \sum_{n=1}^{N}\sum_{m=0}^{10}x_n^{m+i} - \lambda i(i-1)\sum_{m=2}^{10}m (m-1)\frac{x_+^{m+i-3}-x_-^{m+i-3}}{m+i-3}$$
Python implementation follows
def fit_poly_regularizer(x_train, y_train, x_valid, y_valid, x_vis, order, lam):
# Create Vandermonde matrix
A = np.zeros((order,))
for i in range(order):
A[i] = y_train @ (x_train/order)**i
B = np.zeros((order, order))
for i in range(order):
for j in range(order):
B[i,j] = np.sum((x_train/order)**(i+j))
C = np.zeros((order, order))
for i in range(2,order):
for j in range(2, order):
C[i,j] = lam * i * (i-1) * j * (j-1) * (1**(j+i-3) - (-1)**(j+i-3))/(j+i-3)
c = np.linalg.inv(B-C)@A
A = np.zeros((order, len(x_valid)))
for i in range(order):
A[i] = (x_valid/order)**i
error = np.mean(np.square(y_valid - A.T@c))
A = np.zeros((order, len(x_vis)))
for i in range(order):
A[i] = (x_vis/order)**i
y_vis = A.T@c
return error, y_vis
plt.figure()
plt.scatter(x_train, y_train, color='k', marker='x')
plt.xlim([-10.5, 10.5])
plt.ylim([-.5, 1.5])
for lam in [0, 0.01, 0.1, 1]:
error, y_vis = fit_poly_regularizer(x_train, y_train, x_valid, y_valid, x_vis, 10, lam)
plt.plot(x_vis, y_vis, label='Lambda %.2f, Error: %.3f' % (lam, error))
plt.legend()
plt.show()
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')
sns.set_context('notebook')
import keras
import keras.backend as K
from keras.models import Model
from keras.layers import Dense, Activation, Input
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
# Generate LFSR code
x_prev = [0, 0, 0, 1]
x_data = []
y_data = []
while (len(x_data)<10000):
x = (x_prev[0] + x_prev[3]) % 2
y_data.append(x)
x_data.append(x_prev)
x_prev.pop()
x_prev.insert(0,x)
# Prepare datasets
# Dataset for the feed forward
x_train, x_valid, y_train, y_valid = train_test_split(x_data, y_data, test_size=.2)
# Dataset for the recurrent
x_seq = y_data
# Dataset for the autoencoder
x_train_auto, x_valid_auto = train_test_split(x_data, test_size=.2)
def train_feed_forward(x_train, y_train, x_test, y_test, n_neurons=None, activation='sigmoid'):
inp = Input(batch_shape=(None, 4))
x = inp
for i in n_neurons:
x = Dense(i)(x)
x = Activation(activation)(x)
out = Dense(1,use_bias=False, activation=activation)(x)
model = Model(inp, out)
model.compile(loss='mse',
optimizer='sgd')
history = model.fit(x_train, y_train,
batch_size=100,
epochs=1000,
verbose=0,
validation_split=0,
callbacks=[EarlyStopping(patience=20, min_delta=.001, monitor='loss')])
y_pred = model.predict(x_test, batch_size=50, verbose=1)
error = model.evaluate(x_test, y_test, batch_size=50, verbose=1)
return y_pred, model.count_params(), error
layers = [1, 2, 3, 5]
neurons = [2, 5, 10, 20]
error = np.zeros((len(layers), len(neurons)))
params = np.zeros_like(error)
for i,l in enumerate(layers):
for j,n in enumerate(neurons):
y_pred, params[i,j], error[i,j] = train_feed_forward(x_train, y_train, x_valid, y_valid, n_neurons=[n for _ in range(l)], activation='sigmoid')
print('Plot of error (heatmap) and number of params (annotation) for layers vs neurons')
ax = sns.heatmap(error, xticklabels=layers, yticklabels=neurons, annot=params)
ax.set_xlabel('Number of Layers')
ax.set_ylabel('Number of Neurons per layer')
plt.show()