We begin with the Taylor Expansion for $e^x$, $\sum_l\frac{x^l}{l!}$. Comparing to equation 14.3 in the book, we see that $c_l=\frac{1}{l!}$.
We begin with eqn. 14.6:
$$ \begin{equation} {a_n=c_n+\sum_{m=1}^M b_mc_{n-m}} \;\;\;\;(n=0,...,N) \end{equation} $$As we're evaluating up to the [1/1] Padé approximate, this yields:
$$ a_0=c_0=\frac{1}{0!}=1\\ a_1=c_1+b_1c_0=\frac{1}{1!}+b_1=1+b_1\\ $$and, using Equation 14.7 to get the third equation for solving the third unknown:
$$ \begin{equation} {0=c_l+\sum_{m=1}^M b_mc_{l-m}} \;\;\;\;(l=N+1,...,N+M) \end{equation} $$yields:
$$ 0 = c_2+b_1c_1 $$This is easily solveable:
$$ 0=\frac{1}{2}+b_1\\ b_1=-.5 $$Therefore,
$$ a_1=1-.5 = .5 $$Thus: $$ e=\frac{a_0+a_1}{1+b_1}=\frac{1+.5}{1-.5}=3 $$
Doing the [5/5] Padé approximate, this yields:
$$ a_0=c_0=\frac{1}{0!}=1\\ a_1=c_1+b_1c_0=\frac{1}{1!}+b_1=1+b_1\\ a_2=c_2+b_1c_1+b_2c_0=\frac{1}{2}+b_1+b_2\\ a_3=c_3+b_1c_2+b_2c_1+b_3c_0=\frac{1}{6}+\frac{b_1}{2}+b_2+b_3\\ a_4=c_4+b_1c_3+b_2c_2+b_3c_1+b_4c_0=\frac{1}{24}+\frac{b_1}{6}+\frac{b_2}{2}+b_3+b_4\\ a_5=c_5+b_1c_4+b_2c_3+b_3c_2+b_4c_1+b_5c_0=\frac{1}{120}+\frac{b_1}{24}+\frac{b_2}{6}+\frac{b_3}{2}+b_4+b_5 $$This leaves us with 5 equations and 5 unknowns. Again utilizing equation 14.7: Thus: $$ 0=c_6+b_1c_5+b_2c_4+b_3c_3+b_4c_2+b_5c_1\\ 0=c_7+b_1c_6+b_2c_5+b_3c_4+b_4c_3+b_5c_2\\ 0=c_8+b_1c_7+b_2c_6+b_3c_5+b_4c_4+b_5c_3\\ 0=c_9+b_1c_8+b_2c_7+b_3c_6+b_4c_5+b_5c_4\\ 0=c_{10}+b_1c_9+b_2c_8+b_3c_7+b_4c_6+b_5c_5 $$
#Allow us to solve the system of equations
import numpy as np
import math
B=np.array([-1/math.factorial(6),-1/math.factorial(7),-1/math.factorial(8),-1/math.factorial(9),-1/math.factorial(10)])
A=np.array([[1/math.factorial(5),1/1/math.factorial(4),1/1/math.factorial(3),1/1/math.factorial(2),1/1/math.factorial(1)],
[1/math.factorial(6),1/math.factorial(5),1/math.factorial(4),1/math.factorial(3),1/math.factorial(2)],
[1/math.factorial(7),1/math.factorial(6),1/math.factorial(5),1/math.factorial(4),1/math.factorial(3)],
[1/math.factorial(8),1/math.factorial(7),1/math.factorial(6),1/math.factorial(5),1/math.factorial(4)],
[1/math.factorial(9),1/math.factorial(8),1/math.factorial(7),1/math.factorial(6),1/math.factorial(5)]])
bs = np.linalg.inv(A).dot(B)
b1=bs[0].item()
b2=bs[1].item()
b3=bs[2].item()
b4=bs[3].item()
b5=bs[4].item()
a0 = 1
a1 = 1+b1
a2 = .5 + b1 + b2
a3 = 1/6 + b1/2 + b2+b3
a4 = 1/24 + b1/6 + b2/2 + b3 + b4
a5 = 1/120 + b1/24 + b2/6 + b3/2 + b4 + b5
#Estimating for e^1 for [1/1]
(a0+a1+a2+a3+a4+a5)/(1+b1+b2+b3+b4+b5)
np.e
Good up to 9 digits, pretty impressive!
When using polynomials to fit data, we use equation 14.9 from the book:
$$ \sum_{n=0}^N x_i^na_n-\sum_{n=N+1}^{N+M}x_i^{n-N}y_ia_n=y_i $$This can be rewritten:
$$ \mathbf{M}_{iN}= \begin{bmatrix} \vec x^0&\vec x^1&\cdots&\vec x^N&-\vec x^1\vec y&\cdots&-\vec x^N\vec y \end{bmatrix} $$and
$$ \vec a^T = [a_0,a_1,...,a_{N+M}] $$import numpy as np
xi = np.arange(-10,11)
xi = xi.reshape((21))
yi = np.multiply(xi >0,1)
poly = [5,10,15]
a5 = []
a10 = []
a15 = []
VM5 = []
VM10 = []
VM15 = []
for x in range(3):
VM = np.empty([21,1+2*poly[x]])
for n in range(poly[x]+1):
temp = (xi**n)
VM[:,n] = (temp)
#print(n)
for m in range(poly[x]+1,2*poly[x]+1):
temp2 = yi*xi**(m-poly[x])
#print((m-poly[x]))
VM[:,m] = (temp2)
#print(temp)
u, w, vh = np.linalg.svd(VM)
W = np.diag(w)
if x == 0:
#print(u.T.shape)
VM5 = copy.deepcopy(VM)
a5 = vh.T@np.concatenate((np.linalg.inv(W),np.zeros((11,10))),axis=1)@u.T@yi
if x == 1:
VM10 = copy.deepcopy(VM)
a10 = vh.T@np.linalg.inv(W)@u.T@yi
if x == 2:
VM15 = copy.deepcopy(VM)
a15 = vh.T@np.concatenate((np.linalg.inv(W),np.zeros((10,21))),axis=0)@u.T@yi
#For a polynomial of 5.
yhat5 = VM5@a5
yhat10 = VM10@a10
yhat15 = VM15@a15
plt.plot(xi,yi)
plt.plot(xi,yhat5)
plt.plot(xi,yhat10)
plt.plot(xi,yhat15)
plt.legend(['True DGP', 'Poly of 5', '10', '15'], loc='upper left')
a10.shape
np.empty([21,1+2*10]).shape
import copy
def taps(x0):
#x0 is a 1x4 array
#with x_n-1,x_n-2,x_n-3,x_n-4 values
return (x0[0]+x0[3])%2
def LFSR(x0):
xn = []
xnew = x0
for i in range(1,17):
xn.append(taps(xnew[:,i-1]).item())
#Shift
x = copy.deepcopy(xnew[:,i-1])
x[1:4] = (x[0:3])
#Assign new
x[0] = xn[i-1]
x = x.reshape((4,1))
xnew = np.hstack((xnew,x))
return (np.array(xn).reshape((16,1)),xnew[:,0:16].T)
#Sigmoid function
def sig(t):
return 1/(1+np.exp(-t))
# Derivative of sigmoid
def dsig(p):
return p * (1 - p)
class NN:
def __init__(self,xtrain,ytrain,nodes1,nodes2):
#Adapted from James Loy's 2-layer NN
self.input = xtrain
#Input layer
#weights between input and first layer
self.weights_input = np.random.rand(self.input.shape[1],nodes1)
#First layer
#weights between first layer and second
self.weights12 = np.random.rand(nodes1,nodes2)
#Second layer
#weights between second layer and output
self.weights23 = np.random.rand(nodes2,1)
#Y for training
self.y = ytrain
#Output
self.output = np.zeros(ytrain.shape)
#Alpha for gradient descent
self.alpha = .01
#The test function
def feedforward(self):
self.layer1 = sig(np.dot(self.input,self.weights_input))
self.layer2 = sig(np.dot(self.layer1,self.weights12))
self.output = sig(np.dot(self.layer2,self.weights23))
return self.output
def backprop(self):
delta_in = 2*(self.y -self.output)*dsig(self.output)
dweights23 = np.dot(self.layer2.T, delta_in)
delta_jn = np.dot(delta_in,self.weights23.T)*dsig(self.layer2)
dweights12 = np.dot(self.layer1.T,delta_jn)
delta_kn = np.dot(delta_jn,self.weights12.T)*dsig(self.layer1)
dweights_input = np.dot(self.input.T,delta_kn)
self.weights_input += dweights_input
self.weights12 += dweights12
self.weights23 += dweights23
def Train(self, xtrain, ytrain):
self.output = self.feedforward()
self.backprop()
def Test(self,xtest):
self.input = xtest
self.output = self.feedforward()
return self.output
yy, xx = LFSR(np.array([1,1,1,1]).reshape((4,1)))
nn = NN(xx,yy,3,3)
tolerance = 10e-5
loss = 100
i = 1
yprime = np.zeros(np.shape(yy))
while loss>tolerance:
out = nn.feedforward()
loss = np.mean(np.square(yy - out))
i+=1
nn.Train(xx, yy)
print(np.round(out,0)==yy)
print(loss)
print(i)
#Let's test against all possible LFSR orders
correct = 0
total = 0
for i in range(1,15):
yyy, xxx = LFSR(xx[i].reshape((4,1)))
test_out = np.round(nn.Test(xxx))
#print(np.hstack((test_out,yyy)))
correct += sum(test_out==yyy).item()
total += test_out.shape[0]
print(correct/total)
So by training on the total LFSR sequence, it's able to get the entire LFSR sequence for any initialization
yy, xx = LFSR(np.array([1,1,1,1]).reshape((4,1)))
yy = yy[0:4]
xx = xx[0:4,]
nn1 = NN(xx,yy,4,10)
tolerance = 10e-5
loss = 100
i = 1
yprime = np.zeros(np.shape(yy))
while loss>tolerance:
out = nn1.feedforward()
loss = np.mean(np.square(yy - out))
i+=1
nn1.Train(xx, yy)
print(np.round(out,0)==yy)
print(loss)
print(i)
yyy, xxx = LFSR(np.array([1,1,1,1]).reshape((4,1)))
yyy = yyy[4:16]
xxx = xxx[4:16,]
np.sum(np.round(nn1.Test(xxx))==yyy)/np.shape(yyy)[0]
However, with only 4 inputs, it only guesses 42% correct, even for the same sequence. At the global scale, that leads to:
#Let's test against all possible LFSR orders
yy, xx = LFSR(np.array([1,1,1,1]).reshape((4,1)))
correct = 0
total = 0
for i in range(1,15):
yyy, xxx = LFSR(xx[i].reshape((4,1)))
test_out = np.round(nn1.Test(xxx))
#print(np.hstack((test_out,yyy)))
correct += sum(test_out==yyy).item()
total += test_out.shape[0]
print(correct/total)
We thus can see how many it gets correct as a function of how many inputs we give it:
rate = []
for j in range(1,16):
#print(j)
y, x = LFSR(np.array([1,0,0,0]).reshape((4,1)))
ytrain = y[0:j]
xtrain = x[0:j,]
xtest = x[j:,]
ytest = y[j:,]
#Initialize the new neural net
nniters = NN(xtrain,ytrain,4,10)
tolerance = 10e-4
loss = 100
#Train the neural net
while loss>tolerance:
out = nniters.feedforward()
#print(out)
loss = np.mean(np.square(ytrain - out))
i+=1
nniters.Train(xtrain, ytrain)
test_out = np.round(nniters.Test(xtest))
temp = sum((test_out==ytest))
temp = temp/ytest.shape[0]
#print(ytest.shape[0])
rate.append(temp)
#print(test_out)
%matplotlib inline
from matplotlib import pyplot as plt
x = np.linspace(1,15,15)
plt.plot(x,rate)
plt.ylabel('Percent Correct')
plt.xlabel('Size of Test Data')