NMM 2020 Jake Read 10 Neural Nets¶

import tensorflow as tf
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
# also the b&b 
import numpy as np 
import matplotlib.pyplot as plt

# ok, let's make the lfsr,

# maximal lfsr begins with [1, 0, 0, 1]
init = [1, 0, 0, 1]
def lfsr(state):
    x = (state[0] + state[3]) % 2 
    nstate = [x, state[0], state[1], state[2]]
    return nstate

def lfsr_bit(state):
    return (state[0] + state[3]) % 2 

lfsr(init)

[0, 1, 0, 0]

# now I have an oracle: given (state), have (next) - want to train 
# the net to do the same, 
states = np.ndarray((16, 4))
states[0] = init
for i in range(14): 
    states[i + 1] = lfsr(states[i])
# there's only 16 possible states, with 16 outcomes,
# outcomes just being the next item, 
# well, ok, this net should reduce to 1, and it should be 
# predicting the next bit, not the whole state - 
# then we just expect it to learn to weight state[0] and [3] 
states

array([[1., 0., 0., 1.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.],
       [1., 1., 0., 0.],
       [1., 1., 1., 0.],
       [1., 1., 1., 1.],
       [0., 1., 1., 1.],
       [1., 0., 1., 1.],
       [0., 1., 0., 1.],
       [1., 0., 1., 0.],
       [1., 1., 0., 1.],
       [0., 1., 1., 0.],
       [0., 0., 1., 1.],
       [0., 0., 0., 1.]])

# let's try this anyways, 
#x = tf.placeholder('float', [None, 4]) # the input,
#y = tf.placeholder('float', [None, 1]) # outputs 

# the kind of model, 
model = tf.keras.models.Sequential()

# add a dense layer,
model.add(tf.keras.layers.Dense(4, input_shape=[4], activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(1))
#model.add(tf.keras.layers.Dense(4, input_shape=(4,), activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(1, activation=tf.nn.softmax))

# model parameters 
optimizer = tf.keras.optimizers.RMSprop(0.0001) 
# rmsprop is 'similar to gradient descent with momentum'
model.compile(optimizer=optimizer, loss='mse', metrics=['mae', 'mse'])

# make the training data: all 16 possible states, and their lfsr outputs
xs = states 
ys = np.zeros(16)
for i in range(len(ys)):
    ys[i] = lfsr_bit(states[i])

print(xs[0])
print(ys[0])

[1. 0. 0. 1.]
0.0

class PrintDot(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 1000 == 0: print('')
        if epoch % 20 == 0: print('.', end='')
model.summary()
history = model.fit(xs[0:8], ys[0:8], epochs=8000, verbose = 0, callbacks=[PrintDot()])

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_39 (Dense)             (None, 4)                 20        
_________________________________________________________________
dense_40 (Dense)             (None, 4)                 20        
_________________________________________________________________
dense_41 (Dense)             (None, 4)                 20        
_________________________________________________________________
dense_42 (Dense)             (None, 1)                 5         
=================================================================
Total params: 65
Trainable params: 65
Non-trainable params: 0
_________________________________________________________________

..................................................
..................................................
..................................................
..................................................
..................................................
..................................................
..................................................
..................................................

# plot learning curves 
plt.plot(history.epoch, history.history['loss'])
plt.plot(history.epoch, history.history['mean_squared_error'])
plt.plot(history.epoch, history.history['mean_absolute_error'])
#history.history

[<matplotlib.lines.Line2D at 0x24828bbab88>]

# would want a nice way to plot success, 
test_sample = xs#[0:1]
predictions = model.predict(test_sample)
# awkward vis, 
#print(ys)
#print(predictions)
xpos = np.zeros(len(ys))
xpinned = np.zeros(len(ys))
for i in range(len(ys)):
    xpos[i] = i
    if(predictions[i] < 0.5):
        xpinned[i] = 0.1
    else:
        xpinned[i] = 0.9
plt.scatter(xpos, ys)
plt.axvline(8)
plt.scatter(xpos, predictions)
plt.scatter(xpos, xpinned)

<matplotlib.collections.PathCollection at 0x2482b953688>

# seems like this can work well with just 2 layers, 
# although it seems sensitive to starting conditions in that case, 
# three layers is more reliable, each of 4 parameters. 
# however, I should assume that this is simply over-fitting the 16 possible states, 
# I've tried this again with just half of the samples (so 8 inputs only), plotted above is
# the in-sample (left) and out-of-sample (right) - this still does well when the output 
# is pinned, (green dots) to 0 or 1, respective of the prediciton's positions around 0.5