import tensorflow as tf
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)
# also the b&b
import numpy as np
import matplotlib.pyplot as plt
# ok, let's make the lfsr,
# maximal lfsr begins with [1, 0, 0, 1]
init = [1, 0, 0, 1]
def lfsr(state):
x = (state[0] + state[3]) % 2
nstate = [x, state[0], state[1], state[2]]
return nstate
def lfsr_bit(state):
return (state[0] + state[3]) % 2
lfsr(init)
# now I have an oracle: given (state), have (next) - want to train
# the net to do the same,
states = np.ndarray((16, 4))
states[0] = init
for i in range(14):
states[i + 1] = lfsr(states[i])
# there's only 16 possible states, with 16 outcomes,
# outcomes just being the next item,
# well, ok, this net should reduce to 1, and it should be
# predicting the next bit, not the whole state -
# then we just expect it to learn to weight state[0] and [3]
states
# let's try this anyways,
#x = tf.placeholder('float', [None, 4]) # the input,
#y = tf.placeholder('float', [None, 1]) # outputs
# the kind of model,
model = tf.keras.models.Sequential()
# add a dense layer,
model.add(tf.keras.layers.Dense(4, input_shape=[4], activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(1))
#model.add(tf.keras.layers.Dense(4, input_shape=(4,), activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(4, activation=tf.nn.relu))
#model.add(tf.keras.layers.Dense(1, activation=tf.nn.softmax))
# model parameters
optimizer = tf.keras.optimizers.RMSprop(0.0001)
# rmsprop is 'similar to gradient descent with momentum'
model.compile(optimizer=optimizer, loss='mse', metrics=['mae', 'mse'])
# make the training data: all 16 possible states, and their lfsr outputs
xs = states
ys = np.zeros(16)
for i in range(len(ys)):
ys[i] = lfsr_bit(states[i])
print(xs[0])
print(ys[0])
class PrintDot(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs):
if epoch % 1000 == 0: print('')
if epoch % 20 == 0: print('.', end='')
model.summary()
history = model.fit(xs[0:8], ys[0:8], epochs=8000, verbose = 0, callbacks=[PrintDot()])
# plot learning curves
plt.plot(history.epoch, history.history['loss'])
plt.plot(history.epoch, history.history['mean_squared_error'])
plt.plot(history.epoch, history.history['mean_absolute_error'])
#history.history
# would want a nice way to plot success,
test_sample = xs#[0:1]
predictions = model.predict(test_sample)
# awkward vis,
#print(ys)
#print(predictions)
xpos = np.zeros(len(ys))
xpinned = np.zeros(len(ys))
for i in range(len(ys)):
xpos[i] = i
if(predictions[i] < 0.5):
xpinned[i] = 0.1
else:
xpinned[i] = 0.9
plt.scatter(xpos, ys)
plt.axvline(8)
plt.scatter(xpos, predictions)
plt.scatter(xpos, xpinned)
# seems like this can work well with just 2 layers,
# although it seems sensitive to starting conditions in that case,
# three layers is more reliable, each of 4 parameters.
# however, I should assume that this is simply over-fitting the 16 possible states,
# I've tried this again with just half of the samples (so 8 inputs only), plotted above is
# the in-sample (left) and out-of-sample (right) - this still does well when the output
# is pinned, (green dots) to 0 or 1, respective of the prediciton's positions around 0.5