import numpy as np
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'png'
plot rosenbrock "banana" function
def f(x,y):
return (1-x)**2 + 100*(y-x**2)**2
x = np.linspace(-2.5,2.5,100)
y = np.linspace(-2.5,2.5,100)
X,Y = np.meshgrid(x,y)
banana = f(X,Y)
# print(banana.shape)
# print(banana)
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.plot_surface(X,Y,banana,cmap='plasma')
plt.title("banana 2d")
plt.show()
simplex
# hmm. let's make a generic banana func... maybe not yet
def banana(x):
dim = x.shape[1]
length = x.shape[0]
# print(dim)
# print(length)
# print(dim)
f = np.zeros(length)
for d in range(1,dim):
f += ((1-x[:,d-1])**2 + 100*(x[:,d]-x[:,d-1]**2)**2)
return f
# let's check it still works for dim 2
x = np.linspace(-2.5,2.5,100)
y = np.linspace(-2.5,2.5,100)
X,Y = np.meshgrid(x,y)
# print(X.shape)
stack = np.column_stack((X.flatten(),Y.flatten()))
# print(stack)
# print(stack.shape)
ban = banana(stack).reshape(x.shape[0],y.shape[0])
# print(ban)
# X,Y = np.meshgrid(x,y)
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
ax.plot_surface(X,Y,ban,cmap='plasma')
plt.title("banana 2d - generic dimenension function")
plt.show()
def simplex(init,f,stop_cond=0.0001,max_iter=500):
"""
init: starting point
f: function to pass in
stop_cond: stopping condition, stdev between simplex points
max_iter: maximum iterations before breaking
"""
dim = len(init) # what dimension are we in
init_spacing = 0.25 # how to generate initial spacing
# construct initial simplex:
simplex = np.zeros((dim+1,dim)) # array of nodes -> dim+1 points in dim space
simplex[0] = init # our first point
# generate the next points: we just move out in each dimension by init spacing
for d in range(dim):
simplex[d+1] = np.array(init)
simplex[d+1][d] += init_spacing
# we add another column of function evaluations
f_simplex = f(simplex)
simplex = np.column_stack((simplex,f_simplex))
stdev = np.sum(np.array(np.std(simplex,axis=0))) # the stopping condition
i = 0
# for plotting
simplex_list = [simplex]
while (stdev >=stop_cond) and (i < max_iter):
# the main loop:
# sort simplex according to f values
sort_simplex = simplex[simplex[:, dim].argsort()[::-1]]
x_mean = np.mean(sort_simplex[1:], axis=0)
# Find reflection and evaluate
x1_new = 2*x_mean - sort_simplex[0]
x1_new[dim] = f(x1_new[:dim].reshape(1,dim))
# if x1_new is the best, try reflecting double the distance
if x1_new[dim] < sort_simplex[-1,dim]:
x1_new_new = 3*x_mean - 2*sort_simplex[0]
x1_new_new[dim] = f(x1_new_new[:dim].reshape(1,dim))
if x1_new_new[dim] < x1_new[dim]:
# if this is better, then this is our new simplex point
sort_simplex[0] = x1_new_new
else:
# otherwise we use just the single level reflection
sort_simplex[0] = x1_new
#print('b')
elif x1_new[dim] > sort_simplex[0,dim]:
# if it gets worse
# we try shrinking
x1_new_new = 3/2*x_mean - 1/2*sort_simplex[0]
x1_new_new[dim] = f(x1_new_new[:dim].reshape(1,dim))
if x1_new_new[dim] < sort_simplex[0,dim]:
# we accept this move
sort_simplex[0] = x1_new_new
else:
# try shrinking again
x1_new_new = 1/2*(x_mean-sort_simplex[0])
x1_new_new[dim] = f(x1_new_new[:dim].reshape(1,dim))
if x1_new_new[dim] <= sort_simplex[dim-1,dim]:
# we accept this move
sort_simplex[0] = x1_new_new
else:
# shrink all vertices toward best one.
for d in range(dim):
sort_simplex[d] = 1/2*(sort_simplex[d] + sort_simplex[-1])
else:
# if it is neither best nor worst, we replace the worst with it and keep going.
sort_simplex[0] = x1_new
# Update our new simplex with the sorted version
simplex = sort_simplex
stdev = np.sum(np.array(np.std(simplex,axis=0)))
i+=1 # step iterations forward
#print(i)
simplex_list.append(simplex)
return simplex_list, stdev, i
#simplex([-1,-1],banana)
simplexes, stdev, iters = simplex([-1,-1],banana)
print(f"iterations = {iters}")
print(f"standard deviation = {stdev}")
print(f"final simplex nodes = {simplexes[-1]}")
iterations = 80 standard deviation = 5.786516701697217e-05 final simplex nodes = [[1.00011637e+00 1.00024133e+00 5.06170031e-08] [1.00016185e+00 1.00033461e+00 4.84447327e-08] [1.00014584e+00 1.00030609e+00 4.19529621e-08]]
# generate the function
x = np.linspace(-1.5,1.5,100)
y = np.linspace(-1.5,1.5,100)
X,Y = np.meshgrid(x,y)
stack = np.column_stack((X.flatten(),Y.flatten()))
ban = banana(stack).reshape(x.shape[0],y.shape[0])
# plot in 3D
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
# banana function
ax.plot_surface(X,Y,ban,cmap='plasma',alpha=0.3)
# final simplex
final_sim = simplexes[-1]
ax.scatter(final_sim[:,0],final_sim[:,1],final_sim[:,2], c='r')
# preceeding simplexes
simplex_array = np.array(simplexes) # convert to array
# we duplicate the last point to close our triangles
simplex_plot = np.zeros((simplex_array.shape[0],4,3))
simplex_plot[:,:3,:] = simplex_array[:,:,:]
simplex_plot[:,3,:] = simplex_array[:,0,:]
# ok, now we're going to plot.
for simp in simplex_plot:
ax.plot(simp[:,0],simp[:,1],simp[:,2],'k-')
# simplex_array = np.column_stack((simplex_array,simplex_array[:,0]))
plt.title("banana 2d - downhill simplex")
plt.show()
import matplotlib.animation as animation
# don't bother running the animation in the notebook... womp womp
# plot in 3D
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
# banana function
ax.plot_surface(X,Y,ban,cmap='plasma',alpha=0.3)
# final simplex
final_sim = simplexes[-1]
ax.scatter(final_sim[:,0],final_sim[:,1],final_sim[:,2], c='r')
# preceeding simplexes
simplex_array = np.array(simplexes) # convert to array
# we duplicate the last point to close our triangles
simplex_plot = np.zeros((simplex_array.shape[0],4,3))
simplex_plot[:,:3,:] = simplex_array[:,:,:]
simplex_plot[:,3,:] = simplex_array[:,0,:]
# ok, now we're going to plot.
# for simp in simplex_plot:
# ax.plot(simp[:,0],simp[:,1],simp[:,2],'k-')
def update_simplex(i):
ax.plot(simplex_plot[i,:,0],simplex_plot[i,:,1],simplex_plot[i,:,2],'k-')
# simplex_array = np.column_stack((simplex_array,simplex_array[:,0]))
# plt.title("banana 2d - downhill simplex")
# plt.show()
ani = animation.FuncAnimation(fig, update_simplex, range(iters), blit=False, interval=200, repeat=True)
plt.show()
10-D
init_x = -1*np.ones(10)
simplexes, stdev, iters = simplex(init_x,banana,max_iter=5000)
print(f"iterations = {iters}")
print(f"standard deviation = {stdev}")
print(f"final node vals = {simplexes[-1][-1]}")
iterations = 2438 standard deviation = 6.741947394245437e-05 final node vals = [9.99997972e-01 9.99990658e-01 9.99995978e-01 9.99994117e-01 9.99983279e-01 9.99981523e-01 9.99973239e-01 9.99955118e-01 9.99903423e-01 9.99808785e-01 8.53031775e-08]
so these are also converging towards all 1s? and one zero is from mystery extra dimension? i don't know how to visualize this. ¯_(ツ)_/¯
gradient descent
# Let's copy our banana back again
def banana(x):
dim = x.shape[1]
length = x.shape[0]
f = np.zeros(length)
for d in range(1,dim):
f += ((1-x[:,d-1])**2 + 100*(x[:,d]-x[:,d-1]**2)**2)
return f
# Our gradient
def banana_grad(x):
dim = x.shape[1]
length = x.shape[0]
f = np.zeros((length,dim))
for i in range(1,dim-1):
f[:,i] = (-2*(1-x[:,i]) - 400*x[:,i]*(x[:,i+1]-x[:,i])**2 + 200*(x[:,i]-x[:,i-1]**2))
#f[:,0] = 400*x[:,0]*(x[:,1]-x[:,0])**2 - 2*(1-x[:,0])
f[:,0] = - 2*(1-x[:,0]) - 400*x[:,0]*(x[:,1]-x[:,0]**2)
f[:,-1] = 200*(x[:,-1]-x[:,-2]**2)
return f
# check it sounds reasonable ish?
x = np.linspace(-2.5,2.5,100)
y = np.linspace(-2.5,2.5,100)
X,Y = np.meshgrid(x,y)
stack = np.column_stack((X.flatten(),Y.flatten()))
ban = banana(stack).reshape(x.shape[0],y.shape[0])
# ban_grad = banana_grad(stack)
def grad_descent(init_x,f,df,gamma=0.002,min_err=0.0001,max_iters=1000):
# we start at init_x
pos = init_x
# first step
pos_next = pos - gamma*df(pos)
# rate of change will be difference between them
err = abs(f(pos)-f(pos_next))
# iterations
pos_list = [pos]
i = 0
while (i<=max_iters) and (err> min_err):
pos = pos_next
pos_list.append(pos)
pos_next = pos - gamma*df(pos)
err = abs(f(pos)-f(pos_next))
i+=1
return pos, i, err,pos_list
init_x = np.array([[-1,-1]])
pos, i, err,pos_list = grad_descent(init_x,banana,banana_grad,min_err=0.00001)
print(pos)
print(i)
# generate the function
x = np.linspace(-1.5,1.5,100)
y = np.linspace(-1.5,1.5,100)
X,Y = np.meshgrid(x,y)
stack = np.column_stack((X.flatten(),Y.flatten()))
ban = banana(stack).reshape(x.shape[0],y.shape[0])
# plot in 3D
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
# banana function
ax.plot_surface(X,Y,ban,cmap='plasma',alpha=0.3)
# final point
final_pt = pos
ax.scatter(final_pt[:,0],final_pt[:,1],banana(final_pt), c='r')
# the path there
for pt in pos_list:
ax.plot(pt[0][0],pt[0][1],banana(pt)[0],c='k',ls = '-',linewidth=10,marker='x')
plt.title("banana 2d - gradient descent, gamma=0.002")
plt.show()
[[0.83974915 0.70447367]] 1001
def banana_grad_2d(x):
dim = x.shape[1]
assert dim ==2, f"dimension must be 2d, not {dim}"
length = x.shape[0]
f = np.zeros((length,dim))
f[:,0] = - 2*(1-x[:,0]) - 400*x[:,0]*(x[:,1]-x[:,0]**2)
f[:,1] = 200*(x[:,1]-x[:,0]**2)
return f
def grad_descent_momentum1(init_x,f,df,alpha = 0.05,gamma=0.002,min_err=0.0001,max_iters=1000):
# we start at init_x
pos = init_x
# first step
pos_next = pos - gamma*df(pos)
grad_prev = df(pos)
# err will be difference between z heights
err = abs(f(pos)-f(pos_next))
# iterations
pos_list = [pos]
i = 0
while (i<=max_iters) and (err> min_err):
pos = pos_next
pos_list.append(pos)
pos_next = pos - gamma*df(pos) - alpha*grad_prev*gamma
grad_prev = df(pos_next)
err = abs(f(pos)-f(pos_next))
i+=1
return pos, i, err,pos_list
init_x = np.array([[-1,-1]])
pos, i, err,pos_list = grad_descent_momentum1(init_x,banana,banana_grad_2d,alpha=0.35,gamma = 0.002,min_err=0.00001,max_iters=1000)
print(pos)
print(i)
print(err)
# plot in 3D
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
# banana function
ax.plot_surface(X,Y,ban,cmap='plasma',alpha=0.3)
# final point
final_pt = pos
ax.scatter(final_pt[:,0],final_pt[:,1],banana(final_pt), c='r')
# the path there
for pt in pos_list:
ax.plot(pt[0][0],pt[0][1],banana(pt)[0],c='k',ls = '-',linewidth=10,marker='x')
plt.title("banana 2d - gradient descent, with momentum")
plt.show()
[[0.87635505 0.7682998 ]] 737 [8.95607747e-06]
def grad_descent_momentum(init_x,f,df,alpha = 0.05,gamma=0.002,min_err=0.0001,max_iters=1000):
# we start at init_x
pos = init_x
# first step
pos_next = pos - gamma*df(pos)
# rate of change will be difference between them
err = abs(f(pos)-f(pos_next))
# iterations
pos_list = [pos]
i = 0
while (i<=max_iters) and (err> min_err):
pos = pos_next
pos_list.append(pos)
pos_next = pos - gamma*df(pos)
pos_next += alpha*(np.random.rand(pos.shape[0],pos.shape[1])-0.5)#alpha*gamma*(df(pos_next)-df(pos))
# randomly do something bad
bad = np.random.rand()
if bad< 0.85:
if abs(np.sum(df(pos_next)-df(pos)))<1.75:
pos_next+=alpha*(np.random.rand()-0.5)
#print('h')
if bad > 0.98:
pos_next += 2*(np.random.rand()-0.5)
#print('bad')
err = abs(f(pos)-f(pos_next))
i+=1
return pos, i, err,pos_list
init_x = np.array([[-1,-1]])
pos, i, err,pos_list = grad_descent_momentum(init_x,banana,banana_grad_2d,alpha=0.07)
print(pos)
print(i)
print(err)
# plot in 3D
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
# banana function
ax.plot_surface(X,Y,ban,cmap='plasma',alpha=0.3)
# final point
final_pt = pos
ax.scatter(final_pt[:,0],final_pt[:,1],banana(final_pt), c='r')
# the path there
for pt in pos_list:
ax.plot(pt[0][0],pt[0][1],banana(pt)[0],c='k',ls = '-',linewidth=10,marker='x')
plt.title("banana 2d - gradient descent, with massive random jitters")
plt.show()
[[0.82356317 0.93651589]] 1001 [1.08819806]
def grad_descent_approx(init_x,f,gamma=0.02,min_err=0.00001,max_iters=5000):
# we start at init_x
pos = init_x
# our fake gradient
def df(pos):
w = 0.35*np.random.normal(0, (0.25)**0.5, 2)
grad = (f(pos+w) - f(pos))*w
return grad
# first step
pos_next = pos - gamma*df(pos)
# rate of change will be difference between them
err = abs(f(pos)-f(pos_next))
# iterations
pos_list = [pos]
i = 0
while (i<=max_iters) and (err> min_err):
pos = pos_next
pos_list.append(pos)
pos_next = pos - gamma*df(pos)
while (abs(pos_next[0,0])>=1.5) or (abs(pos_next[0,1])>=1.5):
pos_next = pos- gamma*df(pos)
err = abs(f(pos)-f(pos_next))
i+=1
return pos, i, err,pos_list
init_x = np.array([[-1,-1]])
pos, i, err,pos_list = grad_descent_approx(init_x,banana)
print(pos)
print(i)
print(err)
# plot in 3D
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
# banana function
ax.plot_surface(X,Y,ban,cmap='plasma',alpha=0.3)
# final point
final_pt = pos
ax.scatter(final_pt[:,0],final_pt[:,1],banana(final_pt), c='r')
# the path there
for pt in pos_list:
ax.plot(pt[0][0],pt[0][1],banana(pt)[0],c='k',ls = '-',linewidth=10,marker='x')
plt.title("banana 2d - fake gradient descent")
plt.show()
[[0.23793656 0.03814389]] 104 [1.00855447e-06]
def grad_descent_baseline(init_x,f,baseline=0.0,gamma=0.02,min_err=0.0001,max_iters=1000):
# we start at init_x
pos = init_x
# our fake gradient
def df(pos):
w = 0.1*np.random.normal(0, (0.25)**0.5, 2)
grad = (f(pos+w) - baseline)*w
return grad
# first step
pos_next = pos - gamma*df(pos)
# rate of change will be difference between them
err = abs(f(pos)-f(pos_next))
# iterations
pos_list = [pos]
i = 0
while (i<=max_iters) and (err> min_err):
pos = pos_next
pos_list.append(pos)
pos_next = pos - gamma*df(pos)
while (abs(pos_next[0,0])>=1.5) or (abs(pos_next[0,1])>=1.5):
pos_next = pos- gamma*df(pos)
err = abs(f(pos)-f(pos_next))
i+=1
return pos, i, err,pos_list
init_x = np.array([[-1,-1]])
pos, i, err,pos_list = grad_descent_baseline(init_x,banana,baseline=2)
print(pos)
print(i)
print(err)
# plot in 3D
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
# banana function
ax.plot_surface(X,Y,ban,cmap='plasma',alpha=0.3)
# final point
final_pt = pos
ax.scatter(final_pt[:,0],final_pt[:,1],banana(final_pt), c='r')
# the path there
for pt in pos_list:
ax.plot(pt[0][0],pt[0][1],banana(pt)[0],c='k',ls = '-',linewidth=10,marker='x')
plt.title("banana 2d - faker gradient descent")
plt.show()
[[ 0.01110981 -0.0931228 ]] 238 [4.14559127e-05]
init_x = -1*np.ones((1,10))
pos, i, err,pos_list = grad_descent(init_x,banana,banana_grad)
print(pos)
print(i)
print(err)
# ok ish
[[0.9958193 0.99168183 0.98368305 0.96867959 0.94183782 0.89525658 0.82368141 0.70417671 0.5662131 0.31220521]] 245 [9.36227659e-05]
conjugate gradient
def conjugate_gradient(init_x,f,df, rate = 0.001, min_err=0.001, max_iters = 1000):
pos = init_x
d_i = df(pos) # initial direction will be the gradient
g_i = df(pos)
# next step
pos_next = pos - rate*d_i
#error
err = abs(f(pos)-f(pos_next))
# iterations
pos_list = [pos]
i = 0
while (i<=max_iters) and (err> min_err):
# advance our current position
pos = pos_next
pos_list.append(pos)
# Find next gradient
g_i_next = df(pos) # since we've updated already
# find gamma
#gamma_i = (g_i_next * (g_i_next-g_i))/(g_i*g_i)
gamma_i = (np.dot(g_i_next, (g_i_next-g_i).T))/(g_i@g_i.T)
# find next direction
d_i = g_i_next + gamma_i*d_i
# make next step
pos_next = pos - rate*d_i
#update gradients
g_i = g_i_next
# update errors and iteration
err = abs(f(pos)-f(pos_next))
i+=1
return pos, i, err,pos_list
init_x = np.array([[-1,-1]])
pos, i, err,pos_list = conjugate_gradient(init_x, banana, banana_grad_2d,rate = 0.001, min_err=0.0001, max_iters = 5000)
print(pos)
print(i)
# plot in 3D
fig,ax = plt.subplots(subplot_kw=dict(projection='3d'))
# banana function
ax.plot_surface(X,Y,ban,cmap='plasma',alpha=0.3)
# final point
final_pt = pos
ax.scatter(final_pt[:,0],final_pt[:,1],banana(final_pt), c='r')
# the path there
for pt in pos_list:
ax.plot(pt[0][0],pt[0][1],banana(pt)[0],c='k',ls = '-',linewidth=10,marker='x')
plt.title("banana 2d - conjugate gradient")
plt.show()
[[0.72212234 0.52015854]] 1282
x = np.linspace(-1,1,500)
y = np.linspace(-1,1,500)
X,Y = np.meshgrid(x,y)
stack = np.column_stack((X.flatten(),Y.flatten()))
ban = banana(stack).reshape(x.shape[0],y.shape[0])
fig,ax = plt.subplots()
# for pt in pos_list:
# ax.plot(pt[0,0],pt[0,1],c='k',ls = '-',linewidth=10,marker='.')
pos_array = np.array(pos_list).reshape(1283,2)
ax.contourf(X,Y,ban,100,cmap='plasma',alpha=0.85)
ax.plot(pos_array[:,0],pos_array[:,1],'k-')
plt.title("contour version - conjugate gradient descent")
plt.show()
CMAES
def CMAES(trawlers,f,B,rate = 0.001, max_iters=100):
trawlers_next = np.zeros_like(trawlers)
time = len(B)
N = trawlers.shape[0] # how many of them
dim = trawlers.shape[1]
for t in range(time):
temperature = B[t]
next_move = rate*(np.random.rand(N,dim)-0.5)
next_move[np.nonzero((next_move.any()>0) and (np.random.rand()>temperature))] = 0 # reject bad moves
trawlers_next = trawlers + next_move
trawlers = trawlers_next
min_point = np.amin(f(trawlers),axis=0)
return min_point
trawlers = np.random.rand(10,2)-0.5 # starting positions (2D)
B = np.zeros(100) # temperature
pos = CMAES(trawlers,banana,B)
print(pos)
1.0759811512047548