import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from ipywidgets import *
%matplotlib notebook
def rosenbrock(X, Y):
return (1 - X)**2 + 100*(Y - X**2)**2
def show_rosenbrock(x_max, y_max, points=[], title='Rosenbrock function'):
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
X = np.linspace(-x_max, x_max, 100)
Y = np.linspace(-y_max, y_max, 100)
X, Y = np.meshgrid(X, Y)
Z = rosenbrock(X, Y)
ax.plot_surface(X, Y, Z, label="wow")
ax.set_title(title)
ax.set_xlabel("x")
ax.set_ylabel("y")
if len(points)>0:
for point in points:
line = ax.plot(point[0][:,0],point[0][:,1], point[1])
plt.setp(line, color='r', linewidth=2.0)
plt.show()
def interactive_plot_rosenbrock(x_max, y_max):
show_rosenbrock(x_max, y_max)
interact(interactive_plot_rosenbrock, x_max=(0,10,1), y_max=(0,10,1))
def nelder_mead_update(X, Z, a, l, p, s, debug=False):
inds = np.argsort(Z)
X = X[inds,:]
Z = Z[inds]
centroid = np.mean(X[:-1,:], axis=0)
x_r = centroid + a*(centroid - X[-1,:])
z_r = rosenbrock(x_r[0], x_r[1])
if z_r < Z[-2]:
if z_r > Z[0]:
X[-1,:] = x_r
Z[-1] = z_r
if debug: print 'reflect'
else:
x_e = centroid + l*(x_r - centroid)
z_e = rosenbrock(x_e[0], x_e[1])
if z_e < z_r:
X[-1,:] = x_e
Z[-1] = z_e
if debug: print 'use expand'
else:
X[-1,:] = x_r
Z[-1] = z_r
if debug: print 'use reflect'
else:
x_s = centroid + p*(centroid - X[-1])
z_s = rosenbrock(x_s[0], x_s[1])
if z_s < Z[-1]:
X[-1,:] = x_s
Z[-1] = z_s
if debug: print 'reflect and shrink'
else:
x_c = centroid - p*(centroid - X[-1])
z_c = rosenbrock(x_c[0], x_c[1])
if z_c < Z[-1]:
X[-1,:] = x_c
Z[-1] = z_c
if debug: print 'contract'
else:
X[1:,:] = X[1:,:] + s*(X[1:,:] - X[0,:])
Z[1:] = rosenbrock(X[1:,0], X[1:,1])
if debug: print 'shrink'
return X, Z
def nelder_mead(X, a=1., l=2., p=0.5, s=0.5, max_iters=200, threshold=1e-7):
Z = rosenbrock(X[:,0], X[:,1])
points = [[X,Z]]
for i in range(max_iters):
X, Z = nelder_mead_update(X, Z, a, l, p, s)
points.append([X, Z])
if np.linalg.norm(points[-1][0]-points[-2][0])<threshold:
return X, Z, points, i
return X, Z, points, max_iters
delta = 0.1
start_X = np.array([[-1,-1],[-1+delta,-1],[-1,-1+delta]])
X, Z, points1, i1 = nelder_mead(start_X)
print "Nelder Mead solution:", X[0]
print "iterations:", i1
show_rosenbrock(2,2, points=points1, title="Nelder Mead Method on the Rosenbrock function")
#golden ratio
g_r = (1+5**0.5)/2
def line_min(point, direction, length=4., threshold=0.1):
x0 = point
x1 = point + length*direction
x2 = point + length*direction*(g_r + 1)
x0_val = rosenbrock(x0[0], x0[1])
x1_val = rosenbrock(x1[0], x1[1])
x2_val = rosenbrock(x2[0], x2[1])
i=0
while np.linalg.norm(x2 - x0) > threshold:
x3 = x2 + (x1 - x2)*g_r
x3_val = rosenbrock(x3[0], x3[1])
if x3_val > x1_val:
x0 = x2
x0_val = x2_val
x2 = x3
x2_val = x3_val
else:
x2 = x1
x2_val = x1_val
x1 = x3
x1_val = x3_val
i += 1
return x1
def direction_set(point, threshold=1e-7, max_iters=200):
directions = np.array([[0.,1.],[1.,0.]])
points = []
costs = []
i=0
while i in range(max_iters):
points.append([point[0],point[1]])
costs.append(rosenbrock(point[0], point[1]))
update_points = np.array([line_min(point, direction) for direction in directions])
vals = update_points - point
direction = np.mean(vals, axis=0)
direction_norm = np.linalg.norm(direction)
directions[np.argmax(np.linalg.norm(vals, axis=1))] = direction / direction_norm
point += direction
i+=1
if(direction_norm < threshold):
return point, [[np.array(points), np.array(costs)]], i
print "hit max iterations"
return point, [[np.array(points), np.array(costs)]], max_iters
point, points2, i2 = direction_set(np.array([-1.,-1.]))
print "Direction Set solution:", point
print "iterations", i2
show_rosenbrock(3,3, points=points2, title="Direction Set Method on the Rosenbrock function")
def rosenbrock_gradient(x, y):
return np.array([-2*(1-x)-400*x*(y - x**2), 200*(y-x**2)])
def rosenbrock_second_partial(x, y):
return np.array([[2-400*(y-3*x**2), -400*x], [-400*x, 200]])
def calc_direction(a, b, direction):
g_a = rosenbrock_gradient(a[0], a[1])
g_b = rosenbrock_gradient(b[0], b[1])
l = g_b.dot(g_b-g_a)/(g_a.dot(g_a))
direction2 = g_b + l*direction
return direction2 / np.linalg.norm(direction2)
def conjugate_gradient(point, max_iters=100, threshold=1e-7):
direction = np.array([0., 1.])
i = 0
points = []
costs = []
while i < max_iters:
points.append([point[0],point[1]])
costs.append(rosenbrock(point[0], point[1]))
point2 = line_min(point, direction)
direction = calc_direction(point, point2, direction)
if np.linalg.norm(point2-point) < threshold:
return point2, [[np.array(points), np.array(costs)]], i
point = point2
i+=1
print "hit max iters"
return point, [[np.array(points), np.array(costs)]], max_iters
print "testing gradients"
print "numeric:", (rosenbrock(-1,-1)-rosenbrock(-1.0001,-1))/.0001, (rosenbrock(-1,-1)-rosenbrock(-1,-1.0001))/.0001
print "analytic:", rosenbrock_gradient(-1,-1)
point3, points3, i3 = conjugate_gradient(np.array([-1.,-1.]))
print "Conjugate Gradient solution:", point3
print "iterations", i3
show_rosenbrock(3,3, points=points3, title="Conjugate Gradient Method on the Rosenbrock function")
def Levenberg(point, n=500, threshold=1e-7, max_iters=200, vary=True):
oldDiff = float('inf')
diff = rosenbrock(point[0], point[1])
M = np.zeros((2,2))
i = 0;
i = 0
points = [[point[0], point[1]]]
costs = [diff]
for i in range(max_iters):
gradient = rosenbrock_gradient(point[0], point[1])
M = rosenbrock_second_partial(point[0], point[1])
M[0,0] *= (1+n)
M[1,1] *= (1+n)
if vary:
if diff>oldDiff:
n*=1.5
else:
n/=1.4
dA = np.linalg.solve(-M,gradient)
point += dA
oldDiff = diff
diff = rosenbrock(point[0], point[1])
points.append([point[0],point[1]])
costs.append(rosenbrock(point[0], point[1]))
if abs(diff-oldDiff) < threshold:
return point, [[np.array(points), np.array(costs)]], i
print "hit max iters"
return point, [[np.array(points), np.array(costs)]], max_iters
point4, points4, i4 = Levenberg(np.array([-1.,-1.]))
print "Levenberg Marquardt solution:", point4
print "iterations", i4
show_rosenbrock(3,3, points=points4, title="Levenberg Marquardt Method on the Rosenbrock function")
N=100
iterations = 5000
J = np.random.normal(size=N)
S = np.random.randint(3, size=N) - 1
def cost(S, J):
length = len(S)
return sum(J[i]*S[i]*S[-length+i+1] for i in range(length))
def flip(S, J, t, a):
cost1 = cost(S, J)
s = np.random.randint(len(S))
S[s] = -S[s]
cost2 = cost(S, J)
if(cost2 > cost1):
p = np.random.rand()
if p > np.exp(-a*t*(cost2-cost1)):
S[s] = - S[s]
def anneal(S, J, a, iters=10):
costs = []
for i in range(iters):
costs.append(cost(S, J))
flip(S, J, i, a)
return S, costs
plt.figure()
minVal = -sum(np.abs((J)))
plt.plot(range(iterations), np.ones(iterations)*minVal, label='min bound')
for a in [0.1, 0.01, 0.001]:
S_flip = [x for x in S]
plt.plot(range(iterations), anneal(S_flip, J, a, iterations)[1], label='a='+str(a))
plt.legend()
plt.title("Cost during Simulated Annealing for different values of A")
plt.show()
plt.figure()
rands = np.random.rand(100)
plt.plot(range(100), rands)
plt.plot(range(100), rands < np.exp(-0.1*np.arange(100)))
plt.plot(range(100), np.exp(-0.1*np.arange(100)))
plt.ylim(0,2)
plt.show()
iterations2 = 50
spins = np.random.randint(3, size=(100,N))-1
def probs(spins, b):
probs = np.exp(-b*(np.array([cost(spins[i,:], J) for i in range(100)])-minVal))
probs /= sum(probs)
return probs
def evolution(spins, J, b, iters=20):
costs = []
for i in range(iters):
probs_i = probs(spins, b)
costs.append(cost(spins[np.argmax(probs_i),:], J))
spins2 = np.zeros((100,N))
rand1 = np.random.choice(100, 100, p=probs_i)
rand2 = np.random.choice(100, 100, p=probs_i)
rand3 = np.random.randint(N, size=100)
rand4 = np.random.randint(N, size=100)
for j in range(100):
spins[j, :rand3[j]] = spins[rand1[j], :rand3[j]]
spins[j, rand3[j]:] = spins[rand2[j], rand3[j]:]
spins[j, rand4[j]] *= -1
return costs
plt.figure()
plt.plot(range(iterations2), np.ones(iterations2)*minVal, label='min bound')
for b in [0.1, 0.01, 0.001]:
spins_flip = np.array([[x for x in y] for y in spins])
plt.plot(range(iterations2), evolution(spins_flip, J, b, iterations2), label='b='+str(b))
plt.legend()
plt.title("Cost during Genetic Algorithm evolution for different values of b")
plt.show()