(a) Plot the Rosenbrock function [Rosenbrock, 1960]
$f = (1 - x)^2 + 100(y - x^2)^2$
# From Neil's nm.py code
import copy as cp
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
def f(x):
global evals
evals += 1
return (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2
plot_size = 100
xy_min = -1.1
xy_max = 1.3
x = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
y = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
(X,Y) = np.meshgrid(x,y)
Z = f([X,Y])
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.set_proj_type('ortho')
surf = ax.plot_surface(X, Y, Z)
ax.view_init(35, 59)
plt.title("Rosenbrock Function")
plt.show()
(b) Pick a stopping criterion and use the downhill simplex method to search for its minimum starting from $x = y = -1$, plotting the search path, and counting the number of algorithm iterations and function evaluations used.
evals = 0
def nelder_mead(f, start, start_size, tolerance):
global evals
evals = 0
d = start.size
x = []
x.append({'pt':cp.deepcopy(start), 'fn':f(start)})
for i in range(d):
x.append({'pt':cp.deepcopy(start), 'fn':0.0})
x[-1]['pt'][i] += start_size
x[-1]['fn'] = f(x[-1]['pt'])
x_s = []
x.sort(key=lambda item:item['fn'])
x_s.append(cp.deepcopy(x))
while True:
mid = np.zeros(d)
for i in range(d):
mid += x[i]['pt']
mid = mid/d
y = mid + (mid - x[-1]['pt'])
f_y = f(y)
if (f_y < x[0]['fn']):
z = mid + 2.0*(mid - x[-1]['pt'])
f_z = f(z)
if (f_z < f_y):
x[-1] = {'pt':z, 'fn':f_z}
else:
x[-1] = {'pt':y, 'fn':f_y}
elif (f_y < x[-2]['fn']):
x[-1] = {'pt':y, 'fn':f_y}
else:
y = mid + 0.5*(mid - x[-1]['pt'])
f_y = f(y)
if (f_y < x[-2]['fn']):
x[-1] = {'pt':y, 'fn':f_y}
else:
y = mid - 0.5*(mid - x[-1]['pt'])
f_y = f(y)
if (f_y < x[-2]['fn']):
x[-1] = {'pt':y, 'fn':f_y}
else:
for i in range(1, d+1):
x[i]['pt'] = (x[0]['pt'] + x[i]['pt'])/2.0
x[i]['fn'] = f(x[i]['pt'])
x.sort(key=lambda item:item['fn'])
x_s.append(cp.deepcopy(x))
error = abs(x_s[-1][0]['fn'] - x_s[-2][0]['fn'])
if ((error > 0) & (error < tolerance)):
break
return x_s
D = 2
start_size = 0.1
tolerance = 1e-6
start = -1*np.ones(D)
x_s = nelder_mead(f, start, start_size, tolerance)
plot_size = 100
xy_min = -1.1
xy_max = 1.3
x = np.arange(xy_min,xy_max,(xy_max-xy_min)/100)
y = np.arange(xy_min,xy_max,(xy_max-xy_min)/100)
(X,Y) = np.meshgrid(x,y)
Z = f([X,Y])
plt.ion()
point = ''
for i in range(len(start)):
point += '%.3f '%x_s[-1][0]['pt'][i]
title = 'Rosenbrock Nelder-Mead search'\
+'\nfinal point: ' + point\
+'\ntolerance: ' + str(tolerance)\
+'\niterations: ' + str(len(x_s))\
+' function evaluations: ' + str(evals)
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.set_proj_type('ortho')
surf = ax.plot_surface(X, Y, Z)
ax.view_init(35, 59)
for i in range(len(x_s)):
x = []
y = []
z = []
x.append(x_s[i][0]['pt'][0])
y.append(x_s[i][0]['pt'][1])
z.append(x_s[i][0]['fn'])
x.append(x_s[i][1]['pt'][0])
y.append(x_s[i][1]['pt'][1])
z.append(x_s[i][1]['fn'])
x.append(x_s[i][2]['pt'][0])
y.append(x_s[i][2]['pt'][1])
z.append(x_s[i][2]['fn'])
x.append(x_s[i][0]['pt'][0])
y.append(x_s[i][0]['pt'][1])
z.append(x_s[i][0]['fn'])
ax.plot(x, y, z, color='black', marker='.')
plt.title(title)
plt.show()
(c) Now repeat the simplex search for $D = 10$ in the D-dimensional generalizations of the Rosenbrock function:
$f = \sum_{i=1}^{D-1} (1 - x_{i-1})^2 + 100(x_i - x_{i-1}^2)^2$
Start all the $x_i$ at -1, plot a 2D projection of the search path, and again count algorithm iterations and function evaluations.
plot_size = 100
xy_min = -1.5
xy_max = 1.3
x = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
y = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
(X,Y) = np.meshgrid(x,y)
Z = f([X,Y])
def f(x):
global evals, D
evals += 1
sum = 0
for i in range(1, D):
sum += (1 - x[i-1])**2 + 100*(x[i] - x[i-1]**2)**2
return sum
D = 10
start_size = 0.1
tolerance = 1e-6
start = -1*np.ones(D)
x_s = nelder_mead(f, start, start_size, tolerance)
point = ''
for i in range(len(start)):
point += '%.3f '%x_s[-1][0]['pt'][i]
title = '10D Rosenbrock Nelder-Mead search'\
+ '\nfinal point: ' + point\
+ '\ntolerance: ' + str(tolerance)\
+ '\niterations: ' + str(len(x_s))\
+ ' function evaluations: ' + str(evals)
fig, ax = plt.subplots(1,1)
ax.contourf(X, Y, Z, 1000, vmax=100, cmap=cm.rainbow)
ax.contour(X, Y, Z, 50, linewidths=0.5)
x = [[],[],[],[]]
y = [[],[],[],[]]
for i in range(len(x_s)):
x[0].append(x_s[i][0]['pt'][0])
y[0].append(x_s[i][0]['pt'][1])
x[1].append(x_s[i][1]['pt'][0])
y[1].append(x_s[i][1]['pt'][1])
x[2].append(x_s[i][2]['pt'][0])
y[2].append(x_s[i][2]['pt'][1])
x[3].append(x_s[i][0]['pt'][0])
y[3].append(x_s[i][0]['pt'][1])
ax.plot(x, y, color='white', marker='.')
plt.title(title)
plt.show()
(d) Repeat and compare the 2D and 10D Rosenbrock search with the gradient descent method.
# From Neil's gd.py code
import copy as cp
import math
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
evals = 0
def gradient_descent(f, df, start, start_scale, rate, momentum, tolerance):
scale = start_scale
x = np.copy(start)
last_x = np.copy(start)
last_last_x = np.copy(start)
pts = [{'pt':np.copy(x), 'fn':f(x)}]
while True:
last_last_x = np.copy(last_x)
last_x = np.copy(x)
x_new = x - scale*df(x) + momentum*(last_x - last_last_x)
f_n = f(x_new)
if (f_n <= pts[-1]['fn']):
x = np.copy(x_new)
pts.append({'pt':np.copy(x), 'fn':f_n})
scale *= rate
error = abs(pts[-1]['fn'] - pts[-2]['fn'])
if (error < tolerance):
break
else:
scale *= 1/rate
return pts
def f(x):
global evals
evals += 1
return (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2
def df(x):
global evals
evals += 2
return np.array([-2*(1 - x[0]) - 400*(x[1] - x[0]**2)*x[0], 200*(x[1] - x[0]**2)])
D = 2
start = -1*np.ones(D)
start_scale = 1e-5
rate = 1.5
momentum = 0.95
tolerance = 1e-6
pts = gradient_descent(f, df, start, start_scale, rate, momentum, tolerance)
plot_size = 100
xy_min = -1.1
xy_max = 1.3
x = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
y = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
(X,Y) = np.meshgrid(x,y)
Z = f([X,Y])
x = []
y = []
z = []
for i in range(len(pts)):
x.append(pts[i]['pt'][0])
y.append(pts[i]['pt'][1])
z.append(pts[i]['fn'])
point = ''
for i in range(len(start)):
point += '%.3f '%pts[-1]['pt'][i]
title = '2D Rosenbrock gradient descent search (with momentum)'\
+ '\nfinal point: ' + point\
+ '\ntolerance: ' + str(tolerance)\
+ '\niterations: ' + str(len(pts))\
+ ' function evaultions ' + str(evals)
plt.ion()
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.set_proj_type('ortho')
ax.plot_surface(X, Y, Z)
ax.view_init(35, 59)
ax.plot(x, y, z, color='black')
plt.title(title)
plt.show()
plot_size = 100
xy_min = -1.5
xy_max = 1.3
x = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
y = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
(X,Y) = np.meshgrid(x,y)
Z = f([X,Y])
def f(x):
global evals, D
evals += 1
sum = 0
for i in range(1, D):
sum += (1 - x[i-1])**2 + 100*(x[i] - x[i-1]**2)**2
return sum
def df(x):
global evals, D
evals += D
dim = np.zeros(D)
dim[0] = -2*(1 - x[0]) - 400*(x[1] - x[0]**2)*x[0]
dim[D-1] = 200*(x[D-1] - x[D-2]**2)
for i in range(1, D-1):
dim[i] = -2*(1 - x[i]) - 400*(x[i+1] - x[i]**2)*x[i] + 200*(x[i] - x[i-1]**2)
return dim
D = 10
start = -1*np.zeros(D)
start_scale = 1e-5
rate = 1.5
tolerance = 1e-6
momentum = 0.95
pts = gradient_descent(f, df, start, start_scale, rate, momentum, tolerance)
point = ''
for i in range(len(start)):
point += '%.3f '%pts[-1]['pt'][i]
title = '10D Rosenbrock gradient descent search (with momentum)'\
+ '\nfinal point: ' + point\
+ '\ntolerance: ' + str(tolerance)\
+ '\niterations: ' + str(len(pts))\
+ ' function evaluations: ' + str(evals)
x = []
y = []
z = []
for i in range(len(pts)):
x.append(pts[i]['pt'][0])
y.append(pts[i]['pt'][1])
z.append(pts[i]['fn'])
fig, ax = plt.subplots(1,1)
ax.contourf(X, Y, Z, 1000, vmax=100, cmap=cm.rainbow)
ax.contour(X, Y, Z, 50, linewidths=0.5)
ax.plot(x, y, color='white')
plt.title(title)
plt.show()
(e) Repeat and compare the 2D and 10D Rosenbrock search with the conjugate gradient method.
# From Neil's cg.py code
import copy as cp
import math
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
evals = 0
def conjugate_gradient(f, df, start, start_step, tolerance):
golden = (1 + math.sqrt(5))/2
pts = []
t0 = {}
t1 = {'pt':np.copy(start), 'fn':f(start)}
t2 = {}
t3 = {}
pts.append(cp.deepcopy(t1))
dir = df(start)
grad = df(start)
step = 0
step_size = start_step
while True:
step += 1
dir_old = np.copy(dir)
grad_old = np.copy(grad)
grad = df(t1['pt'])
gamma = np.dot(grad, grad - grad_old)/np.dot(grad_old, grad_old)
dir = grad + gamma*dir_old
if (step % (dim + 1) == 0):
dir = np.copy(grad)
norm_dir = -dir/np.sqrt(np.dot(dir, dir))
t0 = cp.deepcopy(t1)
while True:
t1['pt'] = t0['pt'] + step_size*norm_dir
t1['fn'] = f(t1['pt'])
if (t1['fn'] > t0['fn']):
step_size *= 0.5
continue
t2['pt'] = t0['pt'] + (t1['pt'] - t0['pt'])*golden
t2['fn'] = f(t2['pt'])
break
while True:
if (t1['fn'] < t2['fn']):
break
t0 = cp.deepcopy(t1)
t1 = cp.deepcopy(t2)
t2['pt'] = t1['pt'] + golden*(t1['pt'] - t0['pt'])
t2['fn'] = f(t2['pt'])
while True:
t3['pt'] = t1['pt'] + (t1['pt'] - t0['pt'])/golden
t3['fn'] = f(t3['pt'])
if(t3['fn'] > t1['fn']):
t2 = cp.deepcopy(t0)
t0 = cp.deepcopy(t3)
else:
t0 = cp.deepcopy(t1)
t1 = cp.deepcopy(t3)
error = abs(t1['fn'] - t0['fn']) + abs(t2['fn'] - t1['fn'])
if (error < tolerance):
break
pts.append(cp.deepcopy(t1))
error = abs(pts[-1]['fn'] - pts[-2]['fn'])
if (error < tolerance):
break
return pts
def f(x):
global evals
evals += 1
return (1 - x[0])**2 + 100*(x[1] - x[0]**2)**2
def df(x):
global evals
evals += 2
return np.array([-2*(1 - x[0]) - 400*(x[1] - x[0]**2)*x[0], 200*(x[1] - x[0]**2)])
dim = 2
start = -1*np.ones(dim)
start_step = 0.1
tolerance = 1e-6
pts = conjugate_gradient(f, df, start, start_step, tolerance)
plot_size = 100
xy_min = -1.1
xy_max = 1.3
x = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
y = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
(X,Y) = np.meshgrid(x,y)
Z = f([X,Y])
x = []
y = []
z = []
for i in range(len(pts)):
x.append(pts[i]['pt'][0])
y.append(pts[i]['pt'][1])
z.append(pts[i]['fn'])
point = ''
for i in range(len(start)):
point += '%.3f '%pts[-1]['pt'][i]
title = '2D Rosenbrock conjugate gradient search'\
+ '\nfinal point: ' + point\
+ '\ntolerance: ' + str(tolerance)\
+ '\niterations: ' + str(len(pts))\
+ ' function evaluations: ' + str(evals)
plt.ion()
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.set_proj_type('ortho')
ax.plot_surface(X, Y, Z)
ax.view_init(35, 59)
ax.plot(x, y, z, color='black')
plt.title(title)
plt.show()
plot_size = 100
xy_min = -1.5
xy_max = 1.3
x = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
y = np.arange(xy_min, xy_max, (xy_max - xy_min)/100)
(X,Y) = np.meshgrid(x,y)
Z = f([X,Y])
def f(x):
global evals, dim
evals += 1
sum = 0
for i in range(1, dim):
sum += (1 - x[i-1])**2 + 100*(x[i] - x[i-1]**2)**2
return sum
def df(x):
global evals, dim
evals += dim
d = np.zeros(dim)
d[0] = -2*(1 - x[0]) - 400*(x[1] - x[0]**2)*x[0]
d[dim-1] = 200*(x[dim-1] - x[dim-2]**2)
for i in range(1, dim-1):
d[i] = -2*(1 - x[i]) - 400*(x[i+1] - x[i]**2)*x[i] + 200*(x[i] - x[i-1]**2)
return d
dim = 10
start = -1*np.ones(dim)
start_step = 0.1
tolerance = 1e-6
pts = conjugate_gradient(f, df, start, start_step, tolerance)
point = ''
for i in range(len(start)):
point += '%.3f '%pts[-1]['pt'][i]
title = '10D Rosenbrock conjugate gradient search'\
+ '\nfinal point: ' + point\
+ '\ntolerance: ' + str(tolerance)\
+ '\niterations: ' + str(len(pts))\
+ ' function evaluations: ' + str(evals)
x = []
y = []
for i in range(len(pts)):
x.append(pts[i]['pt'][0])
y.append(pts[i]['pt'][1])
fig, ax = plt.subplots(1,1)
ax.contourf(X, Y, Z, 1000, vmax=100, cmap=cm.rainbow)
ax.contour(X, Y, Z, 50, linewidths=0.5)
ax.plot(x, y, color='white')
plt.title(title)
plt.show()