import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
x = np.arange(-1,1,0.01)
y = np.arange(-1,1,0.01)
x,y = np.meshgrid(x,y)
rosen = ((1 - x)**2) + 100.0*((y-(x**2))**2)
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(x, y, rosen, alpha=0.5)
print np.min(rosen)
plt.show()
def rosen(x,y):
return (1 - x)**2 + 100.0*((y-x**2)**2)
class simplex_2D:
def __init__(self, function, bound_low, bound_high):
self.function = function
delta = 1.0 #?
# choose random starting point and unit vector simplex edges
# start_x0 = -1
# start_y0 = -1
# start_x1 = 1
# start_y1 = 0
# start_x2 = 0
# start_y2 = 1
start_x0 = -1
start_y0 = -1
start_x1 = -0.9
start_y1 = -1
start_x2 = -1
start_y2 = -0.9
self.vertices = np.array([[start_x0, start_y0], [start_x1, start_y1], [start_x2, start_y2]])
def eval_vertices(self):
evald = np.zeros((3,3))
for i, v in enumerate(self.vertices):
r = self.function(v[0], v[1])
evald[i] = [v[0], v[1], r]
return evald
def eval_others(self, others):
evald = np.zeros((3,3))
for i, v in enumerate(others):
r = self.function(v[0], v[1])
evald[i] = [v[0], v[1], r]
return evald
def get_worst_others(self):
evald = self.eval_vertices()
worst_idx = np.argmax(evald[:,2])
worst = self.vertices[worst_idx]
others = np.delete(self.vertices, worst_idx, 0)
return worst, others
def get_mean(self, others):
new_x = np.sum(others[:,0]) / 2.0
new_y = np.sum(others[:,1]) / 2.0
return np.array([new_x, new_y])
def reflect(self, worst, xmean):
return 2*xmean - worst
def double_step(self, worst, xmean):
return 3*xmean - 2*worst
def reflect_shrink(self, worst, xmean):
return (3/2.0)*xmean - (1.0/2)*worst
def shrink(self, worst, xmean):
return (1/2.0)*(xmean + worst)
def shrink_to_best(self):
evald = self.eval_vertices()
best_idx = np.argmin(evald[:,2])
best = self.vertices[best_idx]
v = self.vertices.copy()
i = 0
for xi in self.vertices:
if not np.array_equal(xi, best):
v[i] = (1.0/2) * (xi + best)
i += 1
v[i] = best
return v
def quit_conditions(self):
#fix this
# print "self.vertices", self.vertices
print "current_min", np.min(self.eval_vertices()[:,2])
evald = self.eval_vertices()
print "evald", evald
return np.min(evald[:,2]) <= 0.0002 and np.min(evald[:,2]) >= 0.0000001
def run(self):
all_vertice_sets = []
count = 0
while (not self.quit_conditions()) and count <= 500:
count += 1
all_vertice_sets.append(self.vertices)
# get the vertice with worst value (maximum for minimization task) and the list of remaining vertices
worst, others = self.get_worst_others()
xmean = self.get_mean(others)
# print "worst vertex", worst
# first - try reflection
new_worst = self.reflect(worst, xmean)
# is it better than the best?
evald = self.eval_vertices()
best_idx = np.argmin(evald[:,2])
best = self.vertices[best_idx]
# print "best vertex", evald[best_idx]
if self.function(new_worst[0], new_worst[1]) <= self.function(best[0], best[1]):
# try to double step
new_double_worst = self.double_step(worst, xmean)
if self.function(new_double_worst[0], new_double_worst[1]) <= self.function(new_worst[0], new_worst[1]):
new_worst = new_double_worst
self.vertices = np.vstack((others, new_worst))
continue
# if not worst, keep and cycle to next point
others_evald = self.eval_others(others)
others_worst_idx = np.argmax(others_evald[:,2])
others_worst = others[others_worst_idx]
if self.function(new_worst[0], new_worst[1]) <= self.function(others_worst[0], others_worst[1]):
self.vertices = np.vstack((others, new_worst))
# print "reflect"
continue
# if is still the worst - try reflecting and shrinking
else:
new_worst = self.reflect_shrink(worst, xmean)
if self.function(new_worst[0], new_worst[1]) <= self.function(others_worst[0], others_worst[1]):
self.vertices = np.vstack((others, new_worst))
continue
# still worse? try just shrinking
else:
new_worst = self.shrink(worst, xmean)
if self.function(new_worst[0], new_worst[1]) <= self.function(others_worst[0], others_worst[1]):
self.vertices = np.vstack((others, new_worst))
continue
# STILL the worst? give up and shrink all towards best
else:
self.vertices = self.shrink_to_best()
return self.vertices, all_vertice_sets
s = simplex_2D(rosen, -1, 1)
v, all_v = s.run()
all_v_plot = np.vstack(all_v)
all_v_plot.shape
# plot the simplex
x = np.arange(-1,1,0.01)
y = np.arange(-1,1,0.01)
x,y = np.meshgrid(x,y)
rosen_f = ((1 - x)**2) + 100.0*((y-(x**2))**2)
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(x, y, rosen_f, alpha=0.5)
r_vals = []
for i in range(all_v_plot.shape[0]):
x = all_v_plot[i,0]
y = all_v_plot[i,1]
r_vals.append(rosen(x,y))
vertices = ax.plot3D(all_v_plot[:,0], all_v_plot[:,1], r_vals, color="black")
#print all_v_plot
#print r_vals
ax.view_init(elev=40., azim=150)
plt.show()
# line minimization / direction sets
gr = (1 + np.sqrt(5)) / 2.0
def get_spanning_set(x0, y0, direc):
d0, d1 = direc
point0 = [x0, y0, rosen(x0, y0)]
point1 = [x0 + d0, y0 + d1, rosen(x0 + d0, y0 + d1)]
# do these points span minimum?
if point1[2] > point0[2]:
# switch
temp0 = point0[:]
point0 = point1[:]
point1 = temp0[:]
# get point2
point2 = [None, None, None]
point2[:2] = point1[:2] + (np.subtract(point1[:2],point0[:2])) * gr
point2[2] = (rosen(point2[0], point2[1]))
while point2[2] < point1[2]:
point0 = point1[:]
point1 = point2[:]
point2[:2] = point1[:2] + (np.subtract(point1[:2],point0[:2])) * gr
point2[2] = rosen(point2[0], point2[1])
return point0, point1, point2
def get_minimum(point0, point1, point2):
t = 0.001
point3 = [None, None, None]
while np.linalg.norm(np.subtract(point2[:2], point0[:2])) > t:
# compute new point
point3[:2] = point0[:2] + np.subtract(point2[:2], point1[:2])
point3[2] = rosen(point3[0], point3[1])
if point3[2] > point1[2]:
# switch
point2 = point0[:]
point0 = point3[:]
else:
point0 = point1[:]
point1 = point3[:]
return point1
def powell():
x_past = -1.0
y_past = -0.5
ds = np.eye(2,2) * 0.1
f_vals = np.zeros(2)
x = x_past
y = y_past
count = 0
min_points = []
while count <= 20:
# for each direction saved
for i in range(2):
direc = ds[:,i]
p0, p1, p2 = get_spanning_set(x,y,direc)
point1 = get_minimum(p0, p1, p2)
x,y = point1[:2]
new_direc = np.subtract(point1[:2],[x_past, y_past])
#print "dotted vec", np.dot(np.transpose(ds), new_direc)
i = np.argmax(np.dot(np.transpose(ds), new_direc))
#print "ds", ds
#print "new direc", new_direc
ds[:,i] = new_direc
#print "ds with new direc", ds
x_past = point1[0]
y_past = point1[1]
print "x,y", x, y
min_points.append([x,y])
count += 1
return min_points
min_points = powell()
min_points = np.vstack(min_points)
min_points.shape
# plot the powell method
x = np.arange(-1,1,0.01)
y = np.arange(-1,1,0.01)
x,y = np.meshgrid(x,y)
rosen_f = ((1 - x)**2) + 100.0*((y-(x**2))**2)
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(x, y, rosen_f, alpha=0.5)
r_mp_vals = []
for i in range(min_points.shape[0]):
x = min_points[i,0]
y = min_points[i,1]
r_mp_vals.append(rosen(x,y))
#print min_points
#print r_mp_vals
points = ax.plot3D(min_points[:,0], min_points[:,1], r_mp_vals, color="black")
ax.view_init(elev=40., azim=150)
plt.show()
# conjugate gradient descent
# rosen function gradient
def d_rosen(x,y):
#list with del-x and del-y
return [-2*(1-x) - 4*100*(y-x**2)*x, 2*100*(y-x**2)]
def conjugate_gradient_descent():
x = -1.0
y = -0.5
r = rosen(x,y)
g = d_rosen(x,y)
print g
direc = [-1.0 * elem for elem in g]
print direc
point = [x,y]
count = 0
t = 0.0000001
all_points = []
while count <= 500:
# direction needs to be normalized?
#print "direc_before", direc
[x,y] = point
p0, p1, p2 = get_spanning_set(x,y,direc / np.linalg.norm(direc))
new_point = get_minimum(p0, p1, p2)
if np.linalg.norm(np.subtract(new_point[:2], point)) < t:
print "Finished!"
break
else:
new_g = d_rosen(new_point[0], new_point[1])
Y = np.dot(new_g, (np.subtract(new_g,g))) / np.dot(g,g)
if Y < 0:
Y = 0
#print "Y", Y
#print "direc", direc
#print "new_g", new_g
direc = np.add(new_g, [Y*d for d in direc])
point = new_point[:2]
count += 1
all_points.append(new_point)
print new_point
return all_points
all_points = conjugate_gradient_descent()
# plot the congujate descent
x = np.arange(-1,1,0.01)
y = np.arange(-1,1,0.01)
x,y = np.meshgrid(x,y)
rosen_f = ((1 - x)**2) + 100.0*((y-(x**2))**2)
fig = plt.figure()
ax = fig.gca(projection='3d')
surf = ax.plot_surface(x, y, rosen_f, alpha=0.5)
all_points = np.vstack(all_points)
points = ax.plot3D(all_points[:,0], all_points[:,1], all_points[:,2], color="black")
ax.view_init(elev=40., azim=150)
plt.show()