In [21]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

Part (a) - Plot

In [22]:
x = np.arange(-1,1,0.01)
y = np.arange(-1,1,0.01)

x,y = np.meshgrid(x,y)

rosen = ((1 - x)**2) + 100.0*((y-(x**2))**2)

fig = plt.figure()
ax = fig.gca(projection='3d')

surf = ax.plot_surface(x, y, rosen, alpha=0.5)
print np.min(rosen)
plt.show()
0.000101
In [23]:
def rosen(x,y):
    return (1 - x)**2 + 100.0*((y-x**2)**2)

class simplex_2D:
    def __init__(self, function, bound_low, bound_high):
        self.function = function
        
        delta = 1.0 #?
        
        # choose random starting point and unit vector simplex edges

#         start_x0 = -1
#         start_y0 = -1
        
#         start_x1 = 1
#         start_y1 = 0
        
#         start_x2 = 0
#         start_y2 = 1

        start_x0 = -1
        start_y0 = -1
        
        start_x1 = -0.9
        start_y1 = -1
        
        start_x2 = -1
        start_y2 = -0.9
        
        self.vertices = np.array([[start_x0, start_y0], [start_x1, start_y1], [start_x2, start_y2]])
        
    def eval_vertices(self):
        evald = np.zeros((3,3))
        for i, v in enumerate(self.vertices):
            r = self.function(v[0], v[1])
            evald[i] = [v[0], v[1], r]
        return evald
        
    def eval_others(self, others):
        evald = np.zeros((3,3))
        for i, v in enumerate(others):
            r = self.function(v[0], v[1])
            evald[i] = [v[0], v[1], r]
        return evald
    
    def get_worst_others(self):
        evald = self.eval_vertices()
        worst_idx = np.argmax(evald[:,2])
        worst = self.vertices[worst_idx]
        others = np.delete(self.vertices, worst_idx, 0)
        return worst, others        
        
    def get_mean(self, others):        
        new_x = np.sum(others[:,0]) / 2.0
        new_y = np.sum(others[:,1]) / 2.0
        return np.array([new_x, new_y])
    
    def reflect(self, worst, xmean):
        return 2*xmean - worst
    
    def double_step(self, worst, xmean):
        return 3*xmean - 2*worst
    
    def reflect_shrink(self, worst, xmean):
        return (3/2.0)*xmean - (1.0/2)*worst
    
    def shrink(self, worst, xmean):
        return (1/2.0)*(xmean + worst)
    
    def shrink_to_best(self):
        evald = self.eval_vertices()
        best_idx = np.argmin(evald[:,2]) 
        best = self.vertices[best_idx]
        v = self.vertices.copy()
        i = 0
        for xi in self.vertices:
            if not np.array_equal(xi, best):
                v[i] = (1.0/2) * (xi + best)
                i += 1
                
        v[i] = best
        return v
    
    def quit_conditions(self):
        #fix this
#         print "self.vertices", self.vertices
        print "current_min", np.min(self.eval_vertices()[:,2])
        evald = self.eval_vertices()
        print "evald", evald
        return np.min(evald[:,2]) <= 0.0002 and np.min(evald[:,2]) >= 0.0000001

    
    def run(self):
        all_vertice_sets = []
        count = 0
        while (not self.quit_conditions()) and count <= 500:
            count += 1
            all_vertice_sets.append(self.vertices)
            # get the vertice with worst value (maximum for minimization task) and the list of remaining vertices
            worst, others = self.get_worst_others()
            xmean = self.get_mean(others)
            
#             print "worst vertex", worst

            # first - try reflection
            new_worst = self.reflect(worst, xmean)

            
            # is it better than the best?
            evald = self.eval_vertices()
            best_idx = np.argmin(evald[:,2])
            best = self.vertices[best_idx]
#             print "best vertex", evald[best_idx]
 
            if self.function(new_worst[0], new_worst[1]) <= self.function(best[0], best[1]):
                # try to double step
                new_double_worst = self.double_step(worst, xmean)
                if self.function(new_double_worst[0], new_double_worst[1]) <= self.function(new_worst[0], new_worst[1]):
                    new_worst = new_double_worst
                    self.vertices = np.vstack((others, new_worst))
                    continue
                    
            # if not worst, keep and cycle to next point
            others_evald = self.eval_others(others)
            others_worst_idx = np.argmax(others_evald[:,2])
            others_worst = others[others_worst_idx]
            
          
            if self.function(new_worst[0], new_worst[1]) <= self.function(others_worst[0], others_worst[1]):
                self.vertices = np.vstack((others, new_worst))
#                 print "reflect"
                continue
            # if is still the worst - try reflecting and shrinking
            else:
                new_worst = self.reflect_shrink(worst, xmean)  
                if self.function(new_worst[0], new_worst[1]) <= self.function(others_worst[0], others_worst[1]):
                    self.vertices = np.vstack((others, new_worst))
                    continue
                # still worse? try just shrinking
                else:
                    new_worst = self.shrink(worst, xmean)
                    if self.function(new_worst[0], new_worst[1]) <= self.function(others_worst[0], others_worst[1]):
                        self.vertices = np.vstack((others, new_worst))
                        continue
                    # STILL the worst? give up and shrink all towards best    
                    else:
                        self.vertices = self.shrink_to_best()
                        
        return self.vertices, all_vertice_sets
In [24]:
s = simplex_2D(rosen, -1, 1)
v, all_v = s.run()
current_min 331.22
evald [[  -1.     -1.    404.  ]
 [  -0.9    -1.    331.22]
 [  -1.     -0.9   365.  ]]
current_min 250.698125
evald [[  -0.9        -1.        331.22    ]
 [  -1.         -0.9       365.      ]
 [  -0.85       -0.85      250.698125]]
current_min 189.133789062
evald [[  -0.9          -1.          331.22      ]
 [  -0.85         -0.85        250.698125  ]
 [  -0.625        -0.975       189.13378906]]
current_min 84.3791430664
evald [[  -0.85         -0.85        250.698125  ]
 [  -0.625        -0.975       189.13378906]
 [  -0.4125       -0.7375       84.37914307]]
current_min 79.8389015198
evald [[ -6.25000000e-01  -9.75000000e-01   1.89133789e+02]
 [ -4.12500000e-01  -7.37500000e-01   8.43791431e+01]
 [  1.43750000e-01  -8.68750000e-01   7.98389015e+01]]
current_min 57.8956886292
evald [[ -0.4125      -0.7375      84.37914307]
 [  0.14375     -0.86875     79.83890152]
 [  0.35625     -0.63125     57.89568863]]
current_min 57.4718482971
evald [[  0.14375     -0.86875     79.83890152]
 [  0.35625     -0.63125     57.89568863]
 [ -0.08125     -0.74375     57.4718483 ]]
current_min 12.3681640625
evald [[  0.35625     -0.63125     57.89568863]
 [ -0.08125     -0.74375     57.4718483 ]
 [  0.125       -0.325       12.36816406]]
current_min 12.3681640625
evald [[ -0.08125     -0.74375     57.4718483 ]
 [  0.125       -0.325       12.36816406]
 [ -0.3125      -0.4375      30.36187744]]
current_min 1.31402359009
evald [[  1.25000000e-01  -3.25000000e-01   1.23681641e+01]
 [ -3.12500000e-01  -4.37500000e-01   3.03618774e+01]
 [ -1.06250000e-01  -1.87500000e-02   1.31402359e+00]]
current_min 0.472751617432
evald [[  0.125       -0.325       12.36816406]
 [ -0.10625     -0.01875      1.31402359]
 [  0.33125      0.09375      0.47275162]]
current_min 0.472751617432
evald [[ 0.228125   -0.115625    3.4069803 ]
 [ 0.1125      0.0375      0.84937744]
 [ 0.33125     0.09375     0.47275162]]
current_min 0.472751617432
evald [[ 0.2796875  -0.0109375   1.31384698]
 [ 0.221875    0.065625    0.63236299]
 [ 0.33125     0.09375     0.47275162]]
current_min 0.472751617432
evald [[ 0.30546875  0.04140625  0.7517856 ]
 [ 0.2765625   0.0796875   0.52438625]
 [ 0.33125     0.09375     0.47275162]]
current_min 0.472751617432
evald [[ 0.2765625   0.0796875   0.52438625]
 [ 0.33125     0.09375     0.47275162]
 [ 0.303125    0.109375    0.5162256 ]]
current_min 0.379940643907
evald [[ 0.33125     0.09375     0.47275162]
 [ 0.303125    0.109375    0.5162256 ]
 [ 0.3984375   0.1453125   0.37994064]]
current_min 0.379940643907
evald [[ 0.33125     0.09375     0.47275162]
 [ 0.3984375   0.1453125   0.37994064]
 [ 0.39570313  0.12460938  0.46739296]]
current_min 0.379940643907
evald [[ 0.3984375   0.1453125   0.37994064]
 [ 0.39570313  0.12460938  0.46739296]
 [ 0.46289063  0.17617188  0.4336159 ]]
current_min 0.28031466068
evald [[ 0.3984375   0.1453125   0.37994064]
 [ 0.46289063  0.17617188  0.4336159 ]
 [ 0.50058594  0.23300781  0.28031466]]
current_min 0.28031466068
evald [[ 0.3984375   0.1453125   0.37994064]
 [ 0.50058594  0.23300781  0.28031466]
 [ 0.43613281  0.20214844  0.33219446]]
current_min 0.159591390903
evald [[ 0.50058594  0.23300781  0.28031466]
 [ 0.43613281  0.20214844  0.33219446]
 [ 0.60820313  0.36210938  0.15959139]]
current_min 0.159591390903
evald [[ 0.50058594  0.23300781  0.28031466]
 [ 0.60820313  0.36210938  0.15959139]
 [ 0.61352539  0.34526367  0.24639321]]
current_min 0.159591390903
evald [[ 0.60820313  0.36210938  0.15959139]
 [ 0.61352539  0.34526367  0.24639321]
 [ 0.66600342  0.41402588  0.19878341]]
current_min 0.102679103675
evald [[ 0.60820313  0.36210938  0.15959139]
 [ 0.66600342  0.41402588  0.19878341]
 [ 0.68425903  0.47367554  0.1026791 ]]
current_min 0.102679103675
evald [[ 0.60820313  0.36210938  0.15959139]
 [ 0.68425903  0.47367554  0.1026791 ]
 [ 0.63634491  0.41982574  0.15441892]]
current_min 0.102679103675
evald [[ 0.68425903  0.47367554  0.1026791 ]
 [ 0.63634491  0.41982574  0.15441892]
 [ 0.71240082  0.53139191  0.13972431]]
current_min 0.0383465519304
evald [[ 0.68425903  0.47367554  0.1026791 ]
 [ 0.71240082  0.53139191  0.13972431]
 [ 0.82229996  0.66794968  0.03834655]]
current_min 0.0383465519304
evald [[ 0.68425903  0.47367554  0.1026791 ]
 [ 0.82229996  0.66794968  0.03834655]
 [ 0.79415817  0.61023331  0.08420705]]
current_min 0.0383465519304
evald [[ 0.82229996  0.66794968  0.03834655]
 [ 0.79415817  0.61023331  0.08420705]
 [ 0.74624405  0.55638351  0.06441675]]
current_min 0.0383465519304
evald [[ 0.82229996  0.66794968  0.03834655]
 [ 0.74624405  0.55638351  0.06441675]
 [ 0.77932892  0.61313324  0.05203619]]
current_min 0.0257908943596
evald [[ 0.82229996  0.66794968  0.03834655]
 [ 0.77932892  0.61313324  0.05203619]
 [ 0.85538483  0.7246994   0.02579089]]
current_min 0.0257908943596
evald [[ 0.82229996  0.66794968  0.03834655]
 [ 0.85538483  0.7246994   0.02579089]
 [ 0.80908566  0.65472889  0.03644948]]
current_min 0.0254062843192
evald [[ 0.85538483  0.7246994   0.02579089]
 [ 0.80908566  0.65472889  0.03644948]
 [ 0.84217052  0.71147861  0.02540628]]
current_min 0.0187263466714
evald [[ 0.85538483  0.7246994   0.02579089]
 [ 0.84217052  0.71147861  0.02540628]
 [ 0.8884697   0.78144913  0.01872635]]
current_min 0.0160261949898
evald [[ 0.84217052  0.71147861  0.02540628]
 [ 0.8884697   0.78144913  0.01872635]
 [ 0.87525539  0.76822834  0.01602619]]
current_min 0.0160261949898
evald [[ 0.8884697   0.78144913  0.01872635]
 [ 0.87525539  0.76822834  0.01602619]
 [ 0.92155457  0.83819885  0.01839482]]
current_min 0.00791269003289
evald [[ 0.87525539  0.76822834  0.01602619]
 [ 0.92155457  0.83819885  0.01839482]
 [ 0.91827555  0.84674253  0.00791269]]
current_min 0.00791269003289
evald [[ 0.87525539  0.76822834  0.01602619]
 [ 0.91827555  0.84674253  0.00791269]
 [ 0.90916002  0.82284214  0.00964304]]
current_min 0.00508592755409
evald [[ 0.91827555  0.84674253  0.00791269]
 [ 0.90916002  0.82284214  0.00964304]
 [ 0.95218017  0.90135634  0.00508593]]
current_min 0.000408454644175
evald [[  9.18275547e-01   8.46742535e-01   7.91269003e-03]
 [  9.52180171e-01   9.01356339e-01   5.08592755e-03]
 [  9.87363541e-01   9.76464021e-01   4.08454644e-04]]
current_min 0.000408454644175
evald [[  9.52180171e-01   9.01356339e-01   5.08592755e-03]
 [  9.87363541e-01   9.76464021e-01   4.08454644e-04]
 [  9.95520011e-01   9.84994003e-01   3.69981304e-03]]
current_min 0.000408454644175
evald [[  9.87363541e-01   9.76464021e-01   4.08454644e-04]
 [  9.95520011e-01   9.84994003e-01   3.69981304e-03]
 [  1.03070338e+00   1.06010168e+00   1.44794641e-03]]
current_min 0.000408454644175
evald [[  9.87363541e-01   9.76464021e-01   4.08454644e-04]
 [  1.03070338e+00   1.06010168e+00   1.44794641e-03]
 [  1.01579019e+00   1.03492728e+00   1.20882761e-03]]
current_min 0.000273314174202
evald [[  9.87363541e-01   9.76464021e-01   4.08454644e-04]
 [  1.01579019e+00   1.03492728e+00   1.20882761e-03]
 [  1.01614012e+00   1.03289867e+00   2.73314174e-04]]
current_min 0.000273314174202
evald [[  9.87363541e-01   9.76464021e-01   4.08454644e-04]
 [  1.01614012e+00   1.03289867e+00   2.73314174e-04]
 [  9.87713477e-01   9.74435410e-01   2.81489858e-04]]
current_min 0.000236850050206
evald [[  1.01614012e+00   1.03289867e+00   2.73314174e-04]
 [  9.87713477e-01   9.74435410e-01   2.81489858e-04]
 [  1.00920843e+00   1.01726855e+00   2.36850050e-04]]
current_min 3.95113361171e-05
evald [[  1.01614012e+00   1.03289867e+00   2.73314174e-04]
 [  1.00920843e+00   1.01726855e+00   2.36850050e-04]
 [  1.00019388e+00   9.99759509e-01   3.95113361e-05]]
In [25]:
all_v_plot = np.vstack(all_v)
all_v_plot.shape

# plot the simplex
x = np.arange(-1,1,0.01)
y = np.arange(-1,1,0.01)

x,y = np.meshgrid(x,y)

rosen_f = ((1 - x)**2) + 100.0*((y-(x**2))**2)

fig = plt.figure()
ax = fig.gca(projection='3d')

surf = ax.plot_surface(x, y, rosen_f, alpha=0.5)

r_vals = []
for i in range(all_v_plot.shape[0]):
    x = all_v_plot[i,0]
    y = all_v_plot[i,1]
    r_vals.append(rosen(x,y))
        
vertices = ax.plot3D(all_v_plot[:,0], all_v_plot[:,1], r_vals, color="black")
#print all_v_plot
#print r_vals
ax.view_init(elev=40., azim=150)
plt.show()
In [26]:
# line minimization / direction sets
gr = (1 + np.sqrt(5)) / 2.0

def get_spanning_set(x0, y0, direc):
    d0, d1 = direc
    point0 = [x0, y0, rosen(x0, y0)]
    point1 = [x0 + d0, y0 + d1, rosen(x0 + d0, y0 + d1)]
    
   
    # do these points span minimum?
    if point1[2] > point0[2]:
        # switch
        temp0 = point0[:]
        point0 = point1[:]
        point1 = temp0[:]
        
    # get point2
    point2 = [None, None, None]
    point2[:2] = point1[:2] + (np.subtract(point1[:2],point0[:2])) * gr
    point2[2] = (rosen(point2[0], point2[1]))
    
      
    while point2[2] < point1[2]:
        point0 = point1[:]
        point1 = point2[:]
        point2[:2] = point1[:2] + (np.subtract(point1[:2],point0[:2])) * gr
        point2[2] = rosen(point2[0], point2[1])
        
       
    return point0, point1, point2

def get_minimum(point0, point1, point2):
    t = 0.001
    point3 = [None, None, None]
    while np.linalg.norm(np.subtract(point2[:2], point0[:2])) > t:
        # compute new point
        point3[:2] = point0[:2] + np.subtract(point2[:2], point1[:2])
        point3[2] = rosen(point3[0], point3[1])
        if point3[2] > point1[2]:
            # switch 
            point2 = point0[:]
            point0 = point3[:]
        else:
            point0 = point1[:]
            point1 = point3[:]
            
    return point1
    
    
def powell():
    x_past = -1.0
    y_past = -0.5   
    ds = np.eye(2,2) * 0.1
    f_vals = np.zeros(2)
    
    x = x_past
    y = y_past
    
    count = 0 
    min_points = []
    while count <= 20:

        # for each direction saved
        for i in range(2):
            direc = ds[:,i]
            p0, p1, p2 = get_spanning_set(x,y,direc)

            point1 = get_minimum(p0, p1, p2)
            x,y = point1[:2]
            
        new_direc = np.subtract(point1[:2],[x_past, y_past])
        #print "dotted vec", np.dot(np.transpose(ds), new_direc)
        i = np.argmax(np.dot(np.transpose(ds), new_direc))
        #print "ds", ds
        #print "new direc", new_direc
        ds[:,i] = new_direc
        #print "ds with new direc", ds
        
        x_past = point1[0]
        y_past = point1[1]
        
        print "x,y", x, y
        min_points.append([x,y])
        count += 1
        
    return min_points
In [27]:
min_points = powell()
x,y 0.00983005625053 -1.11022302463e-16
x,y 0.504508909073 0.25445122794
x,y 0.52190923336 0.272418542015
x,y 0.638263769756 0.40715429583
x,y 0.690877562281 0.46356582269
x,y 0.682778116712 0.455345962925
x,y 0.683419300037 0.456268588581
x,y 0.79548193975 0.619333327882
x,y 0.819035319014 0.660936701757
x,y 0.956624038721 0.912167504367
x,y 0.972500443108 0.943440115713
x,y 0.999199123249 0.998122086314
x,y 0.999839715366 0.99972232754
x,y 0.99968849208 0.99934456183
x,y 0.999781953211 0.999578033879
x,y 0.999781953211 0.999578033879
x,y 0.999781953211 0.999578033879
x,y 0.999781953211 0.999578033879
x,y 0.999781953211 0.999578033879
x,y 0.999781953211 0.999578033879
x,y 0.999781953211 0.999578033879
In [28]:
min_points = np.vstack(min_points)
min_points.shape

# plot the powell method
x = np.arange(-1,1,0.01)
y = np.arange(-1,1,0.01)

x,y = np.meshgrid(x,y)

rosen_f = ((1 - x)**2) + 100.0*((y-(x**2))**2)

fig = plt.figure()
ax = fig.gca(projection='3d')

surf = ax.plot_surface(x, y, rosen_f, alpha=0.5)

r_mp_vals = []
for i in range(min_points.shape[0]):
    x = min_points[i,0]
    y = min_points[i,1]
    r_mp_vals.append(rosen(x,y))
    
#print min_points
#print r_mp_vals
        
points = ax.plot3D(min_points[:,0], min_points[:,1], r_mp_vals, color="black")
ax.view_init(elev=40., azim=150)
plt.show()
In [29]:
# conjugate gradient descent

# rosen function gradient
def d_rosen(x,y):
    #list with del-x and del-y
    return [-2*(1-x) - 4*100*(y-x**2)*x,  2*100*(y-x**2)]

def conjugate_gradient_descent():
    x = -1.0
    y = -0.5
    r = rosen(x,y)
    g = d_rosen(x,y)
    print g
    direc = [-1.0 * elem for elem in g]
    print direc
    
    point = [x,y]
    
    count = 0
    t = 0.0000001
    
    all_points = []
    while count <= 500:
        # direction needs to be normalized?
        #print "direc_before", direc
        [x,y] = point
        p0, p1, p2 = get_spanning_set(x,y,direc / np.linalg.norm(direc))
        new_point = get_minimum(p0, p1, p2)
        
        if np.linalg.norm(np.subtract(new_point[:2], point)) < t:
            print "Finished!"
            break
        else:
            new_g = d_rosen(new_point[0], new_point[1])
            Y = np.dot(new_g, (np.subtract(new_g,g))) / np.dot(g,g)
            if Y < 0:
                Y = 0
            #print "Y", Y
            #print "direc", direc
            #print "new_g", new_g
            direc = np.add(new_g, [Y*d for d in direc])
            point = new_point[:2]
            
        count += 1
        
        all_points.append(new_point)
        
    print new_point
    return all_points
            
    
In [30]:
all_points = conjugate_gradient_descent()
[-604.0, -300.0]
[604.0, 300.0]
Finished!
[0.95742060288867181, 0.91664689519770892, 0.0018130104102201164]
In [31]:
# plot the congujate descent
x = np.arange(-1,1,0.01)
y = np.arange(-1,1,0.01)

x,y = np.meshgrid(x,y)

rosen_f = ((1 - x)**2) + 100.0*((y-(x**2))**2)

fig = plt.figure()
ax = fig.gca(projection='3d')

surf = ax.plot_surface(x, y, rosen_f, alpha=0.5)

all_points = np.vstack(all_points)
points = ax.plot3D(all_points[:,0], all_points[:,1], all_points[:,2], color="black")
ax.view_init(elev=40., azim=150)
plt.show()
In [ ]: