In [88]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

Rosenbrock Function Minimization

In [112]:
from ipywidgets import *
%matplotlib notebook

def rosenbrock(X, Y):
    return (1 - X)**2 + 100*(Y - X**2)**2

def show_rosenbrock(x_max, y_max, points=[], title='Rosenbrock function'):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    X = np.linspace(-x_max, x_max, 100)
    Y = np.linspace(-y_max, y_max, 100)
    X, Y = np.meshgrid(X, Y)

    Z = rosenbrock(X, Y)
    ax.plot_surface(X, Y, Z, label="wow")
    ax.set_title(title)
    ax.set_xlabel("x")
    ax.set_ylabel("y")

    if len(points)>0:
        for point in points:
            line = ax.plot(point[0][:,0],point[0][:,1], point[1])
            plt.setp(line, color='r', linewidth=2.0)
    plt.show()
        
def interactive_plot_rosenbrock(x_max, y_max):
    show_rosenbrock(x_max, y_max)
    
interact(interactive_plot_rosenbrock, x_max=(0,10,1), y_max=(0,10,1))
Out[112]:
<function __main__.interactive_plot_rosenbrock>
In [73]:
def nelder_mead_update(X, Z, a, l, p, s, debug=False):
    
    inds = np.argsort(Z)
    X = X[inds,:]
    Z = Z[inds]
    centroid = np.mean(X[:-1,:], axis=0)
    
    x_r = centroid + a*(centroid - X[-1,:])
    z_r = rosenbrock(x_r[0], x_r[1])


    if z_r < Z[-2]:
        if z_r > Z[0]:
            X[-1,:] = x_r
            Z[-1] = z_r
            if debug: print 'reflect'
            
        else:
            x_e = centroid + l*(x_r - centroid)
            z_e = rosenbrock(x_e[0], x_e[1])
            if z_e < z_r:
                X[-1,:] = x_e
                Z[-1] = z_e
                if debug: print 'use expand'
            else:
                X[-1,:] = x_r
                Z[-1] = z_r
                if debug: print 'use reflect'
    
    else:
        x_s  = centroid + p*(centroid - X[-1])
        z_s = rosenbrock(x_s[0], x_s[1])
        
        if z_s < Z[-1]:
            X[-1,:] = x_s
            Z[-1] = z_s
            if debug: print 'reflect and shrink'
        
        else: 
            x_c  = centroid - p*(centroid - X[-1])
            z_c = rosenbrock(x_c[0], x_c[1])
        
            if z_c < Z[-1]:
                X[-1,:] = x_c
                Z[-1] = z_c              
                if debug: print 'contract'   
                
            else:
                X[1:,:] =  X[1:,:] + s*(X[1:,:] - X[0,:])
                Z[1:] = rosenbrock(X[1:,0], X[1:,1])
                if debug: print 'shrink'

    return X, Z

def nelder_mead(X, a=1., l=2., p=0.5, s=0.5, max_iters=200, threshold=1e-7):
    Z = rosenbrock(X[:,0], X[:,1])
    points = [[X,Z]]
    
    for i in range(max_iters):
        X, Z = nelder_mead_update(X, Z, a, l, p, s)
        points.append([X, Z])
        if np.linalg.norm(points[-1][0]-points[-2][0])<threshold:
            return X, Z, points, i   
    return X, Z, points, max_iters
In [74]:
delta = 0.1

start_X = np.array([[-1,-1],[-1+delta,-1],[-1,-1+delta]])

X, Z, points1, i1 = nelder_mead(start_X)
In [77]:
print "Nelder Mead solution:", X[0]
print "iterations:", i1
Nelder Mead solution: [ 1.00000002  1.00000005]
itterations: 119
In [78]:
show_rosenbrock(2,2, points=points1, title="Nelder Mead Method on the Rosenbrock function")
In [80]:
#golden ratio
g_r = (1+5**0.5)/2

def line_min(point, direction, length=4., threshold=0.1):
    x0 = point
    x1 = point + length*direction
    x2 = point + length*direction*(g_r + 1)
    x0_val = rosenbrock(x0[0], x0[1])
    x1_val = rosenbrock(x1[0], x1[1])
    x2_val = rosenbrock(x2[0], x2[1])
    i=0
    
    while np.linalg.norm(x2 - x0) > threshold:
        x3 = x2 + (x1 - x2)*g_r
        x3_val = rosenbrock(x3[0], x3[1])
        
        if x3_val > x1_val:
            x0 = x2
            x0_val = x2_val
            x2 = x3
            x2_val = x3_val
        else:
            x2 = x1
            x2_val = x1_val
            x1 = x3
            x1_val = x3_val
        i += 1
    return x1

def direction_set(point, threshold=1e-7, max_iters=200):
    
    directions = np.array([[0.,1.],[1.,0.]])
    points = []
    costs = []
    i=0
    while i in range(max_iters):
        points.append([point[0],point[1]])
        costs.append(rosenbrock(point[0], point[1]))
        update_points = np.array([line_min(point, direction) for direction in directions])
        vals = update_points - point
        direction = np.mean(vals, axis=0)
        direction_norm = np.linalg.norm(direction)
        directions[np.argmax(np.linalg.norm(vals, axis=1))] = direction / direction_norm
        point += direction
        i+=1
        if(direction_norm < threshold):
            return point, [[np.array(points), np.array(costs)]], i
    print "hit max iterations"
    return point, [[np.array(points), np.array(costs)]], max_iters
In [81]:
point, points2, i2 = direction_set(np.array([-1.,-1.]))
print "Direction Set solution:", point
print "iterations", i2
Direction Set solution: [ 1.02758921  1.06404205]
itterations 11
In [82]:
show_rosenbrock(3,3, points=points2, title="Direction Set Method on the Rosenbrock function")
In [97]:
def rosenbrock_gradient(x, y):
    return np.array([-2*(1-x)-400*x*(y - x**2), 200*(y-x**2)])

def rosenbrock_second_partial(x, y):
    return np.array([[2-400*(y-3*x**2), -400*x], [-400*x, 200]])

def calc_direction(a, b, direction):
    g_a = rosenbrock_gradient(a[0], a[1])
    g_b = rosenbrock_gradient(b[0], b[1])
    
    l = g_b.dot(g_b-g_a)/(g_a.dot(g_a))
    
    direction2 = g_b + l*direction
    return direction2 / np.linalg.norm(direction2)

def conjugate_gradient(point, max_iters=100, threshold=1e-7):   
    direction = np.array([0., 1.])
    i = 0
    points = []
    costs = []

    while i < max_iters:
        points.append([point[0],point[1]])
        costs.append(rosenbrock(point[0], point[1]))
                
        point2 = line_min(point, direction)
        direction = calc_direction(point, point2, direction)
        
        if np.linalg.norm(point2-point) < threshold:
            return point2, [[np.array(points), np.array(costs)]], i
        point = point2
        i+=1
    
    print "hit max iters"
    return point, [[np.array(points), np.array(costs)]], max_iters
        
        
In [92]:
print "testing gradients"
print "numeric:", (rosenbrock(-1,-1)-rosenbrock(-1.0001,-1))/.0001, (rosenbrock(-1,-1)-rosenbrock(-1,-1.0001))/.0001
print "analytic:", rosenbrock_gradient(-1,-1)
testing gradients
numeric: -804.080104 -400.009999999
analytic: [-804 -400]
In [93]:
point3, points3, i3 = conjugate_gradient(np.array([-1.,-1.]))
print "Conjugate Gradient solution:", point3
print "iterations", i3
Conjugate Gradient solution: [ 1.32343025  1.81815787]
itterations 2
In [94]:
show_rosenbrock(3,3, points=points3, title="Conjugate Gradient Method on the Rosenbrock function")
In [154]:
def Levenberg(point, n=500, threshold=1e-7, max_iters=200, vary=True):
    oldDiff = float('inf')
    diff = rosenbrock(point[0], point[1])
    M = np.zeros((2,2))
    i = 0;
    i = 0
    points = [[point[0], point[1]]]
    costs = [diff]
    
    for i in range(max_iters):                
        gradient = rosenbrock_gradient(point[0], point[1])
        M = rosenbrock_second_partial(point[0], point[1])
        
        M[0,0] *= (1+n)
        M[1,1] *= (1+n)


        if vary:
            if diff>oldDiff:
               n*=1.5
            else:
               n/=1.4
                    
        dA = np.linalg.solve(-M,gradient)
        point += dA
        
        oldDiff = diff
        diff = rosenbrock(point[0], point[1])
        
        points.append([point[0],point[1]])
        costs.append(rosenbrock(point[0], point[1]))
        
        if abs(diff-oldDiff) < threshold:
            return point, [[np.array(points), np.array(costs)]], i

    print "hit max iters"
    return point, [[np.array(points), np.array(costs)]], max_iters
In [155]:
point4, points4, i4 = Levenberg(np.array([-1.,-1.]))
print "Levenberg Marquardt solution:", point4
print "iterations", i4
Levenberg Marquardt solution: [ 0.99998701  0.99997399]
iterations 44
In [156]:
show_rosenbrock(3,3, points=points4, title="Levenberg Marquardt Method on the Rosenbrock function")

Minimizing Spin Energy

In [116]:
N=100
iterations = 5000

J = np.random.normal(size=N)
S = np.random.randint(3, size=N) - 1


def cost(S, J):
    length = len(S)
    
    return sum(J[i]*S[i]*S[-length+i+1] for i in range(length))

def flip(S, J, t, a):
    cost1 = cost(S, J)
    s = np.random.randint(len(S))
    S[s] = -S[s]
    cost2 = cost(S, J)
    if(cost2 > cost1):
        p = np.random.rand()
        if p > np.exp(-a*t*(cost2-cost1)):
            S[s] = - S[s]
    
def anneal(S, J, a, iters=10):
    costs = []
    for i in range(iters):
        costs.append(cost(S, J))
        flip(S, J, i, a)
    return S, costs
            
plt.figure()

minVal = -sum(np.abs((J)))
plt.plot(range(iterations), np.ones(iterations)*minVal, label='min bound')


for a in [0.1, 0.01, 0.001]:
    S_flip = [x for x in S]
    plt.plot(range(iterations), anneal(S_flip, J, a, iterations)[1], label='a='+str(a))
    plt.legend()
    plt.title("Cost during Simulated Annealing for different values of A")

plt.show()
In [ ]:
plt.figure()
rands = np.random.rand(100)
plt.plot(range(100), rands)
plt.plot(range(100), rands < np.exp(-0.1*np.arange(100)))
plt.plot(range(100), np.exp(-0.1*np.arange(100)))
plt.ylim(0,2)
plt.show()
In [152]:
iterations2 = 50
spins = np.random.randint(3, size=(100,N))-1
                          
def probs(spins, b):
    probs = np.exp(-b*(np.array([cost(spins[i,:], J) for i in range(100)])-minVal))
    probs /= sum(probs)
    return probs
    
def evolution(spins, J, b, iters=20):
    costs = []
    for i in range(iters):
        probs_i = probs(spins, b)
        costs.append(cost(spins[np.argmax(probs_i),:], J))
        spins2 = np.zeros((100,N))
        rand1 = np.random.choice(100, 100, p=probs_i)
        rand2 = np.random.choice(100, 100, p=probs_i)
        rand3 = np.random.randint(N, size=100)
        rand4 = np.random.randint(N, size=100)
        
        for j in range(100):
            spins[j, :rand3[j]] = spins[rand1[j], :rand3[j]]
            spins[j, rand3[j]:] = spins[rand2[j], rand3[j]:]
            spins[j, rand4[j]] *= -1
    return costs

plt.figure()
plt.plot(range(iterations2), np.ones(iterations2)*minVal, label='min bound')


for b in [0.1, 0.01, 0.001]:
    spins_flip =  np.array([[x for x in y] for y in spins])
    plt.plot(range(iterations2), evolution(spins_flip, J, b, iterations2), label='b='+str(b))
    plt.legend()
    plt.title("Cost during Genetic Algorithm evolution for different values of b")

plt.show()