In [47]:
from __future__ import division
from numpy import *
from matplotlib import pyplot as plt
from matplotlib import animation
from mpl_toolkits.mplot3d import axes3d
%pylab inline

#from http://jakevdp.github.io/blog/2013/05/12/embedding-matplotlib-animations/
from tempfile import NamedTemporaryFile

VIDEO_TAG = """<video controls>
 <source src="data:video/x-m4v;base64,{0}" type="video/mp4">
 Your browser does not support the video tag.
</video>"""

def anim_to_html(anim):
    if not hasattr(anim, '_encoded_video'):
        with NamedTemporaryFile(suffix='.mp4') as f:
            anim.save(f.name, fps=20,extra_args=['-vcodec', 'libx264', '-pix_fmt', 'yuv420p'])
            video = open(f.name, "rb").read()
        anim._encoded_video = video.encode("base64")
    return VIDEO_TAG.format(anim._encoded_video)

from IPython.display import HTML
def display_animation(anim):
    plt.close(anim._fig)
    return HTML(anim_to_html(anim))
Populating the interactive namespace from numpy and matplotlib

In [483]:
def rosenbrock(v):
    return (1-v[...,0])**2 + 100*(v[...,0]-v[...,1]**2)**2
def d_rosenbrock(v):
    if len(shape(v))==1:
        return array([
            -2*(1-v[0]+200*(v[1]-v[0]**2)*v[0]),
            200*(v[1]-v[0]**2)
            ])
    else:
        return array([
            -2*(1-v[...,0]+200*(v[...,1]-v[...,0]**2)*v[...,0]),
            200*(v[...,1]-v[...,0]**2)
            ])
def plot_ros_simplices(simplices,n_frames = 20, figsize=(8, 6), dpi=80):
    n_simplices = shape(simplices)[0]
    fig = plt.figure(figsize=figsize, dpi=dpi)
    ax = fig.add_subplot(111, projection='3d')
    ax.axis('off')
    ax.set_xlim((-1, 1))
    ax.set_ylim((-1, 1))
    N = 100;
    x = linspace(-1,1.5,N); y = linspace(-1.,1.5,N);
    X,Y = meshgrid(x,y)
    Z = rosenbrock(dstack((X,Y)))
    simplex_plots = []
    for s in simplices: simplex_plots += ax.plot([], [], [], marker='.',linestyle='-', c='r')
        
    def init():
        ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10)
        for s in simplex_plots:
            s.set_data([], [])
            s.set_3d_properties([])
    def animate(i):
        ax.azim = 360/n_frames*i
        ax.elev = 30
        si = ceil(i*n_simplices/n_frames)
        loop = array([0,1,2,0])
        for s,sp in zip(simplices[:si],simplex_plots):
            sp.set_data(s[loop,0],s[loop,1]); sp.set_3d_properties(rosenbrock(s[loop]))
    return animation.FuncAnimation(fig, animate, init_func=init,
                                   frames=n_frames, interval=20, blit=True)
display_animation(plot_ros_simplices(array([[[-1,-1],[-1,-1],[-1,-1]]])))
Out[483]:
In [427]:
start = array([-1,-1]);
simplex = vstack((start+.1*eye(2),array(start)))
def nelder_mead(simplex,f,d):
    simplices = simplex[None,...] #keep track
    while(True):
        simplex = simplices[-1] #our working simplex
        simplex = simplex[argsort(-f(simplex))] #sort the vertices, descending by f value
        xmean = sum(simplex[1:],axis=0)/d
        if f(2*xmean-simplex[0]) < f(simplex[-1]):             #reflection is best
            if f(3*xmean-2*simplex[0]) < f(2*xmean-simplex[0]):
                simplex[0] = 3*xmean-2*simplex[0]              #grow works, keep it
            else:
                simplex[0] = 2*xmean-simplex[0]                #grow doesn't work, keep original reflection
        elif f(2*xmean-simplex[0]) < f(simplex[1]):            #reflection helped a bit, keep and iterate
            simplex[0] = 2*xmean-simplex[0]
        else:                                                  #reflection doesn't help at all
            if f(1.5*xmean+.5*simplex[0]) < f(simplex[1]):     #try reflect and shrink
                simplex[0] = 1.5*xmean+.5*simplex[0]
            elif f(.5*xmean+.5*simplex[0]) < f(simplex[1]):    #try just shrinking
                simplex[0] = .5*xmean+.5*simplex[0]
            else: simplex[:-1] = .5*(simplex[:-1]+simplex[-1]) #nothing is working, shrink all towards best
        simplices = vstack((simplices,simplex[None,...]))
        if abs( f(sum(simplices[-1],axis=0)/(d+1)) - f(sum(simplices[-2],axis=0)/(d+1)) ) < 1e-3:
            break
    return simplices
anim = plot_ros_simplices(nelder_mead(simplex,rosenbrock,2),n_frames=50,dpi=150,figsize=(10,8))
display_animation(anim)
#anim.save('nelder-mead.gif', writer='imagemagick', fps=10)
In [431]:
phi = .5*(1+sqrt(5))
def find_decrease(x0,d,f,alpha=1):
    #from a starting point, direction, and initial step length, return a step length that gives descent
    a = alpha
    while(True):
        x1 = x0+a*d
        if f(x1) < f(x0): break
        a /= phi
        if a<1e-4:
            a = -alpha #if we shrink all the way to the start, switch direction
    return a
def bracket(x0,d,f,alpha=1):
    #find a bracketing triple from starting point, direction, and intitial step length
    alpha = find_decrease(x0,d,f,alpha=alpha)
    while(True):
        x1,x2 = x0+alpha*d,x0+alpha*(1+1/phi)*d
        if (f(x1) < min(f(x0),f(x2))):
            break
        alpha *= phi
    return x0,x1,x2    
#test
plt.figure()
b = array(bracket(0,1,cos))
t = linspace(0,b[2],100)
plt.plot(t,cos(t)); plt.plot(b,cos(b),linestyle='',marker='.',color='r')
Out[431]:
[<matplotlib.lines.Line2D at 0x116902310>]
In [432]:
def linemin_old(x0,d,f,alpha=1,eps=1e-6,just_result=False):
    #golden section line minimization, starting from bracketing triple
    x0,x1,x2 = bracket(x0,d,f,alpha=alpha)
    x = array([[x0,x1,x2]])
    l = lambda x: sum(x**2,axis=0)
    while(l(x2-x0) > eps):
        if l(x1-x0) > l(x2-x1):
            x3 = x0 + (x1-x0)/phi*d
            if f(x3)<f(x1):
                x0,x1,x2 = x0,x3,x1
            else:
                x0,x1,x2 = x1,x3,x2
        else:
            x3 = x1 + (x2-x1)/phi*d
            if f(x3)<f(x1):
                x0,x1,x2 = x1,x3,x2
            else:
                x0,x1,x2 = x0,x1,x3
        x = vstack((x,array([[x0,x1,x2]])))
    if just_result:
        return x[-1,1] #just return x1 from last iteration
    else:
        return x

#test
x = linemin(0,1,cos)
t = linspace(0,2*pi,100)
fig = plt.figure(dpi=150,figsize=(10,8))
ax = plt.axes(xlim=(0, 2*pi), ylim=(-1.1, 1))
pts, = plt.plot([],[],linestyle='',marker='.',color='r')
def init():
    plt.plot(t,cos(t));
    pts.set_data([], [])
    return pts,
def animate(i):
    pts.set_data(x[:i+1], cos(x[:i+1]))
    return pts, 
anim = animation.FuncAnimation(fig, animate, init_func=init,
                               frames=shape(x)[0], interval=20, blit=True)
display_animation(anim)
#anim.save('line-min.gif', writer='imagemagick', fps=10)
Out[432]:
In [433]:
#sgn = lambda x: (1, -1)[x<0]
def mnbrak(ax,bx,f,glimit=1e2,eps=1e-6):
    #given distinct initial points ax and bx, search for a bracketing triple ax,bx,cx
    #from numerical recipes, 10.1
    if f(bx)>f(ax):
        tmp = ax; ax = bx; bx = tmp; #switch
    cx = bx + phi*(bx-ax);
    while True:
        r=(bx-ax)*(f(bx)-f(cx)) #do parabolic fit
        q=(bx-cx)*(f(bx)-f(ax))
        denom = q-r if q-r!=0 else eps*sgn(q-r)
        u=bx-((bx-cx)*q-(bx-ax)*r)/(2*denom)
        ulim=bx+glimit*(cx-bx)
        if (bx-u)*(u-cx) > 0:
            if f(u) < f(cx):
                return bx,u,cx
            elif f(u) > f(bx):
                return ax,bx,u
            u = cx+phi*(cx-bx) #parabolic fit didn't help, use default magnification
        elif (cx-u)*(u-ulim) > 0:
            if f(u) < f(cx):
                bx=cx
                cx=u
                u = cx+phi*(cx-bx)
        elif (u-ulim)*(ulim-cx) > 0:
            u = ulim
        else:
            u = cx+phi*(cx-bx)
        ax=bx; bx=cx; cx=u;
        if f(bx) < f(cx): break
    return ax,bx,cx
In [295]:
x = array([mnbrak(0,1,cos)])
t = linspace(0,2*pi,100)
fig = plt.figure()
#ax = plt.axes(xlim=(0, 2*pi), ylim=(-1.1, 1))
plt.plot(t,cos(t))
plt.plot(x,cos(x),linestyle='',marker='.',color='r')
print x
[[ 1.          2.61803399  5.23606798]]

In [466]:
def linemin(xi,d,f,alpha=1,eps=1e-3,just_result=False):
    #golden section line minimization
    fs = lambda a: f(xi+a*d) #scalar
    x0,x1,x2 = mnbrak(0,alpha,fs)
    #x0,x1,x2 = map(lambda a: x0+a*d,mnbrak(0,alpha,fs))
    x = array([[x0,x1,x2]])
    #l = lambda x: sum(x**2,axis=0)
    i=0
    while(x2-x0 > eps):
        #print 'linemin',l(x2-x0)
        i = i+1
        if x1-x0 > x2-x1:
            x3 = x0 + (x1-x0)/phi
            if fs(x3)<fs(x1):
                x0,x1,x2 = x0,x3,x1
            else:
                x0,x1,x2 = x1,x3,x2
        else:
            x3 = x1 + (x2-x1)/phi
            if fs(x3)<fs(x1):
                x0,x1,x2 = x1,x3,x2
            else:
                x0,x1,x2 = x0,x1,x3
        x = vstack((x,array([[x0,x1,x2]])))

    if just_result:
        return x[-1,1] #just return x1 from last iteration
    else:
        return x
In [384]:
#test
x = linemin(0,1.,cos)
t = linspace(0,2*pi,100)
fig = plt.figure()
ax = plt.axes(xlim=(0, 2*pi), ylim=(-1.1, 1))
pts, = plt.plot([],[],linestyle='',marker='.',color='r')
def init():
    plt.plot(t,cos(t));
    pts.set_data([], [])
    return pts,
def animate(i):
    pts.set_data(x[:i+1], cos(x[:i+1]))
    return pts, 
anim = animation.FuncAnimation(fig, animate, init_func=init,
                               frames=shape(x)[0], interval=20, blit=True)
display_animation(anim)
Out[384]:
In [469]:
def powell(x0,f,alpha=1.,eps=1e-4,max_iter=100):
    #powell direction set method, start x0, function f
    x = array([x0])
    n_dim = shape(x)[-1]
    d = eye(n_dim) #take coordinate axes as initial directions
    #it is important to make sure these are descent directions, else linemin will fail
    i=0
    while(True):
        df = []    #changes in objective along each direction
        i = i+1
        for di in d:
            a_min = linemin(x[-1],di,f,alpha=alpha,just_result=True) #minimize in this direction
            xnew = x[-1]+a_min*di
            x = vstack((x,array([xnew])))
            df.append(f(x[-2])-f(x[-1])) #save change in objective
        if max(df) < eps or i>max_iter: break
        d[argmax(df)] = sum(d,axis=0)/n_dim #replace old direction with overall change
        #d[argmax(df)] /= linalg.norm(d[argmax(df)])
    return x
In [454]:
def plot_ros_powell(pts,n_frames = 20, figsize=(8, 6), dpi=80):
    n_pts = shape(pts)[0]
    fig = plt.figure(figsize=figsize, dpi=dpi)
    ax = fig.add_subplot(111, projection='3d')
    ax.axis('off')
    ax.set_xlim((-1, 1))
    ax.set_ylim((-1, 1))
    N = 100;
    x = linspace(-1.5,2,N); y = linspace(-1.5,2,N);
    X,Y = meshgrid(x,y)
    Z = rosenbrock(dstack((X,Y)))
    x_plot, = ax.plot([], [], [], marker='.',linestyle='-', c='r')
    def init():
        ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10)
        x_plot.set_data([], [])
        x_plot.set_3d_properties([])
    def animate(i):
        ax.azim = 360/n_frames*i
        ax.elev = 30
        xi = ceil(i*n_pts/n_frames)
        x_plot.set_data(pts[:xi,0],pts[:xi,1]);
        x_plot.set_3d_properties(rosenbrock(pts[:xi]))
    return animation.FuncAnimation(fig, animate, init_func=init,
                                   frames=n_frames, interval=20, blit=True)
In [473]:
x=powell([-1,-1],rosenbrock,alpha=.1,eps=1e-5,max_iter=100);
anim = plot_ros_powell(x,n_frames=50,dpi=150,figsize=(10,8))
anim.save('direction-set.gif', writer='imagemagick', fps=10)
#display_animation(anim)
In [488]:
def conj_grad(f,df,x0,eps=1e-6,max_iter=100):
    dnew = -df(x0) #take starting direction as steepest descent
    x1 = x0 + linemin(x0,dnew,rosenbrock,just_result=True)*dnew
    x = array([x0,x1])
    mag2 = lambda x: dot(x,x)
    i=0
    while(True):
        i=i+1
        gamma = mag2(df(x[-1]))/mag2(df(x[-2]))
        dnew = df(x[-1]) + gamma*dnew
        a = linemin(x[-1],dnew,f,alpha=alpha,just_result=True)
        x = vstack((x,array([x[-1]+a*dnew])))
        if abs(a)<eps or i>max_iter: break
    return x
def plot_ros_cg(pts,n_frames = 20, figsize=(8, 6), dpi=80):
    n_pts = shape(pts)[0]
    fig = plt.figure(figsize=figsize, dpi=dpi)
    ax = fig.add_subplot(111, projection='3d')
    ax.axis('off')
    ax.set_xlim((-1, 1))
    ax.set_ylim((-1, 1))
    N = 100;
    x = linspace(-1.5,2,N); y = linspace(-1.5,2,N);
    X,Y = meshgrid(x,y)
    Z = rosenbrock(dstack((X,Y)))
    x_plot, = ax.plot([], [], [], marker='.',linestyle='-', c='r')
    def init():
        ax.plot_wireframe(X, Y, Z, rstride=10, cstride=10)
        x_plot.set_data([], [])
        x_plot.set_3d_properties([])
    def animate(i):
        ax.azim = 360/n_frames*i
        ax.elev = 30
        xi = ceil(i*n_pts/n_frames)
        x_plot.set_data(pts[:xi,0],pts[:xi,1]);
        x_plot.set_3d_properties(rosenbrock(pts[:xi]))
    return animation.FuncAnimation(fig, animate, init_func=init,
                                   frames=n_frames, interval=20, blit=True)
In [489]:
x = conj_grad(rosenbrock,d_rosenbrock,[-1,-1])
anim = plot_ros_cg(x,n_frames=50,dpi=150,figsize=(10,8))
-c:12: RuntimeWarning: invalid value encountered in double_scalars
-c:9: RuntimeWarning: invalid value encountered in double_scalars
-c:10: RuntimeWarning: invalid value encountered in double_scalars
-c:3: RuntimeWarning: overflow encountered in multiply
-c:13: RuntimeWarning: overflow encountered in double_scalars
-c:28: RuntimeWarning: overflow encountered in double_scalars
-c:25: RuntimeWarning: invalid value encountered in double_scalars
-c:13: RuntimeWarning: invalid value encountered in double_scalars
-c:28: RuntimeWarning: invalid value encountered in double_scalars

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-489-5ca3124d83f5> in <module>()
----> 1 x = conj_grad(rosenbrock,d_rosenbrock,[-1,-1])
      2 anim = plot_ros_cg(x,n_frames=50,dpi=150,figsize=(10,8))

<ipython-input-488-42756079793a> in conj_grad(f, df, x0, eps, max_iter)
      9         gamma = mag2(df(x[-1]))/mag2(df(x[-2]))
     10         dnew = df(x[-1]) + gamma*dnew
---> 11         a = linemin(x[-1],dnew,f,alpha=alpha,just_result=True)
     12         x = vstack((x,array([x[-1]+a*dnew])))
     13         if abs(a)<eps or i>max_iter: break

<ipython-input-466-9b8629d93906> in linemin(xi, d, f, alpha, eps, just_result)
      2     #golden section line minimization
      3     fs = lambda a: f(xi+a*d) #scalar
----> 4     x0,x1,x2 = mnbrak(0,alpha,fs)
      5     #x0,x1,x2 = map(lambda a: x0+a*d,mnbrak(0,alpha,fs))
      6     x = array([[x0,x1,x2]])

<ipython-input-433-69e674cd6457> in mnbrak(ax, bx, f, glimit, eps)
      8     while True:
      9         r=(bx-ax)*(f(bx)-f(cx)) #do parabolic fit
---> 10         q=(bx-cx)*(f(bx)-f(ax))
     11         denom = q-r if q-r!=0 else eps*sgn(q-r)
     12         u=bx-((bx-cx)*q-(bx-ax)*r)/(2*denom)

<ipython-input-466-9b8629d93906> in <lambda>(a)
      1 def linemin(xi,d,f,alpha=1,eps=1e-3,just_result=False):
      2     #golden section line minimization
----> 3     fs = lambda a: f(xi+a*d) #scalar
      4     x0,x1,x2 = mnbrak(0,alpha,fs)
      5     #x0,x1,x2 = map(lambda a: x0+a*d,mnbrak(0,alpha,fs))

KeyboardInterrupt: 
In []: