from __future__ import division
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D, proj3d
from matplotlib.cm import inferno as colormap
from matplotlib.colors import LogNorm
from matplotlib import rc
import matplotlib.animation as animation
import seaborn as sns
from mpl_toolkits import mplot3d
import matplotlib.cm as cm
rc('animation', html='html5')
sns.set(style='whitegrid', rc={'figure.figsize':(10,8)})
The general formula for the function is:
$f(x, y) = (a - x)^2 + b(y - x^2)^2$
, which has a minimum at $(a, a^2)$.
A function with many nonconvexities and points that are close to being a minimum.
I graph the function below:
Neil has given us:
$f(x, y) = (1 - x)^2 + 100(y - x^2)^2$
and thus I expect a minimum at $(1,1)$.
# Define Rosenbrock Function
def rosenbrock(x, a=1, b=100):
y = x[1]
x = x[0]
return (a - x)**2 + b*(y - x**2)**2
x = np.linspace(-2, 2, 500)
y = np.linspace(-2, 2, 500)
X, Y = np.meshgrid(x, y)
Z = rosenbrock([X, Y])
fig = plt.figure(figsize=(16, 6))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122, projection="3d")
ax1.set_xlim([-2, 2])
ax1.set_ylim([-2, 2])
ax1.set_xlabel("x")
ax1.set_ylabel("y")
ax2.set_xlabel("x")
ax2.set_ylabel("y")
ax2.set_zlim([0, 2500])
# Color mesh
ax1.set_facecolor("white")
ax1.pcolormesh(X, Y, Z, cmap=matplotlib.cm.jet,norm=LogNorm())
ax1.scatter(1, 1, color="k")
ax1.annotate('Global Min', xy=(1, 1), xytext=(-0.5, 1.25),arrowprops=dict(facecolor='black', shrink=0.05))
# Surface plot
ax2.set_facecolor("white")
ax2.plot_surface(X, Y, Z, norm = LogNorm(), cmap=matplotlib.cm.jet, linewidth=0)
ax2.view_init(azim=65, elev=25)
ax2.scatter(1., 1., 0., color="k")
xa, ya, _ = proj3d.proj_transform(1,1,0, ax2.get_proj())
ax2.annotate("Global Min", xy = (xa, ya), xytext = (-20, 30), textcoords = 'offset points', ha = 'right', va = 'bottom', arrowprops=dict(facecolor='black', shrink=0.05))
plt.tight_layout()
plt.show()
Her I discuss the downhill simplex(or Nelder-Mead) algorithm. The main idea is very simple:
Following Neil's book & this paper: Nice Nelder-Mead Paper!, I setup my code operations:
There are two ways to obtain the initial simplex. The first way is to simply pass in a simplex $\Delta$ as the guess. The second is to pass in a single point, $x_0$, and create the simplex based around that point (we do this by using $x_0$ as one point of the simplex and by using $x_i := x_0 + e_i \varepsilon$ for the $n$ other points).
Once we have the simplex, we evaluate the function at each of the points in the simplex and sort the points such that $x_1 \leq x_2 \leq \dots \leq x_{n+1}$ and find $\bar{x} := \frac{1}{n} \sum_{i=1}^n x_i$.
If $|f(\bar{x}) - f(x_1)|$(or another convergence metric of your choosing) then return $x_1$ as the minimum value, otherwise proceed.
Create a reflected point $x_r$.
4.1 If $f(x_1) \leq f(x_r) < f(x_n)$ then replace $x_{n+1}$ with $x_r$
4.2 Return to step 2.
If $f(x_r) < f(x_1)$ then create the expanded point $x_e$.
5.1 If $f(x_e) < f(x_r)$ then replace $x_{n+1}$ with $x_e$
5.2 Else if $f(x_r) < f(x_e)$ then replace $x_{n+1}$ with $x_r$.
5.3 Return to step 2.
If $f(x_n) < f(x_r) < f(x_{n+1})$ then create the outside contraction point $x_{oc}$.
6.1 If $f(x_{oc}) < f(x_r)$ then replace $x_{n+1}$ with $x_{oc}$
6.2 Else, shrink the points towards $x_1$
6.3 Return to step 2
If $f(x_{n+1}) < f(x_r)$ then create the inside contraction point $x_{ic}$.
7.1 If $f(x_{ic}) < f(x_r)$ then replace $x_{n+1}$ with $x_{ic}$
7.2 Else, shrink the points towards $x_1$
7.3 Return to step 2
That is all of the algorithm. As previously stated, you can see that it simply applies our main operations repeatedly until we converge. I have written a simple implementation of the algorithm below.
def nmd(f, x0, method="ANMS", tol=1e-8, maxit=1e4, iter_returns=None):
"""
Parameters
----------
f : callable
Function to minimize
x0 : scalar(float) or array_like(float, ndim=1)
The initial guess for minimizing
method : string or tuple(floats)
If a string, should specify ANMS or NMS then will use specific
parameter values, but also can pass in a tuple of parameters in
order (alpha, beta, gamma, delta), which are the reflection,
expansion, contraction, and contraction parameters
tol : scalar(float)
The tolerance level to achieve convergence
maxit : scalar(int)
The maximimum number of iterations allowed
"""
#-----------------------------------------------------------------#
# Set some parameter values
#-----------------------------------------------------------------#
init_guess = x0
fx0 = f(x0)
dist = 10.
curr_it = 0
# Get the number of dimensions we are optimizing
n = np.size(x0)
# Will use the Adaptive Nelder-Mead Simplex paramters by default
if method is "ANMS":
alpha = 1.
beta = 1. + (2./n)
gamma = .75 - 1./(2.*n)
delta = 1. - (1./n)
# Otherwise can use standard parameters
elif method is "NMS":
alpha = 1.
beta = 2.
gamma = .5
delta = .5
elif type(method) is tuple:
alpha, beta, gamma, delta = method
#-----------------------------------------------------------------#
# Create the simplex points and do the initial sort
#-----------------------------------------------------------------#
simplex_points = np.empty((n+1, n))
pt_fval = [(x0, fx0)]
simplex_points[0, :] = x0
for ind, elem in enumerate(x0):
if np.abs(elem) < 1e-14:
curr_tau = 0.00025
else:
curr_tau = 0.05
curr_point = np.squeeze(np.eye(1, M=n, k=ind)*curr_tau + x0)
simplex_points[ind, :] = curr_point
pt_fval.append((curr_point, f(curr_point)))
if iter_returns is not None:
ret_points = []
else:
ret_points = None
#-----------------------------------------------------------------#
# The Core of The Nelder-Mead Algorithm
#-----------------------------------------------------------------#
while dist>tol and curr_it<maxit:
# 1: Sort and find new center point (excluding worst point)
pt_fval = sorted(pt_fval, key=lambda v: v[1])
xbar = x0*0
for i in range(n):
xbar = xbar + (pt_fval[i][0])/(n)
if iter_returns is not None and curr_it in iter_returns:
ret_points.append(pt_fval)
# Define useful variables
x1, f1 = pt_fval[0]
xn, fn = pt_fval[n-1]
xnp1, fnp1 = pt_fval[n]
# 2: Reflect
xr = xbar + alpha*(xbar - pt_fval[-1][0])
fr = f(xr)
if f1 <= fr < fn:
# Replace the n+1 point
xnp1, fnp1 = (xr, fr)
pt_fval[n] = (xnp1, fnp1)
elif fr < f1:
# 3: expand
xe = xbar + beta*(xr - xbar)
fe = f(xe)
if fe < fr:
xnp1, fnp1 = (xe, fe)
pt_fval[n] = (xnp1, fnp1)
else:
xnp1, fnp1 = (xr, fr)
pt_fval[n] = (xnp1, fnp1)
elif fn <= fr <= fnp1:
# 4: outside contraction
xoc = xbar + gamma*(xr - xbar)
foc = f(xoc)
if foc <= fr:
xnp1, fnp1 = (xoc, foc)
pt_fval[n] = (xnp1, fnp1)
else:
# 6: Shrink
for i in range(1, n+1):
curr_pt, curr_f = pt_fval[i]
# Shrink the points
new_pt = x1 + delta*(curr_pt - x1)
new_f = f(new_pt)
# Replace
pt_fval[i] = new_pt, new_f
elif fr >= fnp1:
# 5: inside contraction
xic = xbar - gamma*(xr - xbar)
fic = f(xic)
if fic <= fr:
xnp1, fnp1 = (xic, fic)
pt_fval[n] = (xnp1, fnp1)
else:
# 6: Shrink
for i in range(1, n+1):
curr_pt, curr_f = pt_fval[i]
# Shrink the points
new_pt = x1 + delta*(curr_pt - x1)
new_f = f(new_pt)
# Replace
pt_fval[i] = new_pt, new_f
# Compute the distance and increase iteration counter
dist = abs(fn - f1)
curr_it = curr_it + 1
if curr_it == maxit:
raise ValueError("Max iterations; Convergence failed.")
if ret_points:
return x1, f1, curr_it, ret_points
else:
return x1, f1, curr_it
iterstosee = [0, 1, 2, 3, 4, 5, 10, 20, 50, 75, 90, 95]
x, fx, its, ret_tris = nmd(rosenbrock, x0=np.array([-1, -1.]), tol=1e-12, iter_returns=iterstosee)
fig, axs = plt.subplots(nrows=6, ncols=2, figsize=(16, 24))
axs = axs.flatten()
# Color mesh
for i, curr_ax in enumerate(axs):
curr_simplex = np.vstack([ret_tris[i][0][0], ret_tris[i][1][0], ret_tris[i][2][0]])
curr_ax.pcolormesh(X, Y, Z, cmap=matplotlib.cm.jet,
norm=LogNorm())
curr_ax.set_title("This is simplex for iteration %i" %iterstosee[i])
curr_ax.scatter(curr_simplex[:, 0], curr_simplex[:, 1])
plt.tight_layout()
plt.show()
its
ret_tris
type(ret_tris)
type(ret_tris[0][0][0])
fx
len(ret_tris)
its