using PyPlot

# 14.1 - A
function plot_rosen(f)
    X = [-1.5:0.01:1.5]
    Y = [-1.0:0.01:3]

    F = [f([x, y]) for x in X, y in Y]

    Xs = [x for x in X, y in Y]
    Ys = [y for x in X, y in Y]
    plot_surface(Xs, Ys, F, alpha=0.2)
end

const C = 100
f(x) = (1-x[1])^2 + C*(x[2]-x[1]^2)^2
# get the algebraic gradient
dfdx(x) = [-2*(1-x[1]) - 4C*(x[2]-x[1]^2)*x[1],
           2C*(x[2]-x[1]^2)]
plot_rosen(f)

PyObject <mpl_toolkits.mplot3d.art3d.Poly3DCollection object at 0x10bbd7050>

Warning: redefining constant C

# 14.1 - B, The Downhill Simplex Method

function simplex_mean(s, chosen)
    # takes the mean of the simplex, excluding the chosen point
    m = [0, 0]
    for c in 1:3
        if c != chosen
            m += s[c]
        end
    end
    return m ./ 2
end

reflect(x, mean) = 2mean - x
reflect_stretch(x, mean) = 3mean - 2x
reflect_shrink(x, mean) = 3mean/2 - x/2
shrink(x, mean) = (mean + x) / 2

function shrink_all!(s)
    # assume the simplex has already been sorted in best->worst order
    s[:,2] = (s[:,2] + s[:,1])/2
    s[:,3] = (s[:,3] + s[:,1])/2
    return s
end

feval_simplex(s) = reshape(mapslices(f, s, 1), 3)
@assert feval_simplex([0 0; 1 1; 2 2]') == [f([0, 0]), f([1, 1]), f([2, 2])]

function next_simplex!(s)
    # modifies the given simplex and returns the number of function evals in this iteration
    res = feval_simplex(simplex)
    # 3 evals
    sortidxs = sortperm(res)
    s[:] = s[:, sortidxs]
    res = res[sortidxs]
    
    mn = (s[:,1] + s[:,2]) / 2
    x_ref = reflect(s[:,3], mn)
    v_ref = f(x_ref)
    # 4 evals
    if v_ref < res[1]
        # reflecting got us the best point, how about stretching?
#        println("reflecting = great")
        x_ref_str = reflect_stretch(s[:,3], mn)
        v_ref_str = f(x_ref_str)
        # 5 evals
        if v_ref_str < v_ref
#            println("reflecting + stretching = good")
            # stretching got us an even better point, we've got our next simplex
            s[:,3] = x_ref_str
            return 5
        end
        # OK, reflecting helped but stretching didn't
        s[:,3] = x_ref
        return 5
    elseif v_ref < res[2]
#        println("reflecting = good")
        # after reflecting it's no longer the worst but not the best. good enough
        s[:,3] = x_ref
        return 4
    end
#    println("reflecting = bad")
    # after reflecting this is still the worst
    x_ref_shr = reflect_shrink(s[:,3], mn)
    v_ref_shr = f(x_ref_shr)
    # 5 evals
    if v_ref_shr < res[2]
#        println("reflecting + shrinking = good")
        s[:,3] = x_ref_shr
        return 5
    end
    # reflecting and shrinking didn't work, how about just shrinking
    x_shr = shrink(s[:,3], mn)
    v_shr = f(x_shr)
    # 6 evals
    if v_shr < res[2]
#        println("shrinking = good")
        s[:,3] = x_shr
        return 6
    end
#    println("shrinking everything")
    # wow, still the worst after all that. Let's just shrink everybody!
    shrink_all!(s)
    return 6
end

function plot_simplex(s)
    data = vcat(s, feval_simplex(s)')
    data = hcat(data, data[:,1])
    
    plot3D(data[1,:]', data[2,:]', zs=data[3,:]', color="black")
end

simplex = [-1 -1; -0.9 -1; -1 -0.9]'
last_val = maximum(feval_simplex(simplex))
static_count = 0
evals = 0
#simplex = [0 3; 0.1 3; 0 3.1]'
plot_rosen(f)
plot_simplex(simplex)
while true
    evals += next_simplex!(simplex)
    plot_simplex(simplex)
    val = maximum(feval_simplex(simplex))
    if abs(val - last_val) < 0.01
        static_count += 1
        if static_count > 10
            break
        end
    else
        static_count = 0
    end
    last_val = val
end
gca()[:view_init](elev=60, azim=20)
println("Final simplex after $(evals) evals:")
println(simplex)

Final simplex after 220 evals:
1.0161401221528772	.9873635411262629	.9877134772017717
1.0328986672684621	.976464021205909	.9744354104623267

# Lets try implementing a line search

# ye goldene ratio
const gold = (1+sqrt(5))/2

function find_enclosing(x_init, d)
    # finds an enclosing set of 2 points with one in the middle
    # takes a starting point and direction vector
    # returns a 6-tuple with (x1, y1, x2, y2, x3, y3)
    
    x1 = x_init
    y1 = f(x1)
    x2 = x1 + d
    y2 = f(x2)
    
    scatter3D(x1[1], x1[2], zs=f(x1), color="red", s=5)

    if y2 > y1
        # uh oh, we either overshot already or we're going in the wrong direction
        xtemp = x1
        ytemp = y1
        x1 = x2
        y1 = y2
        x2 = xtemp
        y2 = ytemp
    end
    x3 = x2 + (x2 - x1) * gold
    y3 = f(x3)
    plot3D([x1[1], x2[1]], [x1[2], x2[2]], zs=[y1, y2], color="red")
    scatter3D(x2[1], x2[2], zs=y2, color="red", s=5)
    plot3D([x2[1], x3[1]], [x2[2], x3[2]], zs=[y2, y3], color="red")
    scatter3D(x3[1], x3[2], zs=y3, color="red", s=5)
    while y3 < y2
        x1 = x2
        y1 = y2
        x2 = x3
        y2 = y3
        x3 = x2 + (x2 - x1) * gold
        y3 = f(x3)
        plot3D([x2[1], x3[1]], [x2[2], x3[2]], zs=[y2, y3], color="red")
        scatter3D(x3[1], x3[2], zs=y3, color="red", s=5)
    end
    (x1, y1, x2, y2, x3, y3)
end

function find_minimum(x1, y1, x2, y2, x3, y3, thresh)
    # assumes that x1 and x3 form a bracket that encloses the minimum, and x2 is an intermediate point
    # that forms a golden section between x1 and x3. It is also assumed that the smaller interval is
    # between x1 and x2. It is NOT assumed that x3 > x1.
    iterations = 0

    while norm(x3 - x1) > thresh && iterations < 10000
        #println("norm(x3-x1) = $(norm(x3 - x1))")
        #println("x1 = ($(x1[1]), $(x1[2])), y1 = $(y1)")
        #println("x2 = ($(x2[1]), $(x2[2])), y2 = $(y2)")
        #println("x3 = ($(x3[1]), $(x3[2])), y3 = $(y3)")
        for i in 1:2
            @assert (x1[i] <= x2[i] <= x3[i]) || (x1[i] >= x2[i] >= x3[i])
        end
        x4 = x1 + x3 - x2
        y4 = f(x4)
        if y4 > y2
            # here we also flip the order so that the small interval is first
            x3 = x1
            y3 = y1
            x1 = x4
            y1 = y4        
        else
            x1 = x2
            y1 = y2
            x2 = x4
            y2 = y4
        end
        iterations += 1
    end
#    println("Found minimum in $(iterations) iterations")
    (x2, y2)
end

line_minimize(x, d, thresh=1e-5) = find_minimum(find_enclosing(x, d)..., thresh)

hello

# use line minimization to find a minimum using Powell's Method

plot_rosen(f)
# set an initial value for x
x_0 = [-1.0, -0.5]
y_0 = f(x_0)
# set an initial set of directions to the axes
ds = eye(2, 2) .* 0.1
#ds = [0.0 1; 1 0]
ys = zeros(size(ds, 2))
iterations = 0
while iterations < 100
    x = x_0
    # search in each direction
    for i in 1:size(ds, 2)
        d = ds[:, i]
        #println("minimizing from ($(x[1]), $(x[2]))")
        # track the new y for each direction minimization
        x, ys[i] = line_minimize(x, d)
        #println("minimum in d=($(d[1]), $(d[2])) was ($(x[1]), $(x[2]))")
        scatter3D(x[1], x[2], zs=ys[i], color="purple", s=20)
    end
    d_new = x - x_0
    #d_new = d_new / norm(d_new)
    i = indmax(ds' * d_new)
    ds[:, i] = d_new
    #println("replacing direction $i with ($(d_new[1]), $(d_new[2]))")
    #println("---------")
    iterations += 1
    if norm(x - x_0) < 1e-7
        break
    end
    x_0 = x
end
println("Finished in $(iterations) iterations")

x = x_0
y = ys[end]
println("x = ($(x[1]), $(x[2]))")
println("y = $y")

gca()[:view_init](elev=60, azim=40)

Finished in 14 iterations
x = (0.9999948501361238, 0.9999902850064573)
y = 6.070940605450799e-11

# now we'll try to use Conjugate Gradient Descent

plot_rosen(f)
# set an initial value for x
x = [-1.0, -0.5]
y = f(x)
# set an initial direction
grad = dfdx(x)
d = -grad
iterations = 0
while iterations < 100 # emergency stop
    x_new, y_new = line_minimize(x, d / norm(d))
    iterations += 1
    if norm(x_new - x) < 1e-7
        break
    end
    grad_new = dfdx(x_new)
    gamma = dot(grad_new, (grad_new - grad)) / dot(grad, grad)
    if gamma < 0
        gamma = 0
    end
    d = grad_new + gamma * d
    x = x_new
    y = y_new
    scatter3D(x[1], x[2], zs=y, color="purple", s=20)

end
println("Finished in $(iterations) iterations")
println("x = ($(x[1]), $(x[2]))")
println("y = $y")

gca()[:view_init](elev=60, azim=40)

Finished in 100 iterations
x = (0.7055051820165372, 0.4968664005300224)
y = 0.08680309002403648

x = [1, 3, 4]
dot(x, x)

26

# This data set is shared between the following runs
J = randn(100);

min_energy(J) = sum([-abs(j) for j in J])
@assert min_energy([10]) == -10
@assert min_energy([-8, 2]) == -10

function energy(S, J)
    E = 0
    for i in 1:(length(S)-1)
        E -= J[i]*S[i]*S[i+1]
    end
    E -= J[end]*S[end]*S[1]
    return E
end

energy (generic function with 1 method)

# 14.2

using PyPlot

function sim_anneal(N, alpha)
    S = rand(0:1, 100) * 2 - 1
    energies = zeros(N)
    E_prev = energy(S, J)

    for t in 1:length(energies)
        beta = alpha * t
        i = rand(1:length(S))
        S[i] = -S[i]
        E_new = energy(S, J)
        if E_new < E_prev
            E_prev = E_new
            energies[t] = E_prev
            continue
        end
        prob = e^(-beta * (E_new - E_prev))
        if rand() < prob
            E_prev = E_new
            energies[t] = E_prev
            continue
        end
        # we're not accepting the change, so reverse it
        S[i] = -S[i]
        energies[t] = E_prev
    end
    return energies
end

println("Theoretical Minimum Energy: $(min_energy(J))")

N = 5000
axhline(min_energy(J), label="Theoretical Minimum", color="purple")
for i in 1:50
    energies001 = sim_anneal(N, 0.001)
    energies01 = sim_anneal(N, 0.01)
    energies1 = sim_anneal(N, 0.1)

    plot(energies1, color="red", alpha=0.2, label=L"$\alpha$ = 0.1")
    plot(energies01, color="green", alpha=0.2, label=L"$\alpha$ = 0.01")
    plot(energies001, color="blue", alpha=0.2, label=L"$\alpha$ = 0.001")
    if i == 1
        legend()
    end
end

Theoretical Minimum Energy: -84.35388391578405

# 14.3 Genetic Algorithms

using PyPlot

function get_multinomial(threshs)
    p = rand()
    # yeah, we could do binary rather than linear search here...
    for i in 1:length(threshs)
        if threshs[i] > p
            return i
        end
    end
    return length(threshs)
end

Emin = min_energy(J)

beta = 10
M = length(J) # number of spins
N = 100 # size of population
T = 200 # number of generations
energy_plot = zeros(T)

for beta in [13, 10, 1, 0.1, 0.01]
    S = rand(0:1, M, N) * 2 - 1
    for t in 1:T
        energies = mapslices(v -> energy(v, J), S, 1)
        energy_plot[t] = minimum(energies)
        fitness = e.^(-beta*(energies .- Emin))
        # normalize
        probs = fitness ./ sum(fitness)
        threshs = cumsum(probs)
        #println("HERE 1")
        S_new = zeros(M, N)
        for i in 1:N
            parent1 = get_multinomial(threshs)
            parent2 = get_multinomial(threshs)
            #println("HERE 2")
            xover = rand(1:M)
            S_new[:, i] = vcat(S[1:xover, parent1], S[xover+1:end, parent2])
            # now randomly flip a bit
            #println("HERE 3")
            mutation = rand(1:M)
            S_new[mutation, i] = -S_new[mutation, i]
        end
        S = S_new
    end
    plot(energy_plot, label="beta = $(beta)")
end


axhline(Emin, label="Theoretical Minimum")
legend()

PyObject <matplotlib.legend.Legend object at 0x11cf616d0>