import numpy as np
import matplotlib.pyplot as plt
noise_sd = .5
x = np.random.random(100)
y = 2 + 3*x + np.random.normal(0, noise_sd, 100)
X = np.ones((100, 2))
X[:, 1] = x
U, S, Vt = np.linalg.svd(X, full_matrices=False)
V = Vt.transpose()
error_sd = noise_sd*np.sum((V*V)/(S*S), axis=1)
print(error_sd)
def svd_inv(S):
    S = 1/S
    for i in range(len(S)):
        if S[i] == np.inf:
            S[i] = 0
            
    return S
a = []
for i in range(100):
    idx = np.random.choice(100, 100, replace=True)
    new_X = X[idx]
    new_y = y[idx]
    
    U, S, Vt = np.linalg.svd(new_X, full_matrices=False)
    V = Vt.transpose()
    
    S_inv = svd_inv(S)
    a.append(np.matmul(np.matmul(np.matmul(V, np.diag(S_inv)), 
                                  U.transpose()), new_y))
error_sd = np.std(a, axis=0)
print(error_sd)
a = []
for i in range(100):
    x = np.random.random(100)
    new_y = 2 + 3*x + np.random.normal(0, noise_sd, 100)
    new_X = np.ones((100, 2))
    new_X[:, 1] = x
    
    U, S, Vt = np.linalg.svd(new_X, full_matrices=False)
    V = Vt.transpose()
    
    S_inv = svd_inv(S)
    a.append(np.matmul(np.matmul(np.matmul(V, np.diag(S_inv)), 
                                  U.transpose()), new_y))
    
error_sd = np.std(a, axis=0)
print(error_sd)
noise_sd = .1
x = np.random.random(100)
y = np.sin(2 + 3*x) + np.random.normal(0, noise_sd, 100)
def LM(x, y, a, b, l, adapt=False):
    a = [a]
    b = [b]
    l = [l]
    error = np.sum((y-np.sin(a[-1]+b[-1]*x))**2)
    for step in range(10**3):
        grad = -2*np.array([np.sum((y - np.sin(a[-1]+b[-1]*x))*np.cos(a[-1]+b[-1]*x)),
                              np.sum((y - np.sin(a[-1]+b[-1]*x))*np.cos(a[-1]+b[-1]*x)*x)])
        
        H = 2*np.array([[np.sum(np.cos(a[-1]+b[-1]*x)**2), np.sum(x*(np.cos(a[-1]+b[-1]*x)**2))],
                        [np.sum(x*(np.cos(a[-1]+b[-1]*x)**2)), np.sum(x*x*(np.cos(a[-1]+b[-1]*x)**2))]])
        M = .5*H*np.array([[1+l[-1], 1], [1, 1+l[-1]]])
        D = -1 * np.matmul(np.linalg.inv(M), grad.transpose())
        a.append(a[-1] + D[0])
        b.append(b[-1] + D[1])
        new_error = np.sum((y-np.sin(a[-1]+b[-1]*x))**2)
    
        if adapt:
            if new_error < error:
                l.append(l[-1]*.8)
            else:
                l.append(l[-1]*1.3)
        error = new_error
    print('Estimation: a=%f, b=%f' % (a[-1], b[-1]))
    print('Mean Squered Error = %f' % error)
    
    print('Estimated function:')
#     fig, ax = plt.subplots(ncols=1, nrows=4)
#     ax[0] = plt.scatter(x, np.sin(a[-1]+b[-1]*x))
#     ax[0].set_label('Estimated function')
    plt.scatter(x, np.sin(a[-1]+b[-1]*x))
    plt.show()
    
    print('Convergence of a:')
    plt.plot(np.arange(len(a)), a)
    plt.show()
    
    print('Convergence of b:')
    plt.plot(np.arange(len(b)), b)
    plt.show()
    
    if adapt:
        print('Lambda:')
        plt.plot(np.arange(len(l)), l)
        plt.show()
LM(x, y, 1, 1, 100, adapt=False)
LM(x, y, 1, 1, 1, adapt=False)
LM(x, y, 1, 1, 100, adapt=True)
LM(x, y, 1, 1, 1, adapt=True)
 
 
An optimal unbiased estimator will be such that: $$ \sigma^2(f) = \frac{1}{J(\mu)} $$
Therefore, $$ \sigma^2(f) = \sigma^2 $$
And by the definition of unbiased estimator, $$ \langle f \rangle = \mu $$
Now, let's look at the estimator $$ f(X) = \frac{1}{N} \sum_{n=1}^{N} x_n = \bar{x} $$
$$ \langle f \rangle = \left \langle \frac{1}{N} \sum_{n=1}^{N} x_n \right \rangle = \frac{1}{N} \sum_{n=1}^{N} \langle x_n \rangle = \mu $$$$ \sigma^2(f) = \left \langle \left [ (\frac{1}{N} \sum_{n=1}^{N} x_n) - \mu \right ]^2 \right \rangle = \left \langle \left [ \frac{1}{N} (\sum_{n=1}^{N} x_n - N \mu) \right ]^2 \right \rangle = \left \langle \left [ \frac{1}{N} \sum_{n=1}^{N} (x_n - \mu) \right ]^2 \right \rangle = \left \langle \frac{1}{N} \sum_{n=1}^{N} (x_n - \mu)^2 \right \rangle = \sigma^2 $$Therefore, f meets the requirements for an optimal estimator!