import matplotlib.pyplot as plt
import numpy as np

12.1¶

n = 100
sd = 0.5

x = np.linspace(0,1,n)
y = np.zeros(n)
sol = np.zeros(n)
for i in range(n):
    y[i] = 3*x[i] + 2 + np.random.normal(0, sd)
    sol[i] = 3*x[i] + 2

plt.plot(x, sol)
plt.scatter(x, y) #[25:50], D[25:50]) # [from:to]

<matplotlib.collections.PathCollection at 0x26b7a9a2588>

def SVD(ex, why):
    # ok, we have to shape an A https://www.youtube.com/watch?v=N9uf0YGDaWk
    # such that [[a1, b1],
    #            [a2, b2], ... ]
    A = np.matrix(np.append(np.ones([n, 1]), ex.reshape(n,1), 1))
    # A # shape for ax + b coeff, 
    U, sigma, VT = np.linalg.svd(A) # np solves the svd,
    # to extract fit parameters for this two-d case, 
    # we can go line by line,
    b = why.reshape(n,1)
    r = [0.0, 0.0]
    r += (1 / sigma[0]) * (U[:,0].T*b) * (VT[:,0].T)
    r += (1 / sigma[1]) * (U[:,1].T*b) * (VT[:,1].T)
    return r, A, U, sigma, VT

To extract the parameters, our 'x-hat', we want something like:
$ \hat{x} = V \Sigma U^\intercal b $

Now this r should contain the parameters for the fit

r, A, U, sigma, VT = SVD(x,y)
print(r)
print("svd: ")
print("a = " + str(r[0,1]))
print("b = " + str(r[0,0]))
svdsol = np.zeros(n)
for i in range(n):
    svdsol[i] = r[0,1]*x[i] + r[0,0]
plt.plot(x, sol)
plt.plot(x, svdsol)
plt.scatter(x, y)

[[1.99579495 3.03570368]]
svd: 
a = 3.035703677267401
b = 1.995794946038664

<matplotlib.collections.PathCollection at 0x26b7b7a4e08>

a) Error by 12.34¶

$$ \frac{\sigma^2_{n,i}}{\sigma^2_\zeta} = \sum{j}\frac{V^2_{ij}}{w^2_j} $$

meaning...

VT[0,1]

-0.4723720409409031

e1 = np.sum((VT*VT)/(sigma*sigma), axis=1) # axis sums 'along' this axis...

e1

matrix([[0.00788664],
        [0.14914306]])

b) by bootstrap sampling to generate 100 sets¶

ok, do this 100 times w/ random sets of samples drawn from the original
ok, can plot, how do I define errors against these?

for s in range(100): ## 100 new sets 
    rint = np.random.randint(0,100,100)
    xn = np.zeros(100) # new x, 
    yn = np.zeros(100) # new y, 
    for i in range(100): ## of 100 pts 
        xn[i] = x[rint[i]]#x[np.random.randint(0,100)]
        yn[i] = y[rint[i]]#[np.random.randint(0,100)]
    r, A, U, sigma, VT = SVD(xn, yn)
    tempsol = np.zeros(100)
    for j in range(100):
        tempsol[j] = r[0,1]*x[j] + r[0,0]
    plt.plot(x, tempsol)

no idea what's up here

c) by generating 100 new data sets¶

for s in range(100):
    xs = np.linspace(0,1,n)
    ys = np.zeros(n)
    for i in range(n):
        ys[i] = 3*xs[i] + 2 + np.random.normal(0, sd)
    r, A, U, sigma, VT = SVD(xs, ys)
    tempsol = np.zeros(100)
    for j in range(100):
        tempsol[j] = r[0,1]*x[j] + r[0,0]
    plt.plot(xs, tempsol)

this appears roughly correct - but I'm still not sure how to calculate error against one another.