import matplotlib.pyplot as plt
import numpy as np
n = 100
sd = 0.5
x = np.linspace(0,1,n)
y = np.zeros(n)
sol = np.zeros(n)
for i in range(n):
y[i] = 3*x[i] + 2 + np.random.normal(0, sd)
sol[i] = 3*x[i] + 2
plt.plot(x, sol)
plt.scatter(x, y) #[25:50], D[25:50]) # [from:to]
def SVD(ex, why):
# ok, we have to shape an A https://www.youtube.com/watch?v=N9uf0YGDaWk
# such that [[a1, b1],
# [a2, b2], ... ]
A = np.matrix(np.append(np.ones([n, 1]), ex.reshape(n,1), 1))
# A # shape for ax + b coeff,
U, sigma, VT = np.linalg.svd(A) # np solves the svd,
# to extract fit parameters for this two-d case,
# we can go line by line,
b = why.reshape(n,1)
r = [0.0, 0.0]
r += (1 / sigma[0]) * (U[:,0].T*b) * (VT[:,0].T)
r += (1 / sigma[1]) * (U[:,1].T*b) * (VT[:,1].T)
return r, A, U, sigma, VT
To extract the parameters, our 'x-hat', we want something like:
$ \hat{x} = V \Sigma U^\intercal b $
Now this r should contain the parameters for the fit
r, A, U, sigma, VT = SVD(x,y)
print(r)
print("svd: ")
print("a = " + str(r[0,1]))
print("b = " + str(r[0,0]))
svdsol = np.zeros(n)
for i in range(n):
svdsol[i] = r[0,1]*x[i] + r[0,0]
plt.plot(x, sol)
plt.plot(x, svdsol)
plt.scatter(x, y)
meaning...
VT[0,1]
e1 = np.sum((VT*VT)/(sigma*sigma), axis=1) # axis sums 'along' this axis...
e1
ok, do this 100 times w/ random sets of samples drawn from the original
ok, can plot, how do I define errors against these?
for s in range(100): ## 100 new sets
rint = np.random.randint(0,100,100)
xn = np.zeros(100) # new x,
yn = np.zeros(100) # new y,
for i in range(100): ## of 100 pts
xn[i] = x[rint[i]]#x[np.random.randint(0,100)]
yn[i] = y[rint[i]]#[np.random.randint(0,100)]
r, A, U, sigma, VT = SVD(xn, yn)
tempsol = np.zeros(100)
for j in range(100):
tempsol[j] = r[0,1]*x[j] + r[0,0]
plt.plot(x, tempsol)
for s in range(100):
xs = np.linspace(0,1,n)
ys = np.zeros(n)
for i in range(n):
ys[i] = 3*xs[i] + 2 + np.random.normal(0, sd)
r, A, U, sigma, VT = SVD(xs, ys)
tempsol = np.zeros(100)
for j in range(100):
tempsol[j] = r[0,1]*x[j] + r[0,0]
plt.plot(xs, tempsol)
this appears roughly correct - but I'm still not sure how to calculate error against one another.