NMM 2020 Jake Read 10 Function Architectures: Fitting

In [1]:
import numpy as np 
import math
import matplotlib.pyplot as plt
import scipy as sp

14.2

Take as a data set $x = \{-10, -9, ..., 9, 10\}$ and $y(x) = 0$ if $x \leq 0$ and $y(x) = 1$ if $x > 0$

In [2]:
def y(x):
    if(x <= 0):
        return 0
    else:
        return 1

t = np.arange(-10, 11, 1)
d = np.zeros(len(t))
for i in range(len(t)):
    d[i] = y(t[i])
plt.plot(t,d)
Out[2]:
[<matplotlib.lines.Line2D at 0x145c625b2c8>]

a) fit the data with polynomials of 5, 10, and 15 terms, using a psuedo-inverse of the Vandermonde matrix.

In [3]:
def poly(t, p): # evaluate polynomial at t, with parameters p 
    sum = 0
    for i in range(len(p)):
        sum += p[i]*t**i
    return sum 

def polyseries(ts, p): # call the above for an array of t's
    res = np.zeros(len(ts))
    for i in range(len(ts)):
        res[i] = poly(ts[i], p)
    return res 

def polyfactors(t, p): # return coefficients for the parameters p, at time t
    res = np.zeros(len(p))
    for i in range(len(p)):
        res[i] = t**i
    return res

t0 = np.arange(-10, 10, 0.001)
d0 = polyseries(t0, np.ones(5))
plt.plot(t0, d0)
Out[3]:
[<matplotlib.lines.Line2D at 0x145c6325408>]

ok, now we need to find the p that best approximates the step function, with n+1 points we can exactly fit a polynomial with n components, so any less (5) will under-fit and (15) will over-fit, is my guess

In [4]:
#need to construct the vandermonde matrix
r0 = polyfactors(5, np.zeros(5))
print(r0)
#one of those, for each of our samples: first for 5, then 10, 15 
vm5 = np.zeros((len(t), 5))
for i in range(len(t)):
    #print(t[i])
    vm5[i] = polyfactors(t[i], np.zeros(5))
vm5 # matrix for 5 terms, 
[  1.   5.  25. 125. 625.]
Out[4]:
array([[ 1.000e+00, -1.000e+01,  1.000e+02, -1.000e+03,  1.000e+04],
       [ 1.000e+00, -9.000e+00,  8.100e+01, -7.290e+02,  6.561e+03],
       [ 1.000e+00, -8.000e+00,  6.400e+01, -5.120e+02,  4.096e+03],
       [ 1.000e+00, -7.000e+00,  4.900e+01, -3.430e+02,  2.401e+03],
       [ 1.000e+00, -6.000e+00,  3.600e+01, -2.160e+02,  1.296e+03],
       [ 1.000e+00, -5.000e+00,  2.500e+01, -1.250e+02,  6.250e+02],
       [ 1.000e+00, -4.000e+00,  1.600e+01, -6.400e+01,  2.560e+02],
       [ 1.000e+00, -3.000e+00,  9.000e+00, -2.700e+01,  8.100e+01],
       [ 1.000e+00, -2.000e+00,  4.000e+00, -8.000e+00,  1.600e+01],
       [ 1.000e+00, -1.000e+00,  1.000e+00, -1.000e+00,  1.000e+00],
       [ 1.000e+00,  0.000e+00,  0.000e+00,  0.000e+00,  0.000e+00],
       [ 1.000e+00,  1.000e+00,  1.000e+00,  1.000e+00,  1.000e+00],
       [ 1.000e+00,  2.000e+00,  4.000e+00,  8.000e+00,  1.600e+01],
       [ 1.000e+00,  3.000e+00,  9.000e+00,  2.700e+01,  8.100e+01],
       [ 1.000e+00,  4.000e+00,  1.600e+01,  6.400e+01,  2.560e+02],
       [ 1.000e+00,  5.000e+00,  2.500e+01,  1.250e+02,  6.250e+02],
       [ 1.000e+00,  6.000e+00,  3.600e+01,  2.160e+02,  1.296e+03],
       [ 1.000e+00,  7.000e+00,  4.900e+01,  3.430e+02,  2.401e+03],
       [ 1.000e+00,  8.000e+00,  6.400e+01,  5.120e+02,  4.096e+03],
       [ 1.000e+00,  9.000e+00,  8.100e+01,  7.290e+02,  6.561e+03],
       [ 1.000e+00,  1.000e+01,  1.000e+02,  1.000e+03,  1.000e+04]])
In [5]:
px5, r, rank, s = np.linalg.lstsq(vm5, d, rcond=None)
px5
Out[5]:
array([ 4.15383728e-01,  1.34166939e-01,  3.65123166e-03, -9.53470633e-04,
       -3.02867142e-05])
In [6]:
pd5 = polyseries(t0, px5)
plt.plot(t0, pd5)
plt.plot(t, d)
Out[6]:
[<matplotlib.lines.Line2D at 0x145c625b548>]

doesn't seem radically unlikely that that's a best fit with just 5 terms, I'll try 10 and 15, to see if they improve.

In [7]:
vm10 = np.zeros((len(t), 10))
for i in range(len(t)):
    vm10[i] = polyfactors(t[i], np.zeros(10))
# for reasons such as the above I will make no claim to code elegance 
px10, r, rank, s = np.linalg.lstsq(vm10, d, rcond=None)
In [34]:
pd10 = polyseries(t0, px10)
plt.plot(t0, pd10)
for i in range(21):
    plt.axvline(i-10) # visualize on-sample and off errors, 
plt.plot(t, d)
Out[34]:
[<matplotlib.lines.Line2D at 0x145c8d29a48>]
In [9]:
vm15 = np.zeros((len(t), 15))
for i in range(len(t)):
    vm15[i] = polyfactors(t[i], np.zeros(15))
px15, r, rank, s = np.linalg.lstsq(vm15, d, rcond=None)
pd15 = polyseries(t0, px15)
plt.plot(t0, pd15)
plt.plot(t, d)
Out[9]:
[<matplotlib.lines.Line2D at 0x145c644bcc8>]

so, that 15 term is a mystery to me. is this meant to be a discovery? the x^15 term is hard to fight off, if I manually clip it...

In [31]:
pd15 = polyseries(t0, px15[0:10])
plt.plot(t0, pd15)
plt.plot(t, d)
Out[31]:
[<matplotlib.lines.Line2D at 0x145c8b3fbc8>]

b) do the same, but using RBFs of $r^3$

In [11]:
def r3(t, offset): 
    return (t+offset)**3

def r3factors(t, offsets):
    res = np.zeros(len(offsets))
    for i in range(len(offsets)):
        res[i] = r3(t, offsets[i])
    return res

def r3seq(offsets, oa, t): # offsets are positions, oa are coefficients for this offset 
    sum = 0 
    for i in range(len(offsets)):
        sum += r3(t, offsets[i])*oa[i]
    return sum 

offsets5 = np.arange(-10, 10.1, 5)
oa5 = np.ones(5)
of5ex = np.zeros(len(t0))
for i in range(len(t0)):
    of5ex[i] = r3seq(offsets5, oa5, t0[i])

plt.plot(t0, of5ex)
Out[11]:
[<matplotlib.lines.Line2D at 0x145c6568648>]
In [12]:
# ahn matrix for 5, 
of5 = np.zeros((len(t), 5))
for i in range(len(t)):
    #print(t[i])
    of5[i] = r3factors(t[i], offsets5)
of5
Out[12]:
array([[-8.000e+03, -3.375e+03, -1.000e+03, -1.250e+02,  0.000e+00],
       [-6.859e+03, -2.744e+03, -7.290e+02, -6.400e+01,  1.000e+00],
       [-5.832e+03, -2.197e+03, -5.120e+02, -2.700e+01,  8.000e+00],
       [-4.913e+03, -1.728e+03, -3.430e+02, -8.000e+00,  2.700e+01],
       [-4.096e+03, -1.331e+03, -2.160e+02, -1.000e+00,  6.400e+01],
       [-3.375e+03, -1.000e+03, -1.250e+02,  0.000e+00,  1.250e+02],
       [-2.744e+03, -7.290e+02, -6.400e+01,  1.000e+00,  2.160e+02],
       [-2.197e+03, -5.120e+02, -2.700e+01,  8.000e+00,  3.430e+02],
       [-1.728e+03, -3.430e+02, -8.000e+00,  2.700e+01,  5.120e+02],
       [-1.331e+03, -2.160e+02, -1.000e+00,  6.400e+01,  7.290e+02],
       [-1.000e+03, -1.250e+02,  0.000e+00,  1.250e+02,  1.000e+03],
       [-7.290e+02, -6.400e+01,  1.000e+00,  2.160e+02,  1.331e+03],
       [-5.120e+02, -2.700e+01,  8.000e+00,  3.430e+02,  1.728e+03],
       [-3.430e+02, -8.000e+00,  2.700e+01,  5.120e+02,  2.197e+03],
       [-2.160e+02, -1.000e+00,  6.400e+01,  7.290e+02,  2.744e+03],
       [-1.250e+02,  0.000e+00,  1.250e+02,  1.000e+03,  3.375e+03],
       [-6.400e+01,  1.000e+00,  2.160e+02,  1.331e+03,  4.096e+03],
       [-2.700e+01,  8.000e+00,  3.430e+02,  1.728e+03,  4.913e+03],
       [-8.000e+00,  2.700e+01,  5.120e+02,  2.197e+03,  5.832e+03],
       [-1.000e+00,  6.400e+01,  7.290e+02,  2.744e+03,  6.859e+03],
       [ 0.000e+00,  1.250e+02,  1.000e+03,  3.375e+03,  8.000e+03]])
In [13]:
ox5, r, rank, s = np.linalg.lstsq(of5, d, rcond=None)
ox5
Out[13]:
array([ 4.43396275e-05,  1.03960730e-04, -7.18670382e-04, -1.01332524e-03,
        6.30224630e-04])
In [14]:
# reconstruct, 
oxd5 = np.zeros(len(t0))
for i in range(len(t0)):
    oxd5[i] = r3seq(offsets5, ox5, t0[i])
plt.plot(t0, oxd5)
plt.plot(t, d)
Out[14]:
[<matplotlib.lines.Line2D at 0x145c7583e48>]
In [15]:
# ok, 
offsets10 = np.linspace(-10, 10, 10)
of10 = np.zeros((len(t), 10))
for i in range(len(t)):
    #print(t[i])
    of10[i] = r3factors(t[i], offsets10)
ox10, r, rank, s = np.linalg.lstsq(of10, d, rcond=None)
oxd10 = np.zeros(len(t0))
for i in range(len(t0)):
    oxd10[i] = r3seq(offsets10, ox10, t0[i])
plt.plot(t0, oxd10)
plt.plot(t, d)
Out[15]:
[<matplotlib.lines.Line2D at 0x145c659a288>]
In [16]:
# ok, 
offsets15 = np.linspace(-10, 10, 15)
of15 = np.zeros((len(t), 15))
for i in range(len(t)):
    #print(t[i])
    of15[i] = r3factors(t[i], offsets15)
ox15, r, rank, s = np.linalg.lstsq(of15, d, rcond=None)
oxd15 = np.zeros(len(t0))
for i in range(len(t0)):
    oxd15[i] = r3seq(offsets15, ox15, t0[i])
plt.plot(t0, oxd15)
plt.plot(t, d)
Out[16]:
[<matplotlib.lines.Line2D at 0x145c7665708>]
In [17]:
ox15
Out[17]:
array([-5.59613518e-05,  5.92592533e-05,  1.01547025e-04,  8.62662308e-05,
        2.87811385e-05, -5.55439844e-05, -1.51344870e-04, -2.43257251e-04,
       -3.15916860e-04, -3.53959429e-04, -3.42020689e-04, -2.64736375e-04,
       -1.06742218e-04,  1.47326051e-04,  5.12832697e-04])
In [18]:
ox10
Out[18]:
array([-3.45243763e-05,  1.33349083e-04,  1.01080329e-04, -5.58328614e-05,
       -2.61892713e-04, -4.41601448e-04, -5.19461291e-04, -4.19974464e-04,
       -6.76431930e-05,  6.13030300e-04])
In [19]:
ox5
Out[19]:
array([ 4.43396275e-05,  1.03960730e-04, -7.18670382e-04, -1.01332524e-03,
        6.30224630e-04])

it's unclear to me why these all appear identical, but it feels like composing a step function with lots of little step functions is not an awesome idea, so maybe this is meant to be broken. let's see about off-axis errors

In [21]:
ts = np.linspace(-10.5, 10.5, 22)
ts
Out[21]:
array([-10.5,  -9.5,  -8.5,  -7.5,  -6.5,  -5.5,  -4.5,  -3.5,  -2.5,
        -1.5,  -0.5,   0.5,   1.5,   2.5,   3.5,   4.5,   5.5,   6.5,
         7.5,   8.5,   9.5,  10.5])
In [ ]: