import numpy as np
import matplotlib.pyplot as plt


%config InlineBackend.figure_format = 'svg'


x = np.arange(-10,11,1)
y = np.zeros_like(x)
for i in range(len(y)):
    if x[i] > 0:
        y[i] = 1


plt.plot(x,y,'o')
plt.show()


def vpoly(x,y,order):
    # Get Vandermonde matrix
    V = np.zeros((order,len(x)))
    for i in range(V.shape[0]):
        for j in range(V.shape[1]):
            V[i,j] = x[j]**i
            
    # Get pseudo-inverse
    b = np.matmul(np.linalg.pinv(V.T),y)
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
            
    y_fit = np.zeros_like(x)
    for n in range(order):
        y_fit += b[n]*x**(n)
        
    return b,x_range,y_fit


order = [5,10,15]
for o in order:
    b,x_range,y_fit=vpoly(x,y,o)
#     print(b)
    plt.plot(x_range,y_fit,label=o)
plt.plot(x,y,'ko',label='data')
plt.ylim(-1,2)
plt.legend()
plt.title("polynomial fits")
plt.show()


def RBF(x,y,order,centered=True):
    # centers
    if centered:
        c = np.arange(-(10-int(20/order)/2),(10+int(20/order)/2),int(20/order))
    else:
        c = np.arange(-10,10,int(20/order))
    # RBF matrix
    F = np.zeros((order,len(x)))
    # f(r) = r**3
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x[j]-c[i])**3
    # solve
    b = np.linalg.pinv(F.T).dot(y)
    
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
    
    F = np.zeros((order,len(x_range)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_range[j]-c[i])**3
    y_fit = F.T.dot(b)    

    return b,x_range,y_fit


order = [5,10,15]
for o in order:
    br, xr_range, yr_fit = RBF(x,y,o)
    plt.plot(xr_range,yr_fit,label=o)
plt.plot(x,y,'ko',label='data')
plt.ylim(-1,2)
plt.legend()
plt.title("RBF - centers centered at zero")
plt.show()
plt.show()


order = [5,10,15]
for o in order:
    br, xr_range, yr_fit = RBF(x,y,o,centered=False)
    plt.plot(xr_range,yr_fit,label=o)
plt.plot(x,y,'ko',label='data')
plt.ylim(-1,2)
plt.legend()
plt.title("RBF - centers [-10,10]")
plt.show()
plt.show()


# generate the out of sample data
x_err = np.arange(-10.5,11.5,1)
y_true = np.zeros_like(x_err)
for i in range(len(y_true)):
    if x_err[i] > 0:
        y_true[i] = 1


def vpoly(x,y,x_err,y_true,order):
    # Get Vandermonde matrix
    V = np.zeros((order,len(x)))
    for i in range(V.shape[0]):
        for j in range(V.shape[1]):
            V[i,j] = x[j]**i
            
    # Get pseudo-inverse
    b = np.matmul(np.linalg.pinv(V.T),y)
    
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
            
    y_fit = np.zeros_like(x)
    for n in range(order):
        y_fit += b[n]*x**(n)
    
    # Get errors
    y_err = np.zeros_like(x_err)
    for n in range(order):
        y_err += b[n]*x_err**(n)
    err= np.mean(np.abs(y_true-y_err))
    return b,x_range,y_fit,err
order = [5,10,15]
poly_errs = []
for o in order:
    b, x_range, y_fit,poly_err = vpoly(x,y,x_err,y_true,o)
    poly_errs.append(poly_err)
poly_errs

[0.14501642333134343, 0.11465016641749635, 0.6509468017965738]


def RBF(x,y,x_err,y_true,order,centered=False):
    # centers
    if centered:
        c = np.arange(-(10-int(20/order)/2),(10+int(20/order)/2),int(20/order))
    else:
        c = np.arange(-10,10,int(20/order))
    # RBF matrix
    F = np.zeros((order,len(x)))
    # f(r) = r**3
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x[j]-c[i])**3
    # solve
    b = np.linalg.pinv(F.T).dot(y)
    
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
    
    F = np.zeros((order,len(x_range)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_range[j]-c[i])**3
    y_fit = F.T.dot(b)
    
    # Get errors
    F = np.zeros((order,len(x_err)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_err[j]-c[i])**3
    y_err = F.T.dot(b)
    
    err= np.mean(np.abs(y_true-y_err))
    
    return b,x_range,y_fit,err
order = [5,10,15]
rbf_errs = []
for o in order:
    b, x_range, y_fit,rbf_err = RBF(x,y,x_err,y_true,o)
    rbf_errs.append(rbf_err)
rbf_errs

[0.10672369166374591, 0.055404995107231535, 0.048208659405521506]


plt.plot(order,poly_errs,label="poly")
plt.plot(order,rbf_errs,label="rbf")
plt.legend()
plt.xlabel("order")
plt.ylabel("error")
plt.xticks(order)
plt.show()


def RBF(x,y,x_err,y_true,order,centered=False, rand=False):
    # centers
    if centered:
        c = np.arange(-(10-int(20/order)/2),(10+int(20/order)/2),int(20/order))
    else:
        c = np.arange(-10,10,int(20/order))
    if rand:
        c = np.random.uniform(-10,10,order)
    # RBF matrix
    F = np.zeros((order,len(x)))
    # f(r) = r**3
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x[j]-c[i])**3
    # solve
    b = np.linalg.pinv(F.T).dot(y)
    
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
    
    F = np.zeros((order,len(x_range)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_range[j]-c[i])**3
    y_fit = F.T.dot(b)
    
    # Get errors
    F = np.zeros((order,len(x_err)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_err[j]-c[i])**3
    y_err = F.T.dot(b)
    
    err= np.mean(np.abs(y_true-y_err))
    
    return b,x_range,y_fit,err


order = [5,10,15]
rbf_errs = []
for o in order:
    b, x_range, y_fit,rbf_err = RBF(x,y,x_err,y_true,o, rand = True)
    rbf_errs.append(rbf_err)
    plt.plot(x_range,y_fit,label=o)
plt.plot(x,y,'ko',label='data')
plt.ylim(-1,3)
plt.legend()
plt.title("RBF - uniform random dist centers")
plt.show()
plt.show()


rbf_errs = []
for i in range(100):
    b, x_range, y_fit,rbf_err = RBF(x,y,x_err,y_true,15, rand = True)
    rbf_errs.append(rbf_err)
plt.plot(rbf_errs,'ko')
plt.title("out of sample errors for 100 random distributions, order=15")
plt.show()


print("maybe later")

maybe later


from sklearn.mixture import GaussianMixture
from sklearn.datasets import make_blobs


X, y_true = make_blobs(n_samples=400, centers=4,
                       cluster_std=0.60, random_state=0)
X = X[:, ::-1] # flip axes for better plotting
plt.scatter(X[:, 0], X[:, 1])
plt.title("raw data")
plt.show()


rng = np.random.RandomState(13)
X_stretched = np.dot(X, rng.randn(2, 2))
plt.scatter(X_stretched[:, 0], X_stretched[:, 1])
plt.title("raw stretched data")
plt.show()


gmm = GaussianMixture(n_components=4).fit(X)
labels = gmm.predict(X)
plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='plasma')
plt.title("Gaussian mixture w/ 4 components")
plt.show()


from matplotlib.patches import Ellipse

def draw_ellipse(position, covariance, ax=None, **kwargs):
    """Draw an ellipse with a given position and covariance"""
    ax = ax or plt.gca()
    
    # Convert covariance to principal axes
    if covariance.shape == (2, 2):
        U, s, Vt = np.linalg.svd(covariance)
        angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
        width, height = 2 * np.sqrt(s)
    else:
        angle = 0
        width, height = 2 * np.sqrt(covariance)
    
    # Draw the Ellipse
    for nsig in range(1, 4):
        ax.add_patch(Ellipse(position, nsig * width, nsig * height,
                             angle=angle, **kwargs))
def plot_gmm(gmm, X, label=True, ax=None):
    ax = ax or plt.gca()
    labels = gmm.fit(X).predict(X)
    if label:
        ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2)
    else:
        ax.scatter(X[:, 0], X[:, 1], s=40, zorder=2)
    ax.axis('equal')
    
    w_factor = 0.2 / gmm.weights_.max()
    for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
        draw_ellipse(pos, covar, alpha=w * w_factor)


gmm = GaussianMixture(n_components=4)
plot_gmm(gmm, X)
plt.title("Gaussian mixture with covariances")

Text(0.5, 1.0, 'Gaussian mixture with covariances')


gmm = GaussianMixture(n_components=4, covariance_type='full', random_state=42)
plot_gmm(gmm, X_stretched)
plt.title("stretched dataset")

Text(0.5, 1.0, 'stretched dataset')


x = np.random.uniform(-5,5,100) # our 100 points randomly distributed between -5 and 5
y = np.tanh(x) + np.random.normal(0,0.25,100) # tanh(x) + random noise w/ magnitude 0.25
plt.plot(x,y,'ko')
plt.title("our points")
plt.show()


from sklearn.linear_model import LinearRegression
X = np.stack((x,y)).T
gmm = GaussianMixture(n_components=1, covariance_type='full', random_state=0).fit(X)
# gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=0)
# plt.plot(x,gmm.score_samples(X),'ro')
plt.plot(x,y,'ko')
# plt.show()
# plot_gmm(gmm, X)
# print(gmm.covariances_)
# print(gmm.means_)
# probs = gmm.predict_proba(X)
# print(probs.shape)
# print(probs)
# expanded = (X * probs.reshape(X.shape[0], 1, -1)).reshape(X.shape[0], -1)
lr = LinearRegression().fit(x.reshape(-1,1),y) # how to do linear fti. 
# # lr = LinearRegression().fit(probs, y)
plt.plot(x,lr.predict(x.reshape(-1,1)))
plt.show()


X = np.stack((x,y)).T
gmm = GaussianMixture(n_components=1, covariance_type='full', random_state=0).fit(X)
# gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=0)
# plt.plot(x,gmm.score_samples(X),'ro')
plt.plot(x,y,'ko')
# plt.show()
# plot_gmm(gmm, X)
# print(gmm.covariances_)
# print(gmm.means_)
probs = gmm.predict_proba(X)
# print(probs.shape)
# print(probs)
# expanded = (X * probs.reshape(X.shape[0], 1, -1)).reshape(X.shape[0], -1)
# lr = LinearRegression().fit(x.reshape(-1,1),y) # how to do linear fti. 
lr = LinearRegression().fit(probs, y)
plt.plot(x,lr.predict(x.reshape(-1,1)))
plt.show()


gmm = GaussianMixture(n_components=1)
plot_gmm(gmm, X)
plt.title("sklearn output, 1")

Text(0.5, 1.0, 'sklearn output, 1')


gmm = GaussianMixture(n_components=2)
plot_gmm(gmm, X)
plt.title("sklearn output, 2")

Text(0.5, 1.0, 'sklearn output, 2')


gmm = GaussianMixture(n_components=4)
plot_gmm(gmm, X)
plt.title("sklearn output, 4")

Text(0.5, 1.0, 'sklearn output')


gmm = GaussianMixture(n_components=2, covariance_type='full', random_state=0).fit(X)
print(gmm.means_.shape)
print(gmm.covariances_)
# plt.plot(x,gmm.score_samples(X),'ro')
plt.plot(x,y,'ko')
plt.show()

(2, 2)
[[[1.65453262 0.03101424]
  [0.03101424 0.05667506]]

 [[3.13595714 0.72071246]
  [0.72071246 0.26838154]]]


def cluster(x,y,N,iters=100):
    mu = np.random.uniform(min(x),max(x),len(x)) # initial mus
    pc = np.ones(N)/N# initial clusters probability (=1)
    var_x = np.ones(N)
    var_y = np.ones(N)
    beta = np.ones((2,N)) #2,N or N,2 ... good god
    # i don't know how to do this
    for i in range(iters):
        #probability of x in cluster "input space"
        pc_x = np.zeros((len(x),N))
        for d in range(len(x)):
            for m in range(N):
                pc_x[d,m] = (1/np.sqrt(2*np.pi*var_x[m]))*np.exp(-(x[d]-mu[m])**2/(2*var_x[m]))
        # probability y given c,x "output space"
        pcx_y = np.zeros_like(pc_x)
        for d in range(len(x)):
            for m in range(N):
                pcx_y = 0 #um.
cluster(x,y,2)

density estimation and functions¶

13.2¶

a)¶

b)¶

C)¶

D)¶

15.2¶

15.1¶