density estimation and functions¶

In [1]:
import numpy as np
import matplotlib.pyplot as plt
In [2]:
%config InlineBackend.figure_format = 'svg'

13.2¶

In [21]:
x = np.arange(-10,11,1)
y = np.zeros_like(x)
for i in range(len(y)):
    if x[i] > 0:
        y[i] = 1
In [28]:
plt.plot(x,y,'o')
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-25T21:51:58.486489</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>

a)¶

In [67]:
def vpoly(x,y,order):
    # Get Vandermonde matrix
    V = np.zeros((order,len(x)))
    for i in range(V.shape[0]):
        for j in range(V.shape[1]):
            V[i,j] = x[j]**i
            
    # Get pseudo-inverse
    b = np.matmul(np.linalg.pinv(V.T),y)
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
            
    y_fit = np.zeros_like(x)
    for n in range(order):
        y_fit += b[n]*x**(n)
        
    return b,x_range,y_fit
In [71]:
order = [5,10,15]
for o in order:
    b,x_range,y_fit=vpoly(x,y,o)
#     print(b)
    plt.plot(x_range,y_fit,label=o)
plt.plot(x,y,'ko',label='data')
plt.ylim(-1,2)
plt.legend()
plt.title("polynomial fits")
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-25T22:02:45.985026</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>

b)¶

In [108]:
def RBF(x,y,order,centered=True):
    # centers
    if centered:
        c = np.arange(-(10-int(20/order)/2),(10+int(20/order)/2),int(20/order))
    else:
        c = np.arange(-10,10,int(20/order))
    # RBF matrix
    F = np.zeros((order,len(x)))
    # f(r) = r**3
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x[j]-c[i])**3
    # solve
    b = np.linalg.pinv(F.T).dot(y)
    
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
    
    F = np.zeros((order,len(x_range)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_range[j]-c[i])**3
    y_fit = F.T.dot(b)    

    return b,x_range,y_fit
In [109]:
order = [5,10,15]
for o in order:
    br, xr_range, yr_fit = RBF(x,y,o)
    plt.plot(xr_range,yr_fit,label=o)
plt.plot(x,y,'ko',label='data')
plt.ylim(-1,2)
plt.legend()
plt.title("RBF - centers centered at zero")
plt.show()
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-25T22:35:24.468749</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [110]:
order = [5,10,15]
for o in order:
    br, xr_range, yr_fit = RBF(x,y,o,centered=False)
    plt.plot(xr_range,yr_fit,label=o)
plt.plot(x,y,'ko',label='data')
plt.ylim(-1,2)
plt.legend()
plt.title("RBF - centers [-10,10]")
plt.show()
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-25T22:36:05.145977</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>

C)¶

In [23]:
# generate the out of sample data
x_err = np.arange(-10.5,11.5,1)
y_true = np.zeros_like(x_err)
for i in range(len(y_true)):
    if x_err[i] > 0:
        y_true[i] = 1
In [122]:
def vpoly(x,y,x_err,y_true,order):
    # Get Vandermonde matrix
    V = np.zeros((order,len(x)))
    for i in range(V.shape[0]):
        for j in range(V.shape[1]):
            V[i,j] = x[j]**i
            
    # Get pseudo-inverse
    b = np.matmul(np.linalg.pinv(V.T),y)
    
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
            
    y_fit = np.zeros_like(x)
    for n in range(order):
        y_fit += b[n]*x**(n)
    
    # Get errors
    y_err = np.zeros_like(x_err)
    for n in range(order):
        y_err += b[n]*x_err**(n)
    err= np.mean(np.abs(y_true-y_err))
    return b,x_range,y_fit,err
order = [5,10,15]
poly_errs = []
for o in order:
    b, x_range, y_fit,poly_err = vpoly(x,y,x_err,y_true,o)
    poly_errs.append(poly_err)
poly_errs
Out[122]:
[0.14501642333134343, 0.11465016641749635, 0.6509468017965738]
In [123]:
def RBF(x,y,x_err,y_true,order,centered=False):
    # centers
    if centered:
        c = np.arange(-(10-int(20/order)/2),(10+int(20/order)/2),int(20/order))
    else:
        c = np.arange(-10,10,int(20/order))
    # RBF matrix
    F = np.zeros((order,len(x)))
    # f(r) = r**3
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x[j]-c[i])**3
    # solve
    b = np.linalg.pinv(F.T).dot(y)
    
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
    
    F = np.zeros((order,len(x_range)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_range[j]-c[i])**3
    y_fit = F.T.dot(b)
    
    # Get errors
    F = np.zeros((order,len(x_err)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_err[j]-c[i])**3
    y_err = F.T.dot(b)
    
    err= np.mean(np.abs(y_true-y_err))
    
    return b,x_range,y_fit,err
order = [5,10,15]
rbf_errs = []
for o in order:
    b, x_range, y_fit,rbf_err = RBF(x,y,x_err,y_true,o)
    rbf_errs.append(rbf_err)
rbf_errs
Out[123]:
[0.10672369166374591, 0.055404995107231535, 0.048208659405521506]
In [126]:
plt.plot(order,poly_errs,label="poly")
plt.plot(order,rbf_errs,label="rbf")
plt.legend()
plt.xlabel("order")
plt.ylabel("error")
plt.xticks(order)
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-25T23:00:38.752281</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [20]:
def RBF(x,y,x_err,y_true,order,centered=False, rand=False):
    # centers
    if centered:
        c = np.arange(-(10-int(20/order)/2),(10+int(20/order)/2),int(20/order))
    else:
        c = np.arange(-10,10,int(20/order))
    if rand:
        c = np.random.uniform(-10,10,order)
    # RBF matrix
    F = np.zeros((order,len(x)))
    # f(r) = r**3
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x[j]-c[i])**3
    # solve
    b = np.linalg.pinv(F.T).dot(y)
    
    # Generate plot points
    x_range = x = np.arange(-10,11,0.1)
    y_range = np.zeros_like(x)
    for i in range(len(y)):
        if x_range[i] > 0:
            y_range[i] = 1
    
    F = np.zeros((order,len(x_range)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_range[j]-c[i])**3
    y_fit = F.T.dot(b)
    
    # Get errors
    F = np.zeros((order,len(x_err)))
    for i in range(F.shape[0]):
        for j in range(F.shape[1]):
            F[i,j] = np.abs(x_err[j]-c[i])**3
    y_err = F.T.dot(b)
    
    err= np.mean(np.abs(y_true-y_err))
    
    return b,x_range,y_fit,err
In [34]:
order = [5,10,15]
rbf_errs = []
for o in order:
    b, x_range, y_fit,rbf_err = RBF(x,y,x_err,y_true,o, rand = True)
    rbf_errs.append(rbf_err)
    plt.plot(x_range,y_fit,label=o)
plt.plot(x,y,'ko',label='data')
plt.ylim(-1,3)
plt.legend()
plt.title("RBF - uniform random dist centers")
plt.show()
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T09:50:26.739141</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [39]:
rbf_errs = []
for i in range(100):
    b, x_range, y_fit,rbf_err = RBF(x,y,x_err,y_true,15, rand = True)
    rbf_errs.append(rbf_err)
plt.plot(rbf_errs,'ko')
plt.title("out of sample errors for 100 random distributions, order=15")
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T09:52:46.100726</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>

D)¶

In [127]:
print("maybe later")
maybe later

15.2¶

we're going to do this tutorial: https://jakevdp.github.io/PythonDataScienceHandbook/05.12-gaussian-mixtures.html instead. v below.

In [42]:
from sklearn.mixture import GaussianMixture
from sklearn.datasets import make_blobs
In [113]:
X, y_true = make_blobs(n_samples=400, centers=4,
                       cluster_std=0.60, random_state=0)
X = X[:, ::-1] # flip axes for better plotting
plt.scatter(X[:, 0], X[:, 1])
plt.title("raw data")
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:19:19.438498</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [114]:
rng = np.random.RandomState(13)
X_stretched = np.dot(X, rng.randn(2, 2))
plt.scatter(X_stretched[:, 0], X_stretched[:, 1])
plt.title("raw stretched data")
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:19:22.417619</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [115]:
gmm = GaussianMixture(n_components=4).fit(X)
labels = gmm.predict(X)
plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='plasma')
plt.title("Gaussian mixture w/ 4 components")
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:19:24.789058</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [116]:
from matplotlib.patches import Ellipse

def draw_ellipse(position, covariance, ax=None, **kwargs):
    """Draw an ellipse with a given position and covariance"""
    ax = ax or plt.gca()
    
    # Convert covariance to principal axes
    if covariance.shape == (2, 2):
        U, s, Vt = np.linalg.svd(covariance)
        angle = np.degrees(np.arctan2(U[1, 0], U[0, 0]))
        width, height = 2 * np.sqrt(s)
    else:
        angle = 0
        width, height = 2 * np.sqrt(covariance)
    
    # Draw the Ellipse
    for nsig in range(1, 4):
        ax.add_patch(Ellipse(position, nsig * width, nsig * height,
                             angle=angle, **kwargs))
def plot_gmm(gmm, X, label=True, ax=None):
    ax = ax or plt.gca()
    labels = gmm.fit(X).predict(X)
    if label:
        ax.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis', zorder=2)
    else:
        ax.scatter(X[:, 0], X[:, 1], s=40, zorder=2)
    ax.axis('equal')
    
    w_factor = 0.2 / gmm.weights_.max()
    for pos, covar, w in zip(gmm.means_, gmm.covariances_, gmm.weights_):
        draw_ellipse(pos, covar, alpha=w * w_factor)
In [117]:
gmm = GaussianMixture(n_components=4)
plot_gmm(gmm, X)
plt.title("Gaussian mixture with covariances")
Out[117]:
Text(0.5, 1.0, 'Gaussian mixture with covariances')
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:19:29.555439</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [118]:
gmm = GaussianMixture(n_components=4, covariance_type='full', random_state=42)
plot_gmm(gmm, X_stretched)
plt.title("stretched dataset")
Out[118]:
Text(0.5, 1.0, 'stretched dataset')
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:19:32.100231</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>

15.1¶

In [119]:
x = np.random.uniform(-5,5,100) # our 100 points randomly distributed between -5 and 5
y = np.tanh(x) + np.random.normal(0,0.25,100) # tanh(x) + random noise w/ magnitude 0.25
plt.plot(x,y,'ko')
plt.title("our points")
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:19:36.806182</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [121]:
from sklearn.linear_model import LinearRegression
X = np.stack((x,y)).T
gmm = GaussianMixture(n_components=1, covariance_type='full', random_state=0).fit(X)
# gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=0)
# plt.plot(x,gmm.score_samples(X),'ro')
plt.plot(x,y,'ko')
# plt.show()
# plot_gmm(gmm, X)
# print(gmm.covariances_)
# print(gmm.means_)
# probs = gmm.predict_proba(X)
# print(probs.shape)
# print(probs)
# expanded = (X * probs.reshape(X.shape[0], 1, -1)).reshape(X.shape[0], -1)
lr = LinearRegression().fit(x.reshape(-1,1),y) # how to do linear fti. 
# # lr = LinearRegression().fit(probs, y)
plt.plot(x,lr.predict(x.reshape(-1,1)))
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:20:20.437177</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [129]:
X = np.stack((x,y)).T
gmm = GaussianMixture(n_components=1, covariance_type='full', random_state=0).fit(X)
# gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=0)
# plt.plot(x,gmm.score_samples(X),'ro')
plt.plot(x,y,'ko')
# plt.show()
# plot_gmm(gmm, X)
# print(gmm.covariances_)
# print(gmm.means_)
probs = gmm.predict_proba(X)
# print(probs.shape)
# print(probs)
# expanded = (X * probs.reshape(X.shape[0], 1, -1)).reshape(X.shape[0], -1)
# lr = LinearRegression().fit(x.reshape(-1,1),y) # how to do linear fti. 
lr = LinearRegression().fit(probs, y)
plt.plot(x,lr.predict(x.reshape(-1,1)))
plt.show()
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:38:38.791110</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [123]:
gmm = GaussianMixture(n_components=1)
plot_gmm(gmm, X)
plt.title("sklearn output, 1")
Out[123]:
Text(0.5, 1.0, 'sklearn output, 1')
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:23:45.521253</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [124]:
gmm = GaussianMixture(n_components=2)
plot_gmm(gmm, X)
plt.title("sklearn output, 2")
Out[124]:
Text(0.5, 1.0, 'sklearn output, 2')
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:24:02.904573</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [122]:
gmm = GaussianMixture(n_components=4)
plot_gmm(gmm, X)
plt.title("sklearn output, 4")
Out[122]:
Text(0.5, 1.0, 'sklearn output')
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T13:20:22.913903</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
In [112]:
gmm = GaussianMixture(n_components=2, covariance_type='full', random_state=0).fit(X)
print(gmm.means_.shape)
print(gmm.covariances_)
# plt.plot(x,gmm.score_samples(X),'ro')
plt.plot(x,y,'ko')
plt.show()
(2, 2)
[[[1.65453262 0.03101424]
  [0.03101424 0.05667506]]

 [[3.13595714 0.72071246]
  [0.72071246 0.26838154]]]
<rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <cc:Work> <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/> <dc:date>2023-04-27T11:12:45.422069</dc:date> <dc:format>image/svg+xml</dc:format> <dc:creator> <cc:Agent> <dc:title>Matplotlib v3.6.3, https://matplotlib.org/</dc:title> </cc:Agent> </dc:creator> </cc:Work> </rdf:RDF> <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>

this seems like a good resource: https://www.youtube.com/watch?v=JXdrq7--XV0

In [102]:
def cluster(x,y,N,iters=100):
    mu = np.random.uniform(min(x),max(x),len(x)) # initial mus
    pc = np.ones(N)/N# initial clusters probability (=1)
    var_x = np.ones(N)
    var_y = np.ones(N)
    beta = np.ones((2,N)) #2,N or N,2 ... good god
    # i don't know how to do this
    for i in range(iters):
        #probability of x in cluster "input space"
        pc_x = np.zeros((len(x),N))
        for d in range(len(x)):
            for m in range(N):
                pc_x[d,m] = (1/np.sqrt(2*np.pi*var_x[m]))*np.exp(-(x[d]-mu[m])**2/(2*var_x[m]))
        # probability y given c,x "output space"
        pcx_y = np.zeros_like(pc_x)
        for d in range(len(x)):
            for m in range(N):
                pcx_y = 0 #um.
cluster(x,y,2)