16.1)¶

Generate a data set by sampling 100 points from y = tanh(x) between x = −5 to 5, adding random noise with a magnitude of 0.25 to y. Fit it with cluster-weighted modeling (CWM), using a local linear model for the clusters, y = a + bx. Plot the data and the resulting forecast ⟨y|x⟩, uncertainty ⟨σy2 |x⟩, and cluster probabilities p(⃗x|cm)p(cm) as the number of clusters is increased (starting with one), and animate their evolution during the EM iterations.

import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import rc
rc('animation', html='html5')
import seaborn as sns
import numpy as np
sns.set(style="whitegrid")

# number of datapoints
num = 100
# just tanh
x = np.linspace(-5, 5, num)
y = np.tanh(x)

# style all graphs here later
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

plt.plot(x, y)
plt.show()

mag = 0.25
noise = np.random.normal(0, mag, num)
noiseUni = np.random.uniform(-mag/2, mag/2, num)
y2 = y + noise
y3 = y + noiseUni

plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

plt.plot(x, y2, label='norm')
plt.plot(x, y3, label='uni')
plt.plot(x, y, '|', label='tanh')
plt.legend()
plt.show()

1 Cluster¶

forecast, forecast_error, cluster_prob = cwm(40, x, y2, 1, 1)

fig, (ax, ax2) = plt.subplots(2, 1, figsize=(12,12))
ax.plot(x, y2)
ax.plot(x , forecast[0])

for i in range(40):
    foreLine = ax.plot(x, forecast[i], '.' ,color='orange')
    foreErrLine = ax2.plot(x, forecast_error[i],'.', color = 'blue')
    
for i in range(cluster_prob.shape[1]):
    line, = ax2.plot(x, cluster_prob[0, i], color='red')
plt.show()

2 Clusters¶

forecast, forecast_error, cluster_prob = cwm(40, x, y2, 2, 2)

fig, (ax, ax2) = plt.subplots(2, 1, figsize=(12,12))
ax.plot(x, y2)
ax.set_ylim(-1.7, 1.7)
ax2.set_xlim(min(x), max(x))

#for i in range(40):
    #foreLine = ax.plot(x, forecast[i], '.' ,color='orange')
    #foreErrLine = ax2.plot(x, forecast_error[i],'.', color = 'blue')
    
cluster_lines = []
for i in range(cluster_prob.shape[1]):
    line, = ax2.plot(x, cluster_prob[0, i])
    cluster_lines.append(line)

#plt.show()
def update_anim(i):
    ax.plot(x, forecast[i], '.' , alpha=0.5 , color='orange')
    # Update the cluster probs
    for m in range(cluster_prob.shape[1]):
        cluster_lines[m].set_data(x, cluster_prob[i, m])
animation.FuncAnimation(fig, update_anim, range(len(forecast)), blit=False, interval=350, repeat=True)

3 Clusters¶

forecast, forecast_error, cluster_prob = cwm(40, x, y2, 3, 2)

fig, (ax, ax2) = plt.subplots(2, 1, figsize=(12,12))
ax.plot(x, y3)
ax.set_ylim(-1.7, 1.7)
ax2.set_xlim(min(x), max(x))

#for i in range(40):
    #foreLine = ax.plot(x, forecast[i], '.' ,color='orange')
    #foreErrLine = ax2.plot(x, forecast_error[i],'.', color = 'blue')
    
cluster_lines = []
for i in range(cluster_prob.shape[1]):
    line, = ax2.plot(x, cluster_prob[0, i])
    cluster_lines.append(line)
#plt.show()

def update_anim(i):
    ax.plot(x, forecast[i], '.' , alpha=0.5 , color='orange')
    # Update the cluster probs
    for m in range(cluster_prob.shape[1]):
        cluster_lines[m].set_data(x, cluster_prob[i, m])
animation.FuncAnimation(fig, update_anim, range(len(forecast)), blit=False, interval=350, repeat=True)

How do 10 Clusters do?¶

forecast, forecast_error, cluster_prob = cwm(40, x, y2, 10, 2)

fig, (ax, ax2) = plt.subplots(2, 1, figsize=(12,12))
ax.plot(x, y2)
ax.set_ylim(-1.5, 1.5)
ax2.set_xlim(min(x), max(x))
    
cluster_lines = []
for i in range(cluster_prob.shape[1]):
    line, = ax2.plot(x, cluster_prob[0, i])
    cluster_lines.append(line)
#plt.show()

def update_anim(i):
    ax.plot(x, forecast[i], '.' , alpha=0.5 , color='orange')
    # Update the cluster probs
    for m in range(cluster_prob.shape[1]):
        cluster_lines[m].set_data(x, cluster_prob[i, m])
animation.FuncAnimation(fig, update_anim, range(len(forecast)), blit=False, interval=350, repeat=True)

# input: number of iterations, x, y, number of clusters, number of coefficients
def cwm(count, X, Y, m, nCoeff): 

    # hold forecasts
    forecast = np.zeros((count, num)) # num = number of datapoints
    foreErr = np.zeros((count, num))
    # cluster probability
    cProb = np.zeros((count, m, num))
    # initialize cluster mult uniformly
    mu = np.random.uniform(min(X), max(Y), m)
    
    # equal probability for all clusters
    pm = np.ones(m) / m
    
    # variance for in and output
    varX = np.ones(m)
    varY = np.ones(m)
    beta = np.ones((m, nCoeff))
      
    for all in range(count):
        # see 16.24
        pxc = np.zeros((num, m))
        for i in range(num):
            for j in range(m):
                pxc[i, j] = np.exp((- (X[i] - mu[j]) ** 2) / (2 * varX[j]) ) / np.sqrt(2 * np.pi * varX[j])
                
        for j in range(m):
            for i in range(num):
                cProb[all, j, i] = pxc[i, j] * pm[j]

        # see 16.26
        def f(x, coeff):
            s = 0
            for i in range(len(coeff)):
                s += coeff[i] * (x ** i)
            return s
        pyxc = np.zeros((num, m))
        for i in range(num):
            for j in range(m):
                pyxc[i, j] = np.exp((- (Y[i] - f(X[i], beta[j])) ** 2) / (2 * varY[j]) ) / np.sqrt(2 * np.pi * varY[j])

        # see 16.27
        for i in range(num):
            nom = 0
            denom = 0
            for j in range(m):
                nom += f(X[i], beta[j]) * pxc[i, j] * pm[j]
                denom += pxc[i, j] * pm[j]
            forecast[all, i] = nom / denom
            
            # 16.28 error
            errorNom = 0
            for j in range(m):
                errorNom += (varY[j] + f(X[i], beta[j]) ** 2) * pxc[i, j] * pm[j]
            foreErr[all, i] = errorNom / denom - forecast[all, i] ** 2
        
        # P(c_m | y, x) - equation (16.29)
        pcyx = np.zeros((num, m))
        for i in range(num):
            for j in range(m):
                pcyx[i, j] = pyxc[i, j] * pxc[i, j] * pm[j]
            pcyx[i] /= np.sum(pcyx[i])

        # 16.30 update pm
        for i in range(m):
            pm[i] = np.mean(pcyx[:, i])

        # 16.32 input var update
        for i in range(m):
            varX[i] = np.sum(np.multiply((X - mu[i])**2, pcyx[:, i])) / np.sum(pcyx[:, i])
            # add a small constant
            varX[i] += 0.1

        # 16.31 mu update
        for i in range(m):
            mu[i] = np.sum(np.multiply(X, pcyx[:, i])) / np.sum(pcyx[:, i])

        # local linear    
        for i in range(m):
            a = np.zeros(nCoeff)
            for j in range(nCoeff):
                a[j] = np.sum(np.multiply(np.multiply(Y, X**j), pcyx[:, i])) / np.sum(pcyx[:, i])
            B = np.zeros((nCoeff, nCoeff))
            for j in range(nCoeff):
                for k in range(nCoeff):
                    B[j, k] = np.sum(np.multiply(np.multiply(X**j, X**k), pcyx[:, i])) / np.sum(pcyx[:, i])
            ## matrix invert    
            Binv = np.linalg.inv(B)
            beta[i] = Binv.dot(a)

        # 16.37 update varY for output
        for i in range(m):
            varY[i] = np.sum(np.multiply((Y - f(X, beta[i]))**2, pcyx[:, i])) / np.sum(pcyx[:, i])
            # add a small constant
            varY[i] += 0.1
    # give us a forecast, a forecast error and the cluster probability to work with
    return forecast, foreErr, cProb