16.1)

Generate a data set by sampling 100 points from y = tanh(x) between x = −5 to 5, adding random noise with a magnitude of 0.25 to y. Fit it with cluster-weighted modeling (CWM), using a local linear model for the clusters, y = a + bx. Plot the data and the resulting forecast ⟨y|x⟩, uncertainty ⟨σy2 |x⟩, and cluster probabilities p(⃗x|cm)p(cm) as the number of clusters is increased (starting with one), and animate their evolution during the EM iterations.

In [198]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import rc
rc('animation', html='html5')
import seaborn as sns
import numpy as np
sns.set(style="whitegrid") 
In [199]:
# number of datapoints
num = 100
# just tanh
x = np.linspace(-5, 5, num)
y = np.tanh(x)

# style all graphs here later
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

plt.plot(x, y)
plt.show()
In [200]:
mag = 0.25
noise = np.random.normal(0, mag, num)
noiseUni = np.random.uniform(-mag/2, mag/2, num)
y2 = y + noise
y3 = y + noiseUni

plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)

plt.plot(x, y2, label='norm')
plt.plot(x, y3, label='uni')
plt.plot(x, y, '|', label='tanh')
plt.legend()
plt.show()

1 Cluster

In [209]:
forecast, forecast_error, cluster_prob = cwm(40, x, y2, 1, 1)

fig, (ax, ax2) = plt.subplots(2, 1, figsize=(12,12))
ax.plot(x, y2)
ax.plot(x , forecast[0])

for i in range(40):
    foreLine = ax.plot(x, forecast[i], '.' ,color='orange')
    foreErrLine = ax2.plot(x, forecast_error[i],'.', color = 'blue')
    
for i in range(cluster_prob.shape[1]):
    line, = ax2.plot(x, cluster_prob[0, i], color='red')
plt.show()

2 Clusters

In [202]:
forecast, forecast_error, cluster_prob = cwm(40, x, y2, 2, 2)

fig, (ax, ax2) = plt.subplots(2, 1, figsize=(12,12))
ax.plot(x, y2)
ax.set_ylim(-1.7, 1.7)
ax2.set_xlim(min(x), max(x))

#for i in range(40):
    #foreLine = ax.plot(x, forecast[i], '.' ,color='orange')
    #foreErrLine = ax2.plot(x, forecast_error[i],'.', color = 'blue')
    
cluster_lines = []
for i in range(cluster_prob.shape[1]):
    line, = ax2.plot(x, cluster_prob[0, i])
    cluster_lines.append(line)

#plt.show()
def update_anim(i):
    ax.plot(x, forecast[i], '.' , alpha=0.5 , color='orange')
    # Update the cluster probs
    for m in range(cluster_prob.shape[1]):
        cluster_lines[m].set_data(x, cluster_prob[i, m])
animation.FuncAnimation(fig, update_anim, range(len(forecast)), blit=False, interval=350, repeat=True)
Out[202]:

3 Clusters

In [203]:
forecast, forecast_error, cluster_prob = cwm(40, x, y2, 3, 2)

fig, (ax, ax2) = plt.subplots(2, 1, figsize=(12,12))
ax.plot(x, y3)
ax.set_ylim(-1.7, 1.7)
ax2.set_xlim(min(x), max(x))

#for i in range(40):
    #foreLine = ax.plot(x, forecast[i], '.' ,color='orange')
    #foreErrLine = ax2.plot(x, forecast_error[i],'.', color = 'blue')
    
cluster_lines = []
for i in range(cluster_prob.shape[1]):
    line, = ax2.plot(x, cluster_prob[0, i])
    cluster_lines.append(line)
#plt.show()

def update_anim(i):
    ax.plot(x, forecast[i], '.' , alpha=0.5 , color='orange')
    # Update the cluster probs
    for m in range(cluster_prob.shape[1]):
        cluster_lines[m].set_data(x, cluster_prob[i, m])
animation.FuncAnimation(fig, update_anim, range(len(forecast)), blit=False, interval=350, repeat=True)
Out[203]:

How do 10 Clusters do?

In [208]:
forecast, forecast_error, cluster_prob = cwm(40, x, y2, 10, 2)

fig, (ax, ax2) = plt.subplots(2, 1, figsize=(12,12))
ax.plot(x, y2)
ax.set_ylim(-1.5, 1.5)
ax2.set_xlim(min(x), max(x))
    
cluster_lines = []
for i in range(cluster_prob.shape[1]):
    line, = ax2.plot(x, cluster_prob[0, i])
    cluster_lines.append(line)
#plt.show()

def update_anim(i):
    ax.plot(x, forecast[i], '.' , alpha=0.5 , color='orange')
    # Update the cluster probs
    for m in range(cluster_prob.shape[1]):
        cluster_lines[m].set_data(x, cluster_prob[i, m])
animation.FuncAnimation(fig, update_anim, range(len(forecast)), blit=False, interval=350, repeat=True)
Out[208]:
In [206]:
# input: number of iterations, x, y, number of clusters, number of coefficients
def cwm(count, X, Y, m, nCoeff): 

    # hold forecasts
    forecast = np.zeros((count, num)) # num = number of datapoints
    foreErr = np.zeros((count, num))
    # cluster probability
    cProb = np.zeros((count, m, num))
    # initialize cluster mult uniformly
    mu = np.random.uniform(min(X), max(Y), m)
    
    # equal probability for all clusters
    pm = np.ones(m) / m
    
    # variance for in and output
    varX = np.ones(m)
    varY = np.ones(m)
    beta = np.ones((m, nCoeff))
      
    for all in range(count):
        # see 16.24
        pxc = np.zeros((num, m))
        for i in range(num):
            for j in range(m):
                pxc[i, j] = np.exp((- (X[i] - mu[j]) ** 2) / (2 * varX[j]) ) / np.sqrt(2 * np.pi * varX[j])
                
        for j in range(m):
            for i in range(num):
                cProb[all, j, i] = pxc[i, j] * pm[j]

        # see 16.26
        def f(x, coeff):
            s = 0
            for i in range(len(coeff)):
                s += coeff[i] * (x ** i)
            return s
        pyxc = np.zeros((num, m))
        for i in range(num):
            for j in range(m):
                pyxc[i, j] = np.exp((- (Y[i] - f(X[i], beta[j])) ** 2) / (2 * varY[j]) ) / np.sqrt(2 * np.pi * varY[j])

        # see 16.27
        for i in range(num):
            nom = 0
            denom = 0
            for j in range(m):
                nom += f(X[i], beta[j]) * pxc[i, j] * pm[j]
                denom += pxc[i, j] * pm[j]
            forecast[all, i] = nom / denom
            
            # 16.28 error
            errorNom = 0
            for j in range(m):
                errorNom += (varY[j] + f(X[i], beta[j]) ** 2) * pxc[i, j] * pm[j]
            foreErr[all, i] = errorNom / denom - forecast[all, i] ** 2
        
        # P(c_m | y, x) - equation (16.29)
        pcyx = np.zeros((num, m))
        for i in range(num):
            for j in range(m):
                pcyx[i, j] = pyxc[i, j] * pxc[i, j] * pm[j]
            pcyx[i] /= np.sum(pcyx[i])

        # 16.30 update pm
        for i in range(m):
            pm[i] = np.mean(pcyx[:, i])

        # 16.32 input var update
        for i in range(m):
            varX[i] = np.sum(np.multiply((X - mu[i])**2, pcyx[:, i])) / np.sum(pcyx[:, i])
            # add a small constant
            varX[i] += 0.1

        # 16.31 mu update
        for i in range(m):
            mu[i] = np.sum(np.multiply(X, pcyx[:, i])) / np.sum(pcyx[:, i])

        # local linear    
        for i in range(m):
            a = np.zeros(nCoeff)
            for j in range(nCoeff):
                a[j] = np.sum(np.multiply(np.multiply(Y, X**j), pcyx[:, i])) / np.sum(pcyx[:, i])
            B = np.zeros((nCoeff, nCoeff))
            for j in range(nCoeff):
                for k in range(nCoeff):
                    B[j, k] = np.sum(np.multiply(np.multiply(X**j, X**k), pcyx[:, i])) / np.sum(pcyx[:, i])
            ## matrix invert    
            Binv = np.linalg.inv(B)
            beta[i] = Binv.dot(a)

        # 16.37 update varY for output
        for i in range(m):
            varY[i] = np.sum(np.multiply((Y - f(X, beta[i]))**2, pcyx[:, i])) / np.sum(pcyx[:, i])
            # add a small constant
            varY[i] += 0.1
    # give us a forecast, a forecast error and the cluster probability to work with
    return forecast, foreErr, cProb