Prove that the DFT is unitary.
In order to prove that the Discrete Fourier Transform is unitary, we first need the DFT matrix, $W$:
$W = \frac{1}{\sqrt{N}} \left[\begin{array}{111111} 1 & 1 & 1 & 1 & \cdots & 1 \\ 1 & \omega & \omega^2 & \omega^3 & \cdots & \omega^{N-1} \\ 1 & \omega^2 & \omega^4 & \omega^6 & \cdots & \omega^{2(N-1)} \\ 1 & \omega^3 & \omega^6 & \omega^9 & \cdots & \omega^{3(N-1)} \\ \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ 1 & \omega^{N-1} & \omega^{2(N-1)} & \omega^{3(N-1)} & \cdots & \omega^{(N-1)(N-1)} \end{array}\right], \; \omega = e^{\frac{2 \pi i}{N}}$
We must prove that
$WW^* = W^*W = I$
We will look at each element $W_{ij}$ of $WW^*$
$W_{ij} = \sum\limits_{k=0}^{N-1} \omega^{(j-i)k}$
Along the diagonal, $j - i = 0$, so we know that the sum of those values will be $N$ $1$'s, and
$\left(\frac{1}{\sqrt{N}}\right)^2 (N) = 1$
So, all values along the diagonal are $1$. Now, for the off-diagonal values, $j - i \ne 0$ and $w^{j-i} \ne 0$. Therefore,
$W_{ij} = \sum\limits_{k=0}^{N-1} \omega^k = \frac{1 - \omega^N}{1 - \omega} = 0$
This is because
$\omega^N = (e^{\frac{2 \pi i}{N}})^N = e^{2 \pi i} = 1$
Therefore, all values along the diagonal are $1$, and all others are $0$, which creates the identity matrix, proving that the DFT is unitary.
(a) Generate and plot a time series $\{t_j\}$ for the sum of two sine waves at 697 and 1209 Hz (the DTMF tone for the number 1 key), sampling it at 10,000 samples per second for 0.25 second ($N = 2500 \text{ points}$).
# A blend of my own code and Neil's cs.py
import matplotlib.pyplot as plt
import numpy as np
import random
f1 = 697
f2 = 1209
f_s = 10000
dt = 1/f_s
t_tot = 0.25
t = np.arange(0, t_tot, dt)
N = len(t)
x1 = np.sin(2*np.pi*f1*t)
x2 = np.sin(2*np.pi*f2*t)
x_sum = x1 + x2
plt.plot(t, x_sum)
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.title("Summed Signal")
plt.show()
plt.plot(t, x_sum)
plt.xlim(0.01, 0.02)
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.title("Summed Signal (Zoomed)")
plt.show()
(b) Calculate and plot the Discrete Cosine Transform (DCT) coefficients $\{f_i\}$ for these data, defined by their multiplication by the matrix $f_i = \sum_{j=0}^{N-1} D_{ij}t_j$, where
$D_{ij} = \left\{ \begin{array}{ll} \sqrt{\frac{1}{N}} & (i = 0) \\ \sqrt{\frac{2}{N}}\text{cos}(\frac{\pi (2j + 1) i}{2N}) & (1 \le i \le N - 1) \\ \end{array} \right.$
D = np.zeros((N,N))
for i in range(N):
D[:,i] = np.sqrt(2/N)*np.cos(np.pi*(2*i+1)*np.arange(N)/(2*N))
D[0,:] *= 1/np.sqrt(2)
transform = D@x_sum
freq = np.arange(N)/(2*t_tot)
plt.plot(freq, transform)
plt.xlim(400, 1500)
plt.xlabel("Frequency (Hz)")
plt.ylabel("Amplitude")
plt.title("DCT")
plt.show()
(c) Plot the inverse transform of the $\{f_i\}$ by multiplying them by the inverse of the DCT matrix (which is equal to its transpose) and verify that it matches the time series.
inverse = np.transpose(D)@transform
plt.plot(t, inverse)
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.title("Inverse DCT")
plt.show()
plt.plot(t, inverse)
plt.xlim(0.01, 0.02)
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.title("Inverse DCT (Zoomed)")
plt.show()
(d) Randomly sample and plot a subset $\{t_k'\}$ of 5% of the $\{t_j\}$ ($M = 125 \text{points}$).
M = int(N*0.05)
index = (np.random.rand(M)*N).astype(int)
rand = x_sum[index]
plt.plot(t, x_sum)
plt.scatter(t[index], rand)
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.title("Random Samples")
plt.show()
plt.plot(t, x_sum)
plt.scatter(t[index], rand)
plt.xlim(0.01, 0.02)
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.title("Random Samples (Zoomed)")
plt.show()
(e) Starting with a random guess for the DCT coefficients $\{f_i'\}$, use gradient descent to minimize the error at the sample points
$\min\limits_{\{f_i'\}} \sum\limits_{k=0}^{M-1} \left(t_k' - \sum\limits_{i = 0}^{N-1} D_{ik}f_i'\right)^2$
and plot the resulting estimated coefficients.
def gradmin(f, df, start, scale, steps):
x = np.copy(start)
for i in range(steps):
x -= scale*df(x)
return x
def f(x):
global rand, index, D
inv = np.transpose(D)@x
fn = np.sum((inv[index] - rand)**2)
return fn
def df(x):
global rand, index, D
inv = np.transpose(D)@x
grad = np.zeros(N)
for i in range(N):
grad[i] = np.sum(2*(inv[index] - rand)*D[i,index])
return grad
start = np.random.rand(N)
scale = 0.1
steps = 100
fit = gradmin(f, df, start, scale, steps)
plt.plot(freq, fit)
plt.xlim(400, 1500)
plt.xlabel("Frequency (Hz)")
plt.ylabel("Amplitude")
plt.title("Minimize Error")
plt.show()
(f) The preceding minimization is under-constrained; it becomes well-posed if a norm of the DCT coefficients is minimized subject to a constraint of agreeing with the sampled points. Ones of the simplest (but not best, see Chapter 17) ways to do this is by adding a penalty term to the minimization. Repeat the gradient descent minimization using the L2 norm:
$\min\limits_{\{f_i'\}} \sum\limits_{k=0}^{M-1} \left(t_k' - \sum\limits_{i=0}^{N-1} D_{ik}f_i'\right)^2 + \sum\limits_{i=0}^{N-1} f_i^{\prime 2}$
and plot the resulting estimated coefficients.
def f(x):
global rand, index
inv = np.transpose(D)@x
fn = np.sum((inv[index] - rand)**2) + np.sum(x**2)
return fn
def df(x):
global rand, index, D
inv = np.transpose(D)@x
grad = np.zeros(N)
for i in range(N):
grad[i] = np.sum(2*(inv[index] - rand)*D[i,index]) + 2*x[i]
return grad
start = np.random.rand(N)
scale = 0.1
steps = 100
fit = gradmin(f, df, start, scale, steps)
plt.plot(freq, fit)
plt.xlim(400, 1500)
plt.xlabel("Frequency (Hz)")
plt.ylabel("Amplitude")
plt.title("Minimize Error + L2")
plt.show()
(g) Repeat the gradient descent minimization using the L1 norm:
$\min\limits_{\{f_i'\}} \sum\limits_{k=0}^{M-1} \left(t_k' - \sum\limits_{i=0}^{N-1} D_{ik}f_i'\right)^2 + \sum\limits_{i=0}^{N-1} |f_i^{\prime}|$
and plot the resulting estimated coefficients, compare to the L2 norm estimate, and compare $M$ to the Nyquist sampling limit of twice the highest frequency.
def f(x):
global rand, index
inv = np.transpose(D)@x
fn = np.sum((inv[index] - rand)**2) + np.sum(np.abs(x))
return fn
def df(x):
global rand, index, D
inv = np.transpose(D)@x
grad = np.zeros(N)
for i in range(N):
grad[i] = np.sum(2*(inv[index] - rand)*D[i,index]) + np.sign(x[i])
return grad
start = np.random.rand(N)
scale = 0.1
steps = 100
fit = gradmin(f, df, start, scale, steps)
plt.plot(freq, fit)
plt.xlim(400, 1500)
plt.xlabel("Frequency (Hz)")
plt.ylabel("Amplitude")
plt.title("Minimize Error + L1")
plt.show()
Calculate the inverse wavelet transform, using Daubechies fourth-order coefficients, of a vector length $2^{12}$, with a 1 in the 5th and 30th places and zeros elsewhere.
# From Neil's invwave.py
import matplotlib.pyplot as plt
import numpy as np
n_order = 12
n_pts = 2**n_order
c0 = (1 + np.sqrt(3))/(4*np.sqrt(2))
c1 = (3 + np.sqrt(3))/(4*np.sqrt(2))
c2 = (3 - np.sqrt(3))/(4*np.sqrt(2))
c3 = (1 - np.sqrt(3))/(4*np.sqrt(2))
w = np.zeros(n_pts)
w_new = np.zeros(n_pts)
w[4] = 1
w[29] = 1
for order in range(1, n_order):
N = 2**(order + 1)
w_new[0:N] = w[0:N]
w[0:N:2] = w_new[0:N//2]
w[1:N:2] = w_new[N//2:N]
w_new[0] = c0*w[0] + c3*w[1] + c2*w[n_pts - 2] + c1*w[n_pts - 1]
w_new[1] = c1*w[0] - c2*w[1] + c3*w[n_pts - 2] - c0*w[n_pts - 1]
for i in range(2, N, 2):
w_new[i] = c2*w[i-2] + c1*w[i-1] + c0*w[i] + c3*w[i+1]
w_new[i+1] = c3*w[i-2] - c0*w[i-1] + c1*w[i] - c2*w[i+1]
w[0:N] = w_new[0:N]
plt.plot(w)
plt.show()
Consider a measurement of a three-componenet vector $\vec{x}$, with $x_1$ and $x_2$ being drawn independently from a Gaussian distribution with zero mean and unit variance, and $x_3 = x_1 + x_2$.
(a) Analytically calculuate the covariance matrix of $\vec{x}$.
$\langle x_1^2 \rangle = \langle x_2^2 \rangle = 1, \; \langle x_1 x_2 \rangle = 0, \; \langle (x_1 + x_2)^2 \rangle = \langle x_1^2 \rangle + \langle x_2^2 \rangle = 2$
$\matrix{C_x} = \left[ \begin{array}{111} 1 & 0 & 1 \\ 0 & 1 & 1 \\ 1 & 1 & 2 \end{array} \right]$
(b) What are the eigenvalues?
$|\matrix{C_x} - \lambda \matrix{I}| = 0$
$(1 - \lambda)[(1 - \lambda)(2 - \lambda) - 1] - (1 - \lambda) = 0$
This is solved by $\lambda = 1$ for one root, then we can divide $(1 - \lambda)$ out
$(1 - \lambda)(2 - \lambda) - 2 = 0$
Which is solved by $\lambda = 0$ and $\lambda = 3$, which are the other two roots.
(c) Numerically verify these results by drawing a data set from the distribution and computing the covariance matrix and eigenvalues.
# From Neil's pca.py code
import numpy as np
eps = 1e-10
N = 1000000
np.set_printoptions(precision=3,suppress=True)
x = np.random.randn(1, N)
x = np.concatenate((x, np.random.randn(1, N)))
x = np.concatenate((x, np.reshape(x[0,:] + x[1,:], (1, N))))
Cx = np.cov(x)
print('covariance of x:')
print(Cx)
v, Mt = np.linalg.eig(Cx)
print('eigenvalues of covariance of x:')
print(v)
(d) Numerically find the eigenvectors of the covariance matrix, and use them to construct a transformation to a new set of variables $\vec{y}$ that have a diagonal covariance matrix with no zero eigenvalues. Verify this on the data set.
M = np.transpose(Mt)
index = (v > eps)
M = M[index,:]
y = np.matmul(M,x)
Cy = np.cov(y)
print('covariance of y:')
print(Cy)
Generate pairs of uniform random variables $\{s_1, s_2\}$ with each component contained in [0, 1].
(a) Plot these data.
# Adapted from Neil's ica.r code
import matplotlib.pyplot as plt
import numpy as np
N = 1000
eps = 1e-6
s1 = np.random.uniform(0, 1, N)
s2 = np.random.uniform(0, 1, N)
plt.scatter(s1, s2)
plt.title("data")
plt.show()
(b) Mix them ($\vec{x} = A \cdot \vec{s}$) with a square matrix $A = \left[ \begin{array}{ll} 1 & 2 \\ 3 & 1 \\ \end{array} \right]$ and plot.
s = np.array([s1, s2])
A = np.array([[1, 2], [3, 1]])
x = A.dot(s)
plt.scatter(x[0], x[1])
plt.title("mixed with A")
plt.show()
(c) Make $\vec{x}$ zero mean, diagonalize with unit variance, and plot.
x_avg_0 = np.sum(x[0])/N
x_avg_1 = np.sum(x[1])/N
for i in range(N):
x[0][i] = x[0][i] - x_avg_0
x[1][i] = x[1][i] - x_avg_1
plt.scatter(x[0], x[1])
plt.title("zero mean")
plt.show()
C = np.cov(x)
M = np.transpose(np.linalg.eig(C)[1])
x = np.matmul(M, x)
plt.scatter(x[0], x[1])
plt.title("diagonalized")
plt.show()
v = np.linalg.eig(C)[0]
x = np.matmul(np.diag(1/np.sqrt(v)), x)
plt.scatter(x[0], x[1])
plt.title("normalized")
plt.show()
(d) Find the independent components of $\vec{x}$ with the $\text{log}\ \text{cosh}$ contrast function, and plot.
def f(x):
return np.log(np.cosh(x))
def df(x):
return np.tanh(x)
def ddf(x):
return 1 - np.tanh(x)**2
w1 = np.random.uniform(0, 1, 2)
w1 = np.array([[w1[0]],[w1[1]]])
w1 = w1 / np.sqrt(np.sum(w1**2))
err = 1
i = 0
while (err > eps):
i += 1
temp0 = np.matmul(np.transpose(w1), x)
df_temp0 = df(temp0)
ddf_temp0 = ddf(temp0)
temp1 = np.zeros((2,N))
for j in range(N):
temp1[0][j] = df_temp0[0][j]
temp1[1][j] = df_temp0[0][j]
temp2 = x*temp1
temp2_avg_0 = np.sum(temp2[0])/N
temp2_avg_1 = np.sum(temp2[1])/N
temp2_avg = np.array([[temp2_avg_0], [temp2_avg_1]])
ddf_temp0_avg = np.sum(ddf_temp0)/N
w1_new = temp2_avg - ddf_temp0_avg*w1
w1_new = w1_new / np.sqrt(np.sum(w1_new**2))
err = np.sum(np.abs(w1 - w1_new))
w1 = w1_new
print(i, "iterations, error: ", err)
w2 = np.zeros((1,2))
w2[0][0] = w1[1]
w2[0][1] = -w1[0]
W = np.zeros((2,2))
W[0][0] = w1[0]
W[0][1] = w1[1]
W[1][0] = w2[0][0]
W[1][1] = w2[0][1]
y = np.matmul(W, x)
plt.scatter(y[0], y[1])
plt.title("ICA")
plt.show()