Compressed Sensing: Touch tone phone example

Plumbing for sound in IPython

from __future__ import division, absolute_import
from numpy import *
#from sound import *
import struct
import warnings
from IPython.core.display import HTML
%pylab inline
# this is a wrapper that take a filename and publish an html <audio> tag to listen to it


import StringIO
import base64

wav_rate = 44100 #44.1 khz
#http://nbviewer.ipython.org/github/Carreau/posts/blob/master/07-the-sound-of-hydrogen.ipynb
def wavPlayer(data_in,max_amp=1): 
    """ will display html 5 player for compatible browser
    The browser need to know how to play wav through html5.
    there is no autoplay to prevent file playing when the browser opens
    Adapted from SciPy.io.
    
    SEC: added max_amp to determine mapping
    """
    data = 2**13/max_amp*data_in
    data = data.astype(np.int16)
    
    buffer = StringIO.StringIO()
    buffer.write(b'RIFF')
    buffer.write(b'\x00\x00\x00\x00')
    buffer.write(b'WAVE')

    buffer.write(b'fmt ')
    if data.ndim == 1:
        noc = 1
    else:
        noc = data.shape[1]
    bits = data.dtype.itemsize * 8
    sbytes = wav_rate*(bits // 8)*noc
    ba = noc * (bits // 8)
    buffer.write(struct.pack('<ihHIIHH', 16, 1, noc, wav_rate, sbytes, ba, bits))

    # data chunk
    buffer.write(b'data')
    buffer.write(struct.pack('<i', data.nbytes))

    if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and sys.byteorder == 'big'):
        data = data.byteswap()

    buffer.write(data.tostring())
#    return buffer.getvalue()
    # Determine file size and place it in correct
    #  position at start of the file.
    size = buffer.tell()
    buffer.seek(4)
    buffer.write(struct.pack('<i', size-8))
    
    val = buffer.getvalue()
    
    src = """
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    <title>Simple Test</title>
    </head>
    
    <body>
    <audio controls="controls" style="width:600px" >
      <source controls src="data:audio/wav;base64,{base64}" type="audio/wav" />
      Your browser does not support the audio element.
    </audio>
    </body>
    """.format(base64=base64.encodestring(val))
    display(HTML(src))

Populating the interactive namespace from numpy and matplotlib

#test sound
time = linspace(0,1., wav_rate*1.)
wavPlayer(sin(2*pi*440*time), max_amp=1.);

Implementing a touch tone phone

#touch tone stuff
phone = {'1': [697,1209],'2': [697,1336],'3': [697,1477],
         '4': [770,1209],'5': [770,1336],'6': [770,1477],
         '7': [852,1209],'8': [852,1336],'9': [852,1477],
         '*': [941,1209],'0': [941,1336],'#': [941,1477]}
touch = lambda s,t: sin(phone[s][0]*2*pi*t) + sin(phone[s][1]*2*pi*t)

key = '9'
dur = .2; N = wav_rate*dur; dt = dur/N
t = linspace(0,dur,N)
plt.xlim(0,.01)
signal = touch(key,t)
plot(t,signal)
wavPlayer(signal, max_amp=1.);

n_samples = int(.035*N) #pick a fraction to sample
K = sort(random.choice(int(N),size=n_samples)) #random subset of samples
sampled = touch(key,t[K])
plot(t[K],sampled,linestyle='',marker='o')

[<matplotlib.lines.Line2D at 0x10a6be550>]

#test round trip
#plot(t,signal,c='g')
#X =fft.fft(signal)
#xx = fft.ifft(X)
#plot(t,xx,c='b')
#plt.xlim([0,.01])
#plot(t[K],sampled,linestyle='',marker='o',c='g')

def plot_fft(s):
    sp = 1/sqrt(N)*fft.fft(s).real #take amplitudes from real part
    freq = arange(N)/(dt*N)
    plot(freq, sp)
    plt.xlim([0,2000])
    for f in phone[key]:
        y = max(-amin(sp),amax(sp))
        plot([f,f],[-1.5*y,1.5*y],c='g',linestyle='--')
plot_fft(signal)

Setting up the compressed sensing problem

#DFT
omega = lambda f: exp(2j*pi*f)
def idft(n,k=None):
    if k is not None:
        #only build rows corresponding to k from DFT
        return 1/sqrt(n)*omega(arange(n)*k.reshape(-1,1)/n)
    else:
        return 1/sqrt(n)*omega(arange(n)*arange(n).reshape(-1,1)/n)
A = idft(N,K)

Now we can phrase the linear constraint \(Ax=b\) which gaurantees our reconstruction projects to the sampled values under our sampling. The first step is using SVD to find the minimum \(L_2\) norm solution and show this is not the desired answer.

A Bad Idea: Minimize \(L_2\) norm with SVD

H = lambda x: conjugate(x.T)
U,w,V = linalg.svd(A)
V = H(V) #notational
wz = where(logical_not(isclose(w,0)))
w_inv = zeros_like(w); w_inv[wz] = 1./(w[wz])
n = shape(V)[0]; m = shape(U)[1]
W_inv = zeros((n,m)); W_inv[:m,:m] = diag(w_inv)
c = dot(dot(V,dot(W_inv,U.T)), sampled)

freq = arange(shape(c)[-1])/(dt*N)
plot(freq,c)
plt.xlim([0,3000])
for f in phone[key]: plot([f,f],[1.5*amin(c),1.5*amax(c)],c='g',linestyle='--')

/usr/local/lib/python2.7/site-packages/numpy/core/numeric.py:460: ComplexWarning: Casting complex values to real discards the imaginary part
  return array(a, dtype, copy=False, order=order)

#f = sqrt(N)*irfft(c[:N/2+1])
f = dot(idft(N),c)
plot(t,f)
plt.xlim([0,.005])
plt.ylim([-4,4])
plot(t[K],sampled,linestyle='',marker='o')

[<matplotlib.lines.Line2D at 0x10ba53e50>]

wavPlayer(f, max_amp=1.);

-c:25: ComplexWarning: Casting complex values to real discards the imaginary part

A better idea: Sparse Reconstruction with a smoothed \(L_1\) norm approximation

def sl1(x,alpha):
    return 1/alpha*( log(1+exp(-alpha*x)) + log(1+exp(alpha*x)) )
def dsl1(x,alpha):
    return 1/(1+exp(-alpha*x)) - 1/(1+exp(alpha*x))
def ddsl1(x,alpha):
    return 2*alpha*exp(alpha*x)/(1+exp(alpha*x))**2
x = linspace(-2,2,200)
plt.figure(figsize=(12,3.3))
plt.subplot(131)
gca().set_title('smooth l_1 norm')
for a in [1,1.5,3,10,100]:
    plot(x,sl1(x,a),label='a=%.1f'%a)
plt.legend(loc='lower right',prop={'size':8})
plt.subplot(132)
gca().set_title('smooth l_1 norm first derivative')
for a in [1,1.5,3,10,100]:
    plot(x,dsl1(x,a),label='a=%.1f'%a)
plt.legend(loc='lower right',prop={'size':8})
plt.subplot(133)
gca().set_title('smooth l_1 norm second derivative')
for a in [1,1.5,3,10,100]:
    plot(x,ddsl1(x,a),label='a=%.1f'%a)
plt.legend(loc='upper right',prop={'size':8})

<matplotlib.legend.Legend at 0x1132baa10>

We want to minimize this smoothed \(L_1\) norm, subject to the constraint \(Ax-b=0\). To do this, we'll use a penalty method, minimizing \(f=|x|_{1,\alpha} + \frac{\mu}{2}(Ax-b)'(Ax-b)\). We have derivatives, we need to think about how many we should use.

\(\nabla f = \nabla |x|_{1,\alpha} + \mu A^\top(Ax-b)\)

I'm going to use just the gradient to do conjugate gradient. I have a bad feeling about trying any second order methods, given that the penalty has constant curvature, and the smoothed \(l_1\) approaches a delta function...

from opt import * #my line search and cg routines
normsq = lambda x: dot(x,x)
mu = 12.
alpha = 20.
F = lambda x: sum(sl1(x,alpha),axis=0) + .5*mu*normsq(dot(A,x)-sampled)
dF = lambda x: dsl1(x,alpha) + mu*dot(A.T,dot(A,x)-sampled)
x0 = -.1+.2*random.rand(shape(A)[1]) #random start in [-.1,.1]
#for mu,alpha in zip( [1,1,1,5,10,10,20], [1,2,3,3,3,3,5,5] ):
for mu in [1,10,30,40]:
    for alpha in [5,40]:
        x0 = conj_grad(F,dF,x0,max_iter=10)

Conjugate Gradient term`inating after 3 iterations
Conjugate Gradient term`inating after 5 iterations
Conjugate Gradient term`inating after 2 iterations
Conjugate Gradient term`inating after 3 iterations
Conjugate Gradient term`inating after 8 iterations
Conjugate Gradient term`inating after 6 iterations
Conjugate Gradient term`inating after 10 iterations
Conjugate Gradient term`inating after 5 iterations

freq = arange(shape(x0)[-1])/(dt*N)
plot(freq,x0)
plt.xlim([0,3000])
for f in phone[key]: plot([f,f],[1.5*amin(x0),1.5*amax(x0)],c='g',linestyle='--')

Two peaks!

X = x0[:N/2]
fs = freq[where(X>.5*amax(X))[0]].reshape(-1,1)
print "Signal appears %d-sparse."%(shape(fs)[0])
plot(t,sum(sin(2*pi*fs*t),axis=0))
plt.xlim([0,.005])
plot(t[K],sampled,linestyle='',marker='o')
wavPlayer(sum(sin(2*pi*fs*t),axis=0), max_amp=1.);

Signal appears 2-sparse.

#f = dot(idft(N),x0)
#plt.xlim([0,.005])
#plot(t,f)
#plt.ylim([-4,4])
#plot(t[K],sampled,linestyle='',marker='o')

l1 min with equality constraints can be recast as LP (if x,A, and b are real... so maybe this is why Cleve used DCT instead of DFT...) http://users.ece.gatech.edu/~justin/l1magic/downloads/l1magic.pdf

Just for comparison, let's compare to what the Nyquist-Shannon theorem says we should be sampling at.

f_max = max(phone[key])
nyquist = 2*f_max
print "Nyquist says we should have %d samples (%d Hz for %.2f s)"%(nyquist*dur,f_max,dur)
print "We have %d."%n_samples

Nyquist says we should have 590 samples (1477 Hz for 0.20 s)
We have 308.

debugging: quick test of cg routine on a quadratic

a = [1,10]
def quad(v):
    if len(shape(v))==1:
        return a[0]*v[0]**2 + a[1]*v[1]**2
    else:
        return a[0]*v[...,0]**2 + a[1]*v[...,1]**2
def d_quad(v): 
    if len(shape(v))==1:
        return array([2*a[0]*v[0],2*a[1]*v[1]])
    else:
        return array([2*a[0]*v[...,0],2*a[1]*v[...,1]])

x0 = array([-1.1,-1.])
n_it = 5
x = empty((n_it+1,2))
x[0] = x0
for mi in range(n_it):
    x[mi+1] = conj_grad(quad,d_quad,x0,max_iter=mi)
#x = conj_grad(quad,d_quad,x0,max_iter=20)
plot(x[:,0],x[:,1],c='g',marker='.')
N=100
x = linspace(-1.5,2,N); y = linspace(-1.5,2,N);
X,Y = meshgrid(x,y)
plt.contour(X, Y, quad(dstack((X,Y))), levels=[.3,1.,2.,10.,50])

<matplotlib.contour.QuadContourSet instance at 0x10b4d9710>