In [1]:
from __future__ import division

"""
Independent component analysis (ICA) is used to estimate 
sources given noisy measurements. Imagine 2 persons speaking 
simultaneously and 2 microphones recording the mixed signals. 
ICA is used to recover the sources ie. what is said by each person.
"""
import sys
from io import BytesIO
import base64
import struct  

from IPython.display import display
from IPython.core.display import HTML
import time
from scipy.io import wavfile
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import FastICA
from IPython.display import Audio
In [2]:
def wavPlayer(data, rate):
    """ will display html 5 player for compatible browser
    The browser need to know how to play wav through html5.
    there is no autoplay to prevent file playing when the browser opens
    Adapted from SciPy.io. and
    github.com/Carreau/posts/blob/master/07-the-sound-of-hydrogen.ipynb
    """
    
    buffer = BytesIO()
    buffer.write(b'RIFF')
    buffer.write(b'\x00\x00\x00\x00')
    buffer.write(b'WAVE')

    buffer.write(b'fmt ')
    if data.ndim == 1:
        noc = 1
    else:
        noc = data.shape[1]
    bits = data.dtype.itemsize * 8
    sbytes = rate*(bits // 8)*noc
    ba = noc * (bits // 8)
    buffer.write(struct.pack('<ihHIIHH', 16, 1, noc, rate, sbytes, ba, bits))

    # data chunk
    buffer.write(b'data')
    buffer.write(struct.pack('<i', data.nbytes))

    if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and sys.byteorder == 'big'):
        data = data.byteswap()

    buffer.write(data.tostring())
    # return buffer.getvalue()
    # Determine file size and place it in correct
    # position at start of the file.
    size = buffer.tell()
    buffer.seek(4)
    buffer.write(struct.pack('<i', size-8))
    
    val = buffer.getvalue()
    
    src = """
    <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    <title>Simple Test</title>
    </head>
    
    <body>
    <audio controls="controls" style="width:600px" >
      <source controls src="data:audio/wav;base64,{base64}" type="audio/wav" />
      Your browser does not support the audio element.
    </audio>
    </body>
    """.format(base64=base64.encodebytes(val))
    display(HTML(src))
In [3]:
# loading wav files
fs_1, voice_1, new_1 = wavfile.read(
                    "tbawht02.wav")
fs_2, voice_2, new_2= wavfile.read(
                    "poem.wav")
# reshaping the files to have same size

print(new_1)
print(new_2)
m, = voice_1.shape 
voice_2 = voice_2[:m]

# plotting time domain representation of signal
figure_1 = plt.figure("Original Signal")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of voice 1")
plt.xlabel("Time")
plt.ylabel("Signal")
plt.plot(np.arange(m)/fs_1, voice_1)
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of voice 2")
plt.plot(np.arange(m)/fs_2, voice_2)
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
Audio(voice_1, rate=fs_1)
16
16
Out[3]:
In [4]:
figure_1 = plt.figure("Mixed audio")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of mixed audio source 1")
plt.plot(np.arange(m)/fs_2, voice_2)
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
Audio(voice_2, rate=fs_2)
Out[4]:
In [5]:
# concatenate
voice = np.c_[voice_1, voice_2]
print(voice.shape)
A = np.array([[3, 0.5], [0.5, 3]])
X = np.dot(A, voice.T)
figure_1 = plt.figure("Mixed audio 2")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of mixed audio source 2")
print(X.shape)
print(X[1,:].shape)
plt.plot(np.arange(m)/fs_2, X[0,:])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
Audio(X[1,:], rate=fs_1)
(37376, 2)
(2, 37376)
(37376,)
Out[5]:
In [6]:
Audio(X[0,:], rate=fs_1)
Out[6]:
In [7]:
def make_mean_zero(x, N):
    mean = np.sum(x[0,])/N
    x[0, ] = x[0, ] - mean
    mean = np.sum(x[1,])/N
    x[1, ] = x[1, ] - mean
    return x
def mean(x,N):
    mean = np.sum(x[0,])/N
    x = x[:,:] - mean
    return x

df = lambda x: np.tanh(x)
ddf = lambda x: 1/np.cosh(x)**2

def gen_expect(x, N):
    mean1 = np.sum(x[0,])/N
    mean2 = np.sum(x[1,])/N
    return np.array([[mean1],[mean2]])
In [8]:
def ica(w, x):
    N = len(x[0])
    make_mean_zero(x, N) #make zero mean
    w,v = np.linalg.eig(np.cov(x))
    M = (v/np.sqrt(w)).T
    unit_x = np.dot(M,x) #diagonalize with unit variance
    i=0
    while True:
        i+=1  
        g1 = gen_expect(df(np.dot(w,unit_x))*unit_x, N)
        g2 = gen_expect(ddf(np.dot(w,unit_x)), N)
        g1 = np.array([g1[0, 0], g1[1,0]])
        g2 = np.array([g2[0, 0], g2[1,0]])
        w_new = g1 - g2
        w_new /= np.linalg.norm(w_new)
        if abs(np.linalg.norm(w_new-w))<0.000001: 
            break
        w = w_new
    return w

w1 = ica(np.random.rand(2), X) #random seed for weights
#second ica component must be orthogonal to first, since we're in 2d, this
#is unique, up to sign.
w2 = np.array([-w1[1],w1[0]])

ica_output = np.array([w1, w2])
print(A)
print(ica_output)
print(A)
new_audio = np.dot(ica_output, X)
print(new_audio.shape)
# Here, now you want to seaprate them into orthogonal values. 
[[ 3.   0.5]
 [ 0.5  3. ]]
[[ 0.98377575  0.17940253]
 [-0.17940253  0.98377575]]
[[ 3.   0.5]
 [ 0.5  3. ]]
(2, 37376)
/Users/yadapruksachatkun/anaconda/lib/python3.6/site-packages/ipykernel/__main__.py:14: RuntimeWarning: overflow encountered in cosh
In [9]:
"""For reference here are mixed"""
figure_1 = plt.figure("Mixed audio")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of mixed audio source 1")
plt.plot(np.arange(m)/fs_2, X[0])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of mixed audio source 2")
plt.plot(np.arange(m)/fs_2, X[1])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
In [10]:
figure_1 = plt.figure("Actual and estimated sources")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of estimated separated audio source 2")
plt.plot(np.arange(m)/fs_2, new_audio[0,:])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of actual voice 2")
plt.plot(np.arange(m)/fs_2, voice_1)
plt.xlabel("Time")
plt.ylabel("Signal")

figure_2 = plt.figure("Mixed source")
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of mixed source 2")
plt.plot(np.arange(m)/fs_2, X[0])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
In [11]:
print(m)
print(new_audio.shape)
figure_1 = plt.figure("Actual and estimated sources")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of estimated separated audio source 2")
plt.plot(np.arange(m)/fs_2, new_audio[1,:])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of actual voice 2")
plt.plot(np.arange(m)/fs_2, voice_2)
plt.xlabel("Time")
plt.ylabel("Signal")

figure_2 = plt.figure("Mixed source")
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of mixed source 2")
plt.plot(np.arange(m)/fs_2, X[1])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
37376
(2, 37376)
In [12]:
Audio(new_audio[1], rate=fs_1)
Out[12]:
In [15]:
Audio(X[1], rate=fs_1)
Out[15]: