from __future__ import division
"""
Independent component analysis (ICA) is used to estimate
sources given noisy measurements. Imagine 2 persons speaking
simultaneously and 2 microphones recording the mixed signals.
ICA is used to recover the sources ie. what is said by each person.
"""
import sys
from io import BytesIO
import base64
import struct
from IPython.display import display
from IPython.core.display import HTML
import time
from scipy.io import wavfile
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import FastICA
from IPython.display import Audio
def wavPlayer(data, rate):
""" will display html 5 player for compatible browser
The browser need to know how to play wav through html5.
there is no autoplay to prevent file playing when the browser opens
Adapted from SciPy.io. and
github.com/Carreau/posts/blob/master/07-the-sound-of-hydrogen.ipynb
"""
buffer = BytesIO()
buffer.write(b'RIFF')
buffer.write(b'\x00\x00\x00\x00')
buffer.write(b'WAVE')
buffer.write(b'fmt ')
if data.ndim == 1:
noc = 1
else:
noc = data.shape[1]
bits = data.dtype.itemsize * 8
sbytes = rate*(bits // 8)*noc
ba = noc * (bits // 8)
buffer.write(struct.pack('<ihHIIHH', 16, 1, noc, rate, sbytes, ba, bits))
# data chunk
buffer.write(b'data')
buffer.write(struct.pack('<i', data.nbytes))
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and sys.byteorder == 'big'):
data = data.byteswap()
buffer.write(data.tostring())
# return buffer.getvalue()
# Determine file size and place it in correct
# position at start of the file.
size = buffer.tell()
buffer.seek(4)
buffer.write(struct.pack('<i', size-8))
val = buffer.getvalue()
src = """
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>Simple Test</title>
</head>
<body>
<audio controls="controls" style="width:600px" >
<source controls src="data:audio/wav;base64,{base64}" type="audio/wav" />
Your browser does not support the audio element.
</audio>
</body>
""".format(base64=base64.encodebytes(val))
display(HTML(src))
# loading wav files
fs_1, voice_1, new_1 = wavfile.read(
"tbawht02.wav")
fs_2, voice_2, new_2= wavfile.read(
"poem.wav")
# reshaping the files to have same size
print(new_1)
print(new_2)
m, = voice_1.shape
voice_2 = voice_2[:m]
# plotting time domain representation of signal
figure_1 = plt.figure("Original Signal")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of voice 1")
plt.xlabel("Time")
plt.ylabel("Signal")
plt.plot(np.arange(m)/fs_1, voice_1)
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of voice 2")
plt.plot(np.arange(m)/fs_2, voice_2)
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
Audio(voice_1, rate=fs_1)
figure_1 = plt.figure("Mixed audio")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of mixed audio source 1")
plt.plot(np.arange(m)/fs_2, voice_2)
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
Audio(voice_2, rate=fs_2)
# concatenate
voice = np.c_[voice_1, voice_2]
print(voice.shape)
A = np.array([[3, 0.5], [0.5, 3]])
X = np.dot(A, voice.T)
figure_1 = plt.figure("Mixed audio 2")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of mixed audio source 2")
print(X.shape)
print(X[1,:].shape)
plt.plot(np.arange(m)/fs_2, X[0,:])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
Audio(X[1,:], rate=fs_1)
Audio(X[0,:], rate=fs_1)
def make_mean_zero(x, N):
mean = np.sum(x[0,])/N
x[0, ] = x[0, ] - mean
mean = np.sum(x[1,])/N
x[1, ] = x[1, ] - mean
return x
def mean(x,N):
mean = np.sum(x[0,])/N
x = x[:,:] - mean
return x
df = lambda x: np.tanh(x)
ddf = lambda x: 1/np.cosh(x)**2
def gen_expect(x, N):
mean1 = np.sum(x[0,])/N
mean2 = np.sum(x[1,])/N
return np.array([[mean1],[mean2]])
def ica(w, x):
N = len(x[0])
make_mean_zero(x, N) #make zero mean
w,v = np.linalg.eig(np.cov(x))
M = (v/np.sqrt(w)).T
unit_x = np.dot(M,x) #diagonalize with unit variance
i=0
while True:
i+=1
g1 = gen_expect(df(np.dot(w,unit_x))*unit_x, N)
g2 = gen_expect(ddf(np.dot(w,unit_x)), N)
g1 = np.array([g1[0, 0], g1[1,0]])
g2 = np.array([g2[0, 0], g2[1,0]])
w_new = g1 - g2
w_new /= np.linalg.norm(w_new)
if abs(np.linalg.norm(w_new-w))<0.000001:
break
w = w_new
return w
w1 = ica(np.random.rand(2), X) #random seed for weights
#second ica component must be orthogonal to first, since we're in 2d, this
#is unique, up to sign.
w2 = np.array([-w1[1],w1[0]])
ica_output = np.array([w1, w2])
print(A)
print(ica_output)
print(A)
new_audio = np.dot(ica_output, X)
print(new_audio.shape)
# Here, now you want to seaprate them into orthogonal values.
"""For reference here are mixed"""
figure_1 = plt.figure("Mixed audio")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of mixed audio source 1")
plt.plot(np.arange(m)/fs_2, X[0])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of mixed audio source 2")
plt.plot(np.arange(m)/fs_2, X[1])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
figure_1 = plt.figure("Actual and estimated sources")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of estimated separated audio source 2")
plt.plot(np.arange(m)/fs_2, new_audio[0,:])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of actual voice 2")
plt.plot(np.arange(m)/fs_2, voice_1)
plt.xlabel("Time")
plt.ylabel("Signal")
figure_2 = plt.figure("Mixed source")
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of mixed source 2")
plt.plot(np.arange(m)/fs_2, X[0])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
print(m)
print(new_audio.shape)
figure_1 = plt.figure("Actual and estimated sources")
plt.subplot(2, 1, 1)
plt.title("Time Domain Representation of estimated separated audio source 2")
plt.plot(np.arange(m)/fs_2, new_audio[1,:])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of actual voice 2")
plt.plot(np.arange(m)/fs_2, voice_2)
plt.xlabel("Time")
plt.ylabel("Signal")
figure_2 = plt.figure("Mixed source")
plt.subplot(2, 1, 2)
plt.title("Time Domain Representation of mixed source 2")
plt.plot(np.arange(m)/fs_2, X[1])
plt.xlabel("Time")
plt.ylabel("Signal")
plt.show()
Audio(new_audio[1], rate=fs_1)
Audio(X[1], rate=fs_1)