%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%Biomechanicaly
informaed voice codec
%
%Rebecca Kleinberger
%
%(using
lib from UCL phonetic classes)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% ORGANISATION
%
% Different mechanisms
:
% - air pressure (signal enveloppe)
% - glottal source (fondamental
frequency F0)
% - filtering by the cavities (formants)
%
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% PREPARE THE FILE AND
SAMPLING FREQUENCY
%
%
[sig fssig nb]=wavread('sound/rec/recA2N.wav');
%signal
in mono
sig = sig(:,1);
%take
a short time sample at a fix posiiton
size = length(sig);
mid = floor(size/2);
z = sig(mid: mid+floor(size/25));
%take
a chunk of 30ms (because 30m is a generaly a good
window size)
ms30=floor(fssig*0.03);
y = sig(mid: mid+ms30);
x = y;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% ENVELOPE EXTRACTION
% use
of a detection function
%
[DetectionFunction,nFrames] = ComplexDomainDF(sig,fssig);
time = (1:length(sig))/44100.0;
f1 = figure('name','ENVELOPE
EXTRACTION');
set(f1, 'OuterPosition', [0 0 1100 650]);
subplot(2,1,1);
plot(time,sig);
legend('Original
waveform');
xlabel('time (s)');
ylabel('amplitude');
subplot(2,1,2);
plot(((1:nFrames)/nFrames)*length(sig)/44100.0,
DetectionFunction);
hold on;
plot(((1:nFrames)/nFrames)*length(sig)/44100.0,
- DetectionFunction);
title('envelope
extraction')
hold off;
legend('Complex-domain');
xlabel('time (s)');
ylabel('Detection function');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% USING
SMOOTH FUNCTION?????
% NOT GREAT IDEA...
%
%
% abs_sig = abs(sig);
% abs_sig_smoothed = smooth(abs_sig,2000);
%
% Ts
= 1/fssig;
%
% f2 =
figure('name','ENVELOPE EXTRACTION USING SMOOTH
FUNCTION');
%set(f2,
'OuterPosition', [0 0 1100
650]);
%
% subplot(3,1,1);
% plot([Ts:Ts:Ts*length(sig)],sig);
% legend('Signal');
% subplot(3,1,2);
% plot([Ts:Ts:Ts*length(abs_sig)],abs_sig);
% legend('Signal
Magnitude');
% subplot(3,1,3);
% plot([Ts:Ts:Ts*length(abs_sig_smoothed)],abs_sig_smoothed);
% hold
on;
% plot([Ts:Ts:Ts*length(abs_sig_smoothed)],0.03,'r');
% hold
off;
% legend('Signal
Contour');
% xlabel('time (s)');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% ESTIMATION OF F0 GLOTTAL
SOURCE FREQUENCY METHODE 1: BY ANALYSE OF THE CEPSTRUM
%
%
% using
the section z (or y) of our signal sig
ms1=fssig/1000;
% maximum speech Fx at 1000Hz
ms20=fssig/50;
% minimum speech Fx at 50Hz
%
% plot
waveform
t=(0:length(x)-1)/fssig; % times of sampling instants
f3 = figure('name','ESTIMATION OF F0
GLOTTAL SOURCE FREQUENCY METHODE 1: BY ANALYSE OF THE CEPSTRUM');
set(f3, 'OuterPosition', [0 0 1100 650]);
subplot(3,1,1);
plot(t,x);
legend('Waveform');
xlabel('Time (s)');
ylabel('Amplitude');
%
% do fourier transform of windowed signal
Y=fft(x.*hamming(length(x)));
%
% plot
spectrum of bottom 5000Hz
hz5000=5000*length(Y)/fssig;
f=(0:hz5000)*fssig/length(Y);
subplot(3,1,2);
plot(f,20*log10(abs(Y(1:length(f)))+eps));
legend('Spectrum');
xlabel('Frequency (Hz)');
ylabel('Magnitude (dB)');
%
% cepstrum is DFT of log spectrum
C=fft(log(abs(Y)+eps));
%
% plot
between 1ms (=1000Hz) and 20ms (=50Hz)
q=(ms1:ms20)/fssig;
subplot(3,1,3);
plot(q,abs(C(ms1:ms20)));
legend('Cepstrum');
xlabel('Quefrency (s)');
ylabel('Amplitude');
%find
the F0 frequency
[c,fx]=max(abs(C(ms1:ms20)));
F0 = fssig/(ms1+fx-1);
fprintf('F0=%gHz\n',F0);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% ESTIMATION OF F0 GLOTTAL SOURCE
FREQUENCY : METHODE 2: BY USE OF AUTOCORRELATION FUNCTION
%
%
%
% using
the section x of our signal sig
ms20=fssig/50;
% minimum speech Fx at 50Hz
%
% plot
waveform
f4 = figure('name','ESTIMATION OF F0
GLOTTAL SOURCE FREQUENCY : METHODE 2: BY USE OF AUTOCORRELATION FUNCTION');
set(f4, 'OuterPosition', [0 0 1100 650]);
t=(0:length(x)-1)/fssig; % times of sampling instants
subplot(2,1,1);
plot(t,x);
legend('Waveform');
xlabel('Time (s)');
ylabel('Amplitude');
%
% calculate
autocorrelation
r=xcorr(x,ms20,'coeff');
%
% plot
autocorrelation
d=(-ms20:ms20)/fssig;
% times of delays
subplot(2,1,2);
plot(d,r);
legend('Autocorrelation');
xlabel('Delay (s)');
ylabel('Correlation coeff.');
ms2=fssig/500;
% maximum speech Fx at 500Hz
ms20=fssig/50;
% minimum speech Fx at 50Hz
% just
look at region corresponding to positive delays
r=r(ms20+1:2*ms20+1);
[rmax,tx]=max(r(ms2:ms20));
F0 = fssig/(ms2+tx-1);
fprintf('rmax=%g Fx=%gHz\n',rmax,F0);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% FIND F0 AGAINST TIME
%
%
%signal
in mono
sig = sig(:,1);
fs=fssig;
ms2=floor(fs*0.002);
ms10=floor(fs*0.01);
ms20=floor(fs*0.02);
ms30=floor(fs*0.03);
% plot
waveform
f5 = figure('name','F0 OVER TIME');
set(f5, 'OuterPosition', [0 0 1100 650]);
t=(0:length(sig)-1)/fs;
subplot(2,1,1);
plot(t,sig);
legend('Waveform');
xlabel('Time (s)');
ylabel('Amplitude');
% process
in chunks
pos=1;
fx=[];
while (pos+ms30) <= length(sig)
sigchunk=sig(pos:pos+ms30-1);
sigchunk=sigchunk-mean(sigchunk);
% calculate autocorrelation
r=xcorr(sigchunk,ms20,'coeff');
r=r(ms20+1:2*ms20+1);
% search for maximum between 2ms (=500Hz) and 20ms (=50Hz)
[rmax,fxval]=max(abs(r(ms2:ms20)));
if (rmax > 0.5)
fx=[fx fs/(ms2+fxval-1)];
else
fx=[fx
0];
end;
pos=pos+ms10;
end;
% plot
F0 trace
t=(0:length(fx)-1)*0.01;
subplot(2,1,2);
plot(t,fx);
legend('F0 Trace');
xlabel('Time (s)');
ylabel('Frequency (Hz)');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% FIND THE FORMANT PATTERN
%
%
% using
the section x of our signal sig
% resample
to 10,000Hz (optional)
x=resample(x,10000,fssig);
fs=10000;
%
% plot
waveform
f6 = figure('name','FIND THE FORMANT
PATTERN');
set(f6, 'OuterPosition', [0 0 1100 650]);
t=(0:length(x)-1)/fs; % times of sampling instants
%
% get
Linear prediction filter
ncoeff=2+fs/1000;
% rule of thumb for formant
estimation
a=lpc(x,ncoeff);
%
% plot
frequency response
[h,f]=freqz(1,a,512,fs);
g = 20*log10(abs(h)+eps);
plot(f,g);
legend('LP Filter');
xlabel('Frequency (Hz)');
ylabel('Gain (dB)');
% find
frequencies by root-solving
r=roots(a);
% find roots of polynomial a
r=r(imag(r)>0.01);
% only look for roots >0Hz
up to fs/2
ffreq=sort(atan2(imag(r),real(r))*fs/(2*pi));
% convert
to Hz and sort
%for i=1:length(ffreq)
% fprintf('Formant %d Frequency %.1f\n',i,ffreq(i));
%end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% FIND THE FORMANT PATTERN
AGAINST TIME
%
%
fs = fssig;
x = sig(:,1);
%number
of samples in 10ms
ms10=floor(fs*.01);
%number
of samples in 30ms
ms30=floor(fs*0.03);
ncoeff=2+fs/1000;
% rule of thumb for formant
estimation
% process
in chunks of 30ms
pos=1;
fm=[]; % formant peaks
ft=[]; % formant times
H=[];
nbrChunk =
floor(length(x)/ms30)+1;
gG =
zeros(512,nbrChunk);
ind = 1;
while (pos+ms30) <= length(x)
y=x(pos:pos+ms30-1);
y=y-mean(y);
% find LP filter
a=lpc(y,ncoeff);
[h,f]=freqz(1,a,512,fs);
g =
20*log10(abs(h)+eps);
gG(:,ind) = g;
ind = ind+1;
% find roots
r=roots(a);
r=r(imag(r)>0.01);
pos=pos+ms10;
end;
% plot
formant surface over time
f7 = figure('name','FORMANT PATTERN
OVER TIME');
set(f7, 'OuterPosition', [0 0 1100 650]);
meshz(gG);
legend('Formants');
xlabel('Time (s)');
ylabel('Frequency (Hz)');
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%