%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%Biomechanicaly informaed voice codec

%

%Rebecca Kleinberger

%

%(using lib from UCL phonetic classes)

%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% ORGANISATION

%

%   Different mechanisms :

%       - air pressure (signal enveloppe)

%       - glottal source (fondamental frequency F0)

%       - filtering by the cavities (formants)

%

%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% PREPARE THE FILE AND SAMPLING FREQUENCY

%  

%

 

[sig fssig nb]=wavread('sound/rec/recA2N.wav');

%signal in mono

sig = sig(:,1);

 

%take a short time sample at a fix posiiton

size = length(sig);

mid = floor(size/2);

z = sig(mid: mid+floor(size/25));

 

%take a chunk of 30ms (because 30m is a generaly a good window size)

ms30=floor(fssig*0.03);

y = sig(mid: mid+ms30);

 

x = y;

 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% ENVELOPE EXTRACTION

% use of a detection function

%

 

[DetectionFunction,nFrames] = ComplexDomainDF(sig,fssig);

 

time = (1:length(sig))/44100.0;

f1 = figure('name','ENVELOPE EXTRACTION');

set(f1, 'OuterPosition', [0 0 1100 650]);

subplot(2,1,1);

plot(time,sig);

legend('Original waveform');

xlabel('time (s)');

ylabel('amplitude');

subplot(2,1,2);

plot(((1:nFrames)/nFrames)*length(sig)/44100.0, DetectionFunction);

hold on;

plot(((1:nFrames)/nFrames)*length(sig)/44100.0, - DetectionFunction);

title('envelope extraction')

hold off;

legend('Complex-domain');

xlabel('time (s)');

ylabel('Detection function');

 

 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% USING SMOOTH FUNCTION?????

% NOT GREAT IDEA...

%

%

% abs_sig = abs(sig);

% abs_sig_smoothed = smooth(abs_sig,2000);

%

% Ts = 1/fssig;

%

% f2 = figure('name','ENVELOPE EXTRACTION USING SMOOTH FUNCTION');

%set(f2, 'OuterPosition', [0 0 1100 650]);

%

% subplot(3,1,1);

% plot([Ts:Ts:Ts*length(sig)],sig);

% legend('Signal');

% subplot(3,1,2);

% plot([Ts:Ts:Ts*length(abs_sig)],abs_sig);

% legend('Signal Magnitude');

% subplot(3,1,3);

% plot([Ts:Ts:Ts*length(abs_sig_smoothed)],abs_sig_smoothed);

% hold on;

% plot([Ts:Ts:Ts*length(abs_sig_smoothed)],0.03,'r');

% hold off;

% legend('Signal Contour');

% xlabel('time (s)');

 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% ESTIMATION OF F0 GLOTTAL SOURCE FREQUENCY METHODE 1: BY ANALYSE OF THE CEPSTRUM

%

%  

 

% using the section z (or y) of our signal sig

 

ms1=fssig/1000;                 % maximum speech Fx at 1000Hz

ms20=fssig/50;                  % minimum speech Fx at 50Hz

%

% plot waveform

t=(0:length(x)-1)/fssig;        % times of sampling instants

f3 = figure('name','ESTIMATION OF F0 GLOTTAL SOURCE FREQUENCY METHODE 1: BY ANALYSE OF THE CEPSTRUM');

set(f3, 'OuterPosition', [0 0 1100 650]);

subplot(3,1,1);

plot(t,x);

legend('Waveform');

xlabel('Time (s)');

ylabel('Amplitude');

%

% do fourier transform of windowed signal

Y=fft(x.*hamming(length(x)));

%

% plot spectrum of bottom 5000Hz

hz5000=5000*length(Y)/fssig;

f=(0:hz5000)*fssig/length(Y);

subplot(3,1,2);

plot(f,20*log10(abs(Y(1:length(f)))+eps));

legend('Spectrum');

xlabel('Frequency (Hz)');

ylabel('Magnitude (dB)');

%

% cepstrum is DFT of log spectrum

C=fft(log(abs(Y)+eps));

%

% plot between 1ms (=1000Hz) and 20ms (=50Hz)

q=(ms1:ms20)/fssig;

subplot(3,1,3);

plot(q,abs(C(ms1:ms20)));

legend('Cepstrum');

xlabel('Quefrency (s)');

ylabel('Amplitude');

 

%find the F0 frequency

[c,fx]=max(abs(C(ms1:ms20)));

F0 = fssig/(ms1+fx-1);

fprintf('F0=%gHz\n',F0);

 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%  ESTIMATION OF F0 GLOTTAL SOURCE FREQUENCY : METHODE 2: BY USE OF AUTOCORRELATION FUNCTION

%

% 

%

% using the section x of our signal sig

 

ms20=fssig/50;                 % minimum speech Fx at 50Hz

%

% plot waveform

f4 = figure('name','ESTIMATION OF F0 GLOTTAL SOURCE FREQUENCY : METHODE 2: BY USE OF AUTOCORRELATION FUNCTION');

set(f4, 'OuterPosition', [0 0 1100 650]);

t=(0:length(x)-1)/fssig;        % times of sampling instants

subplot(2,1,1);

plot(t,x);

legend('Waveform');

xlabel('Time (s)');

ylabel('Amplitude');

%

% calculate autocorrelation

r=xcorr(x,ms20,'coeff');  

%

% plot autocorrelation

d=(-ms20:ms20)/fssig;          % times of delays

subplot(2,1,2);

plot(d,r);

legend('Autocorrelation');

xlabel('Delay (s)');

ylabel('Correlation coeff.');

 

 

ms2=fssig/500;                 % maximum speech Fx at 500Hz

ms20=fssig/50;                 % minimum speech Fx at 50Hz

 

% just look at region corresponding to positive delays

r=r(ms20+1:2*ms20+1);

[rmax,tx]=max(r(ms2:ms20));

F0 = fssig/(ms2+tx-1);

fprintf('rmax=%g Fx=%gHz\n',rmax,F0);

 

 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% FIND F0 AGAINST TIME

%

%

 

 

%signal in mono

sig = sig(:,1);

fs=fssig;

 

ms2=floor(fs*0.002);

ms10=floor(fs*0.01);

ms20=floor(fs*0.02);

ms30=floor(fs*0.03);

% plot waveform

f5 = figure('name','F0 OVER TIME');

set(f5, 'OuterPosition', [0 0 1100 650]);

 

t=(0:length(sig)-1)/fs;

subplot(2,1,1);

plot(t,sig);

legend('Waveform');

xlabel('Time (s)');

ylabel('Amplitude');

% process in chunks

pos=1;

fx=[];

while (pos+ms30) <= length(sig)

    sigchunk=sig(pos:pos+ms30-1);

    sigchunk=sigchunk-mean(sigchunk);

    % calculate autocorrelation

    r=xcorr(sigchunk,ms20,'coeff');   

    r=r(ms20+1:2*ms20+1);

    % search for maximum  between 2ms (=500Hz) and 20ms (=50Hz)

    [rmax,fxval]=max(abs(r(ms2:ms20)));

    if (rmax > 0.5)

        fx=[fx fs/(ms2+fxval-1)];

    else

        fx=[fx 0];

    end;

    pos=pos+ms10;

end;

% plot F0 trace

t=(0:length(fx)-1)*0.01;

subplot(2,1,2);

plot(t,fx);

legend('F0 Trace');

xlabel('Time (s)');

ylabel('Frequency (Hz)');

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% FIND THE FORMANT PATTERN

%

%

% using the section x of our signal sig

 

% resample to 10,000Hz (optional)

x=resample(x,10000,fssig);

fs=10000;

%

% plot waveform

f6 = figure('name','FIND THE FORMANT PATTERN');

set(f6, 'OuterPosition', [0 0 1100 650]);

t=(0:length(x)-1)/fs;        % times of sampling instants

 

%

% get Linear prediction filter

ncoeff=2+fs/1000;           % rule of thumb for formant estimation

a=lpc(x,ncoeff);

%

% plot frequency response

[h,f]=freqz(1,a,512,fs);

g = 20*log10(abs(h)+eps);

plot(f,g);

legend('LP Filter');

xlabel('Frequency (Hz)');

ylabel('Gain (dB)');

 

 

% find frequencies by root-solving

r=roots(a);                  % find roots of polynomial a

r=r(imag(r)>0.01);           % only look for roots >0Hz up to fs/2

ffreq=sort(atan2(imag(r),real(r))*fs/(2*pi));

                             % convert to Hz and sort

%for i=1:length(ffreq)

%    fprintf('Formant %d Frequency %.1f\n',i,ffreq(i));

%end

 

 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% FIND THE FORMANT PATTERN AGAINST TIME

%

%

 

fs = fssig;

x = sig(:,1);

 

%number of samples in 10ms

ms10=floor(fs*.01);

%number of samples in 30ms

ms30=floor(fs*0.03);

ncoeff=2+fs/1000;           % rule of thumb for formant estimation

 

 

% process in chunks of 30ms

pos=1;

fm=[];      % formant peaks

ft=[];      % formant times

H=[];

nbrChunk = floor(length(x)/ms30)+1;

gG = zeros(512,nbrChunk);

ind = 1;

while (pos+ms30) <= length(x)

    y=x(pos:pos+ms30-1);

    y=y-mean(y);

    % find LP filter

    a=lpc(y,ncoeff);

   

    [h,f]=freqz(1,a,512,fs);

    g = 20*log10(abs(h)+eps);

    gG(:,ind) = g;

    ind = ind+1;

   

    % find roots

    r=roots(a);

    r=r(imag(r)>0.01);

   

    pos=pos+ms10;

end;

% plot formant surface over time

 

 

 

f7 = figure('name','FORMANT PATTERN OVER TIME');

set(f7, 'OuterPosition', [0 0 1100 650]);

meshz(gG);

legend('Formants');

xlabel('Time (s)');

ylabel('Frequency (Hz)');

 

 

%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%