function [finalTimeIndex, weights, soundLeft, soundRight] = nasty(filename, noiseFloorLevel) % segment into chunks -> do the crazy % jeff lieberman, 2005 % this function opens a wave file [handel for now] % and applies segmentation based loosely on jehan's segmentation algorithm, % with changes made to specifically highlight rhythmic activity. % bring in test sound: [y,fs,nbits] = wavread(filename); %y = sound file % fs = sample rate % bits = bitrate t = (1:length(y))/44100; % convert to mono: z = 0.5*(y(:,1)+y(:,2)); soundLeft = y(:,1); soundRight = y(:,2); % STFT on the data N = 2048; % length of the fft window - ~46ms, power of two. R = 512; % 12ms window [44100*0.012 = ~512 samples] win = hamming(R); % window shape L = 2000; % shift each time overlap = 512-128; % compute every 3ms % spectrogram: [B,f,tspec] = specgram(z,N,fs,win,overlap); timeRatio = floor(length(t)/length(tspec)); % generate the Bark scale, for separation of the spectrogram: barkIndeces = ceil(13*atan(0.00076*f)+3.5*atan((f/7500).^2)); % generate the energies for each bark thing, by taking the maximal % frequency within that bark range: for i=1:25, barkEnergy(i) = max(find(barkIndeces == i)); end % generate the new Bark spectrogram, with 25 divisions for i = 1:25, currIndeces = find(barkIndeces == i); % find current bark members Bnew(i,:) = sum(B(currIndeces,:))/length(find(barkIndeces == i)); end % smooth the Bark Spectrogram by convoluting every band with a 200ms half % hanning [raised cosine] window: %%%%%%%%%%%%%%%%%% % future version: when detecting grain size, change this blending size %%%%%%%%%%%%%%%%%% windowSecs = 0.05 ; %0.2 - 0.4 sec, with 0.003 sec per frame N = floor(windowSecs*44100/timeRatio); % 200ms half-window so we make a .4s window first raisedWindowFull = window(@hamming,N); raisedWindow = raisedWindowFull(floor(N/2):N); % now that we have the window, convolve: for i = 1:25, BnewSmooth(i,:) = conv( abs(Bnew(i,:)), raisedWindow ); end windowWidth = N*0.5*timeRatio/44100; % time of window = 0.2 tspecStretched = tspec*(tspec(end)+windowWidth)/tspec(end); % compute the loudness function on the raw data: % for each time t, the loudness is the sum of energies in each band loudnessReg = log10(sum(barkEnergy*abs(Bnew)+1,1)); % +1 allows zero volume activity to not hit -inf pad = ones(1,length(raisedWindow)-1); % compute the loudness function on the smooth data: % for each time t, the loudness is the sum of energies in each band loudness = log10(sum(barkEnergy*BnewSmooth+1,1)); pad = ones(1,length(raisedWindow)-1); % compute the event detection function [consecutive differences] eventRaw = diff(loudness); % smooth this function by convolution w/ Hamming window newWind = 0.1; convWindow = hamming(floor(newWind*44100/timeRatio)); % 150ms hamming window pad = [pad ones(1,length(convWindow)-1)]; smootherEvents = conv(convWindow, eventRaw); windowWidth = 0.5*newWind; % half window time windowWidthFrames = floor(windowWidth*44100/timeRatio); % number of frames for the window % find local maxima for this smooth function for onset transients: [min maxes] = localmaxmin(smootherEvents); % find those that aren't at the way end: newMax = maxes(find(maxes < length(tspec))); % fix those that are too low amplitude: % FUTURE REVISION: % make this cutoff frequency by looking at sound data and variance within, % so that is auto-calculates the amount of noise present to get rid of the % floor newMax = newMax(find( (smootherEvents(newMax)/max(smootherEvents)) > noiseFloorLevel )); % for each time of max, get the weight as the actual value of the max event % loudness: weights = smootherEvents(newMax); % max signifies the indeces of events. so, look in the previous 20ms [how % many indeces?] for a minimum in the loudness function, and that is the % trigger index: maxTimes = tspec(newMax) - windowWidth; tspecMaxFrames = newMax - windowWidthFrames; % approx frames for maxes in tspec % find indeces of original times: approxIndeces = floor(newMax*timeRatio-44100*windowWidth); % second half is number of frames for windowWidth % subplot(8,1,1); % hold on; % errorbar(t(approxIndeces),zeros(size(approxIndeces)),zeros(size(approxIndeces))+0.5,'rx'); % now search the previous 0.03 seconds to try to find the minimum of % loudness: % tspecMaxFrames are where to start, and 0.03 seconds is how many frames: minFrames = ceil(0.01*(44100/timeRatio)); % how many frames back to check: for i = 1:length(maxTimes), % for each time of the maximum smoothed loudness. search for nearby minima in the loudness function [trash minimumLoudnessIndeces(i)] = min( loudness( max(tspecMaxFrames(i)-minFrames,1) : tspecMaxFrames(i) ) ); minimumLoudnessIndeces(i) = max(1,minimumLoudnessIndeces(i) + tspecMaxFrames(i) - 1 - minFrames); end % and -finally-... search for the nearest place where the sample has a zero % crossing and is heading positive, to that minimum in loudness. this way % we can cleanly cut up the samples into constituent parts. % search forward: %finalTimes: possibleTimes = tspec(minimumLoudnessIndeces); % ones to test for i=1:length(possibleTimes), % find nearest time index to start with: can be optimized later: real = find(t > possibleTimes(i)); startIndex = real(1); % first time_index that occurs bigger than the other spec; % now search for the nearest negative to positive zero crossing: while(z(startIndex) > 0) % wait to go negative startIndex = startIndex + 1; end while(z(startIndex) < 0) % wait to go positive startIndex = startIndex + 1; end % now we just crossed from negative to positive finalTimeIndex(i) = startIndex; end % output is finalTimeIndex finalTimeIndex = [finalTimeIndex length(t)]; %%%%%%%%%%%%%%%%%%%%%%%%%%%% % plotting: %%%%%%%%%%%%%%%%%%%%%%%%%%%% figure(1), clf subplot(8,1,1); plot(t,z); axis tight; title('WAV File Data, temporary mono conversion'); % plot it: subplot(8,1,2); imagesc(tspec,f,log10(abs(B))); axis xy; colormap('jet'); title('Spectrogram'); % plot improved bark version: subplot(8,1,3); imagesc(tspec,barkIndeces,log10(abs(Bnew))); axis xy; colormap('jet'); title('Improved Bark Spectrogram'); cc = axis; % plot: subplot(8,1,4); imagesc(tspecStretched,barkIndeces,log10(abs(BnewSmooth))); axis xy; colormap('jet') title('Smoothed Bark Spectrogram'); axis(cc); % match previous axis subplot(8,1,5); plot(tspec,loudnessReg/max(loudnessReg)); title('Loudness from Regular Bark Scale'); axis tight; subplot(8,1,6); plot(tspec,loudness(1:length(tspec))/max(loudness)); title('Loudness from Smoothed Bark Scale'); axis tight; subplot(8,1,7); plot(tspec,eventRaw(1:length(tspec))/max(eventRaw)); title('Event Detection'); axis tight; cc = axis; % plot normalized: subtract out the added stuff at the end subplot(8,1,8); plot(tspec-windowWidth, smootherEvents(1:length(tspec))/max(smootherEvents)); title('Smoothed Event Detection Function w/ Labeled Maxima') axis tight; hold on; errorbar(tspec(newMax)-windowWidth,zeros(1,length(newMax)),1*ones(1,length(newMax)),'k'); axis(cc); % add bar markers subplot(8,1,6); hold on; errorbar(tspec(minimumLoudnessIndeces),zeros(size(minimumLoudnessIndeces)),... ones(size(minimumLoudnessIndeces))); % plot these on the original plot: subplot(8,1,1); hold on; errorbar(tspec(minimumLoudnessIndeces),0*ones(size(minimumLoudnessIndeces)),... ones(size(minimumLoudnessIndeces)),'r');