Added all starting files. Developped speech_analysis until signal selection of phonemes.

2025-04-14 10:03:38 +02:00 · 2025-04-14 10:03:38 +02:00 · f7e1e60f7c
parent 13ee04700c
commit f7e1e60f7c
11 changed files with 281 additions and 0 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/chanvocoder.m
+++ b/chanvocoder.m
@ -0,0 +1,57 @@
+function y = chanvocoder(carrier, modul, chan, numband, overlap)
+% y = chanvocoder(carrier, modul, chan, numband, overlap)
+% The Channel Vocoder modulates the carrier signal with the modulation signal
+% chan = number of channels         (e.g., 512)
+% numband = number of bands (<chan) (e.g., 32)
+% overlap = window overlap          (e.g., 1/4)
+
+if numband>chan
+	error('# bands must be < # channels')
+end
+
+[rc, cc]   = size(carrier);
+if cc>rc
+	carrier = carrier';
+end
+
+[rm, cm]   = size(modul);
+if cm>rm
+	modul = modul';
+end
+
+st         = min(rc,cc);                         % stereo or mono?
+if st~= min(rm,cm)
+	error('carrier and modulator must have same number of tracks');
+end
+
+len        = min(length(carrier),length(modul)); % find shortest length
+carrier    = carrier(1:len,1:st);                % shorten carrier if needed
+modul      = modul(1:len,1:st);                  % shorten modulator if needed
+L          = 2*chan;                             % window length/FFT length
+w          = hanning(L); 
+if st==2
+	w=[w w];
+end  % window/ stereo window
+
+bands      = 1:round(chan/numband):chan;         % indices for frequency bands     
+bands(end) = chan;
+y          = zeros(len,st);                      % output vector
+
+ii         = 0;
+while ii*L*overlap+L <= len
+    ind    = round([1+ii*L*overlap:ii*L*overlap+L]);
+    FFTmod = fft( modul(ind,:) .* w );    % window & take FFT of modulator
+    FFTcar = fft( carrier(ind,:) .* w );  % window & take FFT of carrier  
+    syn    = zeros(chan,st);              % place for synthesized output
+    for jj = 1:numband-1                  % for each frequency band
+        b        = [bands(jj):bands(jj+1)-1]; % current band
+        syn(b,:) = FFTcar(b,:)*diag(mean(abs(FFTmod(b,:))));
+    end                                   % take product of spectra
+    midval   = FFTmod(1+L/2,:).*FFTcar(1+L/2,:); % midpoint is special
+    synfull  = [syn; midval; flipud( conj( syn(2:end,:) ) );]; % + and - frequencies
+    timsig   = real( ifft(synfull) );     % invert back to time
+    y(ind,:) = y(ind,:) + timsig;         % add back into time waveform   
+    ii       = ii+1;
+end
+y = 0.8*y/max(max(abs(y)));               % normalize output
+
--- a/frequencySpectrum.m
+++ b/frequencySpectrum.m
@ -0,0 +1,66 @@
+function [power, duration] = frequencySpectrum(signal, fs, pad)
+%%%%%%%%%%%%%%%%%%
+%function power = frequencySpectrum(signal, fs, pad)
+%
+% Task: Display the power spectrum (lin and log scale) of a given signal
+%
+% Input:
+%	- signal: the input signal to process
+%	- fs: the sampling rate
+%	-pad: boolean if true, signal is padded with 0 to the next power of 2 -> FFT instead of DFT
+%
+% Output: 
+%	- power: the power spectrum
+%	
+%
+% Guillaume Gibert, guillaume.gibert@ecam.fr
+% 25/04/2022
+%%%%%%%%%%%%%%%%%%
+
+n = length(signal);        % number of samples
+
+if (pad)
+	n = 2^nextpow2(n);
+end
+
+tic
+y = fft(signal, n);% compute DFT of input signal
+duration = toc;
+
+power = abs(y).^2/n;    % power of the DFT
+
+[val, ind] = max(power); % find the mx value of DFT and its index
+
+% plots
+figure;
+
+subplot(1,3,1) % time plot 
+t=0:1/fs:(n-1)/fs; % time range
+%pad signal with zeros
+if (pad)
+	signal = [ signal; zeros( n-length(signal), 1)];
+end
+plot(t, signal)
+xticks(0:0.1*fs:n*fs);
+xticklabels(0:0.1:n/fs);
+xlabel('Time (s)');
+ylabel('Amplitude (a.u.)');
+
+subplot(1,3,2) % linear frequency plot
+f = (0:n-1)*(fs/n);     % frequency range
+plot(f,power, 'b*'); hold on;
+plot(f,power, 'r');
+xlabel('Frequency (Hz)')
+ylabel('Power (a.u.)')
+
+subplot(1,3,3) % log frequency plot
+plot(f,10*log10(power/power(ind)));
+xlabel('Frequency (Hz)')
+ylabel('Power (dB)')
+
+hold off
+figure;
+plot(f,10*log10(power/power(ind)));
+xlabel('Frequency (Hz)')
+ylabel('Power (dB)')
+
--- a/sound/.DS_Store
+++ b/sound/.DS_Store
--- a/sound/carrier22.wav
+++ b/sound/carrier22.wav
--- a/sound/modulator22.wav
+++ b/sound/modulator22.wav
--- a/sound/white.wav
+++ b/sound/white.wav
--- a/sound/white_periodic.wav
+++ b/sound/white_periodic.wav
--- a/spectrogram.m
+++ b/spectrogram.m
@ -0,0 +1,38 @@
+function spectrogram(signal, samplingFreq, step_size, window_size)
+%%%%%%%%%%%%%%%%%%%%%%%
+%function spectrogram(signal, samplingFreq, step_size, window_size)
+% ex.:  spectrogram(signal, samplingFreq, step_size, window_size)
+%
+% Task: Plot the spectrogram of a given signal
+%
+% Inputs:
+%	-signal: temporal signal to analyse 
+%	-samplingFreq: sampling frequency of the temporal signal
+% 	-step_size: how often the power spectrum will be computed in ms
+%	-window_size: size of the analysing window in ms
+%
+% Ouput: None
+%
+% author: Guillaume Gibert (guillaume.gibert@ecam.fr)
+% date: 14/03/2023
+%%%%%%%%%%%%%%%%%%%%%%%
+
+figure;
+	subplot(2,1,1);
+t=0:1/samplingFreq:length(signal)/samplingFreq-1/samplingFreq;
+plot(t, signal');
+xlim([0 length(signal)/samplingFreq-1/samplingFreq]);
+ylabel('amplitude (norm. unit)');
+	subplot(2,1,2);
+step = fix(step_size*samplingFreq/1000);     % one spectral slice every step_size ms
+window = fix(window_size*samplingFreq/1000);  % window_size ms data window
+fftn = 2^nextpow2(window); % next highest power of 2
+[S, f, t] = specgram(signal, fftn, samplingFreq, window, window-step);
+S = abs(S(2:fftn*4000/samplingFreq,:)); % magnitude in range 0<f<=4000 Hz.
+S = S/max(S(:));           % normalize magnitude so that max is 0 dB.
+S = max(S, 10^(-40/10));   % clip below -40 dB.
+S = min(S, 10^(-3/10));    % clip above -3 dB.
+imagesc (t, f, log(S));    % display in log scale
+set (gca, "ydir", "normal"); % put the 'y' direction in the correct direction
+xlabel('time (s)');
+ylabel('frequency (Hz)');
--- a/speech_analysis.asv
+++ b/speech_analysis.asv
@ -0,0 +1,51 @@
+function speech_analysis()
+
+
+
+
+% Construct the full file path
+filepath = './sound/modulator22.wav';
+% Read the audio file
+[y, Fs] = audioread(filepath);
+disp(['Successfully read the audio file: ', filepath]);
+disp(['Sampling frequency (Fs): ', num2str(Fs), ' Hz']);
+disp(['Number of samples: ', num2str(length(y))]);
+
+% Construct the output filename correctly
+%[~, name, ~] = fileparts(filepath); % Get the filename without extension
+%outputFilename = fullfile('.', ['processed_', name, '.wav']); % Create the new filename
+
+% Write the audio to a new file with double the sampling rate
+%audiowrite(outputFilename, y, Fs*2);
+%disp(['Successfully wrote the processed audio to: ', outputFilename, ' with double the sampling rate.']);
+
+% Play the original audio (using the audio data 'y' and its original sampling rate 'Fs')
+sound(y, Fs); % Play the original sound
+sound(y, Fs*2);
+disp('Playing the audio with double the sampling rate.');
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%Plot
+t = (0:length(y)-1) / Fs; % Time in seconds
+
+figure;
+plot(t, y);
+xlabel('Time (seconds)');
+ylabel('Amplitude');
+title(['Temporal Variation of ', filepath]);
+grid on;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%Frequency Spectrum
+%FFT
+frequencySpectrum(y,Fs, 0);
+%DFT
+frequencySpectrum(y,Fs, 1);
+
+%Modify the padding to make the change.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+spectrogram(y, Fs, step_size, window_size)
+
+end
--- a/speech_analysis.m
+++ b/speech_analysis.m
@ -0,0 +1,69 @@
+function speech_analysis()
+clear all
+close all
+clc
+
+
+% Construct the full file path
+filepath = './sound/modulator22.wav';
+% Read the audio file
+[y, Fs] = audioread(filepath);
+disp(['Successfully read the audio file: ', filepath]);
+disp(['Sampling frequency (Fs): ', num2str(Fs), ' Hz']);
+disp(['Number of samples: ', num2str(length(y))]);
+
+% Construct the output filename correctly
+%[~, name, ~] = fileparts(filepath); % Get the filename without extension
+%outputFilename = fullfile('.', ['processed_', name, '.wav']); % Create the new filename
+
+% Write the audio to a new file with double the sampling rate
+%audiowrite(outputFilename, y, Fs*2);
+%disp(['Successfully wrote the processed audio to: ', outputFilename, ' with double the sampling rate.']);
+
+% Play the original audio (using the audio data 'y' and its original sampling rate 'Fs')
+sound(y, Fs); % Play the original sound
+%sound(y, Fs*2);
+disp('Playing the audio with double the sampling rate.');
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%Plot
+t = (0:length(y)-1) / Fs; % Time in seconds
+
+figure;
+plot(t, y);
+xlabel('Time (seconds)');
+ylabel('Amplitude');
+title(['Temporal Variation of ', filepath]);
+grid on;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%Frequency Spectrum
+%FFT
+tic;
+[yFFT, FFT_Time]=frequencySpectrum(y,Fs, 1);
+disp(FFT_Time);
+%DFT
+tic
+[yDFT, DFT_Time]=frequencySpectrum(y,Fs, 0);
+disp(DFT_Time);
+
+%Modify the padding to make the change.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+spectrogram(y, Fs, 5,50)
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Start and end indices (TO DETERMINE)
+vowel_one_start = 10000;
+vowel_one_end = 15000;
+vowel_two_start = 12000;
+vowel_two_end = 18000;
+vowel_three_start = 15000;
+vowel_three_end = 22000;
+
+% Extract the vowel segments
+vowel_one = y_one(vowel_one_start:vowel_one_end);
+vowel_two = y_two(vowel_two_start:vowel_two_end);
+vowel_three = y_three(vowel_three_start:vowel_three_end);
+end