Added downsampling capacities.

2025-04-14 11:12:28 +02:00 · 2025-04-14 11:12:28 +02:00 · b2224152f4
parent f7e1e60f7c
commit b2224152f4
3 changed files with 88 additions and 66 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/speech_analysis.asv
+++ b/speech_analysis.asv
@ -1,51 +0,0 @@
-function speech_analysis()
-
-
-
-
-% Construct the full file path
-filepath = './sound/modulator22.wav';
-% Read the audio file
-[y, Fs] = audioread(filepath);
-disp(['Successfully read the audio file: ', filepath]);
-disp(['Sampling frequency (Fs): ', num2str(Fs), ' Hz']);
-disp(['Number of samples: ', num2str(length(y))]);
-
-% Construct the output filename correctly
-%[~, name, ~] = fileparts(filepath); % Get the filename without extension
-%outputFilename = fullfile('.', ['processed_', name, '.wav']); % Create the new filename
-
-% Write the audio to a new file with double the sampling rate
-%audiowrite(outputFilename, y, Fs*2);
-%disp(['Successfully wrote the processed audio to: ', outputFilename, ' with double the sampling rate.']);
-
-% Play the original audio (using the audio data 'y' and its original sampling rate 'Fs')
-sound(y, Fs); % Play the original sound
-sound(y, Fs*2);
-disp('Playing the audio with double the sampling rate.');
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%Plot
-t = (0:length(y)-1) / Fs; % Time in seconds
-
-figure;
-plot(t, y);
-xlabel('Time (seconds)');
-ylabel('Amplitude');
-title(['Temporal Variation of ', filepath]);
-grid on;
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%Frequency Spectrum
-%FFT
-frequencySpectrum(y,Fs, 0);
-%DFT
-frequencySpectrum(y,Fs, 1);
-
-%Modify the padding to make the change.
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-spectrogram(y, Fs, step_size, window_size)
-
-end
--- a/speech_analysis.m
+++ b/speech_analysis.m
@ -20,9 +20,6 @@ disp(['Number of samples: ', num2str(length(y))]);
 %audiowrite(outputFilename, y, Fs*2);
 %disp(['Successfully wrote the processed audio to: ', outputFilename, ' with double the sampling rate.']);

-% Play the original audio (using the audio data 'y' and its original sampling rate 'Fs')
-sound(y, Fs); % Play the original sound
-%sound(y, Fs*2);
 disp('Playing the audio with double the sampling rate.');


@ -52,18 +49,94 @@ disp(DFT_Time);
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 spectrogram(y, Fs, 5,50)
-
+title('Spectrogram of modulator22.wav');
+colorbar;
+ylabel('Frequency (Hz)');
+xlabel('Time (s)');
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% Start and end indices (TO DETERMINE)
-vowel_one_start = 10000;
-vowel_one_end = 15000;
-vowel_two_start = 12000;
-vowel_two_end = 18000;
-vowel_three_start = 15000;
-vowel_three_end = 22000;
+%Going to Pratt, we see that :
+%F0 : (100 + 130 +  100 + 120 + 100 + 90) / 6
+%F1 : 578.3725189859462, 418.70239431349677, 552.8090680139439,
+%308.88658136343446, 314.17710770594937, 363.8180262223959
+%F2 : 1695.8136433413672, 1550.9109531347972, 566.7831612330604,
+%1721.8044733141373, 1802.7920754749957, 1891.9059418088873
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+desiredFreq = 4000; %in Hz
+
+% --- Downsampling using downsample() ---
+downsample_factor_ds = round(Fs / desiredFreq);
+y_downsampled_ds = downsample(y, downsample_factor_ds);
+Fs_downsampled_ds = Fs / downsample_factor_ds;
+disp(['--- Downsampling using downsample() ---']);
+disp(['New sampling frequency (downsample): ', num2str(Fs_downsampled_ds), ' Hz']);
+disp(['Number of samples (downsample): ', num2str(length(y_downsampled_ds))]);
+
+% --- Downsampling using decimate() ---
+downsample_factor_dec = round(Fs / desiredFreq);
+y_decimated = decimate(y, downsample_factor_dec);
+Fs_decimated = Fs / downsample_factor_dec;
+disp(['--- Downsampling using decimate() ---']);
+disp(['New sampling frequency (decimate): ', num2str(Fs_decimated), ' Hz']);
+disp(['Number of samples (decimate): ', num2str(length(y_decimated))]);
+
+% --- Plotting Downsampled Signals ---
+t_ds = (0:length(y_downsampled_ds)-1) / Fs_downsampled_ds;
+subplot(3,1,2);
+plot(t_ds, y_downsampled_ds);
+xlabel('Time (seconds)');
+ylabel('Amplitude');
+title(['Downsampled Signal (downsample, Fs = ', num2str(Fs_downsampled_ds), ' Hz)']);
+grid on;
+
+t_dec = (0:length(y_decimated)-1) / Fs_decimated;
+subplot(3,1,3);
+plot(t_dec, y_decimated);
+xlabel('Time (seconds)');
+ylabel('Amplitude');
+title(['Decimated Signal (decimate, Fs = ', num2str(Fs_decimated), ' Hz)']);
+grid on;
+
+%{
+% --- Frequency Spectrum of Downsampled Signals ---
+figure;
+subplot(2,1,1);
+[yFFT_ds, FFT_Time_ds]=frequencySpectrum(y_downsampled_ds,Fs_downsampled_ds, 1);
+disp(['FFT Time (downsampled): ', num2str(FFT_Time_ds)]);
+plot(yFFT_ds, Fs_downsampled_ds);
+title('FFT of Downsampled Signal (downsample)');
+
+subplot(2,1,2);
+[yFFT_dec, FFT_Time_dec]=frequencySpectrum(y_decimated,Fs_decimated, 1);
+disp(['FFT Time (decimated): ', num2str(FFT_Time_dec)]);
+plot(yFFT_dec, Fs_decimated) 
+title('FFT of Decimated Signal (decimate)');
+%}
+%{
+% --- Spectrograms of Downsampled Signals ---
+figure;
+subplot(2,1,1);
+spectrogram(y_downsampled_ds, round(0.02*Fs_downsampled_ds), round(0.01*Fs_downsampled_ds), 512, Fs_downsampled_ds, 'yaxis');
+title(['Spectrogram of Downsampled Signal (downsample, Fs = ', num2str(Fs_downsampled_ds), ' Hz)']);
+colorbar;
+ylabel('Frequency (Hz)');
+xlabel('Time (s)');
+
+subplot(2,1,2);
+spectrogram(y_decimated, round(0.02*Fs_decimated), round(0.01*Fs_decimated), 512, Fs_decimated, 'yaxis');
+title(['Spectrogram of Decimated Signal (decimate, Fs = ', num2str(Fs_decimated), ' Hz)']);
+colorbar;
+ylabel('Frequency (Hz)');
+xlabel('Time (s)');
+%}
+
+
+
+
+
+% Play audios (using the audio data 'y' and its sampling rate 'Fs')
+%sound(y, Fs); % Play the original sound
+%sound(y, Fs*2);
+%sound(y_decimated,Fs_decimated)
+sound(y_downsampled_ds,Fs_downsampled_ds) %Has distortion. This is because the Shannon-Nyquist criteria is not respected. Downsample() doesn't make sure the signal is filtered. Decimate does. So if need to choose, choose decimate !

-% Extract the vowel segments
-vowel_one = y_one(vowel_one_start:vowel_one_end);
-vowel_two = y_two(vowel_two_start:vowel_two_end);
-vowel_three = y_three(vowel_three_start:vowel_three_end);
 end