Added all starting files. Developped speech_analysis until signal selection of phonemes.

This commit is contained in:
Charles STELANDRE 2025-04-14 10:03:38 +02:00
parent 13ee04700c
commit f7e1e60f7c
11 changed files with 281 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

57
chanvocoder.m Normal file
View File

@ -0,0 +1,57 @@
function y = chanvocoder(carrier, modul, chan, numband, overlap)
% y = chanvocoder(carrier, modul, chan, numband, overlap)
% The Channel Vocoder modulates the carrier signal with the modulation signal
% chan = number of channels (e.g., 512)
% numband = number of bands (<chan) (e.g., 32)
% overlap = window overlap (e.g., 1/4)
if numband>chan
error('# bands must be < # channels')
end
[rc, cc] = size(carrier);
if cc>rc
carrier = carrier';
end
[rm, cm] = size(modul);
if cm>rm
modul = modul';
end
st = min(rc,cc); % stereo or mono?
if st~= min(rm,cm)
error('carrier and modulator must have same number of tracks');
end
len = min(length(carrier),length(modul)); % find shortest length
carrier = carrier(1:len,1:st); % shorten carrier if needed
modul = modul(1:len,1:st); % shorten modulator if needed
L = 2*chan; % window length/FFT length
w = hanning(L);
if st==2
w=[w w];
end % window/ stereo window
bands = 1:round(chan/numband):chan; % indices for frequency bands
bands(end) = chan;
y = zeros(len,st); % output vector
ii = 0;
while ii*L*overlap+L <= len
ind = round([1+ii*L*overlap:ii*L*overlap+L]);
FFTmod = fft( modul(ind,:) .* w ); % window & take FFT of modulator
FFTcar = fft( carrier(ind,:) .* w ); % window & take FFT of carrier
syn = zeros(chan,st); % place for synthesized output
for jj = 1:numband-1 % for each frequency band
b = [bands(jj):bands(jj+1)-1]; % current band
syn(b,:) = FFTcar(b,:)*diag(mean(abs(FFTmod(b,:))));
end % take product of spectra
midval = FFTmod(1+L/2,:).*FFTcar(1+L/2,:); % midpoint is special
synfull = [syn; midval; flipud( conj( syn(2:end,:) ) );]; % + and - frequencies
timsig = real( ifft(synfull) ); % invert back to time
y(ind,:) = y(ind,:) + timsig; % add back into time waveform
ii = ii+1;
end
y = 0.8*y/max(max(abs(y))); % normalize output

66
frequencySpectrum.m Normal file
View File

@ -0,0 +1,66 @@
function [power, duration] = frequencySpectrum(signal, fs, pad)
%%%%%%%%%%%%%%%%%%
%function power = frequencySpectrum(signal, fs, pad)
%
% Task: Display the power spectrum (lin and log scale) of a given signal
%
% Input:
% - signal: the input signal to process
% - fs: the sampling rate
% -pad: boolean if true, signal is padded with 0 to the next power of 2 -> FFT instead of DFT
%
% Output:
% - power: the power spectrum
%
%
% Guillaume Gibert, guillaume.gibert@ecam.fr
% 25/04/2022
%%%%%%%%%%%%%%%%%%
n = length(signal); % number of samples
if (pad)
n = 2^nextpow2(n);
end
tic
y = fft(signal, n);% compute DFT of input signal
duration = toc;
power = abs(y).^2/n; % power of the DFT
[val, ind] = max(power); % find the mx value of DFT and its index
% plots
figure;
subplot(1,3,1) % time plot
t=0:1/fs:(n-1)/fs; % time range
%pad signal with zeros
if (pad)
signal = [ signal; zeros( n-length(signal), 1)];
end
plot(t, signal)
xticks(0:0.1*fs:n*fs);
xticklabels(0:0.1:n/fs);
xlabel('Time (s)');
ylabel('Amplitude (a.u.)');
subplot(1,3,2) % linear frequency plot
f = (0:n-1)*(fs/n); % frequency range
plot(f,power, 'b*'); hold on;
plot(f,power, 'r');
xlabel('Frequency (Hz)')
ylabel('Power (a.u.)')
subplot(1,3,3) % log frequency plot
plot(f,10*log10(power/power(ind)));
xlabel('Frequency (Hz)')
ylabel('Power (dB)')
hold off
figure;
plot(f,10*log10(power/power(ind)));
xlabel('Frequency (Hz)')
ylabel('Power (dB)')

BIN
sound/.DS_Store vendored Normal file

Binary file not shown.

BIN
sound/carrier22.wav Normal file

Binary file not shown.

BIN
sound/modulator22.wav Normal file

Binary file not shown.

BIN
sound/white.wav Normal file

Binary file not shown.

BIN
sound/white_periodic.wav Normal file

Binary file not shown.

38
spectrogram.m Normal file
View File

@ -0,0 +1,38 @@
function spectrogram(signal, samplingFreq, step_size, window_size)
%%%%%%%%%%%%%%%%%%%%%%%
%function spectrogram(signal, samplingFreq, step_size, window_size)
% ex.: spectrogram(signal, samplingFreq, step_size, window_size)
%
% Task: Plot the spectrogram of a given signal
%
% Inputs:
% -signal: temporal signal to analyse
% -samplingFreq: sampling frequency of the temporal signal
% -step_size: how often the power spectrum will be computed in ms
% -window_size: size of the analysing window in ms
%
% Ouput: None
%
% author: Guillaume Gibert (guillaume.gibert@ecam.fr)
% date: 14/03/2023
%%%%%%%%%%%%%%%%%%%%%%%
figure;
subplot(2,1,1);
t=0:1/samplingFreq:length(signal)/samplingFreq-1/samplingFreq;
plot(t, signal');
xlim([0 length(signal)/samplingFreq-1/samplingFreq]);
ylabel('amplitude (norm. unit)');
subplot(2,1,2);
step = fix(step_size*samplingFreq/1000); % one spectral slice every step_size ms
window = fix(window_size*samplingFreq/1000); % window_size ms data window
fftn = 2^nextpow2(window); % next highest power of 2
[S, f, t] = specgram(signal, fftn, samplingFreq, window, window-step);
S = abs(S(2:fftn*4000/samplingFreq,:)); % magnitude in range 0<f<=4000 Hz.
S = S/max(S(:)); % normalize magnitude so that max is 0 dB.
S = max(S, 10^(-40/10)); % clip below -40 dB.
S = min(S, 10^(-3/10)); % clip above -3 dB.
imagesc (t, f, log(S)); % display in log scale
set (gca, "ydir", "normal"); % put the 'y' direction in the correct direction
xlabel('time (s)');
ylabel('frequency (Hz)');

51
speech_analysis.asv Normal file
View File

@ -0,0 +1,51 @@
function speech_analysis()
% Construct the full file path
filepath = './sound/modulator22.wav';
% Read the audio file
[y, Fs] = audioread(filepath);
disp(['Successfully read the audio file: ', filepath]);
disp(['Sampling frequency (Fs): ', num2str(Fs), ' Hz']);
disp(['Number of samples: ', num2str(length(y))]);
% Construct the output filename correctly
%[~, name, ~] = fileparts(filepath); % Get the filename without extension
%outputFilename = fullfile('.', ['processed_', name, '.wav']); % Create the new filename
% Write the audio to a new file with double the sampling rate
%audiowrite(outputFilename, y, Fs*2);
%disp(['Successfully wrote the processed audio to: ', outputFilename, ' with double the sampling rate.']);
% Play the original audio (using the audio data 'y' and its original sampling rate 'Fs')
sound(y, Fs); % Play the original sound
sound(y, Fs*2);
disp('Playing the audio with double the sampling rate.');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Plot
t = (0:length(y)-1) / Fs; % Time in seconds
figure;
plot(t, y);
xlabel('Time (seconds)');
ylabel('Amplitude');
title(['Temporal Variation of ', filepath]);
grid on;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Frequency Spectrum
%FFT
frequencySpectrum(y,Fs, 0);
%DFT
frequencySpectrum(y,Fs, 1);
%Modify the padding to make the change.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
spectrogram(y, Fs, step_size, window_size)
end

69
speech_analysis.m Normal file
View File

@ -0,0 +1,69 @@
function speech_analysis()
clear all
close all
clc
% Construct the full file path
filepath = './sound/modulator22.wav';
% Read the audio file
[y, Fs] = audioread(filepath);
disp(['Successfully read the audio file: ', filepath]);
disp(['Sampling frequency (Fs): ', num2str(Fs), ' Hz']);
disp(['Number of samples: ', num2str(length(y))]);
% Construct the output filename correctly
%[~, name, ~] = fileparts(filepath); % Get the filename without extension
%outputFilename = fullfile('.', ['processed_', name, '.wav']); % Create the new filename
% Write the audio to a new file with double the sampling rate
%audiowrite(outputFilename, y, Fs*2);
%disp(['Successfully wrote the processed audio to: ', outputFilename, ' with double the sampling rate.']);
% Play the original audio (using the audio data 'y' and its original sampling rate 'Fs')
sound(y, Fs); % Play the original sound
%sound(y, Fs*2);
disp('Playing the audio with double the sampling rate.');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Plot
t = (0:length(y)-1) / Fs; % Time in seconds
figure;
plot(t, y);
xlabel('Time (seconds)');
ylabel('Amplitude');
title(['Temporal Variation of ', filepath]);
grid on;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Frequency Spectrum
%FFT
tic;
[yFFT, FFT_Time]=frequencySpectrum(y,Fs, 1);
disp(FFT_Time);
%DFT
tic
[yDFT, DFT_Time]=frequencySpectrum(y,Fs, 0);
disp(DFT_Time);
%Modify the padding to make the change.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
spectrogram(y, Fs, 5,50)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Start and end indices (TO DETERMINE)
vowel_one_start = 10000;
vowel_one_end = 15000;
vowel_two_start = 12000;
vowel_two_end = 18000;
vowel_three_start = 15000;
vowel_three_end = 22000;
% Extract the vowel segments
vowel_one = y_one(vowel_one_start:vowel_one_end);
vowel_two = y_two(vowel_two_start:vowel_two_end);
vowel_three = y_three(vowel_three_start:vowel_three_end);
end