import os import sys from locale import normalize import torch import torchaudio as ta def load_data(path): waveform, samplerate = ta.load(path, normalize=True) return waveform, samplerate def make_transform(samplerate): transform = ta.transforms.MelSpectrogram(samplerate, n_fft=2048, n_mels=128) return transform def apply_transform(waveform, transform): mel_specgram = transform(waveform) return mel_specgram def loop_through_whole_dataset(path, transform): #search through all audio folders for dir in os.listdir(path): print(dir) #for each participant folder (e.g. p274) for file in os.listdir(path + dir): if not file == "log.txt": #apply transform to file and save savepath = "./Datasets/vctk/wav48_mels/" savepath += dir + "/" if not os.path.isdir(savepath): os.mkdir(savepath) savepath += str.split(file, '.')[0] + ".mel" if not os.path.isfile(savepath): waveform, samplerate = ta.load(path + dir + "\\" + file, normalize=True, backend="ffmpeg") mel_spec = transform(waveform) torch.save(mel_spec, savepath) print("done") if __name__=="__main__": decoders = ta.utils.ffmpeg_utils.get_audio_decoders() path = sys.argv[1] sample_rate = 48000 t = make_transform(sample_rate) loop_through_whole_dataset(sys.argv[1], t)