import os
import sys
from locale import normalize
import torch
import torchaudio as ta
def load_data(path):
waveform, samplerate = ta.load(path, normalize=True)
return waveform, samplerate
def make_transform(samplerate):
transform = ta.transforms.MelSpectrogram(samplerate, n_fft=2048, n_mels=128)
return transform
def apply_transform(waveform, transform):
mel_specgram = transform(waveform)
return mel_specgram
def loop_through_whole_dataset(path, transform):
#search through all audio folders
for dir in os.listdir(path):
print(dir)
#for each participant folder (e.g. p274)
for file in os.listdir(path + dir):
if not file == "log.txt":
#apply transform to file and save
savepath = "./Datasets/vctk/wav48_mels/"
savepath += dir + "/"
if not os.path.isdir(savepath):
os.mkdir(savepath)
savepath += str.split(file, '.')[0] + ".mel"
if not os.path.isfile(savepath):
waveform, samplerate = ta.load(path + dir + "\\" + file, normalize=True, backend="ffmpeg")
mel_spec = transform(waveform)
torch.save(mel_spec, savepath)
print("done")
if __name__=="__main__":
decoders = ta.utils.ffmpeg_utils.get_audio_decoders()
path = sys.argv[1]
sample_rate = 48000
t = make_transform(sample_rate)
loop_through_whole_dataset(sys.argv[1], t)