-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdataloader.py
42 lines (35 loc) · 1.31 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
import torch
import torchaudio
from torch.utils.data import Dataset
from torchvision.transforms import Resize
import torchaudio.transforms as T
def get_wav_files(directory):
return [
os.path.join(directory, filename)
for filename in os.listdir(directory)
if filename.endswith(".wav")
]
class AudioDataset(Dataset):
def __init__(self, noise_dir, clap_dir):
noise_files = get_wav_files(noise_dir)
clap_files = get_wav_files(clap_dir)
self.noise_dir = noise_dir
self.clap_dir = clap_dir
self.file_list = noise_files + clap_files
self.labels = [0] * len(os.listdir(noise_dir)) + [1] * len(os.listdir(clap_dir))
def __len__(self):
return len(self.file_list)
def __getitem__(self, idx, n_mels=64, n_fft=400, hop_length=200):
waveform, sample_rate = torchaudio.load(self.file_list[idx])
spec = T.MelSpectrogram(
sample_rate=sample_rate,
n_fft=n_fft,
win_length=n_fft,
hop_length=hop_length,
n_mels=n_mels,
)(waveform)
spec = Resize((256, 256))(spec)
spec = (spec - spec.mean()) / spec.std() # normalize the spectrogram
label = self.labels[idx]
return spec, torch.tensor(label)