Commit 64be3db6 authored by Ambuj Mehrish's avatar Ambuj Mehrish
Browse files

Replace augmentation.py

parent 7ea4b949
......@@ -33,7 +33,7 @@ import random
import soundfile
import torch
import torchaudio
from scipy import signal
has_pyroom = True
try:
import pyroomacoustics
......@@ -492,16 +492,15 @@ def data_augmentation(speech,
if "add_reverb" in augmentations:
rir_nfo = rir_df.iloc[random.randrange(rir_df.shape[0])].file_id
rir_fn = transform_dict["add_reverb"]["data_path"] + "/" + rir_nfo + ".wav"
rir, rir_fs = torchaudio.load(rir_fn)
#rir = rir[rir_nfo[1], :] #keep selected channel
speech = torch.tensor(signal.convolve(speech, rir, mode='full')[:, :speech.shape[1]])
rir_fn = transform_dict["add_reverb"]["data_path"] + rir_nfo
rir, rir_fs = torchaudio.load(rir_fn)
speech = torch.tensor(signal.convolve(speech,rir,mode='full')[:, :speech.shape[1]])
if "add_noise" in augmentations:
# Pick a noise type
noise = torch.zeros_like(speech)
noise_idx = random.randrange(4)
noise_idx = random.randrange(3)
# speech
if noise_idx == 0:
......@@ -527,14 +526,17 @@ def data_augmentation(speech,
# babble noise with different volume
elif noise_idx == 3:
snr_db = random.randint(13,20)
ns = random.randint(5,10) # Randomly select 5 to 10 speakers
noise_fn = transform_dict["add_noise"]["data_path"] + "/" + noise_df[noise_df["type"] == "speech"].sample(ns,replace=False)["file_id"].values + ".wav"
pick_count = random.randint(5,10) # Randomly select 5 to 10 speakers
index_list = random.choices(range(noise_df.loc['speech'].shape[0]), k=pick_count)
#noise_rows = transform_dict["add_noise"]["data_path"] + "/" + noise_df[noise_df["type"] == "speech"].sample(ns,replace=False)["file_id"].values + ".wav"
noise = torch.zeros(1,speech.shape[1])
for idx in range(ns):
noise_,noise_fs = torchaudio.load(noise_fn[idx],frame_offset=0,num_frames=speech.shape[1])
for idx in index_list:
#noise_,noise_fs = torchaudio.load(noise_fn[idx],frame_offset=0,num_frames=speech.shape[1])
noise_row = noise_df.loc['speech'].iloc[idx]
noise_ = load_noise_seg(noise_row, speech.shape, sample_rate, transform_dict["add_noise"]["data_path"])
transform = torchaudio.transforms.Vol(gain=random.randint(5,15),gain_type='db') # Randomly select volume level (5-15d)
noise += transform(noise_)
noise /= ns
noise /= pick_count
speech_power = speech.norm(p=2)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment