Commit a0567e62 authored by Anthony Larcher's avatar Anthony Larcher
Browse files

optional pyroom

parent b86abfce
......@@ -189,5 +189,5 @@ __maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reStructuredText'
__version__="1.3.7"
__version__="1.3.8.5.2"
......@@ -29,7 +29,6 @@ Copyright 2014-2021 Anthony Larcher and Sylvain Meignier
from .augmentation import AddNoise
from .augmentation import AddReverb
from .feed_forward import FForwardNetwork
from .feed_forward import kaldi_to_hdf5
from .xsets import XvectorMultiDataset, XvectorDataset, StatDataset
......@@ -38,6 +37,16 @@ from .res_net import ResBlock, ResNet18
from .rawnet import prepare_voxceleb1, Vox1Set, PreEmphasis
from .sincnet import SincNet
has_pyroom = True
try:
import pyroomacoustics
except ImportError:
has_pyroom = False
if has_pyroom:
from .augmentation import AddReverb
__author__ = "Anthony Larcher and Sylvain Meignier"
__copyright__ = "Copyright 2014-2021 Anthony Larcher and Sylvain Meignier"
__license__ = "LGPL"
......
......@@ -30,8 +30,13 @@ import numpy
import pandas
import random
import soundfile
import threading
import pyroomacoustics
has_pyroom = True
try:
import pyroomacoustics
except ImportError:
has_pyroom = False
__author__ = "Anthony Larcher and Sylvain Meignier"
......@@ -210,155 +215,156 @@ class AddNoiseFromSilence(object):
return data.squeeze(), sample[1], sample[2], sample[3], sample[4], sample[5]
class AddReverb(object):
"""Simulate indoor reverberation
Parameters
----------
depth : (float, float), optional
Minimum and maximum values for room depth (in meters).
Defaults to (2.0, 10.0).
width : (float, float), optional
Minimum and maximum values for room width (in meters).
Defaults to (1.0, 10.0).
height : (float, float), optional
Minimum and maximum values for room heigth (in meters).
Defaults to (2.0, 5.0).
absorption : (float, float), optional
Minimum and maximum values of walls absorption coefficient.
Defaults to (0.2, 0.9).
noise : str or list of str, optional
`pyannote.database` collection(s) used for adding noise.
Defaults to "MUSAN.Collection.BackgroundNoise"
snr : (float, float), optional
Minimum and maximum values of signal-to-noise ratio.
Defaults to (5.0, 15.0)
"""
def __init__(
self,
depth=(2.0, 10.0),
width=(1.0, 10.0),
height=(2.0, 5.0),
absorption=(0.2, 0.9),
noise=None,
snr=(5.0, 15.0)
):
super().__init__()
self.depth = depth
self.width = width
self.height = height
self.absorption = absorption
self.max_order_ = 17
self.noise = noise
self.snr = snr
self.noise_ = noise
self.n_rooms_ = 128
self.new_rooms_prob_ = 0.001
self.main_lock_ = threading.Lock()
self.rooms_ = collections.deque(maxlen=self.n_rooms_)
self.room_lock_ = [threading.Lock() for _ in range(self.n_rooms_)]
@staticmethod
def random(m, M):
"""
:param m:
:param M:
:return:
"""
return (M - m) * numpy.random.random_sample() + m
def new_room(self, sample_rate: int):
"""
:param sample_rate:
:return:
"""
# generate a room at random
depth = self.random(*self.depth)
width = self.random(*self.width)
height = self.random(*self.height)
absorption = self.random(*self.absorption)
room = pyroomacoustics.ShoeBox(
[depth, width, height],
fs=sample_rate,
absorption=absorption,
max_order=self.max_order_,
)
# play the original audio chunk at a random location
original = [
self.random(0, depth),
self.random(0, width),
self.random(0, height),
]
room.add_source(original)
# play the noise audio chunk at a random location
noise = [self.random(0, depth), self.random(0, width), self.random(0, height)]
room.add_source(noise)
# place the microphone at a random location
microphone = [
self.random(0, depth),
self.random(0, width),
self.random(0, height),
]
room.add_microphone_array(
pyroomacoustics.MicrophoneArray(numpy.c_[microphone, microphone], sample_rate)
)
room.compute_rir()
return room
def __call__(self, sample):
data = sample[0]
if sample[5]:
with self.main_lock_:
# initialize rooms (with 2 sources and 1 microphone)
while len(self.rooms_) < self.n_rooms_:
room = self.new_room(self.sample_rate)
self.rooms_.append(room)
# create new room with probability new_rooms_prob_
if numpy.random.rand() > 1.0 - self.new_rooms_prob_:
room = self.new_room(self.sample_rate)
self.rooms_.append(room)
# choose one room at random
index = numpy.random.choice(self.n_rooms_)
# lock chosen room to ensure room.sources are not updated concurrently
with self.room_lock_[index]:
room = self.rooms_[index]
# play normalized original audio chunk at source #1
n_samples = len(data)
data = normalize(original).squeeze()
room.sources[0].add_signal(data)
# generate noise with random SNR
noise = self.noise_(n_samples, self.sample_rate).squeeze()
snr = self.random(*self.snr)
alpha = numpy.exp(-numpy.log(10) * snr / 20)
noise *= alpha
# play noise at source #2
room.sources[1].add_signal(noise)
# simulate room and return microphone signal
room.simulate()
data = room.mic_array.signals[0, :n_samples, numpy.newaxis]
return data, sample[1], sample[2], sample[3] , sample[4], sample[5]
if has_pyroom:
class AddReverb(object):
"""Simulate indoor reverberation
Parameters
----------
depth : (float, float), optional
Minimum and maximum values for room depth (in meters).
Defaults to (2.0, 10.0).
width : (float, float), optional
Minimum and maximum values for room width (in meters).
Defaults to (1.0, 10.0).
height : (float, float), optional
Minimum and maximum values for room heigth (in meters).
Defaults to (2.0, 5.0).
absorption : (float, float), optional
Minimum and maximum values of walls absorption coefficient.
Defaults to (0.2, 0.9).
noise : str or list of str, optional
`pyannote.database` collection(s) used for adding noise.
Defaults to "MUSAN.Collection.BackgroundNoise"
snr : (float, float), optional
Minimum and maximum values of signal-to-noise ratio.
Defaults to (5.0, 15.0)
"""
def __init__(
self,
depth=(2.0, 10.0),
width=(1.0, 10.0),
height=(2.0, 5.0),
absorption=(0.2, 0.9),
noise=None,
snr=(5.0, 15.0)
):
super().__init__()
self.depth = depth
self.width = width
self.height = height
self.absorption = absorption
self.max_order_ = 17
self.noise = noise
self.snr = snr
self.noise_ = noise
self.n_rooms_ = 128
self.new_rooms_prob_ = 0.001
self.main_lock_ = threading.Lock()
self.rooms_ = collections.deque(maxlen=self.n_rooms_)
self.room_lock_ = [threading.Lock() for _ in range(self.n_rooms_)]
@staticmethod
def random(m, M):
"""
:param m:
:param M:
:return:
"""
return (M - m) * numpy.random.random_sample() + m
def new_room(self, sample_rate: int):
"""
:param sample_rate:
:return:
"""
# generate a room at random
depth = self.random(*self.depth)
width = self.random(*self.width)
height = self.random(*self.height)
absorption = self.random(*self.absorption)
room = pyroomacoustics.ShoeBox(
[depth, width, height],
fs=sample_rate,
absorption=absorption,
max_order=self.max_order_,
)
# play the original audio chunk at a random location
original = [
self.random(0, depth),
self.random(0, width),
self.random(0, height),
]
room.add_source(original)
# play the noise audio chunk at a random location
noise = [self.random(0, depth), self.random(0, width), self.random(0, height)]
room.add_source(noise)
# place the microphone at a random location
microphone = [
self.random(0, depth),
self.random(0, width),
self.random(0, height),
]
room.add_microphone_array(
pyroomacoustics.MicrophoneArray(numpy.c_[microphone, microphone], sample_rate)
)
room.compute_rir()
return room
def __call__(self, sample):
data = sample[0]
if sample[5]:
with self.main_lock_:
# initialize rooms (with 2 sources and 1 microphone)
while len(self.rooms_) < self.n_rooms_:
room = self.new_room(self.sample_rate)
self.rooms_.append(room)
# create new room with probability new_rooms_prob_
if numpy.random.rand() > 1.0 - self.new_rooms_prob_:
room = self.new_room(self.sample_rate)
self.rooms_.append(room)
# choose one room at random
index = numpy.random.choice(self.n_rooms_)
# lock chosen room to ensure room.sources are not updated concurrently
with self.room_lock_[index]:
room = self.rooms_[index]
# play normalized original audio chunk at source #1
n_samples = len(data)
data = normalize(original).squeeze()
room.sources[0].add_signal(data)
# generate noise with random SNR
noise = self.noise_(n_samples, self.sample_rate).squeeze()
snr = self.random(*self.snr)
alpha = numpy.exp(-numpy.log(10) * snr / 20)
noise *= alpha
# play noise at source #2
room.sources[1].add_signal(noise)
# simulate room and return microphone signal
room.simulate()
data = room.mic_array.signals[0, :n_samples, numpy.newaxis]
return data, sample[1], sample[2], sample[3] , sample[4], sample[5]
......@@ -39,7 +39,7 @@ import soundfile
import yaml
from .augmentation import AddNoise
from .augmentation import AddReverb
#from .augmentation import AddReverb
from ..bosaris.idmap import IdMap
from ..frontend.vad import pre_emphasis
from ..frontend.features import trfbank
......@@ -469,15 +469,21 @@ class SideSet(Dataset):
noise_root_path=self.transformation["noise_root_db"]))
if 'add_reverb' in t:
self.add_reverb[:int(self.len * self.transformation["reverb_file_ratio"])] = 1
numpy.random.shuffle(self.add_reverb)
_transform.append(AddReverb(depth=self.transformation["reverb_depth"],
width=self.transformation["reverb_width"],
height=self.transformation["reverb_height"],
absorption=self.transformation["reverb_absorption"],
noise=None,
snr=self.transformation["reverb_snr"]))
has_pyroom = True
try:
import pyroomacoustics
except ImportError:
has_pyroom = False
if has_pyroom:
self.add_reverb[:int(self.len * self.transformation["reverb_file_ratio"])] = 1
numpy.random.shuffle(self.add_reverb)
_transform.append(AddReverb(depth=self.transformation["reverb_depth"],
width=self.transformation["reverb_width"],
height=self.transformation["reverb_height"],
absorption=self.transformation["reverb_absorption"],
noise=None,
snr=self.transformation["reverb_snr"]))
if 'MFCC' in t:
_transform.append(MFCC())
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment