normfeat.py 8.14 KB
Newer Older
Anthony Larcher's avatar
Anthony Larcher committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# -*- coding: utf-8 -*-
#
# This file is part of SIDEKIT.
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#    
# SIDEKIT is free software: you can redistribute it and/or modify
# it under the terms of the GNU LLesser General Public License as 
# published by the Free Software Foundation, either version 3 of the License, 
# or (at your option) any later version.
#
# SIDEKIT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with SIDEKIT.  If not, see <http://www.gnu.org/licenses/>.

"""
Anthony Larcher's avatar
Anthony Larcher committed
25
Copyright 2014-2017 Anthony Larcher and Sylvain Meignier
Anthony Larcher's avatar
Anthony Larcher committed
26
27
28
29

:mod:`frontend` provides methods to process an audio signal in order to extract
useful parameters for speaker verification.
"""
Sylvain Meignier's avatar
Sylvain Meignier committed
30
import numpy
Anthony Larcher's avatar
Anthony Larcher committed
31
import pandas
32
33
34
35
import scipy.stats as stats
from scipy.signal import lfilter


Anthony Larcher's avatar
Anthony Larcher committed
36
__author__ = "Anthony Larcher and Sylvain Meignier"
Anthony Larcher's avatar
Anthony Larcher committed
37
__copyright__ = "Copyright 2014-2017 Anthony Larcher and Sylvain Meignier"
Anthony Larcher's avatar
Anthony Larcher committed
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
__license__ = "LGPL"
__maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reStructuredText'


def rasta_filt(x):
    """Apply RASTA filtering to the input signal.
    
    :param x: the input audio signal to filter.
        cols of x = critical bands, rows of x = frame
        same for y but after filtering
        default filter is single pole at 0.94
    """
    x = x.T
Anthony Larcher's avatar
Anthony Larcher committed
54
    numerator = numpy.arange(.2, -.3, -.1)
Anthony Larcher's avatar
Anthony Larcher committed
55
    denominator = numpy.array([1, -0.94])
Anthony Larcher's avatar
Anthony Larcher committed
56
57
58
59
60
61

    # Initialize the state.  This avoids a big spike at the beginning
    # resulting from the dc offset level in each band.
    # (this is effectively what rasta/rasta_filt.c does).
    # Because Matlab uses a DF2Trans implementation, we have to
    # specify the FIR part to get the state right (but not the IIR part)
Sylvain Meignier's avatar
Sylvain Meignier committed
62
63
    y = numpy.zeros(x.shape)
    zf = numpy.zeros((x.shape[0], 4))
Anthony Larcher's avatar
Anthony Larcher committed
64
    for i in range(y.shape[0]):
Anthony Larcher's avatar
Anthony Larcher committed
65
        y[i, :4], zf[i, :4] = lfilter(numerator, 1, x[i, :4], axis=-1, zi=[0, 0, 0, 0])
Anthony Larcher's avatar
Anthony Larcher committed
66
67
    
    # .. but don't keep any of these values, just output zero at the beginning
Sylvain Meignier's avatar
Sylvain Meignier committed
68
    y = numpy.zeros(x.shape)
Anthony Larcher's avatar
Anthony Larcher committed
69
70
71

    # Apply the full filter to the rest of the signal, append it
    for i in range(y.shape[0]):
Anthony Larcher's avatar
Anthony Larcher committed
72
        y[i, 4:] = lfilter(numerator, denominator, x[i, 4:], axis=-1, zi=zf[i, :])[0]
Anthony Larcher's avatar
Anthony Larcher committed
73
74
75
76
    
    return y.T


77
def cms(features, label=None, global_mean=None):
Anthony Larcher's avatar
Anthony Larcher committed
78
79
80
81
82
    """Performs cepstral mean subtraction
    
    :param features: a feature stream of dimension dim x nframes 
            where dim is the dimension of the acoustic features and nframes the 
            number of frames in the stream
Anthony Larcher's avatar
Anthony Larcher committed
83
    :param label: a logical vector
Anthony Larcher's avatar
Anthony Larcher committed
84
    :param global_mean: pre-computed mean to use for feature normalization if given
Anthony Larcher's avatar
Anthony Larcher committed
85
86
87
88

    :return: a feature stream
    """
    # If no label file as input: all speech are speech
Anthony Larcher's avatar
Anthony Larcher committed
89
    if label is None:
Sylvain Meignier's avatar
Sylvain Meignier committed
90
        label = numpy.ones(features.shape[0]).astype(bool)
Anthony Larcher's avatar
Anthony Larcher committed
91
92
    if label.sum() == 0:
        mu = numpy.zeros((features.shape[1]))
93
94
    if global_mean is not None:
        mu = global_mean
Anthony Larcher's avatar
Anthony Larcher committed
95
96
97
    else:
        mu = numpy.mean(features[label, :], axis=0)
    features -= mu
Anthony Larcher's avatar
Anthony Larcher committed
98
99


100
def cmvn(features, label=None, global_mean=None, global_std=None):
Anthony Larcher's avatar
Anthony Larcher committed
101
102
103
104
105
    """Performs mean and variance normalization
    
    :param features: a feature stream of dimension dim x nframes 
        where dim is the dimension of the acoustic features and nframes the 
        number of frames in the stream
Anthony Larcher's avatar
Anthony Larcher committed
106
107
    :param global_mean: pre-computed mean to use for feature normalization if given
    :param global_std: pre-computed standard deviation to use for feature normalization if given
Anthony Larcher's avatar
Anthony Larcher committed
108
109
110
111
112
    :param label: a logical verctor

    :return: a sequence of features
    """
    # If no label file as input: all speech are speech
113
    if label is None:
Sylvain Meignier's avatar
Sylvain Meignier committed
114
        label = numpy.ones(features.shape[0]).astype(bool)
Anthony Larcher's avatar
Anthony Larcher committed
115

116
117
118
119
120
121
122
    if global_mean is not None and global_std is not None:
        mu = global_mean
        stdev = global_std
        features -= mu
        features /= stdev

    elif not label.sum() == 0:
Sylvain Meignier's avatar
Sylvain Meignier committed
123
124
        mu = numpy.mean(features[label, :], axis=0)
        stdev = numpy.std(features[label, :], axis=0)
Anthony Larcher's avatar
Anthony Larcher committed
125
126
        features -= mu
        features /= stdev
Anthony Larcher's avatar
Anthony Larcher committed
127

Anthony Larcher's avatar
Anthony Larcher committed
128
129

def stg(features, label=None, win=301):
Anthony Larcher's avatar
Anthony Larcher committed
130
131
132
133
134
    """Performs feature warping on a sliding window
    
    :param features: a feature stream of dimension dim x nframes 
        where dim is the dimension of the acoustic features and nframes the
        number of frames in the stream
135
136
    :param label: label of selected frames to compute the Short Term Gaussianization, by default, al frames are used
    :param win: size of the frame window to consider, must be an odd number to get a symetric context on left and right
Anthony Larcher's avatar
Anthony Larcher committed
137
138
139
140
    :return: a sequence of features
    """

    # If no label file as input: all speech are speech
Anthony Larcher's avatar
Anthony Larcher committed
141
    if label is None:
Sylvain Meignier's avatar
Sylvain Meignier committed
142
        label = numpy.ones(features.shape[0]).astype(bool)
Anthony Larcher's avatar
Anthony Larcher committed
143
    speech_features = features[label, :]
Anthony Larcher's avatar
Anthony Larcher committed
144
145
146
147

    add_a_feature = False
    if win % 2 == 1:
        # one feature per line
Anthony Larcher's avatar
Anthony Larcher committed
148
        nframes, dim = numpy.shape(speech_features)
Anthony Larcher's avatar
Anthony Larcher committed
149
150
151
152

        # If the number of frames is not enough for one window
        if nframes < win:
            # if the number of frames is not odd, duplicate the last frame
153
            # if nframes % 2 == 1:
Anthony Larcher's avatar
Anthony Larcher committed
154
155
156
            if not nframes % 2 == 1:
                nframes += 1
                add_a_feature = True
Anthony Larcher's avatar
Anthony Larcher committed
157
                speech_features = numpy.concatenate((speech_features, [speech_features[-1, ]]))
Anthony Larcher's avatar
Anthony Larcher committed
158
159
160
            win = nframes

        # create the output feature stream
Anthony Larcher's avatar
Anthony Larcher committed
161
        stg_features = numpy.zeros(numpy.shape(speech_features))
Anthony Larcher's avatar
Anthony Larcher committed
162
163

        # Process first window
Anthony Larcher's avatar
Anthony Larcher committed
164
165
166
167
        r = numpy.argsort(speech_features[:win, ], axis=0)
        r = numpy.argsort(r, axis=0)
        arg = (r[: (win - 1) / 2] + 0.5) / win
        stg_features[: (win - 1) / 2, :] = stats.norm.ppf(arg, 0, 1)
Anthony Larcher's avatar
Anthony Larcher committed
168

Anthony Larcher's avatar
Anthony Larcher committed
169
        # process all following windows except the last one
Anthony Larcher's avatar
Anthony Larcher committed
170
        for m in range(int((win - 1) / 2), int(nframes - (win - 1) / 2)):
171
            idx = list(range(int(m - (win - 1) / 2), int(m + (win - 1) / 2 + 1)))
Anthony Larcher's avatar
Anthony Larcher committed
172
173
174
175
            foo = speech_features[idx, :]
            r = numpy.sum(foo < foo[(win - 1) / 2], axis=0) + 1
            arg = (r - 0.5) / win
            stg_features[m, :] = stats.norm.ppf(arg, 0, 1)
Anthony Larcher's avatar
Anthony Larcher committed
176
177

        # Process the last window
Anthony Larcher's avatar
Anthony Larcher committed
178
179
180
        r = numpy.argsort(speech_features[list(range(nframes - win, nframes)), ], axis=0)
        r = numpy.argsort(r, axis=0)
        arg = (r[(win + 1) / 2: win, :] + 0.5) / win
Anthony Larcher's avatar
Anthony Larcher committed
181
        
Anthony Larcher's avatar
Anthony Larcher committed
182
        stg_features[list(range(int(nframes - (win - 1) / 2), nframes)), ] = stats.norm.ppf(arg, 0, 1)
Anthony Larcher's avatar
Anthony Larcher committed
183
184
185
186
    else:
        # Raise an exception
        raise Exception('Sliding window should have an odd length')

187
    # wrapFeatures = np.copy(features)
Anthony Larcher's avatar
Anthony Larcher committed
188
    if add_a_feature:
Anthony Larcher's avatar
Anthony Larcher committed
189
190
        stg_features = stg_features[:-1]
    features[label, :] = stg_features
Anthony Larcher's avatar
Anthony Larcher committed
191

Sylvain Meignier's avatar
Sylvain Meignier committed
192

Sylvain Meignier's avatar
Sylvain Meignier committed
193
def cep_sliding_norm(features, win=301, label=None, center=True, reduce=False):
Sylvain Meignier's avatar
Sylvain Meignier committed
194
    """
Anthony Larcher's avatar
Anthony Larcher committed
195
    Performs a cepstal mean substitution and standard deviation normalization
Sylvain Meignier's avatar
Sylvain Meignier committed
196
197
    in a sliding windows. MFCC is modified.

Anthony Larcher's avatar
Anthony Larcher committed
198
199
200
201
202
    :param features: the MFCC, a numpy array
    :param win: the size of the sliding windows
    :param label: vad label if available
    :param center: performs mean subtraction
    :param reduce: performs standard deviation division
Sylvain Meignier's avatar
Sylvain Meignier committed
203
204

    """
Sylvain Meignier's avatar
Sylvain Meignier committed
205
206
    if label is None:
        label = numpy.ones(features.shape[0]).astype(bool)
Sylvain Meignier's avatar
Sylvain Meignier committed
207

Sylvain Meignier's avatar
Sylvain Meignier committed
208
209
210
211
212
213
    if numpy.sum(label) <= win:
        if reduce:
            cmvn(features, label)
        else:
            cms(features, label)
    else:
Anthony Larcher's avatar
Anthony Larcher committed
214
        d_win = win // 2
Sylvain Meignier's avatar
Sylvain Meignier committed
215

Anthony Larcher's avatar
Anthony Larcher committed
216
        df = pandas.DataFrame(features[label, :])
Sylvain Meignier's avatar
Sylvain Meignier committed
217
218
219
        r = df.rolling(window=win, center=True)
        mean = r.mean().values
        std = r.std().values
Sylvain Meignier's avatar
Sylvain Meignier committed
220

Anthony Larcher's avatar
Anthony Larcher committed
221
222
        mean[0:d_win, :] = mean[d_win, :]
        mean[-d_win:, :] = mean[-d_win-1, :]
Sylvain Meignier's avatar
Sylvain Meignier committed
223

Anthony Larcher's avatar
Anthony Larcher committed
224
225
        std[0:d_win, :] = std[d_win, :]
        std[-d_win:, :] = std[-d_win-1, :]
Sylvain Meignier's avatar
Sylvain Meignier committed
226
227

        if center:
Anthony Larcher's avatar
Anthony Larcher committed
228
            features[label, :] -= mean
Sylvain Meignier's avatar
Sylvain Meignier committed
229
            if reduce:
Anthony Larcher's avatar
Anthony Larcher committed
230
                features[label, :] /= std