loss.py 14.3 KB
Newer Older
Anthony Larcher's avatar
Anthony Larcher committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# -*- coding: utf-8 -*-
#
# This file is part of SIDEKIT.
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is a python package for speaker verification.
# Home page: http://www-lium.univ-lemans.fr/sidekit/
#
# SIDEKIT is free software: you can redistribute it and/or modify
# it under the terms of the GNU LLesser General Public License as
# published by the Free Software Foundation, either version 3 of the License,
# or (at your option) any later version.
#
# SIDEKIT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with SIDEKIT.  If not, see <http://www.gnu.org/licenses/>.

"""
Anthony Larcher's avatar
v1.3.7    
Anthony Larcher committed
25
Copyright 2014-2021 Anthony Larcher
Anthony Larcher's avatar
Anthony Larcher committed
26
27
28
29
30

"""

import h5py
import logging
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
31
import math
Anthony Larcher's avatar
Anthony Larcher committed
32
33
34
35
36
37
import sys
import numpy
import torch
import torch.optim as optim
import torch.multiprocessing as mp
from collections import OrderedDict
Anthony Larcher's avatar
Anthony Larcher committed
38
39
from ..bosaris import IdMap
from ..statserver import StatServer
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
40
from torch.nn import Parameter
Anthony Larcher's avatar
Anthony Larcher committed
41

Anthony Larcher's avatar
Anthony Larcher committed
42
43
44

#from .classification import Classification

Anthony Larcher's avatar
Anthony Larcher committed
45
46
__license__ = "LGPL"
__author__ = "Anthony Larcher"
Anthony Larcher's avatar
Anthony Larcher committed
47
__copyright__ = "Copyright 2015-2020 Anthony Larcher"
Anthony Larcher's avatar
Anthony Larcher committed
48
49
50
51
52
53
__maintainer__ = "Anthony Larcher"
__email__ = "anthony.larcher@univ-lemans.fr"
__status__ = "Production"
__docformat__ = 'reS'


Anthony Larcher's avatar
arcface    
Anthony Larcher committed
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class ArcMarginModel(torch.nn.Module):
    def __init__(self, args):
        super(ArcMarginModel, self).__init__()

        self.weight = Parameter(torch.FloatTensor(num_classes, args.emb_size))
        nn.init.xavier_uniform_(self.weight)

        self.easy_margin = args.easy_margin
        self.m = args.margin_m
        self.s = args.margin_s

        self.cos_m = math.cos(self.m)
        self.sin_m = math.sin(self.m)
        self.th = math.cos(math.pi - self.m)
        self.mm = math.sin(math.pi - self.m) * self.m

    def forward(self, input, label):
        x = F.normalize(input)
        W = F.normalize(self.weight)
        cosine = F.linear(x, W)
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m  # cos(theta + m)
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = torch.zeros(cosine.size(), device=device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output


Anthony Larcher's avatar
Anthony Larcher committed
87
88
def l2_norm(input, axis=1):
    norm = torch.norm(input, 2, axis, True)
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
89
90
91
92
    output = torch.div(input, norm)
    return output


Anthony Larcher's avatar
arcface    
Anthony Larcher committed
93
class ArcFace(torch.nn.Module):
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
94
95
    # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599
    def __init__(self, embedding_size, classnum, s=64., m=0.5):
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
96
        super(ArcFace, self).__init__()
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
97
98
99
100
101
102
103
104
105
106
107
        self.classnum = classnum
        self.kernel = Parameter(torch.Tensor(embedding_size, classnum))
        # initial kernel
        self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
        self.m = m  # the margin value, default is 0.5
        self.s = s  # scalar value default is 64, see normface https://arxiv.org/abs/1704.06369
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.mm = self.sin_m * m  # issue 1
        self.threshold = math.cos(math.pi - m)

Anthony Larcher's avatar
arcface    
Anthony Larcher committed
108
    def forward(self, embbedings, target):
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
        # weights norm
        nB = len(embbedings)
        kernel_norm = l2_norm(self.kernel, axis=0)
        # cos(theta+m)
        cos_theta = torch.mm(embbedings, kernel_norm)
        #         output = torch.mm(embbedings,kernel_norm)
        cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
        cos_theta_2 = torch.pow(cos_theta, 2)
        sin_theta_2 = 1 - cos_theta_2
        sin_theta = torch.sqrt(sin_theta_2)
        cos_theta_m = (cos_theta * self.cos_m - sin_theta * self.sin_m)
        # this condition controls the theta+m should in range [0, pi]
        #      0<=theta+m<=pi
        #     -m<=theta<=pi-m
        cond_v = cos_theta - self.threshold
        cond_mask = cond_v <= 0
Anthony Larcher's avatar
Anthony Larcher committed
125
126
        # when theta not in [0,pi], use cosface instead
        keep_val = (cos_theta - self.mm)
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
127
        cos_theta_m[cond_mask] = keep_val[cond_mask]
Anthony Larcher's avatar
Anthony Larcher committed
128
129
        # a little bit hacky way to prevent in_place operation on cos_theta
        output = cos_theta * 1.0
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
130
        idx_ = torch.arange(0, nB, dtype=torch.long)
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
131
        output[idx_, target] = cos_theta_m[idx_, target]
Anthony Larcher's avatar
arcface    
Anthony Larcher committed
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
        output *= self.s  # scale up in order to make softmax work, first introduced in normface
        return output


##################################  Cosface head #############################################################

class Am_softmax(torch.nn.Module):
    # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599
    def __init__(self, embedding_size=512, classnum=51332):
        super(Am_softmax, self).__init__()
        self.classnum = classnum
        self.kernel = Parameter(torch.Tensor(embedding_size, classnum))
        # initial kernel
        self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
        self.m = 0.35  # additive margin recommended by the paper
        self.s = 30.  # see normface https://arxiv.org/abs/1704.06369

    def forward(self, embbedings, label):
        kernel_norm = l2_norm(self.kernel, axis=0)
        cos_theta = torch.mm(embbedings, kernel_norm)
        cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
        phi = cos_theta - self.m
        label = label.view(-1, 1)  # size=(B,1)
        index = cos_theta.data * 0.0  # size=(B,Classnum)
        index.scatter_(1, label.data.view(-1, 1), 1)
        index = index.byte()
        output = cos_theta * 1.0
        output[index] = phi[index]  # only change the correct predicted output
        output *= self.s  # scale up in order to make softmax work, first introduced in normface
        return output

Anthony Larcher's avatar
Anthony Larcher committed
163

Anthony Larcher's avatar
Anthony Larcher committed
164
class ArcLinear(torch.nn.Module):
Anthony Larcher's avatar
Anthony Larcher committed
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
    """Additive Angular Margin linear module (ArcFace)

    Parameters
    ----------
    nfeat : int
        Embedding dimension
    nclass : int
        Number of classes
    margin : float
        Angular margin to penalize distances between embeddings and centers
    s : float
        Scaling factor for the logits
    """

    def __init__(self, nfeat, nclass, margin, s):
        super(ArcLinear, self).__init__()
        eps = 1e-4
        self.min_cos = eps - 1
        self.max_cos = 1 - eps
        self.nclass = nclass
        self.margin = margin
        self.s = s
Anthony Larcher's avatar
Anthony Larcher committed
187
188
        self.W = torch.nn.Parameter(torch.Tensor(nclass, nfeat))
        torch.nn.init.xavier_uniform_(self.W)
Anthony Larcher's avatar
Anthony Larcher committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204

    def forward(self, x, target=None):
        """Apply the angular margin transformation

        Parameters
        ----------
        x : `torch.Tensor`
            an embedding batch
        target : `torch.Tensor`
            a non one-hot label batch

        Returns
        -------
        fX : `torch.Tensor`
            logits after the angular margin transformation
        """
Anthony Larcher's avatar
Anthony Larcher committed
205
206
207
208
        # the feature vectors has been normalized before calling this layer
        #xnorm = torch.nn.functional.normalize(x)
        xnorm = x
        # normalize W
Anthony Larcher's avatar
Anthony Larcher committed
209
        Wnorm = torch.nn.functional.normalize(self.W)
Anthony Larcher's avatar
Anthony Larcher committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
        target = target.long().view(-1, 1)
        # calculate cosθj (the logits)
        cos_theta_j = torch.matmul(xnorm, torch.transpose(Wnorm, 0, 1))
        # get the cosθ corresponding to the classes
        cos_theta_yi = cos_theta_j.gather(1, target)
        # for numerical stability
        cos_theta_yi = cos_theta_yi.clamp(min=self.min_cos, max=self.max_cos)
        # get the angle separating xi and Wyi
        theta_yi = torch.acos(cos_theta_yi)
        # apply the margin to the angle
        cos_theta_yi_margin = torch.cos(theta_yi + self.margin)
        # one hot encode  y
        one_hot = torch.zeros_like(cos_theta_j)
        one_hot.scatter_(1, target, 1.0)
        # project margin differences into cosθj
        return self.s * (cos_theta_j + one_hot * (cos_theta_yi_margin - cos_theta_yi))

227

Anthony Larcher's avatar
Anthony Larcher committed
228
class ArcMarginProduct(torch.nn.Module):
Anthony Larcher's avatar
Anthony Larcher committed
229
230
231
232
233
234
235
236
237
238
239
240
241
    r"""Implement of large margin arc distance: :
        Args:
            in_features: size of each input sample
            out_features: size of each output sample
            s: norm of input feature
            m: margin
            cos(theta + m)
        """

    def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
        super(ArcMarginProduct, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
Anthony Larcher's avatar
debug    
Anthony Larcher committed
242
243
        self.s = s
        self.m = m
Anthony Larcher's avatar
Anthony Larcher committed
244
245
246
        self.weight = Parameter(torch.FloatTensor(out_features, in_features))
        torch.nn.init.xavier_uniform_(self.weight)

Anthony Larcher's avatar
debug    
Anthony Larcher committed
247
248
249
250
251
        self.easy_margin = easy_margin
        self.cos_m = math.cos(self.m)
        self.sin_m = math.sin(self.m)
        self.th = math.cos(math.pi - self.m)
        self.mm = math.sin(math.pi - self.m) * self.m
Anthony Larcher's avatar
Anthony Larcher committed
252

Anthony Larcher's avatar
aam    
Anthony Larcher committed
253
    def change_params(self, s=None, m=None):
Anthony Larcher's avatar
doc    
Anthony Larcher committed
254
255
256
257
258
        """

        :param s:
        :param m:
        """
Anthony Larcher's avatar
Anthony Larcher committed
259
260
261
262
263
264
265
266
267
268
269
        if s is None:
            s = self.s
        if m is None:
            m = self.m
        self.s = s
        self.m = m
        self.cos_m = math.cos(self.m)
        self.sin_m = math.sin(self.m)
        self.th = math.cos(math.pi - self.m)
        self.mm = math.sin(math.pi - self.m) * self.m

Anthony Larcher's avatar
debug    
Anthony Larcher committed
270
    def forward(self, input, target=None):
Anthony Larcher's avatar
doc    
Anthony Larcher committed
271
272
273
274
275
276
        """

        :param input:
        :param target:
        :return:
        """
Anthony Larcher's avatar
debug    
Anthony Larcher committed
277
        # cos(theta)
Anthony Larcher's avatar
doc    
Anthony Larcher committed
278
279
        cosine = torch.nn.functional.linear(torch.nn.functional.normalize(input),
                                        torch.nn.functional.normalize(self.weight))
Anthony Larcher's avatar
Anthony Larcher committed
280
281
        if target == None:
            return cosine * self.s
Anthony Larcher's avatar
debug    
Anthony Larcher committed
282
283
284
        # cos(theta + m)
        sine = torch.sqrt((1.0 - torch.mul(cosine, cosine)).clamp(0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m
Anthony Larcher's avatar
Anthony Larcher committed
285

Anthony Larcher's avatar
debug    
Anthony Larcher committed
286
287
288
289
        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
        else:
            phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)
Anthony Larcher's avatar
Anthony Larcher committed
290

Anthony Larcher's avatar
debug    
Anthony Larcher committed
291
292
293
294
295
        #one_hot = torch.zeros(cosine.size(), device='cuda' if torch.cuda.is_available() else 'cpu')
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, target.view(-1, 1), 1)
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output = output * self.s
Anthony Larcher's avatar
Anthony Larcher committed
296

Anthony Larcher's avatar
Anthony Larcher committed
297
        return output, cosine * self.s
Anthony Larcher's avatar
merge    
Anthony Larcher committed
298
299
300
301


class SoftmaxAngularProto(torch.nn.Module):
    # from https://github.com/clovaai/voxceleb_trainer/blob/3bfd557fab5a3e6cd59d717f5029b3a20d22a281/loss/angleproto.py
Anthony Larcher's avatar
Anthony Larcher committed
302
    def __init__(self, spk_count, emb_dim=256, init_w=10.0, init_b=-5.0, **kwargs):
Anthony Larcher's avatar
merge    
Anthony Larcher committed
303
304
305
306
307
308
309
310
311
        super(SoftmaxAngularProto, self).__init__()

        self.test_normalize = True

        self.w = torch.nn.Parameter(torch.tensor(init_w))
        self.b = torch.nn.Parameter(torch.tensor(init_b))
        self.criterion  = torch.nn.CrossEntropyLoss()

        self.cce_backend = torch.nn.Sequential(OrderedDict([
Anthony Larcher's avatar
Anthony Larcher committed
312
                    ("linear8", torch.nn.Linear(emb_dim, spk_count))
Anthony Larcher's avatar
merge    
Anthony Larcher committed
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
                ]))

    def forward(self, x, target=None):
        assert x.size()[1] >= 2

        cce_prediction = self.cce_backend(x)

        if target==None:
            return cce_prediction

        x = x.reshape(-1,2,x.size()[-1]).squeeze(1)

        out_anchor      = torch.mean(x[:,1:,:],1)
        out_positive    = x[:,0,:]

        cos_sim_matrix  = torch.nn.functional.cosine_similarity(out_positive.unsqueeze(-1),out_anchor.unsqueeze(-1).transpose(0,2))
        torch.clamp(self.w, 1e-6)
        cos_sim_matrix = cos_sim_matrix * self.w + self.b
Anthony Larcher's avatar
Anthony Larcher committed
331
332
        loss = self.criterion(cos_sim_matrix, torch.arange(0, cos_sim_matrix.shape[0], device=x.device)) + self.criterion(cce_prediction, target)
        return loss, cce_prediction
Anthony Larcher's avatar
merge    
Anthony Larcher committed
333

Anthony Larcher's avatar
Anthony Larcher committed
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389

class AngularProximityMagnet(torch.nn.Module):
    # from https://github.com/clovaai/voxceleb_trainer/blob/3bfd557fab5a3e6cd59d717f5029b3a20d22a281/loss/angleproto.py
    def __init__(self, spk_count, emb_dim=256, batch_size=512, init_w=10.0, init_b=-5.0, **kwargs):
        super(AngularProximityMagnet, self).__init__()

        self.test_normalize = True

        self.w = torch.nn.Parameter(torch.tensor(init_w))
        self.b1 = torch.nn.Parameter(torch.tensor(init_b))
        self.b2 = torch.nn.Parameter(torch.tensor(+5.54))

        #last_linear = torch.nn.Linear(512, 1)
        #last_linear.bias.data += 1

        #self.magnitude = torch.nn.Sequential(OrderedDict([
        #            ("linear9", torch.nn.Linear(emb_dim, 512)),
        #            ("relu9", torch.nn.ReLU()),
        #            ("linear10", torch.nn.Linear(512, 512)),
        #            ("relu10", torch.nn.ReLU()),
        #            ("linear11", last_linear),
        #            ("relu11", torch.nn.ReLU())
        #        ]))

        self.cce_backend = torch.nn.Sequential(OrderedDict([
                    ("linear8", torch.nn.Linear(emb_dim, spk_count))
                ]))

        self.criterion  = torch.nn.CrossEntropyLoss()
        self.magnet_criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')

    def forward(self, x, target=None):
        assert x.size()[1] >= 2

        cce_prediction = self.cce_backend(x)
        #x = self.magnitude(x) * torch.nn.functional.normalize(x)

        if target==None:
            return x, cce_prediction

        x = x.reshape(-1,2,x.size()[-1]).squeeze(1)
        out_anchor      = torch.mean(x[:,1:,:],1)
        out_positive    = x[:,0,:]

        ap_sim_matrix  = torch.nn.functional.cosine_similarity(out_positive.unsqueeze(-1),out_anchor.unsqueeze(-1).transpose(0,2))
        torch.clamp(self.w, 1e-6)
        ap_sim_matrix = ap_sim_matrix * self.w + self.b1

        labels = torch.arange(0, int(out_positive.shape[0]), device=torch.device("cuda:0")).unsqueeze(1)
        cos_sim_matrix  = torch.mm(out_positive, out_anchor.T)
        cos_sim_matrix = cos_sim_matrix + self.b2
        cos_sim_matrix = cos_sim_matrix + numpy.log(1/out_positive.shape[0] / (1 - 1/out_positive.shape[0]))
        mask = (torch.tile(labels, (1, labels.shape[0])) == labels.T).float()
        batch_loss = self.criterion(ap_sim_matrix, torch.arange(0, int(out_positive.shape[0]), device=torch.device("cuda:0"))) \
            + self.magnet_criterion(cos_sim_matrix.flatten().unsqueeze(1), mask.flatten().unsqueeze(1))
        return batch_loss, cce_prediction