forked from facebookresearch/fairseq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_label_smoothing.py
123 lines (108 loc) · 4.52 KB
/
test_label_smoothing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import copy
import unittest
import tests.utils as test_utils
import torch
from fairseq.criterions.cross_entropy import CrossEntropyCriterion
from fairseq.criterions.label_smoothed_cross_entropy import (
LabelSmoothedCrossEntropyCriterion,
)
class TestLabelSmoothing(unittest.TestCase):
def setUp(self):
# build dictionary
self.d = test_utils.dummy_dictionary(3)
vocab = len(self.d)
self.assertEqual(vocab, 4 + 3) # 4 special + 3 tokens
self.assertEqual(self.d.pad(), 1)
self.assertEqual(self.d.eos(), 2)
self.assertEqual(self.d.unk(), 3)
pad, eos, unk, w1, w2, w3 = 1, 2, 3, 4, 5, 6 # noqa: F841
# build dataset
self.data = [
# the first batch item has padding
{
"source": torch.LongTensor([w1, eos]),
"target": torch.LongTensor([w1, eos]),
},
{
"source": torch.LongTensor([w1, eos]),
"target": torch.LongTensor([w1, w1, eos]),
},
]
self.sample = next(test_utils.dummy_dataloader(self.data))
# build model
self.args = argparse.Namespace()
self.args.sentence_avg = False
self.args.report_accuracy = False
self.args.probs = (
torch.FloatTensor(
[
# pad eos unk w1 w2 w3
[0.05, 0.05, 0.1, 0.05, 0.3, 0.4, 0.05],
[0.05, 0.10, 0.2, 0.05, 0.2, 0.3, 0.10],
[0.05, 0.15, 0.3, 0.05, 0.1, 0.2, 0.15],
]
)
.unsqueeze(0)
.expand(2, 3, 7)
) # add batch dimension
self.task = test_utils.TestTranslationTask.setup_task(self.args, self.d, self.d)
self.model = self.task.build_model(self.args)
def test_nll_loss(self):
self.args.label_smoothing = 0.1
nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task)
smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(
self.args, self.task
)
nll_loss, nll_sample_size, nll_logging_output = nll_crit(
self.model, self.sample
)
smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(
self.model, self.sample
)
self.assertLess(abs(nll_loss - nll_logging_output["loss"]), 1e-6)
self.assertLess(abs(nll_loss - smooth_logging_output["nll_loss"]), 1e-6)
def test_padding(self):
self.args.label_smoothing = 0.1
crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task)
loss, _, logging_output = crit(self.model, self.sample)
def get_one_no_padding(idx):
# create a new sample with just a single batch item so that there's
# no padding
sample1 = next(test_utils.dummy_dataloader([self.data[idx]]))
args1 = copy.copy(self.args)
args1.probs = args1.probs[idx, :, :].unsqueeze(0)
model1 = self.task.build_model(args1)
loss1, _, _ = crit(model1, sample1)
return loss1
loss1 = get_one_no_padding(0)
loss2 = get_one_no_padding(1)
self.assertAlmostEqual(loss, loss1 + loss2)
def test_reduction(self):
self.args.label_smoothing = 0.1
crit = LabelSmoothedCrossEntropyCriterion.build_criterion(self.args, self.task)
loss, _, logging_output = crit(self.model, self.sample, reduce=True)
unreduced_loss, _, _ = crit(self.model, self.sample, reduce=False)
self.assertAlmostEqual(loss, unreduced_loss.sum())
def test_zero_eps(self):
self.args.label_smoothing = 0.0
nll_crit = CrossEntropyCriterion.build_criterion(self.args, self.task)
smooth_crit = LabelSmoothedCrossEntropyCriterion.build_criterion(
self.args, self.task
)
nll_loss, nll_sample_size, nll_logging_output = nll_crit(
self.model, self.sample
)
smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(
self.model, self.sample
)
self.assertAlmostEqual(nll_loss, smooth_loss)
def assertAlmostEqual(self, t1, t2):
self.assertEqual(t1.size(), t2.size(), "size mismatch")
self.assertLess((t1 - t2).abs().max(), 1e-6)
if __name__ == "__main__":
unittest.main()