Skip to content

Commit

Permalink
🔧 Add scale fix arg to configs
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnGiorgi committed May 20, 2021
1 parent cebf234 commit 7eabbc5
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 0 deletions.
6 changes: 6 additions & 0 deletions training_config/contrastive_only.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ local min_length = 32;
"type": "nt_xent",
"temperature": 0.05,
},
// There was a small bug in the original implementation that caused gradients derived from
// the contrastive loss to be scaled by 1/N, where N is the number of GPUs used during
// training. This has been fixed. To reproduce results from the paper, set this to false.
// Note that this will have no effect if you are not using distributed training with more
// than 1 GPU.
"scale_fix": false
},
"data_loader": {
"batch_size": 4,
Expand Down
6 changes: 6 additions & 0 deletions training_config/declutr.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ local min_length = 32;
"type": "nt_xent",
"temperature": 0.05,
},
// There was a small bug in the original implementation that caused gradients derived from
// the contrastive loss to be scaled by 1/N, where N is the number of GPUs used during
// training. This has been fixed. To reproduce results from the paper, set this to false.
// Note that this will have no effect if you are not using distributed training with more
// than 1 GPU.
"scale_fix": false
},
"data_loader": {
"batch_size": 4,
Expand Down
6 changes: 6 additions & 0 deletions training_config/declutr_base.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ local min_length = 32;
"type": "nt_xent",
"temperature": 0.05,
},
// There was a small bug in the original implementation that caused gradients derived from
// the contrastive loss to be scaled by 1/N, where N is the number of GPUs used during
// training. This has been fixed. To reproduce results from the paper, set this to false.
// Note that this will have no effect if you are not using distributed training with more
// than 1 GPU.
"scale_fix": false
},
"data_loader": {
"batch_size": 4,
Expand Down
6 changes: 6 additions & 0 deletions training_config/declutr_small.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ local min_length = 32;
"type": "nt_xent",
"temperature": 0.05,
},
// There was a small bug in the original implementation that caused gradients derived from
// the contrastive loss to be scaled by 1/N, where N is the number of GPUs used during
// training. This has been fixed. To reproduce results from the paper, set this to false.
// Note that this will have no effect if you are not using distributed training with more
// than 1 GPU.
"scale_fix": false
},
"data_loader": {
"batch_size": 4,
Expand Down
6 changes: 6 additions & 0 deletions training_config/mlm_only.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ local min_length = 32;
},
},
},
// There was a small bug in the original implementation that caused gradients derived from
// the contrastive loss to be scaled by 1/N, where N is the number of GPUs used during
// training. This has been fixed. To reproduce results from the paper, set this to false.
// Note that this will have no effect if you are not using distributed training with more
// than 1 GPU.
"scale_fix": false
},
"data_loader": {
"batch_size": 4,
Expand Down

0 comments on commit 7eabbc5

Please sign in to comment.