Enable Hydra configs in fairseq (facebookresearch#1343) (facebookrese…

…arch#1510) Summary: Pull Request resolved: facebookresearch/pytext#1510 this is the main pr that switches on hydra functionality in fairseq we migrate "args" object into omegaconf "DictConfig" at all legacy entry points in addition this migrates various components from secondary registries (like bpe encoders and tokenizers) to make the migration smoother i am going through code that references migrated fairseq components and changing it to inherit from "Legacy*" components instead. hopefully tests will catch most of this Pull Request resolved: fairinternal/fairseq-py#1343 Reviewed By: myleott Differential Revision: D23973928 Pulled By: alexeib fbshipit-source-id: dd9554981fff51ea75c1ff343874d1d6e61793c9
telmop · Oct 20, 2020 · 3b27ed7 · 3b27ed7
1 parent c76cb6d
commit 3b27ed7
Show file tree

Hide file tree

Showing 85 changed files with 2,037 additions and 1,684 deletions.
diff --git a/config/config.yaml b/config/config.yaml
@@ -1,7 +1,111 @@
+# @package _group_
+common:
+    no_progress_bar: false
+    log_interval: 100
+    log_format: null
+    tensorboard_logdir: null
+    seed: 1
+    cpu: false
+    tpu: false
+    bf16: false
+    fp16: false
+    memory_efficient_fp16: false
+    memory_efficient_bf16: false
+    fp16_no_flatten_grads: false
+    fp16_init_scale: 128
+    fp16_scale_window: null
+    fp16_scale_tolerance: 0.0
+    min_loss_scale: 1.0e-4
+    threshold_loss_scale: null
+    user_dir: null
+    empty_cache_freq: 0
+    all_gather_list_size: 16384
+    model_parallel_size: 1
+    quantization_config_path: null
+    profile: false
+distributed_training:
+    distributed_rank: 0
+    distributed_backend: "nccl"
+    distributed_init_method: null
+    distributed_port: -1
+    device_id: 0
+    local_rank: 0
+    distributed_no_spawn: false
+    ddp_backend: "c10d"
+    bucket_cap_mb: 25
+    fix_batches_to_gpus: false
+    find_unused_parameters: false
+    fast_stat_sync: false
+    broadcast_buffers: false
+    distributed_wrapper: "DDP"
+    slowmo_momentum: null
+    slowmo_algorithm: "LocalSGD"
+    localsgd_frequency: 3
+dataset:
+    num_workers: 1
+    skip_invalid_size_inputs_valid_test: false
+    max_tokens: null
+    batch_size: null
+    required_batch_size_multiple: 8
+    dataset_impl: null
+    data_buffer_size: 10
+    train_subset: "train"
+    valid_subset: "valid"
+    validate_interval: 1
+    fixed_validation_seed: null
+    disable_validation: false
+    curriculum: 0
+    gen_subset: "test"
+    num_shards: 1
+    shard_id: 0
+    max_tokens_valid: ${dataset.max_tokens}
+    batch_size_valid: ${dataset.batch_size}
+optimization:
+    max_epoch: 0
+    max_update: 0
+    clip_norm: 25.0
+    sentence_avg: false
+    update_freq: [ 1 ]
+    lr: [ 0.25 ]
+    min_lr: -1.0
+    use_bmuf: false
+checkpoint:
+    save_dir: "checkpoints"
+    restore_file: "checkpoint_last.pt"
+    reset_dataloader: false
+    reset_lr_scheduler: false
+    reset_meters: false
+    reset_optimizer: false
+    optimizer_overrides: "{}"
+    save_interval: 1
+    save_interval_updates: 0
+    keep_interval_updates: -1
+    keep_last_epochs: -1
+    keep_best_checkpoints: -1
+    no_save: false
+    no_epoch_checkpoints: false
+    no_last_checkpoints: false
+    no_save_optimizer_state: false
+    best_checkpoint_metric: "loss"
+    maximize_best_checkpoint_metric: false
+    patience: -1
+    checkpoint_suffix: ""
+bmuf:
+    block_lr: 1
+    block_momentum: 0.875
+    global_sync_iter: 50
+    warmup_iterations: 500
+    use_nbm: false
+    average_sync: false
 defaults:
-  - params: training_params
-  - task: language_modeling
-  - model: transformer_lm
-  - criterion: cross_entropy
-  - optimizer: adam
-  - lr_scheduler: inverse_sqrt
+    - task: language_modeling
+    - model: null
+    - criterion: null
+    - optimizer: null
+    - lr_scheduler: null
+    - bpe: null
+    - tokenizer: null
+    - scoring: null
+    - generation: null
+    - common_eval: null
+    - eval_lm: null
diff --git a/config/config_eval_lm.yaml b/config/config_eval_lm.yaml
diff --git a/config/criterion/adaptive_loss.yaml b/config/criterion/adaptive_loss.yaml
@@ -1,3 +1,3 @@
 # @package _group_
-sentence_avg: ${params.optimization.sentence_avg}
-ddp_backend: ${params.distributed_training.ddp_backend}
+sentence_avg: ${optimization.sentence_avg}
+ddp_backend: ${distributed_training.ddp_backend}
diff --git a/config/criterion/cross_entropy.yaml b/config/criterion/cross_entropy.yaml
@@ -1,3 +1,2 @@
 # @package _group_
-sentence_avg: ${params.optimization.sentence_avg}
-ddp_backend: ${params.distributed_training.ddp_backend}
+sentence_avg: ${optimization.sentence_avg}
diff --git a/config/params/eval_lm_params.yaml b/config/params/eval_lm_params.yaml
diff --git a/config/params/training_params.yaml b/config/params/training_params.yaml
diff --git a/docs/hydra_integration.md b/docs/hydra_integration.md
@@ -13,15 +13,14 @@ For example, if we'd like to train a language model with transformer, we could p
 
 ```
 defaults:
-  - params: training_params
   - task: language_modeling
   - model: transformer_lm
   - criterion: cross_entropy
   - optimizer: adam
   - lr_scheduler: inverse_sqrt
 ```
 
-- Provide generic parameters common across different training jobs: `config/params/training_params.yaml`
+- Provide generic parameters common across different jobs: `config.yaml`
 - Provide task parameters: `config/task/language_modeling.yaml`
 - Provide model parameters: `config/model/transformer_lm.yaml`
 - Provide criterion parameters: `config/criterion/cross_entropy.yaml`
@@ -41,7 +40,6 @@ Alternatively, if we need to override certain params from the command line, we c
 
 ```
 python fairseq_cli/train_hydra.py
-params=training_params \
 task=language_modeling \
 task.data=/private/home/abaevski/data/wiki103 \
 task.tokens_per_sample=512 \
@@ -56,17 +54,17 @@ lr_scheduler=inverse_sqrt \
 lr_scheduler.warmup_updates=4000 \
 lr_scheduler.warmup_init_lr=1e-07 \
 criterion=cross_entropy \
-params.common.fp16=true \
-params.common.log_format=json \
-params.common.log_interval=1 \
-params.dataset.max_tokens=1024 \
-params.dataset.num_workers=4 \
-params.optimization.update_freq=[16] \
-params.optimization.max_update=50000 \
-params.optimization.clip_norm=0.0 \
-params.optimization.lr=[0.0005] \
-params.checkpoint.save_dir=/checkpoint/mtian/transformer_wikitext-103-hydra-args-cli \
-params.checkpoint.save_interval_updates=10
+common.fp16=true \
+common.log_format=json \
+common.log_interval=1 \
+dataset.max_tokens=1024 \
+dataset.num_workers=4 \
+optimization.update_freq=[16] \
+optimization.max_update=50000 \
+optimization.clip_norm=0.0 \
+optimization.lr=[0.0005] \
+checkpoint.save_dir=/checkpoint/mtian/transformer_wikitext-103-hydra-args-cli \
+checkpoint.save_interval_updates=10
 ```
 
 ## Migrate existing/Creating new modules to hydra interface