diff --git a/configs/trainer/deepspeed.yaml b/configs/trainer/deepspeed.yaml new file mode 100644 index 0000000..97c06e9 --- /dev/null +++ b/configs/trainer/deepspeed.yaml @@ -0,0 +1,25 @@ +defaults: + - default + +accelerator: gpu +devices: 1 + +precision: 'bf16-mixed' # 'bf16-true' # 'transformer-engine', 'transformer-engine-float16', '16-true', '16-mixed', 'bf16-true', +# 'bf16-mixed', '32-true', + +# DeepSpeedStrategy +strategy: + _target_: lightning.pytorch.strategies.DeepSpeedStrategy + # ZeRO optimization + zero_optimization: True + stage: 2 + offload_optimizer: False + offload_optimizer_device: "cpu" # only relevant if offload_optimizer=True + contiguous_gradients: True + + # Activation Checkpointing + partition_activations: False + cpu_checkpointing: False + +# Gradient accumulation +accumulate_grad_batches: 1 \ No newline at end of file