Merge branch 'main' of https://github.com/zRzRzRzRzRzRzR/ChatGLM3

T-bagwell · Feb 29, 2024 · 3b630fe · 3b630fe
2 parents a103d80 + c92d71c
commit 3b630fe
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 4 deletions.
diff --git a/finetune_demo/configs/lora.yaml b/finetune_demo/configs/lora.yaml
@@ -34,6 +34,8 @@ training_args:
     max_new_tokens: 256
   # set your absolute deepspeed path here
   #deepspeed: ds_zero_2.json
+  # set to true if train with cpu.
+  use_cpu: false
 peft_config:
   peft_type: LORA
   task_type: CAUSAL_LM

diff --git a/finetune_demo/configs/ptuning_v2.yaml b/finetune_demo/configs/ptuning_v2.yaml
@@ -34,7 +34,8 @@ training_args:
     max_new_tokens: 512
   # set your absolute deepspeed path here
   #deepspeed: ds_zero_3.json
+  use_cpu: false
 peft_config:
   peft_type: PREFIX_TUNING
   task_type: CAUSAL_LM
-  num_virtual_tokens: 128
+  num_virtual_tokens: 128
diff --git a/finetune_demo/finetune_hf.py b/finetune_demo/finetune_hf.py
@@ -362,9 +362,10 @@ def process_batch_eval(
 
 
 # TODO: Not sure if this is necessary, can set it to half
-def _prepare_model_for_training(model: nn.Module):
+def _prepare_model_for_training(model: nn.Module, use_cpu: bool):
     for param in model.parameters():
-        if param.requires_grad:
+        if param.requires_grad or use_cpu:
+	    # if train with cpu, cast all params to fp32 instead of trainable ones.
             param.data = param.data.to(torch.float32)
 
 
@@ -487,7 +488,7 @@ def main(
     # )
 
     # turn model to fp32
-    _prepare_model_for_training(model)
+    _prepare_model_for_training(model, ft_config.training_args.use_cpu)
 
     ft_config.training_args.generation_config.pad_token_id = (
         tokenizer.pad_token_id