forked from modelscope/ms-swift
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dda4c64
commit 45ada3e
Showing
20 changed files
with
1,058 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7 changes: 7 additions & 0 deletions
7
examples/pytorch/llm/scripts/qwen1half_7b_chat/galore/infer.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Experimental environment: A100 | ||
CUDA_VISIBLE_DEVICES=0 \ | ||
swift infer \ | ||
--ckpt_dir "output/qwen1half-7b-chat/vx-xxx/checkpoint-xxx" \ | ||
--load_dataset_config true \ | ||
--max_length 4096 \ | ||
--use_flash_attn true \ |
18 changes: 18 additions & 0 deletions
18
examples/pytorch/llm/scripts/qwen1half_7b_chat/galore/sft.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Experimental environment: A100 | ||
# 40GB GPU memory | ||
CUDA_VISIBLE_DEVICES=0 \ | ||
swift sft \ | ||
--model_type qwen1half-7b-chat \ | ||
--sft_type full \ | ||
--use_galore true \ | ||
--galore_update_proj_gap 400 \ | ||
--train_dataset_sample -1 \ | ||
--eval_steps 1000 \ | ||
--output_dir output \ | ||
--num_train_epochs 1 \ | ||
--max_length 4096 \ | ||
--learning_rate 1e-5 \ | ||
--use_flash_attn true \ | ||
--save_only_model true \ | ||
--dataset codefuse-evol-instruction-zh \ | ||
--preprocess_num_proc 4 \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Copyright (c) Alibaba, Inc. and its affiliates. | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
from swift.utils.import_utils import _LazyModule | ||
|
||
if TYPE_CHECKING: | ||
from .utils import create_optimizer_and_scheduler, GaLoreConfig | ||
from .adafactor import GaLoreAdafactor | ||
from .adamw8bit import GaLoreAdamW8bit | ||
from .adamw import GaLoreAdamW | ||
else: | ||
_import_structure = { | ||
'utils': ['GaLoreConfig', 'create_optimizer_and_scheduler'], | ||
'adafactor': ['GaLoreAdafactor'], | ||
'adamw8bit': ['GaLoreAdamW8bit'], | ||
'adamw': ['GaLoreAdamW'], | ||
} | ||
|
||
import sys | ||
|
||
sys.modules[__name__] = _LazyModule( | ||
__name__, | ||
globals()['__file__'], | ||
_import_structure, | ||
module_spec=__spec__, | ||
extra_objects={}, | ||
) |
Oops, something went wrong.