config/hf_args_zero_lora.yaml

# model
model_name_or_path: '/home/ubuntu/projects/tunelite/cache/llama-7b'
# data
dataset_name: 'wsc'
refresh: false
data_tag: 'base'
train_on_inputs: false
data_max_length: 1024
# training
# trainer
peft_type: 'lora'
lora_only: false
hf_learning_rate: 0.0005
hf_weight_decay: 0
hf_lr_scheduler_type: 'linear'
hf_warmup: 0.05
tag: 'lora-qv-r2-inplace-sgd'
output_dir: 'outputs'
overwrite_output_dir: true
deepspeed: 'config/ds_config_lora.json'
do_train: true
do_eval: true
evaluation_strategy: 'epoch'
per_device_train_batch_size: 2
per_device_eval_batch_size: 2
learning_rate: 0.005
weight_decay: 0
num_train_epochs: 10
lr_scheduler_type: 'linear'
warmup: 0.05
clip_grad_norm: 1.0
#clip_grad_value: 1.0
#clip_loss_value: 5.0
log_level: 'info'
logging_steps: 1
save_strategy: 'no'
save_total_limit: 0
seed: 42
#bf16: true
remove_unused_columns: false
load_best_model_at_end: false
metric_for_best_model: 'acc'
optim: 'sgd'
group_by_length: false
#report_to: 'wandb'
dataloader_pin_memory: false
gradient_checkpointing: true
predict_with_generate: true
lora_r: 2