Merge pull request uclaml#27 from xujinlai/main

support TULU-2 70B SPIN
vincezengqiang · Apr 15, 2024 · a612edc · a612edc
2 parents e84b7be + 6cc7c25
commit a612edc
Show file tree

Hide file tree

Showing 12 changed files with 304 additions and 39 deletions.
diff --git a/.gitignore b/.gitignore
@@ -164,3 +164,4 @@ generated/
 wandb/
 reformatted/
 data/
+outputs/
diff --git a/configs/tulu-70B/config.yaml b/configs/tulu-70B/config.yaml
@@ -0,0 +1,40 @@
+# Model arguments
+model_name_or_path: /maas-vepfs/models/tulu-70b
+use_flash_attention_2: true
+torch_dtype: bfloat16
+
+# Data training arguments
+dataset_mixer:
+  # HuggingFaceH4/ultrafeedback_binarized: 1.0
+  /maas-vepfs/outputs/tulu-70B/new_data/iter0: 1.0
+  # maas-vepfs/outputs/tulu-70B/new_data/iter0: 1.0
+dataset_splits:
+- train
+- test
+preprocessing_num_workers: 12
+
+# Trainer arguments
+bf16: true
+beta: 0.1
+do_eval: false
+evaluation_strategy: "no"
+eval_steps: 100
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+hub_model_id: tulu-2-dpo-70b
+learning_rate: 5.0e-7
+log_level: info
+logging_steps: 10
+lr_scheduler_type: linear
+max_length: 4096
+max_prompt_length: 2048
+num_train_epochs: 3
+optim: rmsprop
+output_dir: outputs
+per_device_train_batch_size: 8
+per_device_eval_batch_size: 4
+push_to_hub: false
+save_strategy: "epoch"
+save_total_limit: null
+seed: 42
+warmup_ratio: 0.1
diff --git a/configs/tulu-70B/deepspeed_zero3.yaml b/configs/tulu-70B/deepspeed_zero3.yaml
@@ -0,0 +1,24 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+deepspeed_config:
+  # deepspeed_multinode_launcher: standard
+  gradient_accumulation_steps: 1
+  gradient_clipping: 1.0
+  offload_optimizer_device: cpu
+  offload_param_device: cpu
+  zero3_init_flag: true
+  zero3_save_16bit_model: true
+  zero_stage: 3
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+main_training_function: main
+mixed_precision: bf16
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: true
+num_machines: 1
+num_processes: 8
+machine_rank: 0
diff --git a/scripts/tulu-70B/README.md b/scripts/tulu-70B/README.md
@@ -0,0 +1,60 @@
+# Tulu-70B Scripts Guide
+
+This document provides an overview and usage guide for the scripts located in the `tulu-70B` folder within the SPIN project. These scripts are designed to facilitate fine-tuning the Tulu-70B model by SPIN.
+
+## Scripts Overview
+
+- `finetune.sh`: This script is used to launch a distributed training job for SPIN the Tulu-70B model. It utilizes the `accelerate` CLI tool for managing distributed training across multiple GPUs.
+
+- `generate_training_data.sh.sh`: This script generates training data from the Tulu-70B model in a batched manner for SPIN.
+
+
+## Usage
+
+### Generate SPIN training data
+To generate SPIN training data using the `generate_training_data.sh` script, follow the example below with detailed parameter explanations:
+```bash
+# from the root directory of SPIN
+
+bash SPIN/scripts/tulu-70B/generate_training_data.sh.sh <DATA_DIR> <OUTPUT_DIR> <SPIN_ITER> <MODEL_PATH>
+```
+
+The `generate_training_data.sh` script requires the following parameters:
+- `<DATA_DIR>`: The directory where your raw data is stored. This should be the SFT dataset you want to train with SPIN.
+- `<OUTPUT_DIR>`: The directory where the generated training data will be saved. 
+- `<SPIN_ITER>`: The iteration mark of SPIN algorithm 
+- `<MODEL_PATH>`: The model name or path
+
+### Fine-tuning
+
+```bash
+# from the root directory of SPIN
+
+bash SPIN/scripts/tulu-70B/finetune.sh <TRAIN_EPOCH> <WORKER_NUM> <WORKER_RANK> <WORKER_NUM_GPU> <WORKER_0_PORT> <WORKER_0_HOST> <TRAIN_BATCH_SIZE> <EVAL_BATCH_SIZE>
+```
+
+To fine-tune the Tulu-70B model, use the `finetune.sh` script with the appropriate parameters. For example:
+- `TRAIN_EPOCH`: Number of training epochs. Default is 5.
+- `WORKER_NUM`: Number of worker machines. Default is 1.
+- `WORKER_RANK`: Rank of the current worker. Default is 0.
+- `WORKER_NUM_GPU`: Number of GPUs per worker. Default is 8.
+- `WORKER_0_PORT`: Port of the main worker (rank 0). Default is 2950.
+- `WORKER_0_HOST`: Host IP of the main worker (rank 0). Default is "127.0.0.1".
+- `TRAIN_BATCH_SIZE`: Training batch size per device. Default is 1.
+- `EVAL_BATCH_SIZE`: Evaluation batch size per device. Default is 1.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/scripts/tulu-70B/finetune.sh b/scripts/tulu-70B/finetune.sh
@@ -0,0 +1,39 @@
+set -x
+# Set the home directory for Hugging Face transformers library cache.
+#export HF_HOME="${your_hf_home}"
+
+# Set the logging level for the `accelerate` library to output informational messages.
+ACCELERATE_LOG_LEVEL=info
+
+TRAIN_EPOCH=${1:-5}
+
+WORKER_NUM=${2:-1}
+WORKER_RANK=${3:-0}
+WORKER_NUM_GPU=${4:-8}
+WORKER_0_PORT=${5:-2950}
+WORKER_0_HOST=${6:-"127.0.0.1"}
+
+TRAIN_BATCH_SIZE=${7:-2}
+EVAL_BATCH_SIZE=${8:-1}
+
+# Launches a distributed training job with the `accelerate` CLI tool. Key parameters include:
+# --config_file: Path to the DeepSpeed configuration file. This file defines distributed training options and optimizations.
+# --num_processes: Sets the number of processes to launch, typically equal to the number of GPUs available for parallel training.
+# Additional override options (specified at command line) that can alter settings defined in config.yaml:
+# --num_train_epochs=6: Specifies the total number of training epochs.
+# --learning_rate=1e-7: Sets the learning rate for the training process.
+# --beta=0.1: Custom beta parameter value.
+# --warmup_ratio=0.1: Defines the warmup ratio for learning rate scheduling.
+# --output_dir="${path_to_save_checkpoint}": Directory where training checkpoints will be saved.
+# Execution command: Runs 'spin/run_spin.py' with 'configs/config.yaml' as its configuration.
+
+accelerate launch \
+    --config_file configs/tulu-70B/deepspeed_zero3.yaml \
+    --num_machines $WORKER_NUM\
+    --machine_rank $WORKER_RANK\
+    --num_processes $WORKER_NUM_GPU \
+    --main_process_port $WORKER_0_PORT \
+    --main_process_ip $WORKER_0_HOST\
+    spin/run_spin.py configs/tulu-70B/config.yaml \
+    --num_train_epochs=$TRAIN_EPOCH \
+    --per_device_train_batch_size=$TRAIN_BATCH_SIZE \
diff --git a/scripts/tulu-70B/generate_training_data.sh b/scripts/tulu-70B/generate_training_data.sh
@@ -0,0 +1,29 @@
+
+set -x
+
+input_dataset_path_or_name=${1:-"allenai/tulu-v2-sft-mixture"}
+output_dir=${2:-"data"}
+iter=${3:-0}
+model_name_or_path=${4:-"allenai/tulu-2-70b"}
+
+reformated_dataset_output=${output_dir}/SPIN_iter${iter}
+generated_dataset_output=${output_dir}/generated/iter${iter}
+training_dataset_output=${output_dir}/new_data/iter${iter}
+
+# 0. reformat huggingface data 
+python spin/reformat.py \
+    --data $input_dataset_path_or_name \
+    --output_dir $reformated_dataset_output
+
+# 1. generate training data
+bash scripts/tulu-70B/generate_vllm_batched.sh \
+    $reformated_dataset_output \
+    $generated_dataset_output \
+    $model_name_or_path \
+    8
+
+# 2. gathering dataset
+python spin/convert_data.py \
+    --output_dir $training_dataset_output \
+    --input_dir $generated_dataset_output \
+    --num_fracs 11
diff --git a/scripts/tulu-70B/generate_vllm_batched.sh b/scripts/tulu-70B/generate_vllm_batched.sh
@@ -0,0 +1,24 @@
+
+INPUT_DIR=${1:-"UCLA-AGI/SPIN_iter0"}
+OUTPUT_DIR=${2:-"generated/iter0"}
+MODEL_NAME_OR_PATH=${3:-"allenai/tulu-2-70b"}
+TP_PER_WORKER=${4:-8}
+
+
+python3 spin/batched_generate_vllm.py \
+    --model $MODEL_NAME_OR_PATH \
+    --input_dir $INPUT_DIR \
+    --frac_len 5000 \
+    --num_data_frac 11 \
+    --tp_per_worker $TP_PER_WORKER \
+    --output_dir $OUTPUT_DIR
+
+# Generate for the test split as well
+python3 spin/batched_generate_vllm.py \
+    --model $MODEL_NAME_OR_PATH \
+    --input_dir $INPUT_DIR \
+    --frac_len 5000 \
+    --num_data_frac 1 \
+    --tp_per_worker $TP_PER_WORKER \
+    --split test \
+    --output_dir $OUTPUT_DIR
diff --git a/setup.py b/setup.py
@@ -116,7 +116,7 @@ def deps_list(*pkgs):
     packages=find_packages("spin"),
     zip_safe=False,
     extras_require=extras,
-    python_requires=">=3.10.2",
+    python_requires=">=3.9",
     install_requires=install_requires,
     classifiers=[
         "Development Status :: 3 - Alpha",

diff --git a/spin/alignment/model_utils.py b/spin/alignment/model_utils.py
@@ -71,8 +71,8 @@ def get_tokenizer(model_args: ModelArguments, data_args: DataArguments) -> PreTr
         tokenizer.truncation_side = data_args.truncation_side
 
     # Set reasonable default for models without max length
-    if tokenizer.model_max_length > 100_000:
-        tokenizer.model_max_length = 2048
+    # if tokenizer.model_max_length > 100_000:
+        # tokenizer.model_max_length = 8192
 
     if data_args.chat_template is not None:
         tokenizer.chat_template = data_args.chat_template

diff --git a/spin/alignment/trainer.py b/spin/alignment/trainer.py
@@ -9,10 +9,12 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from accelerate import init_empty_weights, load_checkpoint_and_dispatch
 from accelerate.utils import is_deepspeed_available
 from datasets import Dataset
 from torch.utils.data import DataLoader
 from transformers import (
+    AutoConfig, 
     AutoModelForCausalLM,
     DataCollator,
     PreTrainedModel,
@@ -153,15 +155,9 @@ def __init__(
                 "You passed a model_id to the SPINTrainer. This will automatically create an "
                 "`AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you."
             )
+            # with deepspeed.zero.Init():
             model = AutoModelForCausalLM.from_pretrained(model, **model_init_kwargs)
 
-        if isinstance(ref_model, str):
-            warnings.warn(
-                "You passed a ref model_id to the SPINTrainer. This will automatically create an "
-                "`AutoModelForCausalLM`"
-            )
-            ref_model = AutoModelForCausalLM.from_pretrained(ref_model, **ref_model_init_kwargs)
-
         if not is_peft_available() and peft_config is not None:
             raise ValueError(
                 "PEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT models"
@@ -227,13 +223,6 @@ def make_inputs_require_grad(module, input, output):
 
         self.is_peft_model = is_peft_available() and isinstance(model, PeftModel)
 
-        if ref_model:
-            self.ref_model = ref_model
-        elif self.is_peft_model:
-            # The `model` with adapters turned off will be used as the reference model
-            self.ref_model = None
-        else:
-            self.ref_model = create_reference_model(model)
 
         if data_collator is None:
             if tokenizer is None:
@@ -289,8 +278,6 @@ def make_inputs_require_grad(module, input, output):
 
         if disable_dropout:
             disable_dropout_in_model(model)
-            if self.ref_model is not None:
-                disable_dropout_in_model(self.ref_model)
 
         self.max_length = max_length
         self.generate_during_eval = generate_during_eval
@@ -321,6 +308,22 @@ def make_inputs_require_grad(module, input, output):
                 "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`."
             )
 
+        if isinstance(ref_model, str):
+            warnings.warn(
+                "You passed a ref model_id to the SPINTrainer. This will automatically create an "
+                "`AutoModelForCausalLM`"
+            )
+            # with deepspeed.zero.Init():
+            ref_model = AutoModelForCausalLM.from_pretrained(ref_model, **ref_model_init_kwargs)
+
+        if ref_model:
+            self.ref_model = ref_model
+        elif self.is_peft_model:
+            # The `model` with adapters turned off will be used as the reference model
+            self.ref_model = None
+        else:
+            self.ref_model = create_reference_model(model)
+
         if self.ref_model is None:
             if not hasattr(self.accelerator.unwrap_model(self.model), "disable_adapter"):
                 raise ValueError(
@@ -332,6 +335,10 @@ def make_inputs_require_grad(module, input, output):
             else:
                 self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)
 
+        if disable_dropout and self.ref_model is not None:
+            disable_dropout_in_model(self.ref_model)
+
+
     def _prepare_deepspeed(self, model: PreTrainedModelWrapper):
         # Adapted from accelerate: https://github.com/huggingface/accelerate/blob/739b135f8367becb67ffaada12fe76e3aa60fefd/src/accelerate/accelerator.py#L1473
         deepspeed_plugin = self.accelerator.state.deepspeed_plugin
@@ -443,6 +450,15 @@ def spin_loss(
         real_rewards = self.beta * (policy_real_logps - opponent_real_logps).detach()
         generated_rewards = self.beta * (policy_generated_logps - opponent_generated_logps).detach()
 
+        print(f"losses: {losses}")
+        print(f"policy_real_logps: {policy_real_logps}")
+        print(f"policy_generated_logps: {policy_generated_logps}")
+        print(f"opponent_real_logps: {opponent_real_logps}")
+        print(f"opponent_generated_logps: {opponent_generated_logps}")
+        print(f"logits: {logits}")
+        print(f"real_rewards: {real_rewards}")
+        print(f"generated_rewards: {generated_rewards}")
+
         return losses, real_rewards, generated_rewards
 
     def _get_batch_logps(
@@ -539,7 +555,6 @@ def get_batch_metrics(
                 _,
                 _,
             ) = self.concatenated_forward(self.ref_model, batch)
-
         losses, real_rewards, generated_rewards = self.spin_loss(
             policy_real_logps,
             policy_generated_logps,
-Original file line number
+Diff line change
@@ Expand Up / @@ -164,3 +164,4 @@ generated/ @@
     wandb/
     reformatted/
     data/
+    outputs/