Fix RLHF with fp16

nebuly-ai · PierpaoloSorbellini · Mar 27, 2023 · Mar 27, 2023 · Mar 27, 2023 · Mar 27, 2023
commit feacb88b9ee1f43a29f44d8eb7fa96e4323d560f
diff --git a/apps/accelerate/chatllama/artifacts/config/config.yaml b/apps/accelerate/chatllama/artifacts/config/config.yaml
@@ -14,12 +14,12 @@ trainer_config:
   # number of episodes and generation performed for each episode
   # in the train() method
   num_episodes: 100
-  max_timesteps: 32
+  max_timesteps: 4
   # number of timesteps after which the learn() method is called 
   # (to update the weights)
-  update_timesteps: 32
+  update_timesteps: 4
   # number of example sampled at each timestep
-  num_examples: 1
+  num_examples: 4
   # batch and epochs for the training
   batch_size: 1
   epochs: 1
@@ -33,7 +33,7 @@ trainer_config:
   accelerate_enable: False
 
 actor_config:
-  model: "facebook/opt-125m"
+  model: "facebook/opt-1.3b"
   load_8bit: False
   model_folder: "./models"
   tokenizer_path: "path-to-tokenizer"
@@ -92,13 +92,13 @@ reward_config:
   epochs: 1
   iteration_per_print: 1
   # steps after which the checkpoint are saved
-  checkpoint_steps: 200
+  checkpoint_steps: 10000
   # here specify the name of the reward checkpoint from which resume 
   # during reward training. If null load the last one.
   checkpoint_name: null
   lr: 0.000009
   # deepspeed settings
-  deepspeed_enable: True
+  deepspeed_enable: False
   deepspeed_config_path: "./artifacts/config/ds_config.json"
   # accelerate settings
   accelerate_enable: False
@@ -117,5 +117,5 @@ critic_config:
   # here specify the name of the critic checkpoint from which resume 
   # during critic training. If null load the last one.
   checkpoint_name: null
-  peft_enable: True
+  peft_enable: False
   peft_config_path: "./artifacts/config/peft_config.yaml"
diff --git a/apps/accelerate/chatllama/artifacts/download_dataset.py b/apps/accelerate/chatllama/artifacts/download_dataset.py
@@ -2,9 +2,9 @@
 import os
 
 from chatllama.rlhf.dataset import (
-    AnthropicRLHF,
-    SelfInstruct,
-    StanfordNLPSHP,
+    AnthropicRLHFDataset,
+    SelfInstructDataset,
+    StanfordNLPSHPDataset,
 )
 
 
@@ -44,17 +44,17 @@
         raise ValueError("Number of samples should be an integer")
 
     if args.dataset_name == "SHP":
-        dataset = StanfordNLPSHP()
+        dataset = StanfordNLPSHPDataset()
         dataset.save_dataset(args.path, n_samples)
 
     elif args.dataset_name == "ARLHF":
-        dataset = AnthropicRLHF()
+        dataset = AnthropicRLHFDataset()
         dataset.save_dataset(
             args.path,
             n_samples,
         )
     elif args.dataset_name == "SI":
-        dataset = SelfInstruct()
+        dataset = SelfInstructDataset()
         dataset.save_dataset(
             args.path,
             n_samples,

diff --git a/apps/accelerate/chatllama/chatllama/rlhf/actor.py b/apps/accelerate/chatllama/chatllama/rlhf/actor.py
@@ -313,6 +313,7 @@ def train(
                             return_tensors="pt",
                             truncation=True,
                             padding=True,
+                            max_length=self.config.max_sequence_length,
                         )
                     else:
                         input_tokenized = self.model.tokenizer(
@@ -344,7 +345,7 @@ def train(
                     attention_mask = input_tokenized_mask[:, :-1]
 
                     # move to device
-                    if self.config.load_8bit is False:
+                    if not self.config.load_8bit:
                         training_output = training_output.to(self.device)
                         training_input = training_input.to(self.device)
                         attention_mask = attention_mask.to(self.device)

diff --git a/apps/accelerate/chatllama/chatllama/rlhf/base_model.py b/apps/accelerate/chatllama/chatllama/rlhf/base_model.py
@@ -499,6 +499,7 @@ def __init__(self, config: ConfigType) -> None:
 
         # clean the dataset
         if self.accelerate_enable or self.deepspeed_enable:
+            # TODO fix error for process group when using accelerate
             if dist.get_rank() == 0:
                 BaseDataset.clean_dataset(config)
         else:
@@ -654,7 +655,9 @@ def setup_accelerate(
                 )
 
             # assign device
-            self.device = torch.device(f"cuda:{dist.get_rank()}")
+            # Fix error with process group not initialized when using
+            self.device = torch.device("cuda:0")
+            # self.device = torch.device(f"cuda:{dist.get_rank()}")
 
             my_logger.info("Training with Accelerate")
 

diff --git a/apps/accelerate/chatllama/chatllama/rlhf/config.py b/apps/accelerate/chatllama/chatllama/rlhf/config.py
@@ -57,6 +57,7 @@ class ConfigReward:
         is_reward (bool): True if the model is a reward model. Default to True.
         accelerate_enable (bool): Enable accelerate for the reward model
         debug (bool): enable prints for Debugging
+        device_type (str): Device type to be used for the reward model
     """
 
     device: torch.device
@@ -136,6 +137,7 @@ class ConfigActor:
         peft_enable (bool): Enable peft for the actor
         peft_config_path (str): Path to the peft config file.
         debug (bool): Enable prints for debugging
+        device_type (str): Device type to be used for the actor
 
     """
 
@@ -211,6 +213,7 @@ class ConfigTrainer:
         accelerate_enable (bool): Enable accelerate for rl training
         checkpoint_name (Optional[str]): Name of the checkpoint. Default to
             None.
+        device_type (str): Device type to be used for the rl training
     """
 
     actor_lr: int

diff --git a/apps/accelerate/chatllama/chatllama/rlhf/dataset.py b/apps/accelerate/chatllama/chatllama/rlhf/dataset.py
@@ -249,7 +249,7 @@ def clean_dataset(config: ConfigType):
             )
 
 
-class StanfordNLPSHP(BaseDataset):
+class StanfordNLPSHPDataset(BaseDataset):
     """Class for Stanford NLP SHP dataset from HuggingFace"""
 
     def __init__(
@@ -344,7 +344,7 @@ def save_dataset(
         my_logger.success("Generation Completed")
 
 
-class AnthropicRLHF(BaseDataset):
+class AnthropicRLHFDataset(BaseDataset):
     """Class for Anthropic RLHF dataset from HuggingFace"""
 
     def __init__(
@@ -438,7 +438,7 @@ def save_dataset(
         my_logger.success("Generation Completed")
 
 
-class SelfInstruct(BaseDataset):
+class SelfInstructDataset(BaseDataset):
     """Class for SelfInstruct dataset from HuggingFace"""
 
     def __init__(