Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor of models and trainers with base class for common methods #306

Open
wants to merge 42 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
2b8e301
Refactor models and trainers with base_class for common methods
PierpaoloSorbellini Mar 27, 2023
5e0ded8
Revert "Release ChatLLaMA 0.0.4"
PierpaoloSorbellini Mar 27, 2023
3fa5c53
Merge branch 'main' of https://github.com/nebuly-ai/nebullvm into main
PierpaoloSorbellini Mar 27, 2023
ab1f09e
Refactor of models and trainers with base class for common methods
PierpaoloSorbellini Mar 27, 2023
3d54d50
Fix comments and values in the config.yaml
PierpaoloSorbellini Mar 27, 2023
9f5eab4
Add load 8 bit from HF
PierpaoloSorbellini Mar 27, 2023
dc46ee4
Add check on load int 8
PierpaoloSorbellini Mar 27, 2023
c1d03d3
Add Reward and Critic support for LoRA PEFT
PierpaoloSorbellini Mar 28, 2023
36c350d
Add SelfInstruct Dataset from HF
PierpaoloSorbellini Mar 28, 2023
bb92ee7
Fix imports
Mar 28, 2023
6fc94d3
Add logging with proper class
Mar 29, 2023
dc2489f
Fix logs for deepspeed
Mar 30, 2023
0b0795d
Fix early logs with multi-GPUs
Mar 30, 2023
01be6dc
Fix MultiGPU for accelerate
Mar 30, 2023
13b1abd
Fix batch-size for accelerate
Mar 30, 2023
db8b3c2
Add multi gpu training to readme.md
Mar 30, 2023
d771fb2
Fix fp16 training
Mar 31, 2023
e5f959c
Merge branch 'main' into refactor
PierpaoloSorbellini Mar 31, 2023
d5084e5
Fix Distributed training for RLHF
PierpaoloSorbellini Apr 3, 2023
2ec5eaa
Add new models
PierpaoloSorbellini Apr 3, 2023
33e97e2
Add decapoda models
PierpaoloSorbellini Apr 3, 2023
8332a26
Add unsupported model message
PierpaoloSorbellini Apr 3, 2023
32ddfa2
Change sing to KL div accordingly to issue #298
PierpaoloSorbellini Apr 3, 2023
aa9881c
Fix imports order
PierpaoloSorbellini Apr 3, 2023
b10f1dc
Add cases for lora-peft model loading
PierpaoloSorbellini Apr 4, 2023
86a699b
Merge branch 'refactor' of https://github.com/nebuly-ai/nebullvm into…
PierpaoloSorbellini Apr 4, 2023
1f29ba4
Fix Actor 8bit training
PierpaoloSorbellini Apr 4, 2023
1836788
Adjust code comments to match new adjustments
PierpaoloSorbellini Apr 4, 2023
966a19d
Fix device error when using vanilla pytorch trainig
PierpaoloSorbellini Apr 4, 2023
feacb88
Fix RLHF with fp16
PierpaoloSorbellini Apr 5, 2023
f894494
Move grad scaler into base class
PierpaoloSorbellini Apr 5, 2023
b56185f
Add check on 8bit load and distributed training
PierpaoloSorbellini Apr 5, 2023
5699aaa
Add template to self-instruct dataset
PierpaoloSorbellini Apr 12, 2023
5c83927
Fix checkpoints name in actor training
PierpaoloSorbellini Apr 12, 2023
a205ee6
Fix slow loss computation
PierpaoloSorbellini Apr 12, 2023
bb386c4
Fix checkpoints also in reward models
PierpaoloSorbellini Apr 12, 2023
22a64af
Fix checkpoint for rl
PierpaoloSorbellini Apr 12, 2023
10211c6
Add n_checkpoints for all the training with old checkpoints removal
PierpaoloSorbellini Apr 12, 2023
442b396
Improve datasets quality with reward model negative examples
PierpaoloSorbellini Apr 13, 2023
71a6c02
Merge branch 'main' of https://github.com/nebuly-ai/nebullvm into main
PierpaoloSorbellini Apr 14, 2023
1189787
Merge branch 'main' into refactor
PierpaoloSorbellini Apr 14, 2023
98b96c2
Fix merge issues
PierpaoloSorbellini Apr 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix fp16 training
  • Loading branch information
Ubuntu committed Mar 31, 2023
commit d771fb2c767f5d35da8ae3a954d794f5674db366
53 changes: 45 additions & 8 deletions apps/accelerate/chatllama/chatllama/rlhf/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from beartype.typing import Tuple
from einops import rearrange
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import GradScaler

from chatllama.rlhf.base_model import BaseModel, BaseTrainer
from chatllama.rlhf.config import ConfigActor
Expand Down Expand Up @@ -213,6 +214,12 @@ def __init__(self, config: ConfigActor) -> None:
# HF accelerate
self.setup_accelerate()

# define the scaler needed for vanilla pytorch with mixed precision
if (not self.accelerate_enable) and (not self.deepspeed_enable):
self.scaler = GradScaler()
else:
self.scaler = None

def add_eos_token(
self, tokens: torch.Tensor, mask: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor]:
Expand Down Expand Up @@ -260,13 +267,15 @@ def train(

# get config parameters
if self.deepspeed_enable:
batch_size = self.train_dataloader.batch_size
# get batch size from deepspeed
batch_size = self.model_engine.train_batch_size()
elif self.accelerate_enable:
batch_size = (
self.config.batch_size * self.accelerator.num_processes
)
else:
batch_size = self.config.batch_size

epochs = self.config.epochs
device = self.config.device
checkpoint_steps = self.config.checkpoint_steps
Expand Down Expand Up @@ -334,17 +343,44 @@ def train(
attention_mask = attention_mask.to(device)

# forward pass
if self.config.deepspeed_enable:
if self.deepspeed_enable:
est_output = self.model_engine(
training_input, attention_mask
)
else:
elif self.accelerate_enable:
est_output = self.model(training_input, attention_mask)
else:
with torch.autocast(
device_type=self.config.device_type,
dtype=torch.float16,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to auto-cast to fp16 all the tensors? Shouldn't this be a config param?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just following documentation...
https://pytorch.org/docs/stable/notes/amp_examples.html
wrt to casting manually the tensors, this is better with less problem with types in the embedding.
It is not a config param because if you do not use fp16 you would use fp32 and is probably worse.
not seen the point of adding the option for fp32.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But what if I want to train the model in fp32 precision? (DeepSpeed for instance allows the user to select the precision)

):
est_output = self.model(training_input, attention_mask)

# compute loss
est_output = rearrange(est_output, "b s v -> (b s) v")
training_output = rearrange(training_output, "b s -> (b s)")
loss = self.loss_function(est_output, training_output)
if (not self.accelerate_enable) and (
not self.deepspeed_enable
):

# vanilla pytorch use autocast
with torch.autocast(
device_type=self.config.device_type,
dtype=torch.float16,
):
est_output = rearrange(est_output, "b s v -> (b s) v")
training_output = rearrange(
training_output, "b s -> (b s)"
)
loss = self.loss_function(est_output, training_output)
else:

# deepspeed and accelerate use defualt
est_output = rearrange(est_output, "b s v -> (b s) v")
training_output = rearrange(
training_output, "b s -> (b s)"
)
loss = self.loss_function(est_output, training_output)

# save training stats
self.append_training_stats(training_loss=loss.item())

# backward pass
Expand All @@ -358,8 +394,9 @@ def train(
self.scheduler.step()
else:
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
self.scaler.scale(loss).backward()
self.scaler.step(self.optimizer)
self.scaler.update()
self.scheduler.step()

# print progress
Expand Down
44 changes: 30 additions & 14 deletions apps/accelerate/chatllama/chatllama/rlhf/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def __init__(self, config: ConfigType) -> None:

# load the model from model_folder
self.load()
my_logger.success("Model loaded")

else:
# ActorCritic initialization
Expand Down Expand Up @@ -481,20 +482,35 @@ def setup_deepspeed(
# initialize deepspeed
self.model_engine = None
if self.deepspeed_enable is True:
(
self.model_engine,
self.optimizer,
self.train_dataloader,
self.scheduler,
) = deepspeed.initialize(
args=None,
model=self.model,
model_parameters=self.model.parameters(),
optimizer=self.optimizer,
lr_scheduler=self.scheduler,
training_data=self.train_dataset,
config=self.deepspeed_config_path,
)
if isinstance(self.config, Config):
(
self.model_engine,
self.optimizer,
self.train_dataloader,
self.scheduler,
) = deepspeed.initialize(
args=None,
model=self.model,
model_parameters=self.model.parameters(),
optimizer=self.optimizer,
lr_scheduler=self.scheduler,
training_data=self.train_dataset,
config=self.deepspeed_config_path,
)
else:
(
self.model_engine,
self.optimizer,
self.train_dataloader,
self.scheduler,
) = deepspeed.initialize(
args=None,
model=self.model,
model_parameters=self.model.parameters(),
lr_scheduler=self.scheduler,
training_data=self.train_dataset,
config=self.deepspeed_config_path,
)
my_logger.info("Training with DeepSpeed")

@beartype
Expand Down
14 changes: 14 additions & 0 deletions apps/accelerate/chatllama/chatllama/rlhf/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class ConfigReward:
accelerate_enable: bool = False

debug: bool = False
device_type: str = "cuda"


# just for naming consistency
Expand Down Expand Up @@ -168,6 +169,7 @@ class ConfigActor:
peft_config_path: str
checkpoint_name: Optional[str] = None
debug: bool = False
device_type: str = "cuda"


@dataclass
Expand Down Expand Up @@ -231,6 +233,7 @@ class ConfigTrainer:
accelerate_enable: bool
checkpoint_name: Optional[str] = None
debug: bool = False
device_type: str = "cuda"


class Config:
Expand Down Expand Up @@ -270,9 +273,16 @@ def __init__(
) -> None:

# if not specified use the device available
if device is not None:
if ":" in str(device):
device_type = str(device).split(":")[0]
else:
device_type = str(device)

if device is None:
if torch.cuda.is_available():
device = torch.device("cuda")
device_type = "cuda"
else:
raise ValueError("No GPU available...")
# print(f"Current device used :{str(device)}")
Expand All @@ -292,17 +302,21 @@ def __init__(
# Trainer Config
trainer_dict["device"] = device
trainer_dict["debug"] = debug
trainer_dict["device_type"] = device_type
self.trainer = ConfigTrainer(**trainer_dict)
# Actor Config
actor_dict["device"] = device
actor_dict["debug"] = debug
actor_dict["device_type"] = device_type
self.actor = ConfigActor(**actor_dict)
# Critic Config
critic_dict["device"] = device
critic_dict["debug"] = debug
critic_dict["device_type"] = device_type
self.critic = ConfigCritic(**critic_dict)
self.critic.is_reward = False
# Reward Config
reward_dict["device"] = device
reward_dict["debug"] = debug
reward_dict["device_type"] = device_type
self.reward = ConfigReward(**reward_dict)
66 changes: 42 additions & 24 deletions apps/accelerate/chatllama/chatllama/rlhf/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import torch
from beartype import beartype
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import GradScaler

from chatllama.rlhf.base_model import BaseModel, BaseTrainer
from chatllama.rlhf.config import ConfigReward
Expand Down Expand Up @@ -150,21 +151,14 @@ def __init__(self, config: ConfigReward) -> None:

# load the model
self.model = RewardModel(config)

self.accelerate_enable = self.model.accelerate_enable
self.deepspeed_enable = self.model.deepspeed_enable

# optimizer
if self.deepspeed_enable:
import deepspeed

deepspeed.ops.op_builder.CPUAdamBuilder().load()
self.optimizer = deepspeed.ops.adam.DeepSpeedCPUAdam(
self.model.parameters(), lr=config.lr
)
else:
self.optimizer = torch.optim.AdamW(
self.model.parameters(), lr=config.lr
)
self.optimizer = torch.optim.AdamW(
self.model.parameters(), lr=config.lr
)

# loss function
self.loss_function = torch.nn.MSELoss()
Expand Down Expand Up @@ -197,12 +191,21 @@ def __init__(self, config: ConfigReward) -> None:
last_epoch=-1,
)

# for scaling the gradients
self.scaler = None

# deepspeed
self.setup_deepspeed()

# HF accelerate
self.setup_accelerate()

# define the scaler needed for vanilla pytorch with mixed precision
if (not self.accelerate_enable) and (not self.deepspeed_enable):
self.scaler = GradScaler()
else:
self.scaler = None

def train(
self,
) -> None:
Expand All @@ -212,7 +215,8 @@ def train(

# get config parameters
if self.deepspeed_enable:
batch_size = self.train_dataloader.batch_size
# get batch size from deepspeed
batch_size = self.model_engine.train_batch_size()
elif self.accelerate_enable:
batch_size = (
self.config.batch_size * self.accelerator.num_processes
Expand Down Expand Up @@ -263,30 +267,43 @@ def train(
truncation=True,
padding=True,
)
output = torch.as_tensor(
score, dtype=torch.float32, device=device
)
output = torch.as_tensor(score, device=device)

# forward pass
if self.config.deepspeed_enable:
est_output = self.model_engine(
input_tokens["input_ids"].to(device),
input_tokens["attention_mask"].to(device),
)[:, -1]
elif self.accelerate_enable:
est_output = self.model.module.get_reward(
input_tokens["input_ids"].to(device),
input_tokens["attention_mask"].to(device),
)
else:
if self.accelerate_enable:
est_output = self.model.module.get_reward(
input_tokens["input_ids"].to(device),
input_tokens["attention_mask"].to(device),
)
else:
with torch.autocast(
device_type=self.config.device_type,
dtype=torch.float16,
):
est_output = self.model.get_reward(
input_tokens["input_ids"].to(device),
input_tokens["attention_mask"].to(device),
)

# compute the loss
loss = self.loss_function(est_output, output)
if (not self.accelerate_enable) and (
not self.deepspeed_enable
):
# if vanilla pytorch use autocast
with torch.autocast(
device_type=self.config.device_type,
dtype=torch.float16,
):
loss = self.loss_function(est_output, output)
else:
# compute the loss normally
loss = self.loss_function(est_output, output)

self.append_training_stats(training_loss=loss.item())

# backward pass
Expand All @@ -300,8 +317,9 @@ def train(
self.scheduler.step()
else:
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
self.scaler.scale(loss).backward()
self.scaler.step(self.optimizer)
self.scaler.update()
self.scheduler.step()

# print progress
Expand Down