Skip to content

Commit

Permalink
fixed a forward bug with autocast for bottleneck adapters
Browse files Browse the repository at this point in the history
  • Loading branch information
HZQ950419 committed Apr 4, 2023
1 parent 6dc6f2a commit c7f8ef0
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 6 deletions.
1 change: 1 addition & 0 deletions evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def load_model(args) -> tuple:
base_model_mapping = {
'LLaMA-7B': 'decapoda-research/llama-7b-hf',
"GPT-j-6B": "EleutherAI/gpt-j-6B",
"BLOOM-7B": "bigscience/bloom-7b1",
}
base_model = base_model_mapping.get(args.model)
if not base_model:
Expand Down
5 changes: 1 addition & 4 deletions finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,7 @@ def train(
lora_r: int = 8,
lora_alpha: int = 16,
lora_dropout: float = 0.05,
lora_target_modules: List[str] = [
"q_proj",
"v_proj",
],
lora_target_modules: List[str] = None,
# bottleneck adapter hyperparams
bottleneck_size: int = 256,
non_linearity: str = "tanh",
Expand Down
Binary file modified peft/src/peft/tuners/__pycache__/bottleneck.cpython-39.pyc
Binary file not shown.
4 changes: 2 additions & 2 deletions peft/src/peft/tuners/bottleneck.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def forward(self, x: torch.Tensor):

residual = x
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(x)))).to(expected_dtype) * self.adapter_scaling
output = output + residual
output = (output + residual).to(expected_dtype)

result = super().forward(output)
else:
Expand All @@ -480,7 +480,7 @@ def forward(self, x: torch.Tensor):

residual = result_pre_forward
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(result_pre_forward)))).to(expected_dtype) * self.adapter_scaling
result = output + residual
result = (output + residual).to(expected_dtype)
else:
residual = result_pre_forward
output = self.adapter_up(self.act_fn(self.adapter_down(self.adapter_dropout(result_pre_forward)))) * self.adapter_scaling
Expand Down

0 comments on commit c7f8ef0

Please sign in to comment.