Skip to content

Commit

Permalink
decapoda
Browse files Browse the repository at this point in the history
  • Loading branch information
tloen committed Mar 14, 2023
1 parent 6312124 commit 357ec81
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 57 deletions.
31 changes: 11 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,34 +1,25 @@
# alpaca-lora
# alpaca-lora (WIP)

This repository contains code for reproducing the Stanford Alpaca results. Users will need to have LLaMA weights on hand and be ready to fork `transformers`.
This repository contains code for reproducing the [Stanford Alpaca results](https://github.com/tatsu-lab/stanford_alpaca#data-release). Users will need to be ready to fork `transformers`.

# Setup

1. Install dependencies
1. Install dependencies (**install zphang's transformers fork**)

```
pip install -q bitsandbytes datasets accelerate loralib
pip install -q datasets accelerate loralib sentencepiece
pip install -q git+https://github.com/zphang/transformers@llama_push
pip install -q git+https://github.com/huggingface/peft.git\
pip install -q git+https://github.com/huggingface/peft.git
```

2. Convert weights
2. [Install bitsandbytes from source](https://github.com/TimDettmers/bitsandbytes/blob/main/compile_from_source.md)

```
python conversion.py --input_dir [LLAMA_DIR]/LLaMA --model_size 7B --output_dir ./7B
```

3. Modify hyperparams in `finetune.py`
# Inference

```
MICRO_BATCH_SIZE = 12
BATCH_SIZE = 36
EPOCHS = 3
LEARNING_RATE = 2e-5
```
See `generate.py`.

4. Run experiments
# Training

```
python finetune.py
```
Under construction.
40 changes: 11 additions & 29 deletions finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,14 @@
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model

model = LLaMAForCausalLM.from_pretrained(
"./7B/llama-7b",
"decapoda-research/llama-7b-hf",
load_in_8bit=True,
max_sequence_length=128, # data length
device_map="auto",
)


tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer")
tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")


def print_trainable_parameters(model):
"""
Prints the number of trainable parameters in the model.
"""
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)


print_trainable_parameters(model)
model = prepare_model_for_int8_training(model)

config = LoraConfig(
Expand All @@ -48,8 +30,6 @@ def print_trainable_parameters(model):
)
model = get_peft_model(model, config)

print_trainable_parameters(model)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

Expand Down Expand Up @@ -77,21 +57,23 @@ def generate_prompt(data_point):
### Response:"""


# optimized for RTX 4090.
MICRO_BATCH_SIZE = 12
BATCH_SIZE = 36
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 1
LEARNING_RATE = 2e-5
CUTOFF_LEN = 128

data = data.map(
lambda data_point: tokenizer(
generate_prompt(data_point),
truncation=True,
max_length=128,
max_length=CUTOFF_LEN,
padding="max_length",
)
)

MICRO_BATCH_SIZE = 12
BATCH_SIZE = 36
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 3
LEARNING_RATE = 2e-5


trainer = transformers.Trainer(
model=model,
Expand Down
15 changes: 7 additions & 8 deletions generate.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
import torch
from peft import get_peft_model, PeftConfig, LoraConfig, PeftModel
from peft import PeftModel
from transformers import LLaMATokenizer, LLaMAForCausalLM

tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer")
tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")

model = LLaMAForCausalLM.from_pretrained(
"./7B/llama-7b",
"decapoda-research/llama-7b-hf",
load_in_8bit=True,
device_map="auto",
)
model = PeftModel.from_pretrained(model, "./outputs")
model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")

PROMPT = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Sort the following numbers.
Write a poem about the following topic.
### Input:
5, 2, 3
Cars
### Response:"""

Expand All @@ -26,7 +25,7 @@
return_tensors="pt",
)
generation_output = model.generate(
**inputs, return_dict_in_generate=True, output_scores=True, max_new_tokens=50
**inputs, return_dict_in_generate=True, output_scores=True, max_new_tokens=128
)
for s in generation_output.sequences:
print(tokenizer.decode(s))
Loading

0 comments on commit 357ec81

Please sign in to comment.