decapoda

NanQiAi · Mar 14, 2023 · 357ec81 · 357ec81
1 parent 6312124
commit 357ec81
Show file tree

Hide file tree

Showing 4 changed files with 148 additions and 57 deletions.
diff --git a/README.md b/README.md
@@ -1,34 +1,25 @@
-# alpaca-lora
+# alpaca-lora (WIP)
 
-This repository contains code for reproducing the Stanford Alpaca results. Users will need to have LLaMA weights on hand and be ready to fork `transformers`.
+This repository contains code for reproducing the [Stanford Alpaca results](https://github.com/tatsu-lab/stanford_alpaca#data-release). Users will need to be ready to fork `transformers`.
 
+# Setup
 
-1. Install dependencies
+1. Install dependencies (**install zphang's transformers fork**)
 
 ```
-pip install -q bitsandbytes datasets accelerate loralib
+pip install -q datasets accelerate loralib sentencepiece
 
 pip install -q git+https://github.com/zphang/transformers@llama_push
-pip install -q git+https://github.com/huggingface/peft.git\
+pip install -q git+https://github.com/huggingface/peft.git
 ```
 
-2. Convert weights
+2. [Install bitsandbytes from source](https://github.com/TimDettmers/bitsandbytes/blob/main/compile_from_source.md)
 
-```
-python conversion.py --input_dir [LLAMA_DIR]/LLaMA --model_size 7B --output_dir ./7B
-```
 
-3. Modify hyperparams in `finetune.py`
+# Inference
 
-```
-MICRO_BATCH_SIZE = 12
-BATCH_SIZE = 36
-EPOCHS = 3
-LEARNING_RATE = 2e-5
-```
+See `generate.py`.
 
-4. Run experiments
+# Training
 
-```
-python finetune.py
-```
+Under construction.
diff --git a/finetune.py b/finetune.py
@@ -10,32 +10,14 @@
 from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
 
 model = LLaMAForCausalLM.from_pretrained(
-    "./7B/llama-7b",
+    "decapoda-research/llama-7b-hf",
     load_in_8bit=True,
-    max_sequence_length=128,  # data length
     device_map="auto",
 )
 
 
-tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer")
+tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
 
-
-def print_trainable_parameters(model):
-    """
-    Prints the number of trainable parameters in the model.
-    """
-    trainable_params = 0
-    all_param = 0
-    for _, param in model.named_parameters():
-        all_param += param.numel()
-        if param.requires_grad:
-            trainable_params += param.numel()
-    print(
-        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
-    )
-
-
-print_trainable_parameters(model)
 model = prepare_model_for_int8_training(model)
 
 config = LoraConfig(
@@ -48,8 +30,6 @@ def print_trainable_parameters(model):
 )
 model = get_peft_model(model, config)
 
-print_trainable_parameters(model)
-
 tokenizer.pad_token = tokenizer.eos_token
 tokenizer.pad_token_id = tokenizer.eos_token_id
 
@@ -77,21 +57,23 @@ def generate_prompt(data_point):
 ### Response:"""
 
 
+# optimized for RTX 4090.
+MICRO_BATCH_SIZE = 12
+BATCH_SIZE = 36
+GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
+EPOCHS = 1
+LEARNING_RATE = 2e-5
+CUTOFF_LEN = 128
+
 data = data.map(
     lambda data_point: tokenizer(
         generate_prompt(data_point),
         truncation=True,
-        max_length=128,
+        max_length=CUTOFF_LEN,
         padding="max_length",
     )
 )
 
-MICRO_BATCH_SIZE = 12
-BATCH_SIZE = 36
-GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
-EPOCHS = 3
-LEARNING_RATE = 2e-5
-
 
 trainer = transformers.Trainer(
     model=model,

diff --git a/generate.py b/generate.py
@@ -1,23 +1,22 @@
-import torch
-from peft import get_peft_model, PeftConfig, LoraConfig, PeftModel
+from peft import PeftModel
 from transformers import LLaMATokenizer, LLaMAForCausalLM
 
-tokenizer = LLaMATokenizer.from_pretrained("./7B/tokenizer")
+tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
 
 model = LLaMAForCausalLM.from_pretrained(
-    "./7B/llama-7b",
+    "decapoda-research/llama-7b-hf",
     load_in_8bit=True,
     device_map="auto",
 )
-model = PeftModel.from_pretrained(model, "./outputs")
+model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
 
 PROMPT = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 
 ### Instruction:
-Sort the following numbers.
+Write a poem about the following topic.
 
 ### Input:
-5, 2, 3
+Cars
 
 ### Response:"""
 
@@ -26,7 +25,7 @@
     return_tensors="pt",
 )
 generation_output = model.generate(
-    **inputs, return_dict_in_generate=True, output_scores=True, max_new_tokens=50
+    **inputs, return_dict_in_generate=True, output_scores=True, max_new_tokens=128
 )
 for s in generation_output.sequences:
     print(tokenizer.decode(s))