forked from AndrewZhe/lawyer-llama
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_hf-7b_to_llama-pth.py
90 lines (70 loc) · 3.54 KB
/
convert_hf-7b_to_llama-pth.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import argparse
import os
import shutil
from tqdm import tqdm
import torch
from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizer
def convert_model(input_model_path, output_model_path, model_size):
if model_size != '7B':
raise ValueError(f"Unsupported model size: {model_size}")
# Load model
hf_config = LlamaConfig.from_pretrained(input_model_path)
hf_model = LlamaForCausalLM.from_pretrained(input_model_path, config=hf_config, torch_dtype=torch.float16)
n_layers = hf_config.num_hidden_layers
n_heads = hf_config.num_attention_heads
dim = hf_config.hidden_size
# permute for sliced rotary
def permute(w):
return w.view(n_heads, 2, dim // n_heads // 2, dim).transpose(1, 2).reshape(dim, dim)
# Convert huggingface model to pytorch
hf_state_dict = hf_model.state_dict()
pth_state_dict = {}
# possibly dangerous: remove last token embed
pth_state_dict["tok_embeddings.weight"] = hf_state_dict["model.embed_tokens.weight"][:-1, :]
pth_state_dict["norm.weight"] = hf_state_dict["model.norm.weight"]
pth_state_dict["output.weight"] = hf_state_dict["lm_head.weight"][:-1, :]
for layer_i in tqdm(range(n_layers)):
pth_state_dict.update({
f"layers.{layer_i}.attention.wq.weight": permute(
hf_state_dict[f"model.layers.{layer_i}.self_attn.q_proj.weight"]
),
f"layers.{layer_i}.attention.wk.weight": permute(
hf_state_dict[f"model.layers.{layer_i}.self_attn.k_proj.weight"]
),
f"layers.{layer_i}.attention.wv.weight": hf_state_dict[f"model.layers.{layer_i}.self_attn.v_proj.weight"],
f"layers.{layer_i}.attention.wo.weight": hf_state_dict[f"model.layers.{layer_i}.self_attn.o_proj.weight"],
f"layers.{layer_i}.feed_forward.w1.weight": hf_state_dict[f"model.layers.{layer_i}.mlp.gate_proj.weight"],
f"layers.{layer_i}.feed_forward.w2.weight": hf_state_dict[f"model.layers.{layer_i}.mlp.down_proj.weight"],
f"layers.{layer_i}.feed_forward.w3.weight": hf_state_dict[f"model.layers.{layer_i}.mlp.up_proj.weight"],
f"layers.{layer_i}.attention_norm.weight": hf_state_dict[f"model.layers.{layer_i}.input_layernorm.weight"],
f"layers.{layer_i}.ffn_norm.weight": hf_state_dict[f"model.layers.{layer_i}.post_attention_layernorm.weight"],
})
for layer_i in range(n_layers):
pth_state_dict[f"layers.{layer_i}.attention.inner_attention.rope.freqs"] = hf_state_dict[f"model.layers.{layer_i}.self_attn.rotary_emb.inv_freq"]
# Save model
torch.save(pth_state_dict, output_model_path)
def convert_tokenizer(input_tokenizer_path, output_tokenizer_path):
shutil.copyfile(os.path.join(input_tokenizer_path, "tokenizer.model"), os.path.join(output_tokenizer_path, "tokenizer.model"))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
"--hf_model_path",
type=str,
help="Path to the HuggingFace model",
)
parser.add_argument(
"--hf_tokenizer_path",
type=str,
help="Path to the HuggingFace tokenizer",
)
parser.add_argument(
"--output_dir",
type=str,
default="tmp",
help="Path to the output directory",
)
args = parser.parse_args()
if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
convert_model(args.hf_model_path, os.path.join(args.output_dir, "consolidated.00.pth"), "7B")
convert_tokenizer(args.hf_tokenizer_path, args.output_dir)