Skip to content

Commit

Permalink
padding 50257 -> 50304 vocab_size, the nerest multiple of 64. the big…
Browse files Browse the repository at this point in the history
…gest deal smallest optimization i've made in recent past, about 25% faster. this is because the last layer is a major latency bottleneck consuming about 40% of latency due to the very high channel count.
  • Loading branch information
karpathy committed Feb 4, 2023
1 parent b3c17c6 commit 77e7e04
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
4 changes: 2 additions & 2 deletions bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def get_batch(split):
return x, y
else:
# alternatively, if fixed data is desired to not care about data loading
x = torch.randint(50257, (batch_size, block_size), device=device)
y = torch.randint(50257, (batch_size, block_size), device=device)
x = torch.randint(50304, (batch_size, block_size), device=device)
y = torch.randint(50304, (batch_size, block_size), device=device)
get_batch = lambda split: (x, y)

# model init
Expand Down
2 changes: 1 addition & 1 deletion model.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def forward(self, x):
@dataclass
class GPTConfig:
block_size: int = 1024
vocab_size: int = 50257
vocab_size: int = 50304 # GPT-2 vocab_size of 50257, padded up to nearest multiple of 64 for efficiency
n_layer: int = 12
n_head: int = 12
n_embd: int = 768
Expand Down
4 changes: 2 additions & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ def get_batch(split):
vocab_size = meta['vocab_size']
print(f"vocab_size = {vocab_size} (from {meta_path})")
else:
print(f"vocab_size not found in {meta_path}, using GPT-2 default of 50257")
vocab_size = 50257
print(f"vocab_size not found in {meta_path}, using GPT-2 default of 50257 (rounded up to 50304 for efficiency)")
vocab_size = 50304

# model init
model_args = dict(n_layer=n_layer, n_head=n_head, n_embd=n_embd, block_size=block_size,
Expand Down

0 comments on commit 77e7e04

Please sign in to comment.