Skip to content

Commit

Permalink
tune the hyperparams a bit, in configs
Browse files Browse the repository at this point in the history
  • Loading branch information
karpathy committed Feb 5, 2023
1 parent ab0718a commit fce706c
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 12 deletions.
25 changes: 14 additions & 11 deletions config/finetune_shakespeare.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
import time

out_dir = 'out-shakespeare'
eval_interval = 200
eval_interval = 5
eval_iters = 40
wandb_log = False # feel free to turn on
wandb_project = 'shakespeare'
wandb_run_name = 'ft-' + str(time.time())
compile = False # takes too little time to finetune, not worth it

# save a nice and overfit checkpoint that
# will only speak Shakespeare and forgets
# everything else about the world #dark
always_save_checkpoint = True

dataset = 'shakespeare'
init_from = 'gpt2-xl'
init_from = 'gpt2-xl' # this is the largest GPT-2 model

# only save checkpoints if the validation loss improves
always_save_checkpoint = False

# the number of examples per iter:
# 1 batch_size * 32 grad_accum * 1024 tokens = 32,768 tokens/iter
# shakespeare has 301,966 tokens, so 1 epoch ~= 9.2 iters
batch_size = 1
block_size = 512
gradient_accumulation_steps = 32
max_iters = 20

learning_rate = 1e-5
max_iters = 1000
# finetune at constant LR
learning_rate = 3e-5
decay_lr = False
2 changes: 1 addition & 1 deletion config/train_shakespeare_char.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

dataset = 'shakespeare_char'
batch_size = 64
block_size = 256 # context of up to 128 previous characters
block_size = 256 # context of up to 256 previous characters

# baby GPT model :)
n_layer = 6
Expand Down

0 comments on commit fce706c

Please sign in to comment.