Skip to content

Commit

Permalink
minor args re-arranging and removing some spurious ones like wandb en…
Browse files Browse the repository at this point in the history
…tity ty @tcapelle
  • Loading branch information
karpathy committed Jan 5, 2023
1 parent 529c967 commit 9629093
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
2 changes: 1 addition & 1 deletion sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

# -----------------------------------------------------------------------------
out_dir = 'out'
device = 'cuda:2'
device = 'cuda'
compile = False
start = "\n" # or "<|endoftext|>" or whatever you like
num_samples = 10 # number of samples to draw
Expand Down
15 changes: 7 additions & 8 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
"""

import os
import sys
import time
import math

Expand All @@ -31,34 +30,34 @@
eval_iters = 200
eval_only = False # if True, script exits right after the first eval
always_save_checkpoint = True # if True, always save a checkpoint after each eval
init_from = 'scratch' # 'scratch' or 'resume' or 'gpt2*'
# wandb logging
wandb_log = False # disabled by default
wandb_entity = 'karpathy'
wandb_project = 'owt'
wandb_run_name = 'gpt2' # 'run' + str(time.time())
# data
dataset = 'openwebtext'
batch_size = 12
block_size = 1024
# model
device = 'cuda:0'
init_from = 'scratch' # 'scratch' or 'resume' or 'gpt2*'
dropout = 0.0 # for pretraining 0 is good, for finetuning try 0.1+
n_layer = 12
n_head = 12
n_embd = 768
dropout = 0.0 # for pretraining 0 is good, for finetuning try 0.1+
# adamw optimizer
learning_rate = 6e-4 # max learning rate
max_iters = 400000 # total number of training iterations
max_iters = 600000 # total number of training iterations
weight_decay = 1e-2
betas = (0.9, 0.95)
# learning rate decay settings
decay_lr = True # whether to decay the learning rate
warmup_iters = 2000 # how many steps to warm up for
lr_decay_iters = 400000 # should be ~= max_iters per Chinchilla
lr_decay_iters = 600000 # should be ~= max_iters per Chinchilla
min_lr = 6e-5 # minimum learning rate, should be ~= learning_rate/10 per Chinchilla
# DDP settings
backend = 'nccl' # 'nccl', 'gloo', etc.
# system
device = 'cuda'
compile = True # use PyTorch 2.0 to compile the model to be faster
# -----------------------------------------------------------------------------
exec(open('configurator.py').read()) # overrides from command line or config file
Expand Down Expand Up @@ -181,7 +180,7 @@ def get_lr(iter):

# logging
if wandb_log and gpu_id == 0:
wandb.init(project=wandb_project, entity=wandb_entity, name=wandb_run_name)
wandb.init(project=wandb_project, name=wandb_run_name)
wandb.config = {
"batch_size": batch_size,
"block_size": block_size,
Expand Down

0 comments on commit 9629093

Please sign in to comment.