small usability tweaks to bench

vertinski · Feb 2, 2023 · d01863e · d01863e
1 parent d995c22
commit d01863e
Showing 1 changed file with 8 additions and 8 deletions.
diff --git a/bench.py b/bench.py
@@ -9,13 +9,15 @@
 from model import GPTConfig, GPT
 
 # -----------------------------------------------------------------------------
-batch_size = 8
+batch_size = 12
 block_size = 1024
-bias = True
+bias = False
+real_data = True
 seed = 1337
 device = 'cuda' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc.
 dtype = 'bfloat16' # 'float32' or 'bfloat16' or 'float16'
 compile = True # use PyTorch 2.0 to compile the model to be faster
+profile = False # use pytorch profiler, or just simple benchmarking?
 exec(open('configurator.py').read()) # overrides from command line or config file
 # -----------------------------------------------------------------------------
 
@@ -28,7 +30,6 @@
 ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)
 
 # data loading init
-real_data = True
 if real_data:
     dataset = 'openwebtext'
     data_dir = os.path.join('data', dataset)
@@ -62,7 +63,6 @@ def get_batch(split):
     print("Compiling model...")
     model = torch.compile(model) # pytorch 2.0
 
-profile = False # use pytorch profiler, or just simple benchmarking?
 if profile:
     # useful docs on pytorch profiler:
     # - tutorial https://pytorch.org/tutorials/intermediate/tensorboard_profiler_tutorial.html
@@ -73,10 +73,10 @@ def get_batch(split):
         activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA],
         schedule=torch.profiler.schedule(wait=wait, warmup=warmup, active=active, repeat=1),
         on_trace_ready=torch.profiler.tensorboard_trace_handler('./bench_log'),
-        record_shapes=True,
-        profile_memory=True,
-        with_stack=True, # incurs an additional overhead, disable if not needed
-        with_flops=True,
+        record_shapes=False,
+        profile_memory=False,
+        with_stack=False, # incurs an additional overhead, disable if not needed
+        with_flops=False,
         with_modules=False, # only for torchscript models atm
     ) as prof: