Skip to content

Commit

Permalink
[Speech models/PyT] Update perf timers and cuda syncs
Browse files Browse the repository at this point in the history
  • Loading branch information
alancucki authored and nv-kkudrynski committed Feb 15, 2023
1 parent afea561 commit 5146a68
Show file tree
Hide file tree
Showing 10 changed files with 54 additions and 29 deletions.
6 changes: 3 additions & 3 deletions PyTorch/SpeechRecognition/Jasper/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def main():
feats, feat_lens = feat_proc(audio, audio_lens)

sync()
t1 = time.perf_counter()
t1 = time.time()

if args.amp:
feats = feats.half()
Expand All @@ -340,7 +340,7 @@ def main():
preds = greedy_decoder(log_probs)

sync()
t2 = time.perf_counter()
t2 = time.time()

# burn-in period; wait for a new loader due to num_workers
if it >= 1 and (args.steps == 0 or it >= args.warmup_steps):
Expand All @@ -358,7 +358,7 @@ def main():
break

sync()
t0 = time.perf_counter()
t0 = time.time()

# communicate the results
if args.transcribe_wav:
Expand Down
9 changes: 5 additions & 4 deletions PyTorch/SpeechRecognition/Jasper/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
continue

model.eval()
torch.cuda.synchronize()
start_time = time.time()
agg = {'losses': [], 'preds': [], 'txts': []}

Expand All @@ -166,6 +167,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
agg['txts'] += helpers.gather_transcripts([txt], [txt_lens], labels)

wer, loss = process_evaluation_epoch(agg)
torch.cuda.synchronize()
log(() if epoch is None else (epoch,),
step, subset, {'loss': loss, 'wer': 100.0 * wer,
'took': time.time() - start_time})
Expand Down Expand Up @@ -379,11 +381,11 @@ def main():
if multi_gpu and not use_dali:
train_loader.sampler.set_epoch(epoch)

torch.cuda.synchronize()
epoch_start_time = time.time()
epoch_utts = 0
epoch_loss = 0
accumulated_batches = 0
epoch_start_time = time.time()
epoch_eval_time = 0

for batch in train_loader:

Expand Down Expand Up @@ -461,7 +463,6 @@ def main():
step_start_time = time.time()

if step % args.eval_frequency == 0:
tik = time.time()
wer = evaluate(epoch, step, val_loader, val_feat_proc,
symbols, model, ema_model, ctc_loss,
greedy_decoder, args.amp, use_dali)
Expand All @@ -470,7 +471,6 @@ def main():
checkpointer.save(model, ema_model, optimizer, scaler,
epoch, step, best_wer, is_best=True)
best_wer = wer
epoch_eval_time += time.time() - tik

step += 1
accumulated_batches = 0
Expand All @@ -481,6 +481,7 @@ def main():
if not use_dali and step > steps_per_epoch * epoch:
break

torch.cuda.synchronize()
epoch_time = time.time() - epoch_start_time
epoch_loss /= steps_per_epoch
log((epoch,), None, 'train_avg', {'throughput': epoch_utts / epoch_time,
Expand Down
6 changes: 3 additions & 3 deletions PyTorch/SpeechRecognition/QuartzNet/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ def main():
feats, feat_lens = feat_proc(audio, audio_lens)

sync()
t1 = time.perf_counter()
t1 = time.time()

if args.amp:
feats = feats.half()
Expand All @@ -347,7 +347,7 @@ def main():
preds = greedy_decoder(log_probs)

sync()
t2 = time.perf_counter()
t2 = time.time()

# burn-in period; wait for a new loader due to num_workers
if it >= 1 and (args.steps == 0 or it >= args.warmup_steps):
Expand All @@ -365,7 +365,7 @@ def main():
break

sync()
t0 = time.perf_counter()
t0 = time.time()

# communicate the results
if args.transcribe_wav:
Expand Down
9 changes: 5 additions & 4 deletions PyTorch/SpeechRecognition/QuartzNet/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
continue

model.eval()
torch.cuda.synchronize()
start_time = time.time()
agg = {'losses': [], 'preds': [], 'txts': []}

Expand All @@ -187,6 +188,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
agg['txts'] += helpers.gather_transcripts([txt], [txt_lens], labels)

wer, loss = process_evaluation_epoch(agg)
torch.cuda.synchronize()
log(() if epoch is None else (epoch,),
step, subset, {'loss': loss, 'wer': 100.0 * wer,
'took': time.time() - start_time})
Expand Down Expand Up @@ -410,11 +412,11 @@ def main():
if multi_gpu and not use_dali:
train_loader.sampler.set_epoch(epoch)

torch.cuda.synchronize()
epoch_start_time = time.time()
epoch_utts = 0
epoch_loss = 0
accumulated_batches = 0
epoch_start_time = time.time()
epoch_eval_time = 0

for batch in train_loader:

Expand Down Expand Up @@ -493,7 +495,6 @@ def main():
step_start_time = time.time()

if step % args.eval_frequency == 0:
tik = time.time()
wer = evaluate(epoch, step, val_loader, val_feat_proc,
symbols, model, ema_model, ctc_loss,
greedy_decoder, args.amp, use_dali)
Expand All @@ -502,7 +503,6 @@ def main():
checkpointer.save(model, ema_model, optimizer, scaler,
epoch, step, best_wer, is_best=True)
best_wer = wer
epoch_eval_time += time.time() - tik

step += 1
accumulated_batches = 0
Expand All @@ -513,6 +513,7 @@ def main():
if not use_dali and step > steps_per_epoch * epoch:
break

torch.cuda.synchronize()
epoch_time = time.time() - epoch_start_time
epoch_loss /= steps_per_epoch
log((epoch,), None, 'train_avg', {'throughput': epoch_utts / epoch_time,
Expand Down
14 changes: 12 additions & 2 deletions PyTorch/SpeechRecognition/wav2vec2/common/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def __init__(self, scopes=('train', 'train_avg'),
benchmark_epochs=0,
reduce_mean=(),
reduce_last=(),
group_tb_entries=False):
group_tb_entries=False,
cuda=True):
"""
Args:
scopes: possible scopes of metrics accumulation
Expand All @@ -100,9 +101,10 @@ def __init__(self, scopes=('train', 'train_avg'),
self.benchmark_keys = benchmark_keys
self.scopes = scopes
self.group_tb_entries = group_tb_entries
self.cuda = cuda

def log_scalar(self, key, val, accum_reduction=None):
""" Main primitive for logging partial metrics from single batch.
"""Main primitive for logging partial metrics from single batch.
NOTE: Assumption: `log_scalar` cannot be called with different
`accum_reduction` for the same `key`. This results in undefined behavior
Expand Down Expand Up @@ -197,9 +199,13 @@ def start_iter(self, iter):
self._start_accumulating(iter, True, 'train')

def start_epoch(self, epoch):
if self.cuda:
torch.cuda.synchronize()
self._start_accumulating(epoch, True, 'train_avg')

def start_val(self):
if self.cuda:
torch.cuda.synchronize()
self._start_accumulating(None, True, 'val')

def finish_iter(self):
Expand All @@ -209,6 +215,8 @@ def finish_logging_interval(self):
self._finish_accumulating('train')

def finish_epoch(self):
if self.cuda:
torch.cuda.synchronize()
self._accumulate_time('train_avg')
self._finish_accumulating('train_avg')

Expand All @@ -220,6 +228,8 @@ def finish_epoch(self):
metr[k].pop(0)

def finish_val(self, scope='val'):
if self.cuda:
torch.cuda.synchronize()
self._accumulate_time(scope)
self._finish_accumulating(scope)

Expand Down
6 changes: 3 additions & 3 deletions PyTorch/SpeechRecognition/wav2vec2/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def main():
batch = utils.move_to_cuda(batch)

sync()
t1 = time.perf_counter()
t1 = time.time()

if args.fp16:
batch = fp_convert_batch(batch, 'fp16')
Expand All @@ -266,7 +266,7 @@ def main():
preds = logp.argmax(dim=-1, keepdim=False).int()

sync()
t2 = time.perf_counter()
t2 = time.time()

# burn-in period; wait for a new loader due to num_workers
if it >= 1 and (args.steps == 0 or it >= args.warmup_steps):
Expand All @@ -292,7 +292,7 @@ def main():
break

sync()
t0 = time.perf_counter()
t0 = time.time()

tdict = target_dictionary
agg['preds'] = [pred.replace(tdict[tdict.nspecial], ' ')
Expand Down
7 changes: 4 additions & 3 deletions PyTorch/SpeechRecognition/wav2vec2/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,10 @@ def main():
Metrics = W2v2Metrics
criterion = Wav2vecCriterion(args)

metrics = Metrics(args.benchmark_epochs_num)
val_metrics = Metrics(args.benchmark_epochs_num, scopes=['val'])
val_ema_metrics = Metrics(args.benchmark_epochs_num, scopes=['val_ema'])
kw = {'benchmark_epochs': args.benchmark_epochs_num, 'cuda': not args.cpu}
metrics = Metrics(**kw)
val_metrics = Metrics(scopes=['val'], **kw)
val_ema_metrics = Metrics(scopes=['val_ema'], **kw)

init_logger(args.output_dir, args.log_file, args.ema)
logger.log_parameters(vars(args), tb_subset='train')
Expand Down
10 changes: 6 additions & 4 deletions PyTorch/SpeechRecognition/wav2vec2/wav2vec2/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def init_infer_metadata():

class W2v2Metrics(MetricsAggregator):

def __init__(self, benchmark_epochs, scopes=('train', 'train_avg')):
def __init__(self, benchmark_epochs, scopes=('train', 'train_avg'), cuda=True):
super().__init__(
benchmark_epochs=benchmark_epochs,
benchmark_keys=('took', 'accuracy', 'loss', 'ntokens/s'),
Expand All @@ -120,7 +120,8 @@ def __init__(self, benchmark_epochs, scopes=('train', 'train_avg')):
'code_perplexity',
'took', 'loss_scale', 'lr', 'ntokens/s'),
reduce_mean=('temp', 'prob_perplexity', 'code_perplexity'),
reduce_last=('lr', 'loss_scale'))
reduce_last=('lr', 'loss_scale'),
cuda=cuda)

def accumulate(self, scopes=None):
if 'ignore' not in self.partials or self.partials['ignore'] == 0.0:
Expand Down Expand Up @@ -155,11 +156,12 @@ def __init__(
'prob_perplexity', 'took', 'ntokens/s', 'uer',
'wer', 'raw_wer'),
reduce_mean=('temp', 'prob_perplexity', 'code_perplexity'),
reduce_last=('lr',)):
reduce_last=('lr',),
cuda=True):
super().__init__(
benchmark_epochs=benchmark_epochs, benchmark_keys=benchmark_keys,
scopes=scopes, dllogger_keys=dllogger_keys,
reduce_mean=reduce_mean, reduce_last=reduce_last)
reduce_mean=reduce_mean, reduce_last=reduce_last, cuda=cuda)

def accumulate(self, scopes=None):
if 'ignore' not in self.partials or self.partials['ignore'] == 0.0:
Expand Down
11 changes: 10 additions & 1 deletion PyTorch/SpeechSynthesis/HiFiGAN/hifigan/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class Metrics(dict):
def __init__(self, scopes=['train', 'train_avg'],
dll_keys=['loss_gen', 'loss_discrim', 'loss_mel',
'frames/s', 'took', 'lrate_gen', 'lrate_discrim'],
benchmark_epochs=0):
benchmark_epochs=0, cuda=True):
super().__init__()

self.dll_keys = dll_keys
Expand All @@ -133,6 +133,7 @@ def __init__(self, scopes=['train', 'train_avg'],
self.benchmark_epochs = benchmark_epochs
if benchmark_epochs > 0:
self.metrics['train_benchmark'] = defaultdict(list)
self.cuda = cuda

def __setitem__(self, key, val):
if type(val) is dict:
Expand Down Expand Up @@ -182,15 +183,21 @@ def start_iter(self, iter, start_timer=True):
self.start_accumulating(iter, start_timer, 'train')

def start_epoch(self, epoch, start_timer=True):
if self.cuda:
torch.cuda.synchronize()
self.start_accumulating(epoch, start_timer, 'train_avg')

def start_val(self, start_timer=True):
if self.cuda:
torch.cuda.synchronize()
self.start_accumulating(None, start_timer, 'val')

def finish_iter(self, stop_timer=True):
self.finish_accumulating(stop_timer, 'train')

def finish_epoch(self, stop_timer=True):
if self.cuda:
torch.cuda.synchronize()
self.finish_accumulating(stop_timer, 'train_avg')

metr = self.metrics['train_benchmark']
Expand All @@ -201,6 +208,8 @@ def finish_epoch(self, stop_timer=True):
metr[k].pop(0)

def finish_val(self, stop_timer=True):
if self.cuda:
torch.cuda.synchronize()
self.finish_accumulating(stop_timer, 'val')

def get_metrics(self, scope='train', target='dll'):
Expand Down
5 changes: 3 additions & 2 deletions PyTorch/SpeechSynthesis/HiFiGAN/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,9 @@ def main():
init_distributed(args, args.world_size, args.local_rank)

metrics = Metrics(scopes=['train', 'train_avg'],
benchmark_epochs=args.benchmark_epochs_num)
val_metrics = Metrics(scopes=['val'])
benchmark_epochs=args.benchmark_epochs_num,
cuda=args.cuda)
val_metrics = Metrics(scopes=['val'], cuda=args.cuda)
init_logger(args.output, args.log_file, args.ema_decay)
logger.parameters(vars(args), tb_subset='train')

Expand Down

0 comments on commit 5146a68

Please sign in to comment.