Skip to content

Commit 5146a68

Browse files
alancuckinv-kkudrynski
authored andcommitted
[Speech models/PyT] Update perf timers and cuda syncs
1 parent afea561 commit 5146a68

File tree

10 files changed

+54
-29
lines changed

10 files changed

+54
-29
lines changed

PyTorch/SpeechRecognition/Jasper/inference.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ def main():
324324
feats, feat_lens = feat_proc(audio, audio_lens)
325325

326326
sync()
327-
t1 = time.perf_counter()
327+
t1 = time.time()
328328

329329
if args.amp:
330330
feats = feats.half()
@@ -340,7 +340,7 @@ def main():
340340
preds = greedy_decoder(log_probs)
341341

342342
sync()
343-
t2 = time.perf_counter()
343+
t2 = time.time()
344344

345345
# burn-in period; wait for a new loader due to num_workers
346346
if it >= 1 and (args.steps == 0 or it >= args.warmup_steps):
@@ -358,7 +358,7 @@ def main():
358358
break
359359

360360
sync()
361-
t0 = time.perf_counter()
361+
t0 = time.time()
362362

363363
# communicate the results
364364
if args.transcribe_wav:

PyTorch/SpeechRecognition/Jasper/train.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
142142
continue
143143

144144
model.eval()
145+
torch.cuda.synchronize()
145146
start_time = time.time()
146147
agg = {'losses': [], 'preds': [], 'txts': []}
147148

@@ -166,6 +167,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
166167
agg['txts'] += helpers.gather_transcripts([txt], [txt_lens], labels)
167168

168169
wer, loss = process_evaluation_epoch(agg)
170+
torch.cuda.synchronize()
169171
log(() if epoch is None else (epoch,),
170172
step, subset, {'loss': loss, 'wer': 100.0 * wer,
171173
'took': time.time() - start_time})
@@ -379,11 +381,11 @@ def main():
379381
if multi_gpu and not use_dali:
380382
train_loader.sampler.set_epoch(epoch)
381383

384+
torch.cuda.synchronize()
385+
epoch_start_time = time.time()
382386
epoch_utts = 0
383387
epoch_loss = 0
384388
accumulated_batches = 0
385-
epoch_start_time = time.time()
386-
epoch_eval_time = 0
387389

388390
for batch in train_loader:
389391

@@ -461,7 +463,6 @@ def main():
461463
step_start_time = time.time()
462464

463465
if step % args.eval_frequency == 0:
464-
tik = time.time()
465466
wer = evaluate(epoch, step, val_loader, val_feat_proc,
466467
symbols, model, ema_model, ctc_loss,
467468
greedy_decoder, args.amp, use_dali)
@@ -470,7 +471,6 @@ def main():
470471
checkpointer.save(model, ema_model, optimizer, scaler,
471472
epoch, step, best_wer, is_best=True)
472473
best_wer = wer
473-
epoch_eval_time += time.time() - tik
474474

475475
step += 1
476476
accumulated_batches = 0
@@ -481,6 +481,7 @@ def main():
481481
if not use_dali and step > steps_per_epoch * epoch:
482482
break
483483

484+
torch.cuda.synchronize()
484485
epoch_time = time.time() - epoch_start_time
485486
epoch_loss /= steps_per_epoch
486487
log((epoch,), None, 'train_avg', {'throughput': epoch_utts / epoch_time,

PyTorch/SpeechRecognition/QuartzNet/inference.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ def main():
334334
feats, feat_lens = feat_proc(audio, audio_lens)
335335

336336
sync()
337-
t1 = time.perf_counter()
337+
t1 = time.time()
338338

339339
if args.amp:
340340
feats = feats.half()
@@ -347,7 +347,7 @@ def main():
347347
preds = greedy_decoder(log_probs)
348348

349349
sync()
350-
t2 = time.perf_counter()
350+
t2 = time.time()
351351

352352
# burn-in period; wait for a new loader due to num_workers
353353
if it >= 1 and (args.steps == 0 or it >= args.warmup_steps):
@@ -365,7 +365,7 @@ def main():
365365
break
366366

367367
sync()
368-
t0 = time.perf_counter()
368+
t0 = time.time()
369369

370370
# communicate the results
371371
if args.transcribe_wav:

PyTorch/SpeechRecognition/QuartzNet/train.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
163163
continue
164164

165165
model.eval()
166+
torch.cuda.synchronize()
166167
start_time = time.time()
167168
agg = {'losses': [], 'preds': [], 'txts': []}
168169

@@ -187,6 +188,7 @@ def evaluate(epoch, step, val_loader, val_feat_proc, labels, model,
187188
agg['txts'] += helpers.gather_transcripts([txt], [txt_lens], labels)
188189

189190
wer, loss = process_evaluation_epoch(agg)
191+
torch.cuda.synchronize()
190192
log(() if epoch is None else (epoch,),
191193
step, subset, {'loss': loss, 'wer': 100.0 * wer,
192194
'took': time.time() - start_time})
@@ -410,11 +412,11 @@ def main():
410412
if multi_gpu and not use_dali:
411413
train_loader.sampler.set_epoch(epoch)
412414

415+
torch.cuda.synchronize()
416+
epoch_start_time = time.time()
413417
epoch_utts = 0
414418
epoch_loss = 0
415419
accumulated_batches = 0
416-
epoch_start_time = time.time()
417-
epoch_eval_time = 0
418420

419421
for batch in train_loader:
420422

@@ -493,7 +495,6 @@ def main():
493495
step_start_time = time.time()
494496

495497
if step % args.eval_frequency == 0:
496-
tik = time.time()
497498
wer = evaluate(epoch, step, val_loader, val_feat_proc,
498499
symbols, model, ema_model, ctc_loss,
499500
greedy_decoder, args.amp, use_dali)
@@ -502,7 +503,6 @@ def main():
502503
checkpointer.save(model, ema_model, optimizer, scaler,
503504
epoch, step, best_wer, is_best=True)
504505
best_wer = wer
505-
epoch_eval_time += time.time() - tik
506506

507507
step += 1
508508
accumulated_batches = 0
@@ -513,6 +513,7 @@ def main():
513513
if not use_dali and step > steps_per_epoch * epoch:
514514
break
515515

516+
torch.cuda.synchronize()
516517
epoch_time = time.time() - epoch_start_time
517518
epoch_loss /= steps_per_epoch
518519
log((epoch,), None, 'train_avg', {'throughput': epoch_utts / epoch_time,

PyTorch/SpeechRecognition/wav2vec2/common/metrics.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def __init__(self, scopes=('train', 'train_avg'),
7575
benchmark_epochs=0,
7676
reduce_mean=(),
7777
reduce_last=(),
78-
group_tb_entries=False):
78+
group_tb_entries=False,
79+
cuda=True):
7980
"""
8081
Args:
8182
scopes: possible scopes of metrics accumulation
@@ -100,9 +101,10 @@ def __init__(self, scopes=('train', 'train_avg'),
100101
self.benchmark_keys = benchmark_keys
101102
self.scopes = scopes
102103
self.group_tb_entries = group_tb_entries
104+
self.cuda = cuda
103105

104106
def log_scalar(self, key, val, accum_reduction=None):
105-
""" Main primitive for logging partial metrics from single batch.
107+
"""Main primitive for logging partial metrics from single batch.
106108
107109
NOTE: Assumption: `log_scalar` cannot be called with different
108110
`accum_reduction` for the same `key`. This results in undefined behavior
@@ -197,9 +199,13 @@ def start_iter(self, iter):
197199
self._start_accumulating(iter, True, 'train')
198200

199201
def start_epoch(self, epoch):
202+
if self.cuda:
203+
torch.cuda.synchronize()
200204
self._start_accumulating(epoch, True, 'train_avg')
201205

202206
def start_val(self):
207+
if self.cuda:
208+
torch.cuda.synchronize()
203209
self._start_accumulating(None, True, 'val')
204210

205211
def finish_iter(self):
@@ -209,6 +215,8 @@ def finish_logging_interval(self):
209215
self._finish_accumulating('train')
210216

211217
def finish_epoch(self):
218+
if self.cuda:
219+
torch.cuda.synchronize()
212220
self._accumulate_time('train_avg')
213221
self._finish_accumulating('train_avg')
214222

@@ -220,6 +228,8 @@ def finish_epoch(self):
220228
metr[k].pop(0)
221229

222230
def finish_val(self, scope='val'):
231+
if self.cuda:
232+
torch.cuda.synchronize()
223233
self._accumulate_time(scope)
224234
self._finish_accumulating(scope)
225235

PyTorch/SpeechRecognition/wav2vec2/inference.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def main():
249249
batch = utils.move_to_cuda(batch)
250250

251251
sync()
252-
t1 = time.perf_counter()
252+
t1 = time.time()
253253

254254
if args.fp16:
255255
batch = fp_convert_batch(batch, 'fp16')
@@ -266,7 +266,7 @@ def main():
266266
preds = logp.argmax(dim=-1, keepdim=False).int()
267267

268268
sync()
269-
t2 = time.perf_counter()
269+
t2 = time.time()
270270

271271
# burn-in period; wait for a new loader due to num_workers
272272
if it >= 1 and (args.steps == 0 or it >= args.warmup_steps):
@@ -292,7 +292,7 @@ def main():
292292
break
293293

294294
sync()
295-
t0 = time.perf_counter()
295+
t0 = time.time()
296296

297297
tdict = target_dictionary
298298
agg['preds'] = [pred.replace(tdict[tdict.nspecial], ' ')

PyTorch/SpeechRecognition/wav2vec2/train.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,10 @@ def main():
150150
Metrics = W2v2Metrics
151151
criterion = Wav2vecCriterion(args)
152152

153-
metrics = Metrics(args.benchmark_epochs_num)
154-
val_metrics = Metrics(args.benchmark_epochs_num, scopes=['val'])
155-
val_ema_metrics = Metrics(args.benchmark_epochs_num, scopes=['val_ema'])
153+
kw = {'benchmark_epochs': args.benchmark_epochs_num, 'cuda': not args.cpu}
154+
metrics = Metrics(**kw)
155+
val_metrics = Metrics(scopes=['val'], **kw)
156+
val_ema_metrics = Metrics(scopes=['val_ema'], **kw)
156157

157158
init_logger(args.output_dir, args.log_file, args.ema)
158159
logger.log_parameters(vars(args), tb_subset='train')

PyTorch/SpeechRecognition/wav2vec2/wav2vec2/logging.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def init_infer_metadata():
111111

112112
class W2v2Metrics(MetricsAggregator):
113113

114-
def __init__(self, benchmark_epochs, scopes=('train', 'train_avg')):
114+
def __init__(self, benchmark_epochs, scopes=('train', 'train_avg'), cuda=True):
115115
super().__init__(
116116
benchmark_epochs=benchmark_epochs,
117117
benchmark_keys=('took', 'accuracy', 'loss', 'ntokens/s'),
@@ -120,7 +120,8 @@ def __init__(self, benchmark_epochs, scopes=('train', 'train_avg')):
120120
'code_perplexity',
121121
'took', 'loss_scale', 'lr', 'ntokens/s'),
122122
reduce_mean=('temp', 'prob_perplexity', 'code_perplexity'),
123-
reduce_last=('lr', 'loss_scale'))
123+
reduce_last=('lr', 'loss_scale'),
124+
cuda=cuda)
124125

125126
def accumulate(self, scopes=None):
126127
if 'ignore' not in self.partials or self.partials['ignore'] == 0.0:
@@ -155,11 +156,12 @@ def __init__(
155156
'prob_perplexity', 'took', 'ntokens/s', 'uer',
156157
'wer', 'raw_wer'),
157158
reduce_mean=('temp', 'prob_perplexity', 'code_perplexity'),
158-
reduce_last=('lr',)):
159+
reduce_last=('lr',),
160+
cuda=True):
159161
super().__init__(
160162
benchmark_epochs=benchmark_epochs, benchmark_keys=benchmark_keys,
161163
scopes=scopes, dllogger_keys=dllogger_keys,
162-
reduce_mean=reduce_mean, reduce_last=reduce_last)
164+
reduce_mean=reduce_mean, reduce_last=reduce_last, cuda=cuda)
163165

164166
def accumulate(self, scopes=None):
165167
if 'ignore' not in self.partials or self.partials['ignore'] == 0.0:

PyTorch/SpeechSynthesis/HiFiGAN/hifigan/logging.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ class Metrics(dict):
123123
def __init__(self, scopes=['train', 'train_avg'],
124124
dll_keys=['loss_gen', 'loss_discrim', 'loss_mel',
125125
'frames/s', 'took', 'lrate_gen', 'lrate_discrim'],
126-
benchmark_epochs=0):
126+
benchmark_epochs=0, cuda=True):
127127
super().__init__()
128128

129129
self.dll_keys = dll_keys
@@ -133,6 +133,7 @@ def __init__(self, scopes=['train', 'train_avg'],
133133
self.benchmark_epochs = benchmark_epochs
134134
if benchmark_epochs > 0:
135135
self.metrics['train_benchmark'] = defaultdict(list)
136+
self.cuda = cuda
136137

137138
def __setitem__(self, key, val):
138139
if type(val) is dict:
@@ -182,15 +183,21 @@ def start_iter(self, iter, start_timer=True):
182183
self.start_accumulating(iter, start_timer, 'train')
183184

184185
def start_epoch(self, epoch, start_timer=True):
186+
if self.cuda:
187+
torch.cuda.synchronize()
185188
self.start_accumulating(epoch, start_timer, 'train_avg')
186189

187190
def start_val(self, start_timer=True):
191+
if self.cuda:
192+
torch.cuda.synchronize()
188193
self.start_accumulating(None, start_timer, 'val')
189194

190195
def finish_iter(self, stop_timer=True):
191196
self.finish_accumulating(stop_timer, 'train')
192197

193198
def finish_epoch(self, stop_timer=True):
199+
if self.cuda:
200+
torch.cuda.synchronize()
194201
self.finish_accumulating(stop_timer, 'train_avg')
195202

196203
metr = self.metrics['train_benchmark']
@@ -201,6 +208,8 @@ def finish_epoch(self, stop_timer=True):
201208
metr[k].pop(0)
202209

203210
def finish_val(self, stop_timer=True):
211+
if self.cuda:
212+
torch.cuda.synchronize()
204213
self.finish_accumulating(stop_timer, 'val')
205214

206215
def get_metrics(self, scope='train', target='dll'):

PyTorch/SpeechSynthesis/HiFiGAN/train.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,9 @@ def main():
237237
init_distributed(args, args.world_size, args.local_rank)
238238

239239
metrics = Metrics(scopes=['train', 'train_avg'],
240-
benchmark_epochs=args.benchmark_epochs_num)
241-
val_metrics = Metrics(scopes=['val'])
240+
benchmark_epochs=args.benchmark_epochs_num,
241+
cuda=args.cuda)
242+
val_metrics = Metrics(scopes=['val'], cuda=args.cuda)
242243
init_logger(args.output, args.log_file, args.ema_decay)
243244
logger.parameters(vars(args), tb_subset='train')
244245

0 commit comments

Comments
 (0)