forked from coqui-ai/STT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplay.py
executable file
·129 lines (119 loc) · 4.52 KB
/
play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python
"""
Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) and DeepSpeech CSV files
Use "python3 play.py -h" for help
"""
import os
import sys
import random
import argparse
from deepspeech_training.util.audio import LOADABLE_AUDIO_EXTENSIONS, AUDIO_TYPE_PCM, AUDIO_TYPE_WAV
from deepspeech_training.util.sample_collections import SampleList, LabeledSample, samples_from_source
from deepspeech_training.util.augmentations import parse_augmentations, apply_sample_augmentations, SampleAugmentation
def get_samples_in_play_order():
ext = os.path.splitext(CLI_ARGS.source)[1].lower()
if ext in LOADABLE_AUDIO_EXTENSIONS:
samples = SampleList([(CLI_ARGS.source, 0)], labeled=False)
else:
samples = samples_from_source(CLI_ARGS.source, buffering=0)
played = 0
index = CLI_ARGS.start
while True:
if 0 <= CLI_ARGS.number <= played:
return
if CLI_ARGS.random:
yield samples[random.randint(0, len(samples) - 1)]
elif index < 0:
yield samples[len(samples) + index]
elif index >= len(samples):
print("No sample with index {}".format(CLI_ARGS.start))
sys.exit(1)
else:
yield samples[index]
played += 1
index = (index + 1) % len(samples)
def play_collection():
augmentations = parse_augmentations(CLI_ARGS.augment)
if any(not isinstance(a, SampleAugmentation) for a in augmentations):
print("Warning: Some of the augmentations cannot be simulated by this command.")
samples = get_samples_in_play_order()
samples = apply_sample_augmentations(samples,
audio_type=AUDIO_TYPE_PCM,
augmentations=augmentations,
process_ahead=0,
clock=CLI_ARGS.clock)
for sample in samples:
if not CLI_ARGS.quiet:
print('Sample "{}"'.format(sample.sample_id), file=sys.stderr)
if isinstance(sample, LabeledSample):
print(' "{}"'.format(sample.transcript), file=sys.stderr)
if CLI_ARGS.pipe:
sample.change_audio_type(AUDIO_TYPE_WAV)
sys.stdout.buffer.write(sample.audio.getvalue())
return
wave_obj = simpleaudio.WaveObject(sample.audio,
sample.audio_format.channels,
sample.audio_format.width,
sample.audio_format.rate)
play_obj = wave_obj.play()
play_obj.wait_done()
def handle_args():
parser = argparse.ArgumentParser(
description="Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) "
"and DeepSpeech CSV files"
)
parser.add_argument("source", help="Sample DB, CSV or WAV file to play samples from")
parser.add_argument(
"--start",
type=int,
default=0,
help="Sample index to start at (negative numbers are relative to the end of the collection)",
)
parser.add_argument(
"--number",
type=int,
default=-1,
help="Number of samples to play (-1 for endless)",
)
parser.add_argument(
"--random",
action="store_true",
help="If samples should be played in random order",
)
parser.add_argument(
"--augment",
action='append',
help="Add an augmentation operation",
)
parser.add_argument(
"--clock",
type=float,
default=0.5,
help="Simulates clock value used for augmentations during training."
"Ranges from 0.0 (representing parameter start values) to"
"1.0 (representing parameter end values)",
)
parser.add_argument(
"--pipe",
action="store_true",
help="Pipe first sample as wav file to stdout. Forces --number to 1.",
)
parser.add_argument(
"--quiet",
action="store_true",
help="No info logging to console",
)
return parser.parse_args()
if __name__ == "__main__":
CLI_ARGS = handle_args()
if not CLI_ARGS.pipe:
try:
import simpleaudio
except ModuleNotFoundError:
print('Unless using the --pipe flag, play.py requires Python package "simpleaudio" for playing samples')
sys.exit(1)
try:
play_collection()
except KeyboardInterrupt:
print(" Stopped")
sys.exit(0)