forked from smarsland/AviaNZ
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wavio.py
400 lines (362 loc) · 15.6 KB
/
wavio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
"""
The wavio module defines the functions:
read(file)
Read a WAV file and return a `wavio.Wav` object, with attributes
`data`, `rate` and `sampwidth`.
write(filename, data, rate, scale=None, sampwidth=None)
Write a numpy array to a WAV file.
-----
Author: Warren Weckesser (modifications for paging by Stephen Marsland, 2017)
License: BSD 2-Clause:
Copyright (c) 2015, Warren Weckesser
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
from __future__ import division as _division
import wave as _wave
import numpy as _np
__version__ = "0.0.4.dev1"
def _wav2array(nchannels, sampwidth, data):
"""data must be the string containing the bytes from the wav file."""
num_samples, remainder = divmod(len(data), sampwidth * nchannels)
if remainder > 0:
raise ValueError('The length of data is not a multiple of '
'sampwidth * num_channels.')
if sampwidth > 4:
raise ValueError("sampwidth must not be greater than 4.")
if sampwidth == 3:
a = _np.empty((num_samples, nchannels, 4), dtype=_np.uint8)
raw_bytes = _np.fromstring(data, dtype=_np.uint8)
a[:, :, :sampwidth] = raw_bytes.reshape(-1, nchannels, sampwidth)
a[:, :, sampwidth:] = (a[:, :, sampwidth - 1:sampwidth] >> 7) * 255
result = a.view('<i4').reshape(a.shape[:-1])
else:
# 8 bit samples are stored as unsigned ints; others as signed ints.
dt_char = 'u' if sampwidth == 1 else 'i'
a = _np.fromstring(data, dtype='<%s%d' % (dt_char, sampwidth))
result = a.reshape(-1, nchannels)
return result
def _array2wav(a, sampwidth):
"""
Convert the input array `a` to a string of WAV data.
a.dtype must be one of uint8, int16 or int32. Allowed sampwidth
values are:
dtype sampwidth
uint8 1
int16 2
int32 3 or 4
When sampwidth is 3, the *low* bytes of `a` are assumed to contain
the values to include in the string.
"""
if sampwidth == 3:
# `a` must have dtype int32
if a.ndim == 1:
# Convert to a 2D array with a single column.
a = a.reshape(-1, 1)
# By shifting first 0 bits, then 8, then 16, the resulting output
# is 24 bit little-endian.
a8 = (a.reshape(a.shape + (1,)) >> _np.array([0, 8, 16])) & 255
wavdata = a8.astype(_np.uint8).tostring()
else:
# Make sure the array is little-endian, and then convert using
# tostring()
a = a.astype('<' + a.dtype.str[1:], copy=False)
wavdata = a.tostring()
return wavdata
class Wav(object):
"""
Object returned by `wavio.read`. Attributes are:
data : numpy array
The array of data read from the WAV file.
rate : float
The sample rate of the WAV file.
sampwidth : int
The sample width (i.e. number of bytes per sample) of the WAV file.
For example, `sampwidth == 3` is a 24 bit WAV file.
"""
def __init__(self, data, rate, sampwidth, nframes):
self.data = data
self.rate = rate
self.sampwidth = sampwidth
self.nframes = nframes
def __repr__(self):
s = ("Wav(data.shape=%s, data.dtype=%s, rate=%r, sampwidth=%r, nframes=%r)" %
(self.data.shape, self.data.dtype, self.rate, self.sampwidth, self.nframes))
return s
def readFmt(file):
"""
Reads WAV file format.
Convenience function for when samplerate or duration are needed,
to avoid wasting RAM on loading the audiodata.
Parameters
----------
file : string or file object
Either the name of a file or an open file pointer.
Returns
-------
A tuple of :
rate : float
The sampling frequency
nseconds : float
Number of seconds in file
nchannels : int
Number of channels in file
sampwidth : int
Sample width, in bites (16, 24 etc.)
"""
wav = _wave.open(file)
rate = wav.getframerate()
nchannels = wav.getnchannels()
sampwidth = wav.getsampwidth() * 8
nseconds = float(wav.getnframes())/rate
wav.close()
return (rate, nseconds, nchannels, sampwidth)
def read(file,nseconds=None,offset=0):
"""
Read a WAV file.
Parameters
----------
file : string or file object
Either the name of a file or an open file pointer.
nseconds : length of part of file you wish to read (in seconds)
offset : where to start reading from (in seconds)
Returns
-------
wav : wavio.Wav() instance
The return value is an instance of the class `wavio.Wav`,
with the following attributes:
data : numpy array
The array containing the data. The shape of the array
is (num_samples, num_channels). num_channels is the
number of audio channels (1 for mono, 2 for stereo).
rate : float
The sampling frequency (i.e. frame rate)
sampwidth : float
The sample width, in bytes. E.g. for a 24 bit WAV file,
sampwidth is 3.
Notes
-----
This function uses the `wave` module of the Python standard libary
to read the WAV file, so it has the same limitations as that library.
In particular, the function does not read compressed WAV files, and
it does not read files with floating point data.
The array returned by `wavio.read` is alway two-dimensional. If the
WAV data is mono, the array will have shape (num_samples, 1).
`wavio.read()` does not scale or normalize the data. The data in the
array `wav.data` is the data that was in the file. When the file
contains 24 bit samples, the resulting numpy array is 32 bit integers,
with values that have been sign-extended.
"""
wav = _wave.open(file)
rate = wav.getframerate()
nchannels = wav.getnchannels()
sampwidth = wav.getsampwidth()
nframes = wav.getnframes()
if nseconds is None:
nseconds = nframes
if float(nframes)/rate < nseconds:
nseconds = float(nframes)/rate
if nframes - offset*rate < 0:
offset = 0
wav.setpos(int(offset*rate))
# print(nframes, nseconds*rate, int(nseconds*rate), offset)
data = wav.readframes(int(nseconds*rate))
#data = wav.readframes(nframes)
wav.close()
array = _wav2array(nchannels, sampwidth, data)
w = Wav(data=array, rate=rate, sampwidth=sampwidth, nframes=nframes)
return w
_sampwidth_dtypes = {1: _np.uint8,
2: _np.int16,
3: _np.int32,
4: _np.int32}
_sampwidth_ranges = {1: (0, 256),
2: (-2**15, 2**15),
3: (-2**23, 2**23),
4: (-2**31, 2**31)}
def _scale_to_sampwidth(data, sampwidth, vmin, vmax):
# Scale and translate the values to fit the range of the data type
# associated with the given sampwidth.
data = data.clip(vmin, vmax)
dt = _sampwidth_dtypes[sampwidth]
if vmax == vmin:
data = _np.zeros(data.shape, dtype=dt)
else:
outmin, outmax = _sampwidth_ranges[sampwidth]
if outmin != vmin or outmax != vmax:
vmin = float(vmin)
vmax = float(vmax)
data = (float(outmax - outmin) * (data - vmin) /
(vmax - vmin)).astype(_np.int64) + outmin
data[data == outmax] = outmax - 1
data = data.astype(dt)
return data
def write(file, data, rate, scale=None, sampwidth=None):
"""
Write the numpy array `data` to a WAV file.
The Python standard library "wave" is used to write the data
to the file, so this function has the same limitations as that
module. In particular, the Python library does not support
floating point data. When given a floating point array, this
function converts the values to integers. See below for the
conversion rules.
Parameters
----------
file : string, or file object open for writing in binary mode
Either the name of a file or an open file pointer.
data : numpy array, 1- or 2-dimensional, integer or floating point
If it is 2-d, the rows are the frames (i.e. samples) and the
columns are the channels.
rate : float
The sampling frequency (i.e. frame rate) of the data.
sampwidth : int, optional
The sample width, in bytes, of the output file.
If `sampwidth` is not given, it is inferred (if possible) from
the data type of `data`, as follows::
data.dtype sampwidth
---------- ---------
uint8, int8 1
uint16, int16 2
uint32, int32 4
For any other data types, or to write a 24 bit file, `sampwidth`
must be given.
scale : tuple or str, optional
By default, the data written to the file is scaled up or down to
occupy the full range of the output data type. So, for example,
the unsigned 8 bit data [0, 1, 2, 15] would be written to the file
as [0, 17, 30, 255]. More generally, the default behavior is
(roughly)::
vmin = data.min()
vmax = data.max()
outmin = <minimum integer of the output dtype>
outmax = <maximum integer of the output dtype>
outdata = (outmax - outmin)*(data - vmin)/(vmax - vmin) + outmin
The `scale` argument allows the scaling of the output data to be
changed. `scale` can be a tuple of the form `(vmin, vmax)`, in which
case the given values override the use of `data.min()` and
`data.max()` for `vmin` and `vmax` shown above. (If either value
is `None`, the value shown above is used.) Data outside the
range (vmin, vmax) is clipped. If `vmin == vmax`, the output is
all zeros.
If `scale` is the string "none", then `vmin` and `vmax` are set to
`outmin` and `outmax`, respectively. This means the data is written
to the file with no scaling. (Note: `scaling="none" is not the same
as `scaling=None`. The latter means "use the default behavior",
which is to scale by the data minimum and maximum.)
If `scale` is the string "dtype-limits", then `vmin` and `vmax`
are set to the minimum and maximum integers of `data.dtype`.
The string "dtype-limits" is not allowed when the `data` is a
floating point array.
If using `scale` results in values that exceed the limits of the
output sample width, the data is clipped. For example, the
following code::
>>> x = np.array([-100, 0, 100, 200, 300, 325])
>>> wavio.write('foo.wav', x, 8000, scale='none', sampwidth=1)
will write the values [0, 0, 100, 200, 255, 255] to the file.
Example
-------
Create a 3 second 440 Hz sine wave, and save it in a 24-bit WAV file.
>>> import numpy as np
>>> import wavio
>>> rate = 22050 # samples per second
>>> T = 3 # sample duration (seconds)
>>> f = 440.0 # sound frequency (Hz)
>>> t = np.linspace(0, T, T*rate, endpoint=False)
>>> x = np.sin(2*np.pi * f * t)
>>> wavio.write("sine24.wav", x, rate, sampwidth=3)
Create a file that contains the 16 bit integer values -10000 and 10000
repeated 100 times. Don't automatically scale the values. Use a sample
rate 8000.
>>> x = np.empty(200, dtype=np.int16)
>>> x[::2] = -10000
>>> x[1::2] = 10000
>>> wavio.write("foo.wav", x, 8000, scale='none')
Check that the file contains what we expect.
>>> w = wavio.read("foo.wav")
>>> np.all(w.data[:, 0] == x)
True
In the following, the values -10000 and 10000 (from within the 16 bit
range [-2**15, 2**15-1]) are mapped to the corresponding values 88 and
168 (in the range [0, 2**8-1]).
>>> wavio.write("foo.wav", x, 8000, sampwidth=1, scale='dtype-limits')
>>> w = wavio.read("foo.wav")
>>> w.data[:4, 0]
array([ 88, 168, 88, 168], dtype=uint8)
"""
if sampwidth is None:
if not _np.issubdtype(data.dtype, _np.integer) or data.itemsize > 4:
raise ValueError('when data.dtype is not an 8-, 16-, or 32-bit '
'integer type, sampwidth must be specified.')
sampwidth = data.itemsize
else:
if sampwidth not in [1, 2, 3, 4]:
raise ValueError('sampwidth must be 1, 2, 3 or 4.')
outdtype = _sampwidth_dtypes[sampwidth]
outmin, outmax = _sampwidth_ranges[sampwidth]
if scale == "none":
data = data.clip(outmin, outmax-1).astype(outdtype)
elif scale == "dtype-limits":
if not _np.issubdtype(data.dtype, _np.integer):
raise ValueError("scale cannot be 'dtype-limits' with "
"non-integer data.")
# Easy transforms that just changed the signedness of the data.
if sampwidth == 1 and data.dtype == _np.int8:
data = (data.astype(_np.int16) + 128).astype(_np.uint8)
elif sampwidth == 2 and data.dtype == _np.uint16:
data = (data.astype(_np.int32) - 32768).astype(_np.int16)
elif sampwidth == 4 and data.dtype == _np.uint32:
data = (data.astype(_np.int64) - 2**31).astype(_np.int32)
elif data.itemsize != sampwidth:
# Integer input, but rescaling is needed to adjust the
# input range to the output sample width.
ii = _np.iinfo(data.dtype)
vmin = ii.min
vmax = ii.max
data = _scale_to_sampwidth(data, sampwidth, vmin, vmax)
else:
if scale is None:
vmin = data.min()
vmax = data.max()
else:
# scale must be a tuple of the form (vmin, vmax)
vmin, vmax = scale
if vmin is None:
vmin = data.min()
if vmax is None:
vmax = data.max()
data = _scale_to_sampwidth(data, sampwidth, vmin, vmax)
# At this point, `data` has been converted to have one of the following:
# sampwidth dtype
# --------- -----
# 1 uint8
# 2 int16
# 3 int32
# 4 int32
# The values in `data` are in the form in which they will be saved;
# no more scaling will take place.
if data.ndim == 1:
data = data.reshape(-1, 1)
wavdata = _array2wav(data, sampwidth)
w = _wave.open(file, 'wb')
w.setnchannels(data.shape[1])
w.setsampwidth(sampwidth)
w.setframerate(rate)
w.writeframes(wavdata)
w.close()