Skip to content

Commit

Permalink
Merge pull request CPJKU#456 from jpauwels/fix-mp4-no-faststart
Browse files Browse the repository at this point in the history
Fix mp4 no faststart
  • Loading branch information
Sebastian Böck authored Dec 19, 2019
2 parents 3224931 + 5a4e2ee commit a9ce9fa
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 33 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@

# Sphinx documentation
/docs/_build/
.eggs
127 changes: 95 additions & 32 deletions madmom/io/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
import subprocess
import sys
import tempfile
import io

import numpy as np

from ..utils import string_types
from ..utils import string_types, file_types
from ..audio.signal import Signal


Expand Down Expand Up @@ -126,25 +125,23 @@ def _ffmpeg_call(infile, output, fmt='f32le', sample_rate=None, num_channels=1,
raise RuntimeError('avconv has a bug, which results in wrong audio '
'slices! Decode the audio files to .wav first or '
'use ffmpeg.')
# input type handling
if isinstance(infile, Signal):
in_fmt = _ffmpeg_fmt(infile.dtype)
in_ac = str(int(infile.num_channels))
in_ar = str(int(infile.sample_rate))
infile = str("pipe:0")
elif isinstance(infile, io.IOBase):
infile = "-"
else:
infile = str(infile)
# general options
call = [cmd, "-v", "quiet", "-y"]
# input options
if skip:
# use "%f" to avoid scientific float notation
call.extend(["-ss", "%f" % float(skip)])
# if we decode from STDIN, the format must be specified
if infile == "pipe:0":
if isinstance(infile, Signal):
in_fmt = _ffmpeg_fmt(infile.dtype)
in_ac = str(int(infile.num_channels))
in_ar = str(int(infile.sample_rate))
infile = "pipe:0"
call.extend(["-f", in_fmt, "-ac", in_ac, "-ar", in_ar])
elif isinstance(infile, file_types):
infile = "pipe:0"
else:
infile = str(infile)
call.extend(["-i", infile])
if replaygain_mode:
audio_filter = ("volume=replaygain=%s:replaygain_preamp=%.1f"
Expand Down Expand Up @@ -303,7 +300,7 @@ def decode_to_pipe(infile, fmt='f32le', sample_rate=None, num_channels=1,
"""
# check input file type
if not isinstance(infile, (string_types, io.IOBase, Signal)):
if not isinstance(infile, (string_types, file_types, Signal)):
raise ValueError("only file names, file objects or Signal instances "
"are supported as `infile`, not %s." % infile)
# Note: closing the file-like object only stops decoding because ffmpeg
Expand All @@ -315,16 +312,56 @@ def decode_to_pipe(infile, fmt='f32le', sample_rate=None, num_channels=1,
replaygain_mode=replaygain_mode,
replaygain_preamp=replaygain_preamp)
# redirect stdout to a pipe and buffer as requested
if isinstance(infile, (Signal, io.IOBase)):
if isinstance(infile, (Signal, file_types)):
proc = subprocess.Popen(call, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, bufsize=buf_size)
else:
proc = subprocess.Popen(call, stdout=subprocess.PIPE, bufsize=buf_size)
return proc.stdout, proc


def nonstreamable_mp4_file_object(infile, cmd="ffprobe"):
"""
Test if the given audio file object is a non-streamable MPEG4 container.
Decode the given audio and return a file-like object for reading the
samples, as well as a process object.
Parameters
----------
infile : file_types
File object to decode.
cmd : {'ffprobe', 'avprobe'}, optional
Probing command (defaults to ffprobe, alternatively supports avprobe).
Returns
-------
boolean
True when audio is non-streamable container, False otherwise.
"""
call = [cmd, "-v", "debug", "-hide_banner",
"-show_entries", "format=format_name",
"-print_format", "default=nokey=1:noprint_wrappers=1",
"pipe:0"]
proc = subprocess.Popen(call, stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = proc.communicate(infile.read())
retcode = proc.poll()
infile.seek(0)
if retcode:
raise subprocess.CalledProcessError(retcode, call, output=err)
container_format = out.decode().strip()
partial_file = "partial file" in err.decode()
if container_format == "mov,mp4,m4a,3gp,3g2,mj2" and partial_file:
return True
else:
return False


def decode_to_memory(infile, fmt='f32le', sample_rate=None, num_channels=1,
channel=None, skip=None, max_len=None, cmd='ffmpeg',
channel=None, skip=None, max_len=None,
cmd_decode='ffmpeg', cmd_probe='ffprobe',
replaygain_mode=None, replaygain_preamp=0.0):
"""
Decode the given audio and return it as a binary string representation.
Expand All @@ -350,8 +387,10 @@ def decode_to_memory(infile, fmt='f32le', sample_rate=None, num_channels=1,
Number of seconds to skip at beginning of file.
max_len : float, optional
Maximum length in seconds to decode.
cmd : {'ffmpeg', 'avconv'}, optional
cmd_decode : {'ffmpeg', 'avconv'}, optional
Decoding command (defaults to ffmpeg, alternatively supports avconv).
cmd_probe : {'ffprobe', 'avprobe'}, optional
Probing command (defaults to ffprobe, alternatively supports avprobe).
replaygain_mode : {None, 'track','album'}, optional
Specify the ReplayGain volume-levelling mode (None to disable).
replaygain_preamp : float, optional
Expand All @@ -364,13 +403,13 @@ def decode_to_memory(infile, fmt='f32le', sample_rate=None, num_channels=1,
"""
# check input file type
if not isinstance(infile, (string_types, io.IOBase, Signal)):
if not isinstance(infile, (string_types, file_types, Signal)):
raise ValueError("only file names, file objects or Signal instances "
"are supported as `infile`, not %s." % infile)
# prepare decoding to pipe
_, proc = decode_to_pipe(infile, fmt=fmt, sample_rate=sample_rate,
num_channels=num_channels, channel=channel,
skip=skip, max_len=max_len, cmd=cmd,
skip=skip, max_len=max_len, cmd=cmd_decode,
replaygain_mode=replaygain_mode,
replaygain_preamp=replaygain_preamp)
# decode the input to memory
Expand All @@ -382,13 +421,38 @@ def decode_to_memory(infile, fmt='f32le', sample_rate=None, num_channels=1,
except AttributeError:
mv = memoryview(infile)
signal, _ = proc.communicate(mv.cast('b'))
elif isinstance(infile, io.IOBase):
elif isinstance(infile, file_types):
signal, _ = proc.communicate(infile.read())
infile.seek(0)
# handle non-streamable MP4 container, which silently returns an empty
# signal
if not signal and nonstreamable_mp4_file_object(infile, cmd_probe):
try:
delete_file = False
try:
# pass its path if the file exists on disk
path = infile.name
except AttributeError:
# otherwise store it as a temporary file
with tempfile.NamedTemporaryFile(mode="wb", delete=False,
suffix=".mp4") as f:
f.write(infile.read())
infile.seek(0)
path = f.name
delete_file = True
# retry by passing a path to ffmpeg instead of piping the
# audio to stdin (allows multiple reading passes)
signal = decode_to_memory(path, fmt, sample_rate, num_channels,
channel, skip, max_len, cmd_decode,
cmd_probe, replaygain_mode,
replaygain_preamp)
finally:
if delete_file:
os.remove(path)
else:
signal, _ = proc.communicate()
if proc.returncode != 0:
raise subprocess.CalledProcessError(proc.returncode, cmd)
raise subprocess.CalledProcessError(proc.returncode, cmd_decode)
return signal


Expand Down Expand Up @@ -416,7 +480,7 @@ def get_file_info(infile, cmd='ffprobe'):
info['sample_rate'] = infile.sample_rate
else:
# call ffprobe
if isinstance(infile, io.IOBase):
if isinstance(infile, file_types):
call = [cmd, "-v", "quiet", "-show_streams", "pipe:0"]
proc = subprocess.Popen(call, stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
Expand Down Expand Up @@ -504,16 +568,15 @@ def load_ffmpeg_file(filename, sample_rate=None, num_channels=None,
if stop is not None:
max_len = stop - start
# convert the audio signal using ffmpeg
signal = np.frombuffer(decode_to_memory(filename, fmt=fmt,
sample_rate=sample_rate,
num_channels=num_channels,
channel=channel,
skip=start, max_len=max_len,
cmd=cmd_decode,
replaygain_mode=replaygain_mode,
replaygain_preamp=replaygain_preamp
),
dtype=dtype)
signal = np.frombuffer(
decode_to_memory(filename, fmt=fmt, sample_rate=sample_rate,
num_channels=num_channels, channel=channel,
skip=start, max_len=max_len,
cmd_decode=cmd_decode, cmd_probe=cmd_probe,
replaygain_mode=replaygain_mode,
replaygain_preamp=replaygain_preamp
),
dtype=dtype)
# get the needed information from the file
if sample_rate is None or num_channels is None:
info = get_file_info(filename, cmd=cmd_probe)
Expand Down
8 changes: 8 additions & 0 deletions madmom/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from __future__ import absolute_import, division, print_function

import io
import argparse
import contextlib

Expand All @@ -25,6 +26,13 @@
integer_types = (int, np.integer)


# Python 2/3 file compatibility
try:
file_types = (io.IOBase, file)
except NameError:
file_types = io.IOBase


# decorator to suppress warnings
def suppress_warnings(function):
"""
Expand Down
Binary file added tests/data/audio/stereo_sample.m4a
Binary file not shown.
41 changes: 41 additions & 0 deletions tests/test_bin.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,47 @@ def run_help(program):

# TODO: can we speed up these tests?

class TestDifferentFileFormats(unittest.TestCase):

def setUp(self):
# use SuperFlux since it is fast
self.bin = pj(program_path, "SuperFlux")
self.result = [0.14, 1.57, 2.52, 3.365, 4.14]
self.result_file = pj(AUDIO_PATH, 'stereo_sample.onsets.txt')

def test_single_wav(self):
run_single(self.bin, stereo_sample_file, tmp_result)
result = np.loadtxt(tmp_result)
self.assertTrue(np.allclose(result, self.result))

def test_single_flac(self):
run_single(self.bin, stereo_sample_file[:-3] + 'flac', tmp_result)
result = np.loadtxt(tmp_result)
self.assertTrue(np.allclose(result, self.result))

def test_single_m4a(self):
run_single(self.bin, stereo_sample_file[:-3] + 'm4a', tmp_result)
result = np.loadtxt(tmp_result)
self.assertTrue(np.allclose(result, self.result))

def test_batch_wav(self):
run_batch(self.bin, [stereo_sample_file])
result = np.loadtxt(self.result_file)
os.unlink(self.result_file)
self.assertTrue(np.allclose(result, self.result))

def test_batch_flac(self):
run_batch(self.bin, [stereo_sample_file[:-3] + 'flac'])
result = np.loadtxt(self.result_file)
os.unlink(self.result_file)
self.assertTrue(np.allclose(result, self.result))

def test_batch_m4a(self):
run_batch(self.bin, [stereo_sample_file[:-3] + 'm4a'])
result = np.loadtxt(self.result_file)
os.unlink(self.result_file)
self.assertTrue(np.allclose(result, self.result))


class TestBarTrackerProgram(unittest.TestCase):

Expand Down
3 changes: 2 additions & 1 deletion tests/test_io_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
sample_file_22k = pj(AUDIO_PATH, 'sample_22050.wav')
stereo_sample_file = pj(AUDIO_PATH, 'stereo_sample.wav')
flac_file = pj(AUDIO_PATH, 'stereo_sample.flac')
m4a_file = pj(AUDIO_PATH, 'stereo_sample.m4a')
rg_flac_file = pj(AUDIO_PATH, 'stereo_sample_rg.flac')
loud_rg_flac_file = pj(AUDIO_PATH, 'stereo_chirp_rg.flac')

Expand Down Expand Up @@ -340,7 +341,7 @@ def test_write_with_replaygain(self):
class TestLoadAudioFromFileObject(unittest.TestCase):

def test_file_object(self):
for file_path in [sample_file, flac_file]:
for file_path in [sample_file, flac_file, m4a_file]:
disk_signal = Signal(file_path)
with open(file_path, 'rb') as file_handle:
memory_signal = Signal(io.BytesIO(file_handle.read()))
Expand Down
1 change: 1 addition & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
pj(AUDIO_PATH, 'stereo_chirp.wav'),
pj(AUDIO_PATH, 'stereo_chirp_rg.flac'),
pj(AUDIO_PATH, 'stereo_sample.flac'),
pj(AUDIO_PATH, 'stereo_sample.m4a'),
pj(AUDIO_PATH, 'stereo_sample_rg.flac'),
pj(AUDIO_PATH, 'stereo_sample.wav')]

Expand Down

0 comments on commit a9ce9fa

Please sign in to comment.