Fix audio encode timestamp off by one

Simplify the audio encoder input timestamp calculation. The new calculation avoids drifting by tracking the total number of bytes encoded rather than tracking the timestamp and remainder separately, and also makes the timestamps match the decoder output buffer timestamps. Also switch one of the export tests that was passing through AMR samples over to using WAVE audio. The problem with using AMR is that the compressed samples are not necessarily an integer number of audio frames and the shadow decoder would pass them from input to output, so the audio encoder was receiving non-integer numbers of audio frames. Tested by logging the timestamps at the decoder output and encoder input with forcing transcoding audio, and verifying that after this change the audio timestamps are no longer off by one. PiperOrigin-RevId: 523409869
chivehao · Apr 12, 2023 · 0a9134f · 0a9134f
1 parent 78669f8
commit 0a9134f
Show file tree

Hide file tree

Showing 14 changed files with 433 additions and 1,695 deletions.
diff --git a/...nsformer/src/main/java/com/google/android/exoplayer2/transformer/AudioSamplePipeline.java b/...nsformer/src/main/java/com/google/android/exoplayer2/transformer/AudioSamplePipeline.java
@@ -53,8 +53,7 @@
   private final DecoderInputBuffer encoderInputBuffer;
   private final DecoderInputBuffer encoderOutputBuffer;
 
-  private long nextEncoderInputBufferTimeUs;
-  private long encoderBufferDurationRemainder;
+  private long encoderTotalInputBytes;
 
   private volatile boolean queueEndOfStreamAfterSilence;
 
@@ -334,9 +333,8 @@ private void feedEncoder(ByteBuffer inputBuffer) throws ExportException {
     int bufferLimit = inputBuffer.limit();
     inputBuffer.limit(min(bufferLimit, inputBuffer.position() + encoderInputBufferData.capacity()));
     encoderInputBufferData.put(inputBuffer);
-    encoderInputBuffer.timeUs = nextEncoderInputBufferTimeUs;
-    computeNextEncoderInputBufferTimeUs(
-        /* bytesWritten= */ encoderInputBufferData.position(), encoderInputAudioFormat);
+    encoderInputBuffer.timeUs = getOutputAudioDurationUs();
+    encoderTotalInputBytes += encoderInputBufferData.position();
     encoderInputBuffer.setFlags(0);
     encoderInputBuffer.flip();
     inputBuffer.limit(bufferLimit);
@@ -345,7 +343,7 @@ private void feedEncoder(ByteBuffer inputBuffer) throws ExportException {
 
   private void queueEndOfStreamToEncoder() throws ExportException {
     checkState(checkNotNull(encoderInputBuffer.data).position() == 0);
-    encoderInputBuffer.timeUs = nextEncoderInputBufferTimeUs;
+    encoderInputBuffer.timeUs = getOutputAudioDurationUs();
     encoderInputBuffer.addFlag(C.BUFFER_FLAG_END_OF_STREAM);
     encoderInputBuffer.flip();
     // Queuing EOS should only occur with an empty buffer.
@@ -363,21 +361,9 @@ private static TransformationRequest createFallbackTransformationRequest(
     return transformationRequest.buildUpon().setAudioMimeType(actualFormat.sampleMimeType).build();
   }
 
-  private void computeNextEncoderInputBufferTimeUs(long bytesWritten, AudioFormat audioFormat) {
-    // The calculation below accounts for remainders and rounding. Without that it corresponds to
-    // the following:
-    // bufferDurationUs = numberOfFramesInBuffer * sampleDurationUs
-    //     where numberOfFramesInBuffer = bytesWritten / bytesPerFrame
-    //     and   sampleDurationUs       = C.MICROS_PER_SECOND / sampleRate
-    long numerator = bytesWritten * C.MICROS_PER_SECOND + encoderBufferDurationRemainder;
-    long denominator = (long) audioFormat.bytesPerFrame * audioFormat.sampleRate;
-    long bufferDurationUs = numerator / denominator;
-    encoderBufferDurationRemainder = numerator - bufferDurationUs * denominator;
-    if (encoderBufferDurationRemainder > 0) { // Ceil division result.
-      bufferDurationUs += 1;
-      encoderBufferDurationRemainder -= denominator;
-    }
-    nextEncoderInputBufferTimeUs += bufferDurationUs;
+  private long getOutputAudioDurationUs() {
+    long totalFramesWritten = encoderTotalInputBytes / encoderInputAudioFormat.bytesPerFrame;
+    return (totalFramesWritten * C.MICROS_PER_SECOND) / encoderInputAudioFormat.sampleRate;
   }
 
   private boolean shouldGenerateSilence() {

diff --git a/...nsformer/src/test/java/com/google/android/exoplayer2/transformer/MediaItemExportTest.java b/...nsformer/src/test/java/com/google/android/exoplayer2/transformer/MediaItemExportTest.java
@@ -20,6 +20,7 @@
 import static com.google.android.exoplayer2.transformer.AssetLoader.SUPPORTED_OUTPUT_TYPE_DECODED;
 import static com.google.android.exoplayer2.transformer.AssetLoader.SUPPORTED_OUTPUT_TYPE_ENCODED;
 import static com.google.android.exoplayer2.transformer.TestUtil.ASSET_URI_PREFIX;
+import static com.google.android.exoplayer2.transformer.TestUtil.FILE_AUDIO_RAW;
 import static com.google.android.exoplayer2.transformer.TestUtil.FILE_AUDIO_UNSUPPORTED_BY_DECODER;
 import static com.google.android.exoplayer2.transformer.TestUtil.FILE_AUDIO_UNSUPPORTED_BY_ENCODER;
 import static com.google.android.exoplayer2.transformer.TestUtil.FILE_AUDIO_UNSUPPORTED_BY_MUXER;
@@ -161,15 +162,13 @@ public void start_audioOnlyTranscoding_completesSuccessfully() throws Exception
                     .setAudioMimeType(MimeTypes.AUDIO_AAC) // supported by encoder and muxer
                     .build())
             .build();
-    MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_UNSUPPORTED_BY_ENCODER);
+    MediaItem mediaItem = MediaItem.fromUri(ASSET_URI_PREFIX + FILE_AUDIO_RAW);
 
     transformer.start(mediaItem, outputPath);
     TransformerTestRunner.runLooper(transformer);
 
     DumpFileAsserts.assertOutput(
-        context,
-        checkNotNull(testMuxerHolder.testMuxer),
-        getDumpFileName(FILE_AUDIO_UNSUPPORTED_BY_ENCODER + ".aac"));
+        context, checkNotNull(testMuxerHolder.testMuxer), getDumpFileName(FILE_AUDIO_RAW + ".aac"));
   }
 
   @Test

diff --git a/library/transformer/src/test/java/com/google/android/exoplayer2/transformer/TestUtil.java b/library/transformer/src/test/java/com/google/android/exoplayer2/transformer/TestUtil.java
@@ -148,6 +148,7 @@ public void release() {}
   public static final String FILE_AUDIO_VIDEO = "mp4/sample.mp4";
   public static final String FILE_AUDIO_VIDEO_INCREASING_TIMESTAMPS_15S =
       "mp4/sample_with_increasing_timestamps_320w_240h.mp4";
+  public static final String FILE_AUDIO_RAW = "wav/sample.wav";
   public static final String FILE_WITH_SUBTITLES = "mkv/sample_with_srt.mkv";
   public static final String FILE_WITH_SEF_SLOW_MOTION = "mp4/sample_sef_slow_motion.mp4";
   public static final String FILE_AUDIO_UNSUPPORTED_BY_DECODER = "amr/sample_wb.amr";
@@ -177,7 +178,7 @@ public static void createEncodersAndDecoders() {
         /* colorFormats= */ ImmutableList.of(),
         /* isDecoder= */ true);
     addCodec(
-        MimeTypes.AUDIO_AMR_NB,
+        MimeTypes.AUDIO_RAW,
         codecConfig,
         /* colorFormats= */ ImmutableList.of(),
         /* isDecoder= */ true);