Skip to content

Commit

Permalink
Support h.264 video decoding on web (rerun-io#7511)
Browse files Browse the repository at this point in the history
### What

* Closes rerun-io#7375

- The seeking logic now searches for the requested segment by the
presentation/composition timestamp passed in by the user. This way if a
user requests frame at 12s they will get the frame which will be
_presented_ at 12s, not _decoded_ at 12s. See [I/P/B frame
terminology](https://en.wikipedia.org/wiki/Video_compression_picture_types)
in video encoding for more information as to why this distinction
matters.
- `EncodedVideoChunk`s are now given the _composition timestamp_ of the
sample instead of its _decode timestamp_, so the output `VideoFrame`
objects are in _composition timestamp_ order.

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested the web demo (if applicable):
* Using examples from latest `main` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7511?manifest_url=https://app.rerun.io/version/main/examples_manifest.json)
* Using full set of examples from `nightly` build:
[rerun.io/viewer](https://rerun.io/viewer/pr/7511?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG
* [x] If applicable, add a new check to the [release
checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)!
* [x] If have noted any breaking changes to the log API in
`CHANGELOG.md` and the migration guide

- [PR Build Summary](https://build.rerun.io/pr/7511)
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)

To run all checks from `main`, comment on the PR with `@rerun-bot
full-check`.
  • Loading branch information
jprochazk authored Sep 26, 2024
1 parent b13ace0 commit bf7f533
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 47 deletions.
16 changes: 6 additions & 10 deletions crates/store/re_video/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ impl VideoData {
}

/// A segment of a video.
#[derive(Clone)]
#[derive(Debug, Clone)]
pub struct Segment {
/// Decode timestamp of the first sample in this segment, in time units.
pub start: Time,
Expand Down Expand Up @@ -275,18 +275,14 @@ impl std::fmt::Debug for VideoData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Video")
.field("config", &self.config)
.field("timescale", &self.timescale)
.field("duration", &self.duration)
.field("segments", &self.segments)
.field(
"samples",
&self.samples.iter().enumerate().collect::<Vec<_>>(),
)
.field("data", &self.data.len())
.finish()
}
}

impl std::fmt::Debug for Segment {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Segment")
.field("timestamp", &self.start)
.field("samples", &self.sample_range.len())
.finish()
}
}
2 changes: 1 addition & 1 deletion crates/viewer/re_renderer/src/video/decoder/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ impl VideoDecoder {
pub fn frame_at(
&mut self,
_render_ctx: &RenderContext,
_timestamp_s: f64,
_presentation_timestamp_s: f64,
) -> FrameDecodingResult {
FrameDecodingResult::Error(DecodingError::NoNativeSupport)
}
Expand Down
114 changes: 80 additions & 34 deletions crates/viewer/re_renderer/src/video/decoder/web.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ impl VideoDecoder {
pub fn frame_at(
&mut self,
render_ctx: &RenderContext,
timestamp_s: f64,
presentation_timestamp_s: f64,
) -> FrameDecodingResult {
if let Some(error) = self.decode_error.lock().clone() {
// TODO(emilk): if there is a decoding error in one segment or sample,
Expand All @@ -177,7 +177,7 @@ impl VideoDecoder {
return FrameDecodingResult::Error(error);
}

let result = self.frame_at_internal(timestamp_s);
let result = self.frame_at_internal(presentation_timestamp_s);
match &result {
FrameDecodingResult::Ready(_) => {
self.error_on_last_frame_at = false;
Expand All @@ -200,47 +200,80 @@ impl VideoDecoder {
result
}

fn frame_at_internal(&mut self, timestamp_s: f64) -> FrameDecodingResult {
if timestamp_s < 0.0 {
fn frame_at_internal(&mut self, presentation_timestamp_s: f64) -> FrameDecodingResult {
if presentation_timestamp_s < 0.0 {
return FrameDecodingResult::Error(DecodingError::NegativeTimestamp);
}
let timescale = self.data.timescale;
let timestamp = Time::from_secs(timestamp_s, timescale);
let presentation_timestamp = Time::from_secs(presentation_timestamp_s, self.data.timescale);

if let Err(err) = self.enqueue_requested_segments(presentation_timestamp) {
return FrameDecodingResult::Error(err);
}

let Some(requested_segment_idx) =
latest_at_idx(&self.data.segments, |segment| segment.start, &timestamp)
self.try_present_frame(presentation_timestamp)
}

fn enqueue_requested_segments(
&mut self,
presentation_timestamp: Time,
) -> Result<(), DecodingError> {
// Some terminology:
// - presentation timestamp = composition timestamp
// = the time at which the frame should be shown
// - decode timestamp
// = determines the decoding order of samples
//
// Note: `composition >= decode` for any given sample.
// For some codecs, the two timestamps are the same.
// We must enqueue samples in decode order, but show them in composition order.

// 1. Find the latest sample where `decode_timestamp <= presentation_timestamp`.
// Because `composition >= decode`, we never have to look further ahead in the
// video than this.
let Some(decode_sample_idx) = latest_at_idx(
&self.data.samples,
|sample| sample.decode_timestamp,
&presentation_timestamp,
) else {
return Err(DecodingError::EmptyVideo);
};

// 2. Search _backwards_, starting at `decode_sample_idx`, looking for
// the first sample where `sample.composition_timestamp <= presentation_timestamp`.
// This is the sample which when decoded will be presented at the timestamp the user requested.
let Some(requested_sample_idx) = self.data.samples[..=decode_sample_idx]
.iter()
.rposition(|sample| sample.composition_timestamp <= presentation_timestamp)
else {
return FrameDecodingResult::Error(DecodingError::EmptyVideo);
return Err(DecodingError::EmptyVideo);
};
let requested_segment = &self.data.segments[requested_segment_idx];

let Some(requested_sample_idx) = latest_at_idx(
&self.data.samples[requested_segment.range()],
|sample| sample.decode_timestamp,
&timestamp,
// 3. Do a binary search through segments by the decode timestamp of the found sample
// to find the segment that contains the sample.
let Some(requested_segment_idx) = latest_at_idx(
&self.data.segments,
|segment| segment.start,
&self.data.samples[requested_sample_idx].decode_timestamp,
) else {
// This should never happen, because segments are never empty.
return FrameDecodingResult::Error(DecodingError::EmptySegment);
return Err(DecodingError::EmptyVideo);
};

// Enqueue segments as needed. We maintain a buffer of 2 segments, so we can
// always smoothly transition to the next segment.
// We can always start decoding from any segment, because segments always begin
// with a keyframe.
// 4. Enqueue segments as needed.
//
// We maintain a buffer of 2 segments, so we can always smoothly transition to the next segment.
// We can always start decoding from any segment, because segments always begin with a keyframe.
//
// Backward seeks or seeks across many segments trigger a reset of the decoder,
// because decoding all the samples between the previous sample and the requested
// one would mean decoding and immediately discarding more frames than we otherwise
// need to.
// one would mean decoding and immediately discarding more frames than we need.
if requested_segment_idx != self.current_segment_idx {
let segment_distance = requested_segment_idx.checked_sub(self.current_segment_idx);
if segment_distance == Some(1) {
// forward seek to next segment - queue up the one _after_ requested
self.enqueue_segment(requested_segment_idx + 1);
} else {
// Startup, forward seek by N>1, or backward seek across segments -> reset decoder
if let Err(err) = self.reset() {
return FrameDecodingResult::Error(err);
}
self.reset()?;
self.enqueue_segment(requested_segment_idx);
self.enqueue_segment(requested_segment_idx + 1);
}
Expand All @@ -250,22 +283,32 @@ impl VideoDecoder {
// while maintaining a buffer of 2 segments
let sample_distance = requested_sample_idx as isize - self.current_sample_idx as isize;
if sample_distance < 0 {
if let Err(err) = self.reset() {
return FrameDecodingResult::Error(err);
}
self.reset()?;
self.enqueue_segment(requested_segment_idx);
self.enqueue_segment(requested_segment_idx + 1);
}
}

// At this point, we have the requested segments enqueued. They will be output
// in _composition timestamp_ order, so presenting the frame is a binary search
// through the frame buffer as usual.

self.current_segment_idx = requested_segment_idx;
self.current_sample_idx = requested_sample_idx;

Ok(())
}

fn try_present_frame(&mut self, presentation_timestamp: Time) -> FrameDecodingResult {
let timescale = self.data.timescale;

let mut frames = self.frames.lock();

let Some(frame_idx) =
latest_at_idx(&frames, |frame| frame.composition_timestamp, &timestamp)
else {
let Some(frame_idx) = latest_at_idx(
&frames,
|frame| frame.composition_timestamp,
&presentation_timestamp,
) else {
// no buffered frames - texture will be blank
// Don't return a zeroed texture, because we may just be behind on decoding
// and showing an old frame is better than showing a blank frame,
Expand All @@ -287,7 +330,7 @@ impl VideoDecoder {
// This handles the case when we have a buffered frame that's older than the requested timestamp.
// We don't want to show this frame to the user, because it's not actually the one they requested,
// so instead return the last decoded frame.
if timestamp.into_millis(timescale) - frame_timestamp_ms > frame_duration_ms {
if presentation_timestamp.into_millis(timescale) - frame_timestamp_ms > frame_duration_ms {
return FrameDecodingResult::Pending(self.texture.clone());
}

Expand Down Expand Up @@ -351,10 +394,11 @@ impl VideoDecoder {
} else {
EncodedVideoChunkType::Delta
};
// TODO(jan): use `composition_timestamp` instead
let chunk = EncodedVideoChunkInit::new(
&data,
sample.decode_timestamp.into_micros(self.data.timescale),
sample
.composition_timestamp
.into_micros(self.data.timescale),
type_,
);
chunk.set_duration(sample.duration.into_micros(self.data.timescale));
Expand All @@ -375,6 +419,8 @@ impl VideoDecoder {

/// Reset the video decoder and discard all frames.
fn reset(&mut self) -> Result<(), DecodingError> {
re_log::debug!("resetting video decoder");

self.decoder
.reset()
.map_err(|err| DecodingError::ResetFailure(js_error_to_string(&err)))?;
Expand Down
6 changes: 4 additions & 2 deletions crates/viewer/re_renderer/src/video/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ impl Video {
&self,
render_context: &RenderContext,
decoder_stream_id: VideoDecodingStreamId,
timestamp_s: f64,
presentation_timestamp_s: f64,
) -> FrameDecodingResult {
re_tracing::profile_function!();

Expand Down Expand Up @@ -152,7 +152,9 @@ impl Video {
};

decoder_entry.frame_index = render_context.active_frame_idx();
decoder_entry.decoder.frame_at(render_context, timestamp_s)
decoder_entry
.decoder
.frame_at(render_context, presentation_timestamp_s)
}

/// Removes all decoders that have been unused in the last frame.
Expand Down

0 comments on commit bf7f533

Please sign in to comment.