Skip to content

Commit

Permalink
Add Markdown description to demo examples (rerun-io#3631)
Browse files Browse the repository at this point in the history
Add Markdown description walking through most of the archetypes involved
in each demo example.
  • Loading branch information
roym899 authored Oct 3, 2023
1 parent 33ccb8c commit 80a3b1f
Show file tree
Hide file tree
Showing 8 changed files with 292 additions and 41 deletions.
5 changes: 2 additions & 3 deletions examples/python/arkit_scenes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ demo: true
<img src="https://static.rerun.io/arkit_scenes/fb9ec9e8d965369d39d51b17fc7fc5bae6be10cc/full.png" alt="ARKit Scenes screenshot">
</picture>


Visualizes the [ARKitScenes dataset](https://github.com/apple/ARKitScenes/) using the Rerun SDK.
The dataset contains color+depth images, the reconstructed mesh and labeled bounding boxes around furniture.
This example visualizes the [ARKitScenes dataset](https://github.com/apple/ARKitScenes/) using Rerun. The dataset
contains color images, depth images, the reconstructed mesh, and labeled bounding boxes around furniture.

```bash
pip install -r examples/python/arkit_scenes/requirements.txt
Expand Down
72 changes: 63 additions & 9 deletions examples/python/arkit_scenes/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import argparse
import json
import os
from pathlib import Path, PosixPath
from pathlib import Path
from typing import Any, Tuple

import cv2
Expand Down Expand Up @@ -32,6 +32,59 @@
assert set(ORIENTATION.keys()) == set(AVAILABLE_RECORDINGS)


DESCRIPTION = """
# ARKit Scenes
This example visualizes the [ARKitScenes dataset](https://github.com/apple/ARKitScenes/) using Rerun. The dataset
contains color images, depth images, the reconstructed mesh, and labeled bounding boxes around furniture.
## How it was made
The full source code for this example is available
[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/arkit_scenes/main.py).
### Moving RGB-D camera
To log a moving RGB-D camera we need to log four objects: the pinhole camera (intrinsics), the camera pose
(extrinsics), the color image and the depth image.
The [rr.Pinhole archetype](https://www.rerun.io/docs/reference/data_types/archetypes/pinhole) is logged to
[world/camera_lowres](recording://world/camera_lowres) to define the intrinsics of the camera. This
determines how to go from the 3D camera frame to the 2D image plane. The extrinsics are logged as an
[rr.Transform3D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/transform3d) to the
[same entity world/camera_lowres](recording://world/camera_lowres). Note that we could also log the extrinsics to
`world/camera` and the intrinsics to `world/camera/image` instead. Here, we log both on the same entity path to keep
the paths shorter.
The RGB image is logged as an
[rr.Image archetype](https://www.rerun.io/docs/reference/data_types/archetypes/image) to the
[world/camera_lowres/rgb entity](recording://world/camera_lowres/rgb) as a child of the intrinsics + extrinsics
entity described in the previous paragraph. Similarly the depth image is logged as an
[rr.DepthImage archetype](https://www.rerun.io/docs/reference/data_types/archetypes/depth_image) to
[world/camera_lowres/depth](recording://world/camera_lowres/depth).
### Ground-truth mesh
The mesh is logged as an [rr.Mesh3D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/mesh3d).
In this case the mesh is composed of mesh vertices, indices (i.e., which vertices belong to the same face), and vertex
colors. Given a `trimesh.Trimesh` the following call is used to log it to Rerun
```python
rr.log(
"world/mesh",
rr.Mesh3D(
vertex_positions=mesh.vertices,
vertex_colors=mesh.visual.vertex_colors,
indices=mesh.faces,
),
timeless=True,
)
```
Here, the mesh is logged to the [world/mesh entity](recording://world/mesh) and is marked as timeless, since it does not
change in the context of this visualization.
### 3D bounding boxes
The bounding boxes around the furniture is visualized by logging the
[rr.Boxes3D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/boxes3d). In this example, each
bounding box is logged as a separate entity to the common [world/annotations](recording://world/annotations) parent.
""".strip()


def load_json(js_path: Path) -> dict[str, Any]:
with open(js_path) as f:
json_data: dict[str, Any] = json.load(f)
Expand Down Expand Up @@ -255,8 +308,7 @@ def log_camera(
log_line_segments(f"{entity_id}/bbox-2d-segments/{label}", bbox_2d.reshape(-1, 2), colors[i], label)

# pathlib makes it easy to get the parent, but log methods requires a string
camera_path = str(PosixPath(entity_id).parent)
rr.log(camera_path, rr.Transform3D(transform=camera_from_world))
rr.log(entity_id, rr.Transform3D(transform=camera_from_world))
rr.log(entity_id, rr.Pinhole(image_from_camera=intrinsic, resolution=[w, h]))


Expand Down Expand Up @@ -327,6 +379,8 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
-------
None
"""
rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)

video_id = recording_path.stem
lowres_image_dir = recording_path / "lowres_wide"
image_dir = recording_path / "wide"
Expand Down Expand Up @@ -359,13 +413,13 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
print(f"Loading {ply_path}…")
assert os.path.isfile(ply_path), f"Failed to find {ply_path}"

mesh_ply = trimesh.load(str(ply_path))
mesh = trimesh.load(str(ply_path))
rr.log(
"world/mesh",
rr.Mesh3D(
vertex_positions=mesh_ply.vertices,
vertex_colors=mesh_ply.visual.vertex_colors,
indices=mesh_ply.faces,
vertex_positions=mesh.vertices,
vertex_colors=mesh.visual.vertex_colors,
indices=mesh.faces,
),
timeless=True,
)
Expand All @@ -375,8 +429,8 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
annotation = load_json(bbox_annotations_path)
bboxes_3d, bbox_labels, colors_list = log_annotated_bboxes(annotation)

lowres_posed_entity_id = "world/camera_posed_lowres/image_posed_lowres"
highres_entity_id = "world/camera_highres/image_highres"
lowres_posed_entity_id = "world/camera_lowres"
highres_entity_id = "world/camera_highres"

print("Processing frames…")
for frame_timestamp in tqdm(lowres_frame_ids):
Expand Down
51 changes: 51 additions & 0 deletions examples/python/detect_and_track_objects/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,55 @@
DetrForSegmentation,
)

DESCRIPTION = """
# Detect and Track Objects
This is a more elaborate example applying simple object detection and segmentation on a video using the Huggingface
`transformers` library. Tracking across frames is performed using [CSRT](https://arxiv.org/abs/1611.08461) from
OpenCV. The results are visualized using Rerun.
## How it was made
The full source code for this example is available
[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/detect_and_track_objects/main.py).
### Input Video
The input video is logged as a sequence of
[rr.Image objects](https://www.rerun.io/docs/reference/data_types/archetypes/image) to the
[image/rgb entity](recording://image/rgb). Since the detection and segmentation model operates on smaller images the
resized images are logged to the separate [image_scaled/rgb entity](recording://image_scaled/rgb). This allows us to
subsequently visualize the segmentation mask on top of the video.
### Segmentations
The [segmentation result](recording://image_scaled/segmentation) is logged through a combination of two archetypes.
The segmentation image itself is logged as an
[rr.SegmentationImage archetype](https://www.rerun.io/docs/reference/data_types/archetypes/segmentation_image) and
contains the id for each pixel. It is logged to the [image_scaled/segmentation entity](recording://image_scaled/segmentation).
The color and label for each class is determined by the
[rr.AnnotationContext archetype](https://www.rerun.io/docs/reference/data_types/archetypes/annotation_context) which is
logged to the root entity using `rr.log("/", ..., timeless=True` as it should apply to the whole sequence and all
entities that have a class id.
### Detections
The detections and tracked bounding boxes are visualized by logging the
[rr.Boxes2D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/boxes2d) to Rerun.
The color and label of the bounding boxes is determined by their class id, relying on the same
[rr.AnnotationContext archetype](https://www.rerun.io/docs/reference/data_types/archetypes/annotation_context) as the
segmentation images. This ensures that a bounding box and a segmentation image with the same class id will also have the
same color.
Note that it is also possible to log multiple annotation contexts should different colors and / or labels be desired.
The annotation context is resolved by seeking up the entity hierarchy.
### Text Log
Through the [rr.TextLog archetype] text at different importance level can be logged. Rerun integrates with the
[Python logging module](https://docs.python.org/3/library/logging.html). After an initial setup that is described on the
[rr.TextLog page](https://www.rerun.io/docs/reference/data_types/archetypes/text_log#textlogintegration), statements
such as `logging.info("...")`, `logging.debug("...")`, etc. will show up in the Rerun viewer. In the viewer you can
adjust the filter level and look at the messages time-synchronized with respect to other logged data.
""".strip()


@dataclass
class Detection:
Expand Down Expand Up @@ -403,6 +452,8 @@ def main() -> None:

setup_logging()

rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)

video_path: str = args.video_path
if not video_path:
video_path = get_downloaded_path(args.dataset_dir, args.video)
Expand Down
34 changes: 27 additions & 7 deletions examples/python/dicom_mri/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,30 @@
DATASET_DIR: Final = Path(os.path.dirname(__file__)) / "dataset"
DATASET_URL: Final = "https://storage.googleapis.com/rerun-example-datasets/dicom.zip"

DESCRIPTION = """
# Dicom MRI
This example visualizes an MRI scan using Rerun.
## How it was made
The full source code for this example is available
[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/dicom_mri/main.py).
The visualization of the data consists of just the following line
```python
rr.log("tensor", rr.Tensor(voxels_volume_u16, dim_names=["right", "back", "up"]))
```
`voxels_volume_u16` is a `numpy.array` of shape `(512, 512, 512)` containing volumetric MRI intensities. We can
visualize such information in Rerun by logging the `numpy.array` as an
[rr.Tensor archetype](https://www.rerun.io/docs/reference/data_types/archetypes/tensor). Here the tensor is logged to
the [tensor entity](recording://tensor), however any other name for the entity could have been chosen.
In the Rerun viewer you can inspect the data in detail. The `dim_names` provided in the above call to `rr.log` help to
give semantic meaning to each axis. After selecting the tensor view, you can adjust various settings in the Blueprint
settings on the right-hand side. For example, you can adjust the color map, the brightness, which dimensions to show as
an image and which to select from, and more.
"""


def extract_voxel_data(
dicom_files: Iterable[Path],
Expand All @@ -47,18 +71,14 @@ def list_dicom_files(dir: Path) -> Iterable[Path]:


def read_and_log_dicom_dataset(dicom_files: Iterable[Path]) -> None:
rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)

voxels_volume, _ = extract_voxel_data(dicom_files)

# the data is i16, but in range [0, 536].
voxels_volume_u16: npt.NDArray[np.uint16] = np.require(voxels_volume, np.uint16)

rr.log(
"tensor",
rr.Tensor(
voxels_volume_u16,
dim_names=["right", "back", "up"],
),
)
rr.log("tensor", rr.Tensor(voxels_volume_u16, dim_names=["right", "back", "up"]))


def ensure_dataset_downloaded() -> Iterable[Path]:
Expand Down
35 changes: 35 additions & 0 deletions examples/python/dna/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,43 @@
from rerun_demo.data import build_color_spiral
from rerun_demo.util import bounce_lerp, interleave

DESCRIPTION = """
# DNA
This is a minimal example that logs synthetic 3D data in the shape of a double helix. The underlying data is generated
using numpy and visualized using Rerun.
## How it was made
The full source code for this example is available
[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/dna/main.py).
### Colored 3D points
The colored 3D points were added to the scene by logging the
[rr.Points3D archetype](https://www.rerun.io/docs/reference/data_types/points3d) to the
[helix/structure/left](recording://helix/structure/left) and [helix/structure/right](recording://helix/structure/right)
entities.
### 3D line strips
The 3D line strips connecting the 3D point pairs are logged as an
[rr.LineStrips3D archetype](https://www.rerun.io/docs/reference/data_types/line_strips3d) to the
[helix/structure/scaffolding entity](recording://helix/structure/scaffolding).
### Rotation
The whole structure is rotated over time by logging a
[rr.Transform3D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/transform3d) to the
[helix/structure entity](recording://helix/structure.Transform3D) that changes over time. This transform determines the rotation of
the [structure entity](recording://helix/structure) relative to the [helix](recording://helix) entity. Since all other
entities are children of [helix/structure](recording://helix/structure) they will also rotate based on this transform.
You can visualize this rotation by selecting the two entities on the left-hand side and activating `Show transform` in
the Blueprint settings on the right-hand side. You will see one static frame (i.e., the frame of
[helix](recording://helix)) and the rotating frame (i.e., the frame of [structure](recording://helix/structure)).
```
""".strip()


def log_data() -> None:
rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)

rr.set_time_seconds("stable_time", 0)

NUM_POINTS = 100
Expand Down
54 changes: 48 additions & 6 deletions examples/python/human_pose_tracking/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,57 @@
DATASET_URL_BASE: Final = "https://storage.googleapis.com/rerun-example-datasets/pose_movement"


DESCRIPTION = """
# Human Pose Tracking
This example uses Rerun to visualize the output of [MediaPipe](https://developers.google.com/mediapipe)-based tracking
of a human pose in 2D and 3D.
## How it was made
The full source code for this example is available
[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/human_pose_tracking/main.py).
### Input Video
The input video is logged as a sequence of
[rr.Image objects](https://www.rerun.io/docs/reference/data_types/archetypes/image) to the [video entity](recording://video).
### Segmentation
The [segmentation result](recording://video/mask) is logged through a combination of two archetypes. The segmentation
image itself is logged as an
[rr.SegmentationImage archetype](https://www.rerun.io/docs/reference/data_types/archetypes/segmentation_image) and
contains the id for each pixel. The color is determined by the
[rr.AnnotationContext archetype](https://www.rerun.io/docs/reference/data_types/archetypes/annotation_context) which is
logged with `rr.log(..., timeless=True` as it should apply to the whole sequence.
### Skeletons
The [2D](recording://video/pose/points) and [3D skeletons](recording://person/pose/points) are also logged through a
similar combination of two entities.
First, a timeless
[rr.ClassDescription](https://www.rerun.io/docs/reference/data_types/datatypes/class_description) is logged (note, that
this is equivalent to logging an
[rr.AnnotationContext archetype](https://www.rerun.io/docs/reference/data_types/archetypes/annotation_context) as in the
segmentation case). The class description contains the information which maps keypoint ids to labels and how to connect
the keypoints to a skeleton.
Second, the actual keypoint positions are logged in 2D
nd 3D as [rr.Points2D](https://www.rerun.io/docs/reference/data_types/archetypes/points2d) and
[rr.Points3D](https://www.rerun.io/docs/reference/data_types/archetypes/points3d) archetypes, respectively.
""".strip()


def track_pose(video_path: str, segment: bool) -> None:
mp_pose = mp.solutions.pose

rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)

rr.log(
"/",
rr.ClassDescription(
info=rr.AnnotationInfo(id=0, label="Person"),
keypoint_annotations=[rr.AnnotationInfo(id=lm.value, label=lm.name) for lm in mp_pose.PoseLandmark],
keypoint_connections=mp_pose.POSE_CONNECTIONS,
rr.AnnotationContext(
rr.ClassDescription(
info=rr.AnnotationInfo(id=0, label="Person"),
keypoint_annotations=[rr.AnnotationInfo(id=lm.value, label=lm.name) for lm in mp_pose.PoseLandmark],
keypoint_connections=mp_pose.POSE_CONNECTIONS,
)
),
timeless=True,
)
Expand Down Expand Up @@ -60,14 +102,14 @@ def track_pose(video_path: str, segment: bool) -> None:
if landmark_positions_2d is not None:
rr.log(
"video/pose/points",
rr.Points2D(landmark_positions_2d, keypoint_ids=mp_pose.PoseLandmark),
rr.Points2D(landmark_positions_2d, class_ids=0, keypoint_ids=mp_pose.PoseLandmark),
)

landmark_positions_3d = read_landmark_positions_3d(results)
if landmark_positions_3d is not None:
rr.log(
"person/pose/points",
rr.Points3D(landmark_positions_3d, keypoint_ids=mp_pose.PoseLandmark),
rr.Points3D(landmark_positions_3d, class_ids=0, keypoint_ids=mp_pose.PoseLandmark),
)

segmentation_mask = results.segmentation_mask
Expand Down
Loading

0 comments on commit 80a3b1f

Please sign in to comment.