Add Markdown description to demo examples (rerun-io#3631)

Add Markdown description walking through most of the archetypes involved in each demo example.
m-decoster · Oct 3, 2023 · 80a3b1f · 80a3b1f
1 parent 33ccb8c
commit 80a3b1f
Show file tree

Hide file tree

Showing 8 changed files with 292 additions and 41 deletions.
diff --git a/examples/python/arkit_scenes/README.md b/examples/python/arkit_scenes/README.md
@@ -16,9 +16,8 @@ demo: true
   <img src="https://static.rerun.io/arkit_scenes/fb9ec9e8d965369d39d51b17fc7fc5bae6be10cc/full.png" alt="ARKit Scenes screenshot">
 </picture>
 
-
-Visualizes the [ARKitScenes dataset](https://github.com/apple/ARKitScenes/) using the Rerun SDK.
-The dataset contains color+depth images, the reconstructed mesh and labeled bounding boxes around furniture.
+This example visualizes the [ARKitScenes dataset](https://github.com/apple/ARKitScenes/) using Rerun. The dataset
+contains color images, depth images, the reconstructed mesh, and labeled bounding boxes around furniture.
 
 ```bash
 pip install -r examples/python/arkit_scenes/requirements.txt

diff --git a/examples/python/arkit_scenes/main.py b/examples/python/arkit_scenes/main.py
@@ -4,7 +4,7 @@
 import argparse
 import json
 import os
-from pathlib import Path, PosixPath
+from pathlib import Path
 from typing import Any, Tuple
 
 import cv2
@@ -32,6 +32,59 @@
 assert set(ORIENTATION.keys()) == set(AVAILABLE_RECORDINGS)
 
 
+DESCRIPTION = """
+# ARKit Scenes
+This example visualizes the [ARKitScenes dataset](https://github.com/apple/ARKitScenes/) using Rerun. The dataset
+contains color images, depth images, the reconstructed mesh, and labeled bounding boxes around furniture.
+
+## How it was made
+The full source code for this example is available
+[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/arkit_scenes/main.py).
+
+### Moving RGB-D camera
+To log a moving RGB-D camera we need to log four objects: the pinhole camera (intrinsics), the camera pose
+(extrinsics), the color image and the depth image.
+
+The [rr.Pinhole archetype](https://www.rerun.io/docs/reference/data_types/archetypes/pinhole) is logged to
+[world/camera_lowres](recording://world/camera_lowres) to define the intrinsics of the camera. This
+determines how to go from the 3D camera frame to the 2D image plane.  The extrinsics are logged as an
+[rr.Transform3D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/transform3d) to the
+[same entity world/camera_lowres](recording://world/camera_lowres). Note that we could also log the extrinsics to
+`world/camera` and the intrinsics to `world/camera/image` instead. Here, we log both on the same entity path to keep
+the paths shorter.
+
+The RGB image is logged as an
+[rr.Image archetype](https://www.rerun.io/docs/reference/data_types/archetypes/image) to the
+[world/camera_lowres/rgb entity](recording://world/camera_lowres/rgb) as a child of the intrinsics + extrinsics
+entity described in the previous paragraph. Similarly the depth image is logged as an
+[rr.DepthImage archetype](https://www.rerun.io/docs/reference/data_types/archetypes/depth_image) to
+[world/camera_lowres/depth](recording://world/camera_lowres/depth).
+
+### Ground-truth mesh
+The mesh is logged as an [rr.Mesh3D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/mesh3d).
+In this case the mesh is composed of mesh vertices, indices (i.e., which vertices belong to the same face), and vertex
+colors. Given a `trimesh.Trimesh` the following call is used to log it to Rerun
+```python
+rr.log(
+    "world/mesh",
+    rr.Mesh3D(
+        vertex_positions=mesh.vertices,
+        vertex_colors=mesh.visual.vertex_colors,
+        indices=mesh.faces,
+    ),
+    timeless=True,
+)
+```
+Here, the mesh is logged to the [world/mesh entity](recording://world/mesh) and is marked as timeless, since it does not
+change in the context of this visualization.
+
+### 3D bounding boxes
+The bounding boxes around the furniture is visualized by logging the
+[rr.Boxes3D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/boxes3d). In this example, each
+bounding box is logged as a separate entity to the common [world/annotations](recording://world/annotations) parent.
+""".strip()
+
+
 def load_json(js_path: Path) -> dict[str, Any]:
     with open(js_path) as f:
         json_data: dict[str, Any] = json.load(f)
@@ -255,8 +308,7 @@ def log_camera(
         log_line_segments(f"{entity_id}/bbox-2d-segments/{label}", bbox_2d.reshape(-1, 2), colors[i], label)
 
     # pathlib makes it easy to get the parent, but log methods requires a string
-    camera_path = str(PosixPath(entity_id).parent)
-    rr.log(camera_path, rr.Transform3D(transform=camera_from_world))
+    rr.log(entity_id, rr.Transform3D(transform=camera_from_world))
     rr.log(entity_id, rr.Pinhole(image_from_camera=intrinsic, resolution=[w, h]))
 
 
@@ -327,6 +379,8 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
     -------
     None
     """
+    rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)
+
     video_id = recording_path.stem
     lowres_image_dir = recording_path / "lowres_wide"
     image_dir = recording_path / "wide"
@@ -359,13 +413,13 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
     print(f"Loading {ply_path}…")
     assert os.path.isfile(ply_path), f"Failed to find {ply_path}"
 
-    mesh_ply = trimesh.load(str(ply_path))
+    mesh = trimesh.load(str(ply_path))
     rr.log(
         "world/mesh",
         rr.Mesh3D(
-            vertex_positions=mesh_ply.vertices,
-            vertex_colors=mesh_ply.visual.vertex_colors,
-            indices=mesh_ply.faces,
+            vertex_positions=mesh.vertices,
+            vertex_colors=mesh.visual.vertex_colors,
+            indices=mesh.faces,
         ),
         timeless=True,
     )
@@ -375,8 +429,8 @@ def log_arkit(recording_path: Path, include_highres: bool) -> None:
     annotation = load_json(bbox_annotations_path)
     bboxes_3d, bbox_labels, colors_list = log_annotated_bboxes(annotation)
 
-    lowres_posed_entity_id = "world/camera_posed_lowres/image_posed_lowres"
-    highres_entity_id = "world/camera_highres/image_highres"
+    lowres_posed_entity_id = "world/camera_lowres"
+    highres_entity_id = "world/camera_highres"
 
     print("Processing frames…")
     for frame_timestamp in tqdm(lowres_frame_ids):

diff --git a/examples/python/detect_and_track_objects/main.py b/examples/python/detect_and_track_objects/main.py
@@ -36,6 +36,55 @@
     DetrForSegmentation,
 )
 
+DESCRIPTION = """
+# Detect and Track Objects
+
+This is a more elaborate example applying simple object detection and segmentation on a video using the Huggingface
+`transformers` library. Tracking across frames is performed using [CSRT](https://arxiv.org/abs/1611.08461) from
+OpenCV. The results are visualized using Rerun.
+
+## How it was made
+The full source code for this example is available
+[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/detect_and_track_objects/main.py).
+
+### Input Video
+The input video is logged as a sequence of
+[rr.Image objects](https://www.rerun.io/docs/reference/data_types/archetypes/image) to the
+[image/rgb entity](recording://image/rgb). Since the detection and segmentation model operates on smaller images the
+resized images are logged to the separate [image_scaled/rgb entity](recording://image_scaled/rgb). This allows us to
+subsequently visualize the segmentation mask on top of the video.
+
+### Segmentations
+The [segmentation result](recording://image_scaled/segmentation) is logged through a combination of two archetypes.
+The segmentation image itself is logged as an
+[rr.SegmentationImage archetype](https://www.rerun.io/docs/reference/data_types/archetypes/segmentation_image) and
+contains the id for each pixel. It is logged to the [image_scaled/segmentation entity](recording://image_scaled/segmentation).
+
+The color and label for each class is determined by the
+[rr.AnnotationContext archetype](https://www.rerun.io/docs/reference/data_types/archetypes/annotation_context) which is
+logged to the root entity using `rr.log("/", ..., timeless=True` as it should apply to the whole sequence and all
+entities that have a class id.
+
+### Detections
+The detections and tracked bounding boxes are visualized by logging the
+[rr.Boxes2D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/boxes2d) to Rerun.
+
+The color and label of the bounding boxes is determined by their class id, relying on the same
+[rr.AnnotationContext archetype](https://www.rerun.io/docs/reference/data_types/archetypes/annotation_context) as the
+segmentation images. This ensures that a bounding box and a segmentation image with the same class id will also have the
+same color.
+
+Note that it is also possible to log multiple annotation contexts should different colors and / or labels be desired.
+The annotation context is resolved by seeking up the entity hierarchy.
+
+### Text Log
+Through the [rr.TextLog archetype] text at different importance level can be logged. Rerun integrates with the
+[Python logging module](https://docs.python.org/3/library/logging.html). After an initial setup that is described on the
+[rr.TextLog page](https://www.rerun.io/docs/reference/data_types/archetypes/text_log#textlogintegration), statements
+such as `logging.info("...")`, `logging.debug("...")`, etc. will show up in the Rerun viewer. In the viewer you can
+adjust the filter level and look at the messages time-synchronized with respect to other logged data.
+""".strip()
+
 
 @dataclass
 class Detection:
@@ -403,6 +452,8 @@ def main() -> None:
 
     setup_logging()
 
+    rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)
+
     video_path: str = args.video_path
     if not video_path:
         video_path = get_downloaded_path(args.dataset_dir, args.video)

diff --git a/examples/python/dicom_mri/main.py b/examples/python/dicom_mri/main.py
@@ -26,6 +26,30 @@
 DATASET_DIR: Final = Path(os.path.dirname(__file__)) / "dataset"
 DATASET_URL: Final = "https://storage.googleapis.com/rerun-example-datasets/dicom.zip"
 
+DESCRIPTION = """
+# Dicom MRI
+This example visualizes an MRI scan using Rerun.
+
+## How it was made
+The full source code for this example is available
+[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/dicom_mri/main.py).
+
+The visualization of the data consists of just the following line
+```python
+rr.log("tensor", rr.Tensor(voxels_volume_u16, dim_names=["right", "back", "up"]))
+```
+
+`voxels_volume_u16` is a `numpy.array` of shape `(512, 512, 512)` containing volumetric MRI intensities. We can
+visualize such information in Rerun by logging the `numpy.array` as an
+[rr.Tensor archetype](https://www.rerun.io/docs/reference/data_types/archetypes/tensor). Here the tensor is logged to
+the [tensor entity](recording://tensor), however any other name for the entity could have been chosen.
+
+In the Rerun viewer you can inspect the data in detail. The `dim_names` provided in the above call to `rr.log` help to
+give semantic meaning to each axis. After selecting the tensor view, you can adjust various settings in the Blueprint
+settings on the right-hand side. For example, you can adjust the color map, the brightness, which dimensions to show as
+an image and which to select from, and more.
+"""
+
 
 def extract_voxel_data(
     dicom_files: Iterable[Path],
@@ -47,18 +71,14 @@ def list_dicom_files(dir: Path) -> Iterable[Path]:
 
 
 def read_and_log_dicom_dataset(dicom_files: Iterable[Path]) -> None:
+    rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)
+
     voxels_volume, _ = extract_voxel_data(dicom_files)
 
     # the data is i16, but in range [0, 536].
     voxels_volume_u16: npt.NDArray[np.uint16] = np.require(voxels_volume, np.uint16)
 
-    rr.log(
-        "tensor",
-        rr.Tensor(
-            voxels_volume_u16,
-            dim_names=["right", "back", "up"],
-        ),
-    )
+    rr.log("tensor", rr.Tensor(voxels_volume_u16, dim_names=["right", "back", "up"]))
 
 
 def ensure_dataset_downloaded() -> Iterable[Path]:

diff --git a/examples/python/dna/main.py b/examples/python/dna/main.py
@@ -14,8 +14,43 @@
 from rerun_demo.data import build_color_spiral
 from rerun_demo.util import bounce_lerp, interleave
 
+DESCRIPTION = """
+# DNA
+This is a minimal example that logs synthetic 3D data in the shape of a double helix. The underlying data is generated
+using numpy and visualized using Rerun.
+
+## How it was made
+The full source code for this example is available
+[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/dna/main.py).
+
+### Colored 3D points
+The colored 3D points were added to the scene by logging the
+[rr.Points3D archetype](https://www.rerun.io/docs/reference/data_types/points3d) to the
+[helix/structure/left](recording://helix/structure/left) and [helix/structure/right](recording://helix/structure/right)
+entities.
+
+### 3D line strips
+The 3D line strips connecting the 3D point pairs are logged as an
+[rr.LineStrips3D archetype](https://www.rerun.io/docs/reference/data_types/line_strips3d) to the
+[helix/structure/scaffolding entity](recording://helix/structure/scaffolding).
+
+### Rotation
+The whole structure is rotated over time by logging a
+[rr.Transform3D archetype](https://www.rerun.io/docs/reference/data_types/archetypes/transform3d) to the
+[helix/structure entity](recording://helix/structure.Transform3D) that changes over time. This transform determines the rotation of
+the [structure entity](recording://helix/structure) relative to the [helix](recording://helix) entity. Since all other
+entities are children of [helix/structure](recording://helix/structure) they will also rotate based on this transform.
+
+You can visualize this rotation by selecting the two entities on the left-hand side and activating `Show transform` in
+the Blueprint settings on the right-hand side. You will see one static frame (i.e., the frame of
+[helix](recording://helix)) and the rotating frame (i.e., the frame of [structure](recording://helix/structure)).
+```
+""".strip()
+
 
 def log_data() -> None:
+    rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)
+
     rr.set_time_seconds("stable_time", 0)
 
     NUM_POINTS = 100

diff --git a/examples/python/human_pose_tracking/main.py b/examples/python/human_pose_tracking/main.py
@@ -22,15 +22,57 @@
 DATASET_URL_BASE: Final = "https://storage.googleapis.com/rerun-example-datasets/pose_movement"
 
 
+DESCRIPTION = """
+# Human Pose Tracking
+This example uses Rerun to visualize the output of [MediaPipe](https://developers.google.com/mediapipe)-based tracking
+of a human pose in 2D and 3D.
+
+## How it was made
+The full source code for this example is available
+[on GitHub](https://github.com/rerun-io/rerun/blob/latest/examples/python/human_pose_tracking/main.py).
+
+### Input Video
+The input video is logged as a sequence of
+[rr.Image objects](https://www.rerun.io/docs/reference/data_types/archetypes/image) to the [video entity](recording://video).
+
+### Segmentation
+The [segmentation result](recording://video/mask) is logged through a combination of two archetypes. The segmentation
+image itself is logged as an
+[rr.SegmentationImage archetype](https://www.rerun.io/docs/reference/data_types/archetypes/segmentation_image) and
+contains the id for each pixel. The color is determined by the
+[rr.AnnotationContext archetype](https://www.rerun.io/docs/reference/data_types/archetypes/annotation_context) which is
+logged with `rr.log(..., timeless=True` as it should apply to the whole sequence.
+
+### Skeletons
+The [2D](recording://video/pose/points) and [3D skeletons](recording://person/pose/points) are also logged through a
+similar combination of two entities.
+
+First, a timeless
+[rr.ClassDescription](https://www.rerun.io/docs/reference/data_types/datatypes/class_description) is logged (note, that
+this is equivalent to logging an
+[rr.AnnotationContext archetype](https://www.rerun.io/docs/reference/data_types/archetypes/annotation_context) as in the
+segmentation case). The class description contains the information which maps keypoint ids to labels and how to connect
+the keypoints to a skeleton.
+
+Second, the actual keypoint positions are logged in 2D
+nd 3D as [rr.Points2D](https://www.rerun.io/docs/reference/data_types/archetypes/points2d) and
+[rr.Points3D](https://www.rerun.io/docs/reference/data_types/archetypes/points3d) archetypes, respectively.
+""".strip()
+
+
 def track_pose(video_path: str, segment: bool) -> None:
     mp_pose = mp.solutions.pose
 
+    rr.log("description", rr.TextDocument(DESCRIPTION, media_type=rr.MediaType.MARKDOWN), timeless=True)
+
     rr.log(
         "/",
-        rr.ClassDescription(
-            info=rr.AnnotationInfo(id=0, label="Person"),
-            keypoint_annotations=[rr.AnnotationInfo(id=lm.value, label=lm.name) for lm in mp_pose.PoseLandmark],
-            keypoint_connections=mp_pose.POSE_CONNECTIONS,
+        rr.AnnotationContext(
+            rr.ClassDescription(
+                info=rr.AnnotationInfo(id=0, label="Person"),
+                keypoint_annotations=[rr.AnnotationInfo(id=lm.value, label=lm.name) for lm in mp_pose.PoseLandmark],
+                keypoint_connections=mp_pose.POSE_CONNECTIONS,
+            )
         ),
         timeless=True,
     )
@@ -60,14 +102,14 @@ def track_pose(video_path: str, segment: bool) -> None:
             if landmark_positions_2d is not None:
                 rr.log(
                     "video/pose/points",
-                    rr.Points2D(landmark_positions_2d, keypoint_ids=mp_pose.PoseLandmark),
+                    rr.Points2D(landmark_positions_2d, class_ids=0, keypoint_ids=mp_pose.PoseLandmark),
                 )
 
             landmark_positions_3d = read_landmark_positions_3d(results)
             if landmark_positions_3d is not None:
                 rr.log(
                     "person/pose/points",
-                    rr.Points3D(landmark_positions_3d, keypoint_ids=mp_pose.PoseLandmark),
+                    rr.Points3D(landmark_positions_3d, class_ids=0, keypoint_ids=mp_pose.PoseLandmark),
                 )
 
             segmentation_mask = results.segmentation_mask