My own changes.

tesfaldet · Sep 1, 2024 · 4fdaaad · 4fdaaad
1 parent 9ed0531
commit 4fdaaad
Show file tree

Hide file tree

Showing 11 changed files with 2,128 additions and 48 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,149 @@
+# Image/video Data
+*.jpg
+*.jpeg
+*.png
+*.svg
+*.pdf
+*.mov
+*.mp4
+*.gif
+
+# Data & Models
+*.h5
+*.tar
+*.tar.gz
+
+# Lightning-Hydra-Template
+configs/local/default.yaml
+/data/
+/logs/
+.env
+*.log
+.hydra/
+
+# VS Code
+.vscode/
+*.code-workspace
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+**/.vscode
+
+# JetBrains
+.idea/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.pyc
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# Environments
+.venv
+env/
+envs/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# osxfuse
+.fuse_hidden*
+
+# DS_Store
+*.DS_Store
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Pyre type checker
+.pyre/
+
+# Aim logging
+.aim
+
+# Network File Storage (NFS) files
+.nfs*
+
+# Output folder
+outputs/
+
+runs/
+experiments/
+multirun/
+checkpoints/*
+
+# Node modules
+node_modules/
+
+*.bin
diff --git a/cotracker/datasets/kubric_movif_dataset.py b/cotracker/datasets/kubric_movif_dataset.py
@@ -8,6 +8,7 @@
 import torch
 import cv2
 
+from pathlib import Path
 import imageio
 import numpy as np
 
@@ -357,41 +358,40 @@ def __init__(
         self.resize_lim = [0.75, 1.25]  # sample resizes from here
         self.resize_delta = 0.05
         self.max_crop_offset = 15
-        self.seq_names = [
-            fname
-            for fname in os.listdir(data_root)
-            if os.path.isdir(os.path.join(data_root, fname))
-        ]
-        print("found %d unique videos in %s" % (len(self.seq_names), self.data_root))
 
-    def getitem_helper(self, index):
-        gotit = True
-        seq_name = self.seq_names[index]
+        sequence_paths = sorted([seq_path for seq_path in Path(self.data_root).iterdir() if seq_path.is_dir()])
+        self.full_sequences = {
+            seq.name: {
+                "annot_path": str(seq / f"{seq.name}.npy"),
+            }
+            for seq in sequence_paths
+        }
+        self.sequence_names = sorted(list(self.full_sequences.keys()))
 
-        npy_path = os.path.join(self.data_root, seq_name, seq_name + ".npy")
-        rgb_path = os.path.join(self.data_root, seq_name, "frames")
+        print(f"Found {len(self)} videos in {self.data_root}.")
 
-        img_paths = sorted(os.listdir(rgb_path))
-        rgbs = []
-        for i, img_path in enumerate(img_paths):
-            rgbs.append(imageio.v2.imread(os.path.join(rgb_path, img_path)))
+    def getitem_helper(self, index):
+        gotit = True
+        seq_name = self.sequence_names[index]
+        sequence = self.full_sequences[seq_name]
+        data = np.load(sequence["annot_path"], allow_pickle=True).item()
+        rgbs = data["video"]  # [S, H, W, C] uint8 NDArray containing the video frames.
+        traj_2d = data["points"]  # [N, S, 2] NDArray containing ground truth trajectories.
+        visibility = ~data["occluded"]  # [N, S] NDArray containing occlusion status of each target point.
 
-        rgbs = np.stack(rgbs)
-        annot_dict = np.load(npy_path, allow_pickle=True).item()
-        traj_2d = annot_dict["coords"]
-        visibility = annot_dict["visibility"]
+        rgbs = [rgb for rgb in rgbs]  # List of [H, W, C] uint8 NDArrays.
+        traj_2d = traj_2d.transpose(1, 0, 2)  # [S, N, 2]
+        visibility = visibility.transpose(1, 0)  # [S, N]
 
         # random crop
         assert self.seq_len <= len(rgbs)
         if self.seq_len < len(rgbs):
             start_ind = np.random.choice(len(rgbs) - self.seq_len, 1)[0]
 
             rgbs = rgbs[start_ind : start_ind + self.seq_len]
-            traj_2d = traj_2d[:, start_ind : start_ind + self.seq_len]
-            visibility = visibility[:, start_ind : start_ind + self.seq_len]
+            traj_2d = traj_2d[start_ind : start_ind + self.seq_len]
+            visibility = visibility[start_ind : start_ind + self.seq_len]
 
-        traj_2d = np.transpose(traj_2d, (1, 0, 2))
-        visibility = np.transpose(np.logical_not(visibility), (1, 0))
         if self.use_augs:
             rgbs, traj_2d, visibility = self.add_photometric_augs(rgbs, traj_2d, visibility)
             rgbs, traj_2d = self.add_spatial_augs(rgbs, traj_2d, visibility)
@@ -438,4 +438,4 @@ def getitem_helper(self, index):
         return sample, gotit
 
     def __len__(self):
-        return len(self.seq_names)
+        return len(self.full_sequences)
diff --git a/cotracker/datasets/tap_vid_datasets.py b/cotracker/datasets/tap_vid_datasets.py
@@ -179,7 +179,9 @@ def decode(frame):
         target_points = self.points_dataset[video_name]["points"]
         if self.resize_to_256:
             frames = resize_video(frames, [256, 256])
+            # frames = resize_video(frames, [384, 512])
             target_points *= np.array([255, 255])  # 1 should be mapped to 256-1
+            # target_points *= np.array([frames.shape[2] - 1, frames.shape[1] - 1])
         else:
             target_points *= np.array([frames.shape[2] - 1, frames.shape[1] - 1])
 

diff --git a/cotracker/evaluation/core/evaluator.py b/cotracker/evaluation/core/evaluator.py
@@ -39,6 +39,7 @@ def compute_metrics(self, metrics, sample, pred_trajectory, dataset_name):
             pred_visibility = None
         if "tapvid" in dataset_name:
             B, T, N, D = sample.trajectory.shape
+
             traj = sample.trajectory.clone()
             thr = 0.9
 
@@ -85,6 +86,76 @@ def compute_metrics(self, metrics, sample, pred_trajectory, dataset_name):
             logging.info(f"avg: {metrics['avg']}")
             print("metrics", out_metrics)
             print("avg", metrics["avg"])
+
+            # UNCOMMENT
+            # H, W = sample.video.shape[-2:]
+            # device = sample.video.device
+            # out_metrics = {}
+            # d_vis_sum = d_occ_sum = d_sum_all = 0.0
+            # thrs = [1, 2, 4, 8, 16]
+            # sx_ = (W - 1) / 255.0
+            # sy_ = (H - 1) / 255.0
+            # sc_py = np.array([sx_, sy_]).reshape([1, 1, 2])
+            # sc_pt = torch.from_numpy(sc_py).float().to(device)
+            # __, first_visible_inds = torch.max(sample.visibility, dim=1)
+
+            # frame_ids_tensor = torch.arange(T, device=device)[None, :, None].repeat(B, 1, N)
+            # start_tracking_mask = frame_ids_tensor > (first_visible_inds.unsqueeze(1))
+
+            # for thr in thrs:
+            #     d_ = (
+            #         torch.norm(
+            #             pred_trajectory[..., :2] / sc_pt - sample.trajectory[..., :2] / sc_pt,
+            #             dim=-1,
+            #         )
+            #         < thr
+            #     ).float()  # B,S-1,N
+            #     d_occ = (
+            #         reduce_masked_mean(d_, (1 - sample.visibility) * start_tracking_mask).item()
+            #         * 100.0
+            #     )
+            #     d_occ_sum += d_occ
+            #     out_metrics[f"accuracy_occ_{thr}"] = d_occ
+
+            #     d_vis = (
+            #         reduce_masked_mean(d_, sample.visibility * start_tracking_mask).item() * 100.0
+            #     )
+            #     d_vis_sum += d_vis
+            #     out_metrics[f"accuracy_vis_{thr}"] = d_vis
+
+            #     d_all = reduce_masked_mean(d_, start_tracking_mask).item() * 100.0
+            #     d_sum_all += d_all
+            #     out_metrics[f"accuracy_{thr}"] = d_all
+
+            # d_occ_avg = d_occ_sum / len(thrs)
+            # d_vis_avg = d_vis_sum / len(thrs)
+            # d_all_avg = d_sum_all / len(thrs)
+
+            # sur_thr = 16
+            # dists = torch.norm(
+            #     pred_trajectory[..., :2] / sc_pt - sample.trajectory[..., :2] / sc_pt,
+            #     dim=-1,
+            # )  # B,S,N
+            # dist_ok = 1 - (dists > sur_thr).float() * sample.visibility  # B,S,N
+            # survival = torch.cumprod(dist_ok, dim=1)  # B,S,N
+            # out_metrics["survival"] = torch.mean(survival).item() * 100.0
+
+            # out_metrics["accuracy_occ"] = d_occ_avg
+            # out_metrics["accuracy_vis"] = d_vis_avg
+            # out_metrics["accuracy"] = d_all_avg
+
+            # metrics[sample.seq_name[0]] = out_metrics
+            # for metric_name in out_metrics.keys():
+            #     if "avg" not in metrics:
+            #         metrics["avg"] = {}
+            #     metrics["avg"][metric_name] = float(
+            #         np.mean([v[metric_name] for k, v in metrics.items() if k != "avg"])
+            #     )
+
+            # logging.info(f"Metrics: {out_metrics}")
+            # logging.info(f"avg: {metrics['avg']}")
+            # print("metrics", out_metrics)
+            # print("avg", metrics["avg"])
         elif dataset_name == "dynamic_replica" or dataset_name == "pointodyssey":
             *_, N, _ = sample.trajectory.shape
             B, T, N = sample.visibility.shape
@@ -133,7 +204,7 @@ def compute_metrics(self, metrics, sample, pred_trajectory, dataset_name):
             d_vis_avg = d_vis_sum / len(thrs)
             d_all_avg = d_sum_all / len(thrs)
 
-            sur_thr = 50
+            sur_thr = 16
             dists = torch.norm(
                 pred_trajectory[..., :2] / sc_pt - sample.trajectory[..., :2] / sc_pt,
                 dim=-1,
@@ -174,7 +245,7 @@ def evaluate_sequence(
 
         vis = Visualizer(
             save_dir=self.exp_dir,
-            fps=7,
+            fps=12,
         )
 
         for ind, sample in enumerate(tqdm(test_dataloader)):