Skip to content

Commit

Permalink
My own changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
tesfaldet committed Sep 1, 2024
1 parent 9ed0531 commit 4fdaaad
Show file tree
Hide file tree
Showing 11 changed files with 2,128 additions and 48 deletions.
149 changes: 149 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# Image/video Data
*.jpg
*.jpeg
*.png
*.svg
*.pdf
*.mov
*.mp4
*.gif

# Data & Models
*.h5
*.tar
*.tar.gz

# Lightning-Hydra-Template
configs/local/default.yaml
/data/
/logs/
.env
*.log
.hydra/

# VS Code
.vscode/
*.code-workspace
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
**/.vscode

# JetBrains
.idea/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.pyc

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# Environments
.venv
env/
envs/
venv/
ENV/
env.bak/
venv.bak/

# osxfuse
.fuse_hidden*

# DS_Store
*.DS_Store

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Pyre type checker
.pyre/

# Aim logging
.aim

# Network File Storage (NFS) files
.nfs*

# Output folder
outputs/

runs/
experiments/
multirun/
checkpoints/*

# Node modules
node_modules/

*.bin
48 changes: 24 additions & 24 deletions cotracker/datasets/kubric_movif_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import torch
import cv2

from pathlib import Path
import imageio
import numpy as np

Expand Down Expand Up @@ -357,41 +358,40 @@ def __init__(
self.resize_lim = [0.75, 1.25] # sample resizes from here
self.resize_delta = 0.05
self.max_crop_offset = 15
self.seq_names = [
fname
for fname in os.listdir(data_root)
if os.path.isdir(os.path.join(data_root, fname))
]
print("found %d unique videos in %s" % (len(self.seq_names), self.data_root))

def getitem_helper(self, index):
gotit = True
seq_name = self.seq_names[index]
sequence_paths = sorted([seq_path for seq_path in Path(self.data_root).iterdir() if seq_path.is_dir()])
self.full_sequences = {
seq.name: {
"annot_path": str(seq / f"{seq.name}.npy"),
}
for seq in sequence_paths
}
self.sequence_names = sorted(list(self.full_sequences.keys()))

npy_path = os.path.join(self.data_root, seq_name, seq_name + ".npy")
rgb_path = os.path.join(self.data_root, seq_name, "frames")
print(f"Found {len(self)} videos in {self.data_root}.")

img_paths = sorted(os.listdir(rgb_path))
rgbs = []
for i, img_path in enumerate(img_paths):
rgbs.append(imageio.v2.imread(os.path.join(rgb_path, img_path)))
def getitem_helper(self, index):
gotit = True
seq_name = self.sequence_names[index]
sequence = self.full_sequences[seq_name]
data = np.load(sequence["annot_path"], allow_pickle=True).item()
rgbs = data["video"] # [S, H, W, C] uint8 NDArray containing the video frames.
traj_2d = data["points"] # [N, S, 2] NDArray containing ground truth trajectories.
visibility = ~data["occluded"] # [N, S] NDArray containing occlusion status of each target point.

rgbs = np.stack(rgbs)
annot_dict = np.load(npy_path, allow_pickle=True).item()
traj_2d = annot_dict["coords"]
visibility = annot_dict["visibility"]
rgbs = [rgb for rgb in rgbs] # List of [H, W, C] uint8 NDArrays.
traj_2d = traj_2d.transpose(1, 0, 2) # [S, N, 2]
visibility = visibility.transpose(1, 0) # [S, N]

# random crop
assert self.seq_len <= len(rgbs)
if self.seq_len < len(rgbs):
start_ind = np.random.choice(len(rgbs) - self.seq_len, 1)[0]

rgbs = rgbs[start_ind : start_ind + self.seq_len]
traj_2d = traj_2d[:, start_ind : start_ind + self.seq_len]
visibility = visibility[:, start_ind : start_ind + self.seq_len]
traj_2d = traj_2d[start_ind : start_ind + self.seq_len]
visibility = visibility[start_ind : start_ind + self.seq_len]

traj_2d = np.transpose(traj_2d, (1, 0, 2))
visibility = np.transpose(np.logical_not(visibility), (1, 0))
if self.use_augs:
rgbs, traj_2d, visibility = self.add_photometric_augs(rgbs, traj_2d, visibility)
rgbs, traj_2d = self.add_spatial_augs(rgbs, traj_2d, visibility)
Expand Down Expand Up @@ -438,4 +438,4 @@ def getitem_helper(self, index):
return sample, gotit

def __len__(self):
return len(self.seq_names)
return len(self.full_sequences)
2 changes: 2 additions & 0 deletions cotracker/datasets/tap_vid_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,9 @@ def decode(frame):
target_points = self.points_dataset[video_name]["points"]
if self.resize_to_256:
frames = resize_video(frames, [256, 256])
# frames = resize_video(frames, [384, 512])
target_points *= np.array([255, 255]) # 1 should be mapped to 256-1
# target_points *= np.array([frames.shape[2] - 1, frames.shape[1] - 1])
else:
target_points *= np.array([frames.shape[2] - 1, frames.shape[1] - 1])

Expand Down
75 changes: 73 additions & 2 deletions cotracker/evaluation/core/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def compute_metrics(self, metrics, sample, pred_trajectory, dataset_name):
pred_visibility = None
if "tapvid" in dataset_name:
B, T, N, D = sample.trajectory.shape

traj = sample.trajectory.clone()
thr = 0.9

Expand Down Expand Up @@ -85,6 +86,76 @@ def compute_metrics(self, metrics, sample, pred_trajectory, dataset_name):
logging.info(f"avg: {metrics['avg']}")
print("metrics", out_metrics)
print("avg", metrics["avg"])

# UNCOMMENT
# H, W = sample.video.shape[-2:]
# device = sample.video.device
# out_metrics = {}
# d_vis_sum = d_occ_sum = d_sum_all = 0.0
# thrs = [1, 2, 4, 8, 16]
# sx_ = (W - 1) / 255.0
# sy_ = (H - 1) / 255.0
# sc_py = np.array([sx_, sy_]).reshape([1, 1, 2])
# sc_pt = torch.from_numpy(sc_py).float().to(device)
# __, first_visible_inds = torch.max(sample.visibility, dim=1)

# frame_ids_tensor = torch.arange(T, device=device)[None, :, None].repeat(B, 1, N)
# start_tracking_mask = frame_ids_tensor > (first_visible_inds.unsqueeze(1))

# for thr in thrs:
# d_ = (
# torch.norm(
# pred_trajectory[..., :2] / sc_pt - sample.trajectory[..., :2] / sc_pt,
# dim=-1,
# )
# < thr
# ).float() # B,S-1,N
# d_occ = (
# reduce_masked_mean(d_, (1 - sample.visibility) * start_tracking_mask).item()
# * 100.0
# )
# d_occ_sum += d_occ
# out_metrics[f"accuracy_occ_{thr}"] = d_occ

# d_vis = (
# reduce_masked_mean(d_, sample.visibility * start_tracking_mask).item() * 100.0
# )
# d_vis_sum += d_vis
# out_metrics[f"accuracy_vis_{thr}"] = d_vis

# d_all = reduce_masked_mean(d_, start_tracking_mask).item() * 100.0
# d_sum_all += d_all
# out_metrics[f"accuracy_{thr}"] = d_all

# d_occ_avg = d_occ_sum / len(thrs)
# d_vis_avg = d_vis_sum / len(thrs)
# d_all_avg = d_sum_all / len(thrs)

# sur_thr = 16
# dists = torch.norm(
# pred_trajectory[..., :2] / sc_pt - sample.trajectory[..., :2] / sc_pt,
# dim=-1,
# ) # B,S,N
# dist_ok = 1 - (dists > sur_thr).float() * sample.visibility # B,S,N
# survival = torch.cumprod(dist_ok, dim=1) # B,S,N
# out_metrics["survival"] = torch.mean(survival).item() * 100.0

# out_metrics["accuracy_occ"] = d_occ_avg
# out_metrics["accuracy_vis"] = d_vis_avg
# out_metrics["accuracy"] = d_all_avg

# metrics[sample.seq_name[0]] = out_metrics
# for metric_name in out_metrics.keys():
# if "avg" not in metrics:
# metrics["avg"] = {}
# metrics["avg"][metric_name] = float(
# np.mean([v[metric_name] for k, v in metrics.items() if k != "avg"])
# )

# logging.info(f"Metrics: {out_metrics}")
# logging.info(f"avg: {metrics['avg']}")
# print("metrics", out_metrics)
# print("avg", metrics["avg"])
elif dataset_name == "dynamic_replica" or dataset_name == "pointodyssey":
*_, N, _ = sample.trajectory.shape
B, T, N = sample.visibility.shape
Expand Down Expand Up @@ -133,7 +204,7 @@ def compute_metrics(self, metrics, sample, pred_trajectory, dataset_name):
d_vis_avg = d_vis_sum / len(thrs)
d_all_avg = d_sum_all / len(thrs)

sur_thr = 50
sur_thr = 16
dists = torch.norm(
pred_trajectory[..., :2] / sc_pt - sample.trajectory[..., :2] / sc_pt,
dim=-1,
Expand Down Expand Up @@ -174,7 +245,7 @@ def evaluate_sequence(

vis = Visualizer(
save_dir=self.exp_dir,
fps=7,
fps=12,
)

for ind, sample in enumerate(tqdm(test_dataloader)):
Expand Down
Loading

0 comments on commit 4fdaaad

Please sign in to comment.