Skip to content

Commit

Permalink
initial release
Browse files Browse the repository at this point in the history
  • Loading branch information
Sandra Kara committed Sep 17, 2024
0 parents commit 6de6e45
Show file tree
Hide file tree
Showing 38 changed files with 7,465 additions and 0 deletions.
Binary file added DIOD.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
61 changes: 61 additions & 0 deletions DIOD_DINOv2/datasetKITTIEval_dinov2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import random
import json
import numpy as np
from PIL import Image
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataloader import default_collate
import torch.nn.functional as F

from torchvision.io import read_video
import random
import cv2
import math


resolution = (1260,378)

dresolution = (315, 95)

class KITTIDataset(Dataset):
def __init__(self, split='train', root = None):
super(KITTIDataset, self).__init__()
self.resolution = resolution
self.root_dir = root

self.rgb_dir = os.path.join(self.root_dir,'rgb')
self.instance_dir = os.path.join(self.root_dir,'instance')


self.files = os.listdir(self.rgb_dir)
self.files.sort()

if split == 'eval':
self.files = self.files[0:5]

self.img_transform = transforms.Compose([
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])

def __getitem__(self, index):
path = self.files[index]
image = cv2.imread(os.path.join(self.rgb_dir,path))
mask = cv2.imread(os.path.join(self.instance_dir,path),-1)

image = cv2.resize(image, resolution, interpolation = cv2.INTER_LINEAR)
mask = cv2.resize(mask, dresolution, interpolation = cv2.INTER_NEAREST)

mask = torch.Tensor(mask).long()
image = torch.Tensor(image).float()

image = image / 255.0
image = image.permute(2,0,1)
image = self.img_transform(image)

sample = {'image': image, 'mask':mask}
return sample


def __len__(self):
return len(self.files)
89 changes: 89 additions & 0 deletions DIOD_DINOv2/datasetKITTI_dinov2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
import random
from typing import Callable
import torch
from torchvision import transforms
from torch.utils.data import Dataset

import random
import cv2


class KITTIDataset(Dataset):
def __init__(
self,
split='train',
root = None,
resolution = (1248,368),
transform: Callable = None,
apply_img_transform: bool = True
):
super(KITTIDataset, self).__init__()
self.resolution = (resolution[1],resolution[0])
self.dresolution = (resolution[1]//4, resolution[0]//4)
self.root_dir = root
self.files = os.listdir(self.root_dir)
self.files.sort()
self.files = self.files[:151]
if split == 'train':
self.files = self.files[5:]
else:
self.files = self.files[0:5]
self.real_files = []
self.mask_files = []
self.flow_files = []
self.depth_files = []
for f in self.files:
for i in ['image_02','image_03']:
if os.path.exists(os.path.join(self.root_dir,f+'/{}/'.format(i))):
self.real_files.append(f+'/{}/data'.format(i))
self.mask_files.append(f+'/{}/raft_seg'.format(i))

self.img_transform = transforms.Compose([
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])

self.apply_image_transform = apply_img_transform
self.transform = transform

def __getitem__(self, index):
path = self.real_files[index]
mask_path = self.mask_files[index]

all_images = os.listdir(os.path.join(self.root_dir,path))
all_images.sort()
N = len(all_images)

rand_id = random.randint(0,N-10)

real_idx = [rand_id + j for j in range(5)]
ims = []
masks = []
for idd in real_idx:
image = cv2.imread(os.path.join(os.path.join(self.root_dir, path),all_images[idd]))
mask = cv2.imread(os.path.join(os.path.join(self.root_dir, mask_path),all_images[idd]),-1)

image = cv2.resize(image, self.resolution, interpolation = cv2.INTER_LINEAR)
mask = cv2.resize(mask, self.resolution, interpolation = cv2.INTER_NEAREST)

mask = torch.Tensor(mask)
image = torch.Tensor(image)

ims.append(image)
masks.append(mask)

masks = torch.stack(masks).long()
ims = torch.stack(ims).float()
ims /= 255.0
ims = ims.permute(0, 3, 1, 2)
sample = {'image': ims, 'mask':masks}

if self.transform is not None:
sample = self.transform(sample)
elif self.apply_image_transform:
sample["image"] = self.img_transform(sample["image"])

return sample


def __len__(self):
return len(self.real_files)
98 changes: 98 additions & 0 deletions DIOD_DINOv2/datasetPDEval_dinov2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import os
import random
import json
import numpy as np
from PIL import Image
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.dataloader import default_collate
import torch.nn.functional as F

from torchvision.io import read_video
import random
import cv2
import math

class PDDataset(Dataset):
def __init__(self, split='train', root = None):
super(PDDataset, self).__init__()

self.root_dir = root
self.files = os.listdir(self.root_dir)
self.files.sort()

if split == 'eval':
self.files = self.files[0:1]
elif split == 'test':
self.files = self.files

self.real_files = []
self.mask_files = []
for f in self.files:
for i in [1,5,6,7,8,9]:
if os.path.exists(os.path.join(self.root_dir,f+'/rgb/camera_0{}'.format(i))):
self.real_files.append(f+'/rgb/camera_0{}'.format(i))
self.mask_files.append(f+'/ari_masks/camera_0{}'.format(i))
self.img_transform = transforms.Compose([
transforms.Resize((490, 980)),
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])

def __getitem__(self, index):
path = self.real_files[index]
mask_path = self.mask_files[index]

all_images = os.listdir(os.path.join(self.root_dir,path))
all_images.sort()
rand_id = 0
real_idx = [rand_id + 1*j for j in range(200)]
ims = []
masks = []
mapping = {0:0}
mapping_count = 1
for idd in real_idx:
image = cv2.imread(os.path.join(os.path.join(self.root_dir, path),all_images[idd]))
mask = cv2.imread(os.path.join(os.path.join(self.root_dir, mask_path),all_images[idd]),-1)

downsampling_ratio = 0.5
crop = 128
width = int(math.ceil(image.shape[1] * downsampling_ratio))
height = int(math.ceil(image.shape[0] * downsampling_ratio))
dim = (width, height)
image = cv2.resize(image, dim, interpolation = cv2.INTER_LINEAR)
image = image[crop:, :, :]
mask = cv2.resize(mask, dim, interpolation = cv2.INTER_NEAREST)
mask = mask[crop:,:]

values, indices, counts = np.unique(mask, return_inverse=True, return_counts=True)
for i in range(len(values)):
if values[i] not in mapping:
if counts[i] > 500:
mapping[values[i]] = mapping_count
mapping_count += 1
cur_mapping = []
for i in range(len(values)):
value = values[i]
if value not in mapping:
cur_mapping.append(0)
else:
cur_mapping.append(mapping[value])
cur_mapping = np.array(cur_mapping)
_h, _w = mask.shape
mask = cur_mapping[indices].reshape((_h, _w))

mask = torch.Tensor(mask).long()
image = torch.Tensor(image).float()
image = image / 255.0
image = image.permute(2,0,1)
image = self.img_transform(image)
ims.append(image)
masks.append(mask)
ims = torch.stack(ims)
masks = torch.stack(masks)
sample = {'image': ims, 'mask':masks}
return sample


def __len__(self):
return len(self.real_files)
118 changes: 118 additions & 0 deletions DIOD_DINOv2/datasetPD_dinov2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import os
import random
import torch
from typing import Callable
from torchvision import transforms
from torch.utils.data import Dataset
import cv2
import math
import numpy as np

banned_scenes = ['scene_000100','scene_000002','scene_000008','scene_000012','scene_000018','scene_000029',
'scene_000038','scene_000040','scene_000043','scene_000044','scene_000049','scene_000050','scene_000053','scene_000063',
'scene_000079','scene_000090','scene_000094','scene_000100','scene_000103','scene_000106','scene_000111','scene_000112',
'scene_000124','scene_000125','scene_000127','scene_000148','scene_000159','scene_000166','scene_000169',
'scene_000170','scene_000171','scene_000187', 'scene_000191','scene_000200','scene_000202','scene_000217',
'scene_000218','scene_000225','scene_000229','scene_000232','scene_000236','scene_000237','scene_000245',
'scene_000249'
]

class PDDataset(Dataset):
def __init__(self, split='train', root = None, supervision = 'moving', transform: Callable = None,
apply_img_transform: bool = True):
super(PDDataset, self).__init__()
self.root_dir = root
self.files = os.listdir(self.root_dir)
self.files.sort()
if split == 'train':
self.files = self.files[1:]
elif split == 'eval':
self.files = self.files[0:1]
else:
self.files = self.files
self.annotation = None
if supervision == 'moving':
self.annotation = 'moving_masks'
elif supervision == 'all':
self.annotation = 'ari_masks'
elif supervision == 'est':
self.annotation = 'est_masks'
else:
raise ValueError("Need to choose from moving masks, all masks, or estimated masks. Or revise the code for customized setting.")
self.real_files = []
self.mask_files = []
for f in self.files:
if f in banned_scenes:
continue
for i in [1,5,6,7,8,9]:
if os.path.exists(os.path.join(self.root_dir,f+'/rgb/camera_0{}'.format(i))):
self.real_files.append(f+'/rgb/camera_0{}'.format(i))
self.mask_files.append(f+'/{}/camera_0{}'.format(self.annotation, i))
self.img_transform = transforms.Compose([
transforms.Resize((490, 980)), # dinov2
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
self.apply_image_transform = apply_img_transform
self.transform = transform


def __getitem__(self, index):
path = self.real_files[index]
mask_path = self.mask_files[index]
all_images = os.listdir(os.path.join(self.root_dir,path))
all_images.sort()
rand_id = random.randint(0,190)
real_idx = [rand_id + j for j in range(5)]
ims = []
masks = []
for idd in real_idx:
mapping = {0:0}
mapping_count = 1
image = cv2.imread(os.path.join(os.path.join(self.root_dir, path),all_images[idd]))
mask = cv2.imread(os.path.join(os.path.join(self.root_dir, mask_path),all_images[idd]),-1)
downsampling_ratio = 0.5
crop = 128
width = int(math.ceil(image.shape[1] * downsampling_ratio))
height = int(math.ceil(image.shape[0] * downsampling_ratio))
dim = (width, height)
image = cv2.resize(image, dim, interpolation = cv2.INTER_LINEAR)
image = image[crop:, :, :]
mask = cv2.resize(mask, dim, interpolation = cv2.INTER_NEAREST)
mask = mask[crop:,:]

values, indices, counts = np.unique(mask, return_inverse=True, return_counts=True)
for i in range(len(values)):
if values[i] not in mapping:
if counts[i] > 50:
mapping[values[i]] = mapping_count
mapping_count += 1
cur_mapping = []
for value in values:
if value not in mapping:
cur_mapping.append(0)
else:
cur_mapping.append(mapping[value])
cur_mapping = np.array(cur_mapping)
_h, _w = mask.shape
mask = cur_mapping[indices].reshape((_h, _w))

mask = torch.Tensor(mask).long()
image = torch.Tensor(image).float()
image = image / 255.0
image = image.permute(2,0,1)

ims.append(image)
masks.append(mask)
ims = torch.stack(ims)
masks = torch.stack(masks)
sample = {'image': ims, 'mask':masks}

if self.transform is not None:
sample = self.transform(sample)
elif self.apply_image_transform:
sample["image"] = self.img_transform(sample["image"])

return sample


def __len__(self):
return len(self.real_files)
Loading

0 comments on commit 6de6e45

Please sign in to comment.