From 144cc8bea16f39747cf2b2bddbdbcc6e833502e5 Mon Sep 17 00:00:00 2001 From: Julian Gaal Date: Thu, 4 May 2023 15:46:25 +0200 Subject: [PATCH 1/4] provide docker file (ubuntu 20.04, pytorch 1.10.2, cuda 11.3.1 --- Dockerfile | 33 +++++++++++++++++++++++++++++++++ dist_train.sh | 6 ++---- docs/GET_STARTED.md | 13 ++++++++++++- docs/INSTALL.md | 23 ++++++++++++++++++++--- requirements.txt | 13 +++++++++++++ 5 files changed, 80 insertions(+), 8 deletions(-) create mode 100644 Dockerfile create mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..29c40a5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +FROM cnstark/pytorch:1.10.2-py3.9.12-cuda11.3.1-devel-ubuntu20.04 + +RUN apt update && apt upgrade -y &&\ + DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \ + tzdata git libsparsehash-dev unzip wget vim tmux ffmpeg libsm6 libxext6 + +WORKDIR /home/pcseg +COPY ./ . + +# package installation +ENV PATH "/usr/.local/bin:${PATH}" +RUN pip3 install -r requirements.txt +RUN cd /home/pcseg/package &&\ + mkdir torchsparse_dir/ &&\ + unzip -o sparsehash.zip -d sparsehash &&\ + unzip -o torchsparse.zip &&\ + unzip -o range_lib.zip +RUN cd /home/pcseg/package/sparsehash/sparsehash-master &&\ + ./configure --prefix=/home/pcseg/package/torchsparse_dir/spash &&\ + make &&\ + make install +RUN pip3 install -e package/range_lib &&\ + pip3 install -e package/torchsparse + + +# filesystem and permissions setup +ARG UNAME=pcseg +ARG UID=1000 +ARG GID=1000 +RUN groupadd -g $GID -o $UNAME &&\ + useradd -m -u $UID -g $GID -o -s /bin/bash $UNAME &&\ + chown -R $UNAME:$UNAME /home/pcseg +USER $UNAME \ No newline at end of file diff --git a/dist_train.sh b/dist_train.sh index d45f1b7..143afad 100755 --- a/dist_train.sh +++ b/dist_train.sh @@ -2,7 +2,7 @@ set -x NGPUS=$1 -PY_ARGS=${@:2} +PY_ARGS=${*:2} while true do @@ -16,7 +16,5 @@ echo $PORT python -m torch.distributed.launch --nproc_per_node=${NGPUS} \ --rdzv_endpoint=localhost:${PORT} train.py --launcher pytorch --amp \ - ${PY_ARGS} - -echo 'dist_train finished!' + ${PY_ARGS} && echo 'dist_train finished!' \ No newline at end of file diff --git a/docs/GET_STARTED.md b/docs/GET_STARTED.md index 83c1e30..30ee71a 100755 --- a/docs/GET_STARTED.md +++ b/docs/GET_STARTED.md @@ -14,7 +14,6 @@ CUDA_VISIBLE_DEVICES=4,5 sh dist_train.sh 2 --cfg_file tools/cfgs/fusion/semanti CUDA_VISIBLE_DEVICES=6,7 sh dist_train.sh 2 --cfg_file tools/cfgs/fusion/semantic_kitti/rpvnet_mk18_cr10.yaml ``` - ### Waymo Open Dataset For example, if you want to train the following models with 2 GPUs: @@ -24,3 +23,15 @@ CUDA_VISIBLE_DEVICES=0,1 sh dist_train.sh 2 --cfg_file tools/cfgs/voxel/waymo/mi CUDA_VISIBLE_DEVICES=2,3 sh dist_train.sh 2 --cfg_file tools/cfgs/voxel/waymo/cylinder_cy480_cr10.yaml ``` + +### Docker + +#### Docker + +After building according to [installation instructions](./INSTALL.md), run the container while mounting the dataset, e.g. SemanticKITTI: + +```bash +docker run -it --rm --gpus all -v /home/julian/Downloads/semanticKITTI/dataset:/home/pcseg/data_root/SemanticKITTI/ pcseg +``` + +and follow the train instructions above \ No newline at end of file diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 6ceec8a..f12fc86 100755 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -2,8 +2,8 @@ ### General Requirements -This codebase is tested with `torch==1.10.0` and `torchvision==0.11.0`, with `CUDA 11.3` and `gcc 7.3.0`. In order to successfully reproduce the results reported in our paper, we recommend you to follow the exact same configuation with us. However, similar versions that came out lately should be good as well. - +* This codebase is tested with `torch==1.10.0` and `torchvision==0.11.0`, with `CUDA 11.3` and `gcc 7.3.0`. In order to successfully reproduce the results reported in our paper, we recommend you to follow the exact same configuation with us. However, similar versions that came out lately should be good as well. +* OS with `ffmpeg libsm6 libxext6 libsparsehash-dev` installed ### Step 1: Create Enviroment ```Shell @@ -90,7 +90,7 @@ Finished processing dependencies for rangelib==1.0.0 ``` #### 4.5 - Other Packages ```Shell -pip install pyyaml easydict numba torchpack strictyaml llvmlite easydict scikit-image tqdm SharedArray prettytable opencv-python +pip install pyyaml easydict numba torchpack strictyaml llvmlite easydict scikit-image tqdm SharedArray prettytable opencv-python tensorboard ``` ```Shell pip uninstall setuptools @@ -105,3 +105,20 @@ Inside `PCSeg` directory: python setup.py develop ``` +## Docker + +### Prerequisites + +Make sure you have applied the following steps before building the docker image: + +* Install `nvidia-docker`: `sudo apt-get install -y nvidia-container-toolkit` +* Configure docker runtime, according to [this answer](https://stackoverflow.com/a/61737404) while [this addition](https://stackoverflow.com/a/75629058) may be necessary as of 03/2023 +* Restart docker: `sudo systemctl restart docker` + +### Build docker image + +```Shell +docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -t pcseg -f Dockerfile . +``` + +**Note:** `DOCKER_BUILDKIT=0 docker build ...` may be necessary. See Prerequisites \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..538d426 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +torch-scatter==2.1.1 +tensorboard==2.12.3 +opencv_python==4.7.0.72 +pyyaml +easydict +numba +strictyaml +llvmlite +scikit-image +tqdm +sharedarray +prettytable +setuptools==59.5.0 From 23bb5883003aeb9d108185d0b4b5361530672807 Mon Sep 17 00:00:00 2001 From: Julian Gaal Date: Thu, 4 May 2023 15:50:03 +0200 Subject: [PATCH 2/4] numpy > 1.20.x compatibility of float type --- pcseg/data/dataset/semantickitti/laserscan.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pcseg/data/dataset/semantickitti/laserscan.py b/pcseg/data/dataset/semantickitti/laserscan.py index 44aa8f0..f50b883 100755 --- a/pcseg/data/dataset/semantickitti/laserscan.py +++ b/pcseg/data/dataset/semantickitti/laserscan.py @@ -299,11 +299,11 @@ def reset(self): # projection color with semantic labels self.proj_sem_label = np.zeros((self.proj_H, self.proj_W), dtype=np.int32) # [H, W]: label - self.proj_sem_color = np.zeros((self.proj_H, self.proj_W, 3), dtype=np.float) # [H, W, 3]: color + self.proj_sem_color = np.zeros((self.proj_H, self.proj_W, 3), dtype=np.float32) # [H, W, 3]: color # projection color with instance labels self.proj_inst_label = np.zeros((self.proj_H, self.proj_W), dtype=np.int32) # [H, W]: label - self.proj_inst_color = np.zeros((self.proj_H, self.proj_W, 3), dtype=np.float) # [H, W, 3]: color + self.proj_inst_color = np.zeros((self.proj_H, self.proj_W, 3), dtype=np.float32) # [H, W, 3]: color def open_label(self, filename): From 029b36a583876c0d6a113dc1b336a2f072149f76 Mon Sep 17 00:00:00 2001 From: Julian Gaal Date: Thu, 4 May 2023 15:53:37 +0200 Subject: [PATCH 3/4] fix range_lib import --- pcseg/model/segmentor/fusion/rpvnet/rpvnet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcseg/model/segmentor/fusion/rpvnet/rpvnet.py b/pcseg/model/segmentor/fusion/rpvnet/rpvnet.py index 8f14c11..7e79933 100755 --- a/pcseg/model/segmentor/fusion/rpvnet/rpvnet.py +++ b/pcseg/model/segmentor/fusion/rpvnet/rpvnet.py @@ -23,7 +23,7 @@ from pcseg.loss import Losses -import range_utils.nn.functional as rnf +from .range_lib.range_utils.nn import functional as rnf __all__ = ['RPVNet', 'SalsaNext'] From b94e35f76d84e19f63b747edaa1e0d3f314c81b7 Mon Sep 17 00:00:00 2001 From: Julian Gaal Date: Thu, 4 May 2023 17:18:32 +0200 Subject: [PATCH 4/4] semantic kitti dataset validity checks --- docs/GET_STARTED.md | 2 +- .../dataset/semantickitti/semantickitti_rv.py | 22 ++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/docs/GET_STARTED.md b/docs/GET_STARTED.md index 30ee71a..6877ab7 100755 --- a/docs/GET_STARTED.md +++ b/docs/GET_STARTED.md @@ -31,7 +31,7 @@ CUDA_VISIBLE_DEVICES=2,3 sh dist_train.sh 2 --cfg_file tools/cfgs/voxel/waymo/cy After building according to [installation instructions](./INSTALL.md), run the container while mounting the dataset, e.g. SemanticKITTI: ```bash -docker run -it --rm --gpus all -v /home/julian/Downloads/semanticKITTI/dataset:/home/pcseg/data_root/SemanticKITTI/ pcseg +docker run -it --rm --gpus all -v /path/to/logs:/home/pcseg -v /path/to/kitti/semanticKITTI/dataset:/home/pcseg/data_root/SemanticKITTI/ pcseg ``` and follow the train instructions above \ No newline at end of file diff --git a/pcseg/data/dataset/semantickitti/semantickitti_rv.py b/pcseg/data/dataset/semantickitti/semantickitti_rv.py index a4ef8ad..4b5c0a3 100755 --- a/pcseg/data/dataset/semantickitti/semantickitti_rv.py +++ b/pcseg/data/dataset/semantickitti/semantickitti_rv.py @@ -3,6 +3,7 @@ import glob import random import yaml +from itertools import chain import numpy as np import torch @@ -80,15 +81,26 @@ def __init__( if self.split == 'train': folders = ['00', '01', '02', '03', '04', '05', '06', '07', '09', '10'] elif self.split == 'val': folders = ['08'] elif self.split == 'test': folders = ['11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21'] - - self.lidar_list = [] - for folder in folders: - self.lidar_list += glob.glob(self.root + 'sequences/' + folder + '/velodyne/*.bin') - print("Loading '{}' samples from SemanticKITTI under '{}' split".format(len(self.lidar_list), self.split)) + + if not os.path.exists(self.root): + raise ValueError(f"SemanticKITTI root dir {self.root} doesn't exist!") + + if not os.path.exists(self.root + "sequences"): + raise ValueError("Please use SemanticKITTI root directory, e.g. /path/to/SemanticKITTI/dataset") + + self.lidar_list = list( + chain.from_iterable(glob.glob(self.root + 'sequences/' + f + '/velodyne/*.bin') for f in folders)) + if len(self.lidar_list) == 0: + raise ValueError(f"Coudn't read pointclouds. Found {len(self.lidar_list)} clouds") self.label_list = [i.replace("velodyne", "labels") for i in self.lidar_list] self.label_list = [i.replace("bin", "label") for i in self.label_list] + if not any([os.path.exists(path) for path in self.label_list]): + raise ValueError("Couldn't find matching labels") + + print("Loading {} samples from SemanticKITTI under {} split".format(len(self.lidar_list), self.split)) + if self.split == 'train_test': root_psuedo_labels = '/mnt/lustre/konglingdong/data/sets/sequences/' folders_test = ['11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21']