forked from facebookresearch/sam2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_amos_freeze.sh
50 lines (41 loc) · 1.98 KB
/
train_amos_freeze.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/bin/bash
#SBATCH --job-name=sam2 # Job name
#SBATCH --nodes=1 # Number of nodes
#SBATCH --ntasks=8
#SBATCH --ntasks-per-node=8 # Number of tasks (GPUs) per node
#SBATCH --cpus-per-task=8
#SBATCH --time=48:00:00 # Max time (HH:MM:SS)
#SBATCH --output=sbatch_log/amos_nonfreeze_8gpu_%j.log # Output file
#SBATCH --gpus=rtx_3090:8
module load eth_proxy
module load stack/2024-06
module load cuda/12.1.1
module load gcc/12.2.0
module load cmake/3.27.7
source /cluster/work/cvl/qimaqi/miniconda3/etc/profile.d/conda.sh
conda deactivate
conda activate sam2
# conda create -n sam2 python=3.10
# pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu121
export CUDA_HOME=/cluster/software/stacks/2024-06/spack/opt/spack/linux-ubuntu22.04-x86_64_v3/gcc-12.2.0/cuda-12.1.1-5znnrjb5x5xr26nojxp3yhh6v77il7ie/
export PATH=$CUDA_HOME/bin:$PATH
export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
python training/train.py \
-c configs/sam2.1_training/sam2.1_hiera_t_MOSE_finetune_amos_freeze.yaml \
--use-cluster 0 \
--num-gpus 8 \
# python training/train.py \
# -c configs/sam2.1_training/sam2.1_hiera_t_MOSE_finetune_amos.yaml \
# --use-cluster 0 \
# --num-gpus 8 \
# python training/train.py \
# -c configs/sam2.1_training/sam2.1_hiera_t_MOSE_finetune_teeth.yaml \
# --use-cluster 0 \
# --num-gpus 2 \
# -c configs/sam2.1_training/sam2.1_hiera_t_MOSE_finetune_teeth.yaml \
# sbatch --output=sbatch_log/debug_%j.out --ntasks=8 --mem-per-cpu=4g --gpus=titan_rtx:2 --time=4-0 train.sh
# srun --ntasks=8 --mem-per-cpu=4G --gpus=rtx_3090:2 --time=240 --pty bash -i
# 12839180
# check multi gpu, batch size will be shared or not, no each dataloader is independent
# srun --ntasks=8 --mem-per-cpu=4G --time=240 --gpus=rtx_3090:2 --pty bash -i
# check base model freezed can be used for 3090 or not