Skip to content

Commit

Permalink
[TIPC] Fix TIPC Bugs (PaddlePaddle#2543)
Browse files Browse the repository at this point in the history
* Fix TIPC bugs

* Add max_num_workers_dist
  • Loading branch information
Bobholamovic authored Sep 16, 2022
1 parent d2f5667 commit 12da8c5
Show file tree
Hide file tree
Showing 12 changed files with 38 additions and 26 deletions.
2 changes: 1 addition & 1 deletion test_tipc/configs/fastscnn/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ fp_items:fp32|fp16
epoch:2000
--profiler_options:'batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile'
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096;FLAGS_cudnn_deterministic=False
log_iters:15;set_cv_threads:true;skip_iters:2;repeats:1000
log_iters:15;max_num_workers_dist:8;skip_iters:2;repeats:1000
2 changes: 1 addition & 1 deletion test_tipc/configs/fcn_hrnetw18/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ fp_items:fp32|fp16
epoch:500
--profiler_options:'batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile'
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096;FLAGS_cudnn_deterministic=False
log_iters:8;set_cv_threads:true;skip_iters:2;repeats:500
log_iters:8;max_num_workers_dist:8;skip_iters:2;repeats:500
2 changes: 1 addition & 1 deletion test_tipc/configs/mobileseg_mv3/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ fp_items:fp32|fp16
epoch:400
--profiler_options:'batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile'
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096;FLAGS_cudnn_deterministic=False
log_iters:15;set_cv_threads:true;skip_iters:2;repeats:500
log_iters:15;max_num_workers_dist:8;skip_iters:2;repeats:500
2 changes: 1 addition & 1 deletion test_tipc/configs/ocrnet_hrnetw18/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ fp_items:fp32|fp16
epoch:600
--profiler_options:'batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile'
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096;FLAGS_cudnn_deterministic=False
log_iters:15;set_cv_threads:true;skip_iters:3;repeats:500
log_iters:15;max_num_workers_dist:8;skip_iters:3;repeats:500
2 changes: 1 addition & 1 deletion test_tipc/configs/ocrnet_hrnetw48/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ fp_items:fp32|fp16
epoch:500
--profiler_options:'batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile'
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096;FLAGS_cudnn_deterministic=False
log_iters:5;set_cv_threads:true;skip_iters:3;repeats:500
log_iters:5;max_num_workers_dist:8;skip_iters:3;repeats:500
2 changes: 1 addition & 1 deletion test_tipc/configs/pp_liteseg_stdc1/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ fp_items:fp32|fp16
epoch:400
--profiler_options:'batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile'
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096;FLAGS_cudnn_deterministic=False
log_iters:10;set_cv_threads:true;skip_iters:2;repeats:500
log_iters:10;max_num_workers_dist:8;skip_iters:2;repeats:500
2 changes: 1 addition & 1 deletion test_tipc/configs/pp_liteseg_stdc2/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ fp_items:fp32|fp16
epoch:400
--profiler_options:'batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile'
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096;FLAGS_cudnn_deterministic=False
log_iters:10;set_cv_threads:true;skip_iters:2;repeats:500
log_iters:10;max_num_workers_dist:8;skip_iters:2;repeats:500
2 changes: 1 addition & 1 deletion test_tipc/configs/segformer_b0/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ fp_items:fp32|fp16
epoch:800
--profiler_options:'batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile'
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096;FLAGS_cudnn_deterministic=False
log_iters:15;set_cv_threads:true;skip_iters:2;repeats:500
log_iters:15;max_num_workers_dist:8;skip_iters:2;repeats:500
20 changes: 20 additions & 0 deletions test_tipc/configs/segformer_b0/train_ptq_infer_python.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
===========================ptq_params===========================
model_name:segformer_b0_KL
python:python3.7
##
--model_dir:test_tipc/output/segformer_b0_KL/segformer_b0_cityscapes_1024x1024_160k
##
--config:test_tipc/configs/segformer_b0/segformer_b0_cityscapes_1024x1024_160k.yml
--batch_num:1
--batch_size:1
##
trainer:PTQ
PTQ:slim/quant/ptq.py
##
===========================infer_params===========================
inference:deploy/python/infer.py
--device:cpu|gpu
--batch_size:1
--config:quant_model/deploy.yaml
--image_path:test_tipc/cpp/cityscapes_demo.png
--benchmark:True
2 changes: 1 addition & 1 deletion test_tipc/configs/sfnet/train_infer_python.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,4 @@ fp_items:fp32|fp16
epoch:400
--profiler_options:'batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile'
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096;FLAGS_cudnn_deterministic=False
log_iters:15;set_cv_threads:true;skip_iters:2;repeats:500
log_iters:15;max_num_workers_dist:8;skip_iters:2;repeats:500
3 changes: 1 addition & 2 deletions test_tipc/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ if [ ${MODE} = "benchmark_train" ];then
cd ./test_tipc/data/ && unzip PPM-100.zip && cd -
elif [ ${model_name} = 'deeplabv3p_resnet50_cityscapes' ]; then
rm -rf ./test_tipc/data/cityscapes
wget https://paddleseg.bj.bcebos.com/dataset/cityscapes.tar \
-O ./test_tipc/data/cityscapesimgs.tar --no-check-certificate
wget https://paddleseg.bj.bcebos.com/dataset/cityscapes.tar -O ./test_tipc/data/cityscapes.tar --no-check-certificate
tar -xf ./test_tipc/data/cityscapes.tar -C ./test_tipc/data/
else
rm -rf ./test_tipc/data/cityscapes
Expand Down
23 changes: 8 additions & 15 deletions test_tipc/test_train_inference_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ for params in ${extra_args[*]}; do
value=${arr[1]}
if [ "${key}" = 'log_iters' ]; then
log_iters="${value}"
elif [ "${key}" = "set_cv_threads" ]; then
set_cv_threads="${value}"
elif [ "${key}" = "max_num_workers_dist" ]; then
max_num_workers_dist="${value}"
elif [ "${key}" = "repeats" ]; then
repeats="${value}"
fi
Expand Down Expand Up @@ -345,15 +345,12 @@ else
cmd="${cmd} --repeats ${repeats}"
fi

if [ -n "${set_cv_threads}" ] && [ "${set_cv_threads}" = "true" ];then
# Take the first word as the training script, which means there should be no blanks in the path of script.
train_script=$(echo "${run_train}" | cut -d ' ' -f1)
# Make a copy
train_script_copy="$(add_suffix ${train_script} '_copy')"
cp ${train_script} ${train_script_copy}
sed -i '1s/^/import cv2; cv2.setNumThreads(1)\n/' ${train_script_copy}
# Use a global replace!
cmd="${cmd/${train_script}/${train_script_copy}}"
if [ ${#gpu} -gt 2 ] && [ -n "${max_num_workers_dist}" ];then
# Only works when using multiple GPUs
let num_gpus=(${#gpu}+1)/2
let num_workers_per_gpu=${max_num_workers_dist}/${num_gpus}
echo "Use ${num_workers_per_gpu} workers on each GPU."
cmd=`echo ${cmd} | sed "s/--num_workers [0-9]\+/--num_workers ${num_workers_per_gpu}/g"`
fi

if [ -n "${amp_level}" ];then
Expand All @@ -368,10 +365,6 @@ else
if [[ "$cmd" == *'paddle.distributed.launch'* ]]; then
cat log/workerlog.0 >> ${log_path}
fi

if [ -n "${set_cv_threads}" ] && [ "${set_cv_threads}" = "true" ];then
rm ${train_script_copy}
fi

# modify model dir if no eval
if [ ! -f "${save_log}/${train_model_name}" ]; then
Expand Down

0 comments on commit 12da8c5

Please sign in to comment.