diff --git a/experiments/scripts/faster_rcnn_end2end.sh b/experiments/scripts/faster_rcnn_end2end.sh new file mode 100755 index 000000000..fe7a27b42 --- /dev/null +++ b/experiments/scripts/faster_rcnn_end2end.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Usage: +# ./experiments/scripts/default_faster_rcnn.sh GPU NET [--set ...] +# Example: +# ./experiments/scripts/default_faster_rcnn.sh 0 ZF \ +# --set EXP_DIR foobar RNG_SEED 42 TRAIN.SCALES "[400,500,600,700]" + +set -x +set -e + +export PYTHONUNBUFFERED="True" + +GPU_ID=$1 +NET=$2 +NET_lc=${NET,,} +ITERS=70000 +DATASET_TRAIN=voc_2007_trainval +DATASET_TEST=voc_2007_test + +array=( $@ ) +len=${#array[@]} +EXTRA_ARGS=${array[@]:2:$len} +EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} + +LOG="experiments/logs/faster_rcnn_${NET}_${EXTRA_ARGS_SLUG}.txt.`date +'%Y-%m-%d_%H-%M-%S'`" +exec &> >(tee -a "$LOG") +echo Logging output to "$LOG" + +NET_INIT=data/imagenet_models/${NET}.v2.caffemodel + +time ./tools/train_net.py --gpu ${GPU_ID} \ + --solver models/${NET}/faster_rcnn_end2end/solver.prototxt \ + --weights ${NET_INIT} \ + --imdb ${DATASET_TRAIN} \ + --iters ${ITERS} \ + --cfg experiments/cfgs/faster_rcnn_end2end.yml \ + ${EXTRA_ARGS} + +set +x +NET_FINAL=`grep -B 1 "done solving" ${LOG} | grep "Wrote snapshot" | awk '{print $4}'` +set -x + +time ./tools/test_net.py --gpu ${GPU_ID} \ + --def models/${NET}/faster_rcnn_end2end/test.prototxt \ + --net ${NET_FINAL} \ + --imdb ${DATASET_TEST} \ + --cfg experiments/cfgs/faster_rcnn_end2end.yml \ + ${EXTRA_ARGS} diff --git a/models/VGG16/faster_rcnn_end2end/solver.prototxt b/models/VGG16/faster_rcnn_end2end/solver.prototxt new file mode 100644 index 000000000..bc12d2bcb --- /dev/null +++ b/models/VGG16/faster_rcnn_end2end/solver.prototxt @@ -0,0 +1,16 @@ +train_net: "models/VGG16/faster_rcnn_end2end/train.prototxt" +base_lr: 0.001 +lr_policy: "step" +gamma: 0.1 +stepsize: 50000 +display: 20 +average_loss: 100 +# iter_size: 1 +momentum: 0.9 +weight_decay: 0.0005 +# We disable standard caffe solver snapshotting and implement our own snapshot +# function +snapshot: 0 +# We still use the snapshot prefix, though +snapshot_prefix: "vgg16_faster_rcnn" +iter_size: 2 diff --git a/models/VGG16/faster_rcnn_end2end/test.prototxt b/models/VGG16/faster_rcnn_end2end/test.prototxt new file mode 100644 index 000000000..4a938208b --- /dev/null +++ b/models/VGG16/faster_rcnn_end2end/test.prototxt @@ -0,0 +1,608 @@ +name: "VGG_ILSVRC_16_layers" + +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 224 + dim: 224 +} + +input: "im_info" +input_shape { + dim: 1 + dim: 3 +} + +layer { + name: "conv1_1" + type: "Convolution" + bottom: "data" + top: "conv1_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1" + top: "conv2_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2_2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "conv3_3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "conv4_3" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_3" + type: "ReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + name: "pool4" + type: "Pooling" + bottom: "conv4_3" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_1" + type: "ReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_2" + type: "ReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + name: "conv5_3" + type: "Convolution" + bottom: "conv5_2" + top: "conv5_3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_3" + type: "ReLU" + bottom: "conv5_3" + top: "conv5_3" +} + +#========= RPN ============ + +layer { + name: "rpn_conv/3x3" + type: "Convolution" + bottom: "conv5_3" + top: "rpn/output" + param { lr_mult: 1.0 decay_mult: 1.0 } + param { lr_mult: 2.0 decay_mult: 0 } + convolution_param { + num_output: 512 + kernel_size: 3 pad: 1 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_relu/3x3" + type: "ReLU" + bottom: "rpn/output" + top: "rpn/output" +} + +layer { + name: "rpn_cls_score" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_cls_score" + param { lr_mult: 1.0 decay_mult: 1.0 } + param { lr_mult: 2.0 decay_mult: 0 } + convolution_param { + num_output: 18 # 2(bg/fg) * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_bbox_pred" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_bbox_pred" + param { lr_mult: 1.0 decay_mult: 1.0 } + param { lr_mult: 2.0 decay_mult: 0 } + convolution_param { + num_output: 36 # 4 * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + bottom: "rpn_cls_score" + top: "rpn_cls_score_reshape" + name: "rpn_cls_score_reshape" + type: "Reshape" + reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } +} + +#========= RoI Proposal ============ + +layer { + name: "rpn_cls_prob" + type: "Softmax" + bottom: "rpn_cls_score_reshape" + top: "rpn_cls_prob" +} +layer { + name: 'rpn_cls_prob_reshape' + type: 'Reshape' + bottom: 'rpn_cls_prob' + top: 'rpn_cls_prob_reshape' + reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } +} +layer { + name: 'proposal' + type: 'Python' + bottom: 'rpn_cls_prob_reshape' + bottom: 'rpn_bbox_pred' + bottom: 'im_info' + top: 'rois' + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } +} + +#========= RCNN ============ + +layer { + name: "roi_pool5" + type: "ROIPooling" + bottom: "conv5_3" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 7 + pooled_h: 7 + spatial_scale: 0.0625 # 1/16 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "cls_score" + type: "InnerProduct" + bottom: "fc7" + top: "cls_score" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "bbox_pred" + type: "InnerProduct" + bottom: "fc7" + top: "bbox_pred" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 84 + weight_filler { + type: "gaussian" + std: 0.001 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "cls_prob" + type: "Softmax" + bottom: "cls_score" + top: "cls_prob" +} diff --git a/models/VGG16/faster_rcnn_end2end/train.prototxt b/models/VGG16/faster_rcnn_end2end/train.prototxt new file mode 100644 index 000000000..ebadb49b7 --- /dev/null +++ b/models/VGG16/faster_rcnn_end2end/train.prototxt @@ -0,0 +1,673 @@ +name: "VGG_ILSVRC_16_layers" +layer { + name: 'input-data' + type: 'Python' + top: 'data' + top: 'im_info' + top: 'gt_boxes' + python_param { + module: 'roi_data_layer.layer' + layer: 'RoIDataLayer' + param_str: "'num_classes': 21" + } +} + +layer { + name: "conv1_1" + type: "Convolution" + bottom: "data" + top: "conv1_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu1_1" + type: "ReLU" + bottom: "conv1_1" + top: "conv1_1" +} +layer { + name: "conv1_2" + type: "Convolution" + bottom: "conv1_1" + top: "conv1_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu1_2" + type: "ReLU" + bottom: "conv1_2" + top: "conv1_2" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1_2" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv2_1" + type: "Convolution" + bottom: "pool1" + top: "conv2_1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu2_1" + type: "ReLU" + bottom: "conv2_1" + top: "conv2_1" +} +layer { + name: "conv2_2" + type: "Convolution" + bottom: "conv2_1" + top: "conv2_2" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu2_2" + type: "ReLU" + bottom: "conv2_2" + top: "conv2_2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "conv2_2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv3_1" + type: "Convolution" + bottom: "pool2" + top: "conv3_1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_1" + type: "ReLU" + bottom: "conv3_1" + top: "conv3_1" +} +layer { + name: "conv3_2" + type: "Convolution" + bottom: "conv3_1" + top: "conv3_2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_2" + type: "ReLU" + bottom: "conv3_2" + top: "conv3_2" +} +layer { + name: "conv3_3" + type: "Convolution" + bottom: "conv3_2" + top: "conv3_3" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3_3" + type: "ReLU" + bottom: "conv3_3" + top: "conv3_3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "conv3_3" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv4_1" + type: "Convolution" + bottom: "pool3" + top: "conv4_1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_1" + type: "ReLU" + bottom: "conv4_1" + top: "conv4_1" +} +layer { + name: "conv4_2" + type: "Convolution" + bottom: "conv4_1" + top: "conv4_2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_2" + type: "ReLU" + bottom: "conv4_2" + top: "conv4_2" +} +layer { + name: "conv4_3" + type: "Convolution" + bottom: "conv4_2" + top: "conv4_3" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4_3" + type: "ReLU" + bottom: "conv4_3" + top: "conv4_3" +} +layer { + name: "pool4" + type: "Pooling" + bottom: "conv4_3" + top: "pool4" + pooling_param { + pool: MAX + kernel_size: 2 + stride: 2 + } +} +layer { + name: "conv5_1" + type: "Convolution" + bottom: "pool4" + top: "conv5_1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_1" + type: "ReLU" + bottom: "conv5_1" + top: "conv5_1" +} +layer { + name: "conv5_2" + type: "Convolution" + bottom: "conv5_1" + top: "conv5_2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_2" + type: "ReLU" + bottom: "conv5_2" + top: "conv5_2" +} +layer { + name: "conv5_3" + type: "Convolution" + bottom: "conv5_2" + top: "conv5_3" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5_3" + type: "ReLU" + bottom: "conv5_3" + top: "conv5_3" +} + +#========= RPN ============ + +layer { + name: "rpn_conv/3x3" + type: "Convolution" + bottom: "conv5_3" + top: "rpn/output" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 512 + kernel_size: 3 pad: 1 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_relu/3x3" + type: "ReLU" + bottom: "rpn/output" + top: "rpn/output" +} + +layer { + name: "rpn_cls_score" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_cls_score" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 18 # 2(bg/fg) * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} + +layer { + name: "rpn_bbox_pred" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_bbox_pred" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 36 # 4 * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} + +layer { + bottom: "rpn_cls_score" + top: "rpn_cls_score_reshape" + name: "rpn_cls_score_reshape" + type: "Reshape" + reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } +} + +layer { + name: 'rpn-data' + type: 'Python' + bottom: 'rpn_cls_score' + bottom: 'gt_boxes' + bottom: 'im_info' + bottom: 'data' + top: 'rpn_labels' + top: 'rpn_bbox_targets' + top: 'rpn_bbox_inside_weights' + top: 'rpn_bbox_outside_weights' + python_param { + module: 'rpn.anchor_target_layer' + layer: 'AnchorTargetLayer' + param_str: "'feat_stride': 16" + } +} + +layer { + name: "rpn_loss_cls" + type: "SoftmaxWithLoss" + bottom: "rpn_cls_score_reshape" + bottom: "rpn_labels" + propagate_down: 1 + propagate_down: 0 + top: "rpn_cls_loss" + loss_weight: 1 + loss_param { + ignore_label: -1 + normalize: true + } +} + +layer { + name: "rpn_loss_bbox" + type: "SmoothL1Loss" + bottom: "rpn_bbox_pred" + bottom: "rpn_bbox_targets" + bottom: 'rpn_bbox_inside_weights' + bottom: 'rpn_bbox_outside_weights' + top: "rpn_loss_bbox" + loss_weight: 1 + smooth_l1_loss_param { sigma: 3.0 } +} + +#========= RoI Proposal ============ + +layer { + name: "rpn_cls_prob" + type: "Softmax" + bottom: "rpn_cls_score_reshape" + top: "rpn_cls_prob" +} + +layer { + name: 'rpn_cls_prob_reshape' + type: 'Reshape' + bottom: 'rpn_cls_prob' + top: 'rpn_cls_prob_reshape' + reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } +} + +layer { + name: 'proposal' + type: 'Python' + bottom: 'rpn_cls_prob_reshape' + bottom: 'rpn_bbox_pred' + bottom: 'im_info' + top: 'rpn_rois' +# top: 'rpn_scores' + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } +} + +#layer { +# name: 'debug-data' +# type: 'Python' +# bottom: 'data' +# bottom: 'rpn_rois' +# bottom: 'rpn_scores' +# python_param { +# module: 'rpn.debug_layer' +# layer: 'RPNDebugLayer' +# } +#} + +layer { + name: 'roi-data' + type: 'Python' + bottom: 'rpn_rois' + bottom: 'gt_boxes' + top: 'rois' + top: 'labels' + top: 'bbox_targets' + top: 'bbox_inside_weights' + top: 'bbox_outside_weights' + python_param { + module: 'rpn.proposal_target_layer' + layer: 'ProposalTargetLayer' + param_str: "'num_classes': 21" + } +} + +#========= RCNN ============ + +layer { + name: "roi_pool5" + type: "ROIPooling" + bottom: "conv5_3" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 7 + pooled_h: 7 + spatial_scale: 0.0625 # 1/16 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "cls_score" + type: "InnerProduct" + bottom: "fc7" + top: "cls_score" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "bbox_pred" + type: "InnerProduct" + bottom: "fc7" + top: "bbox_pred" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 84 + weight_filler { + type: "gaussian" + std: 0.001 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "loss_cls" + type: "SoftmaxWithLoss" + bottom: "cls_score" + bottom: "labels" + propagate_down: 1 + propagate_down: 0 + top: "loss_cls" + loss_weight: 1 +} +layer { + name: "loss_bbox" + type: "SmoothL1Loss" + bottom: "bbox_pred" + bottom: "bbox_targets" + bottom: "bbox_inside_weights" + bottom: "bbox_outside_weights" + top: "loss_bbox" + loss_weight: 1 +} diff --git a/models/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt b/models/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt new file mode 100644 index 000000000..9a93f7347 --- /dev/null +++ b/models/VGG_CNN_M_1024/faster_rcnn_end2end/solver.prototxt @@ -0,0 +1,14 @@ +train_net: "models/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt" +base_lr: 0.001 +lr_policy: "step" +gamma: 0.1 +stepsize: 50000 +display: 20 +average_loss: 100 +momentum: 0.9 +weight_decay: 0.0005 +# We disable standard caffe solver snapshotting and implement our own snapshot +# function +snapshot: 0 +# We still use the snapshot prefix, though +snapshot_prefix: "vgg_cnn_m_1024_faster_rcnn" diff --git a/models/VGG_CNN_M_1024/faster_rcnn_end2end/test.prototxt b/models/VGG_CNN_M_1024/faster_rcnn_end2end/test.prototxt new file mode 100644 index 000000000..c8bc90ab0 --- /dev/null +++ b/models/VGG_CNN_M_1024/faster_rcnn_end2end/test.prototxt @@ -0,0 +1,450 @@ +name: "VGG_CNN_M_1024" +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 224 + dim: 224 +} +input: "im_info" +input_shape { + dim: 1 + dim: 3 +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 96 + kernel_size: 7 + stride: 2 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "norm1" + type: "LRN" + bottom: "conv1" + top: "norm1" + lrn_param { + local_size: 5 + alpha: 0.0005 + beta: 0.75 + k: 2 + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "norm1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 5 + stride: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "norm2" + type: "LRN" + bottom: "conv2" + top: "norm2" + lrn_param { + local_size: 5 + alpha: 0.0005 + beta: 0.75 + k: 2 + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "norm2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv4" + type: "Convolution" + bottom: "conv3" + top: "conv4" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4" + type: "ReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "conv5" + type: "Convolution" + bottom: "conv4" + top: "conv5" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5" + type: "ReLU" + bottom: "conv5" + top: "conv5" +} + +#========= RPN ============ + +layer { + name: "rpn_conv/3x3" + type: "Convolution" + bottom: "conv5" + top: "rpn/output" + param { lr_mult: 1.0 decay_mult: 1.0 } + param { lr_mult: 2.0 decay_mult: 0 } + convolution_param { + num_output: 256 + kernel_size: 3 pad: 1 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_relu/3x3" + type: "ReLU" + bottom: "rpn/output" + top: "rpn/output" +} + +#layer { +# name: "rpn_conv/3x3" +# type: "Convolution" +# bottom: "conv5" +# top: "rpn_conv/3x3" +# param { lr_mult: 1.0 decay_mult: 1.0 } +# param { lr_mult: 2.0 decay_mult: 0 } +# convolution_param { +# num_output: 192 +# kernel_size: 3 pad: 1 stride: 1 +# weight_filler { type: "gaussian" std: 0.01 } +# bias_filler { type: "constant" value: 0 } +# } +#} +#layer { +# name: "rpn_conv/5x5" +# type: "Convolution" +# bottom: "conv5" +# top: "rpn_conv/5x5" +# param { lr_mult: 1.0 decay_mult: 1.0 } +# param { lr_mult: 2.0 decay_mult: 0 } +# convolution_param { +# num_output: 64 +# kernel_size: 5 pad: 2 stride: 1 +# weight_filler { type: "gaussian" std: 0.0036 } +# bias_filler { type: "constant" value: 0 } +# } +#} +#layer { +# name: "rpn/output" +# type: "Concat" +# bottom: "rpn_conv/3x3" +# bottom: "rpn_conv/5x5" +# top: "rpn/output" +#} +#layer { +# name: "rpn_relu/output" +# type: "ReLU" +# bottom: "rpn/output" +# top: "rpn/output" +#} + +layer { + name: "rpn_cls_score" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_cls_score" + param { lr_mult: 1.0 decay_mult: 1.0 } + param { lr_mult: 2.0 decay_mult: 0 } + convolution_param { + num_output: 18 # 2(bg/fg) * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_bbox_pred" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_bbox_pred" + param { lr_mult: 1.0 decay_mult: 1.0 } + param { lr_mult: 2.0 decay_mult: 0 } + convolution_param { + num_output: 36 # 4 * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + bottom: "rpn_cls_score" + top: "rpn_cls_score_reshape" + name: "rpn_cls_score_reshape" + type: "Reshape" + reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } +} + +#========= RoI Proposal ============ + +layer { + name: "rpn_cls_prob" + type: "Softmax" + bottom: "rpn_cls_score_reshape" + top: "rpn_cls_prob" +} +layer { + name: 'rpn_cls_prob_reshape' + type: 'Reshape' + bottom: 'rpn_cls_prob' + top: 'rpn_cls_prob_reshape' + reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } +} +layer { + name: 'proposal' + type: 'Python' + bottom: 'rpn_cls_prob_reshape' + bottom: 'rpn_bbox_pred' + bottom: 'im_info' + top: 'rois' + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } +} + +#========= RCNN ============ + +layer { + name: "roi_pool5" + type: "ROIPooling" + bottom: "conv5" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 6 + pooled_h: 6 + spatial_scale: 0.0625 # 1/16 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 1024 + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "cls_score" + type: "InnerProduct" + bottom: "fc7" + top: "cls_score" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "bbox_pred" + type: "InnerProduct" + bottom: "fc7" + top: "bbox_pred" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 84 + weight_filler { + type: "gaussian" + std: 0.001 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "cls_prob" + type: "Softmax" + bottom: "cls_score" + top: "cls_prob" +} diff --git a/models/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt b/models/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt new file mode 100644 index 000000000..81a4d3e98 --- /dev/null +++ b/models/VGG_CNN_M_1024/faster_rcnn_end2end/train.prototxt @@ -0,0 +1,484 @@ +name: "VGG_CNN_M_1024" +layer { + name: 'input-data' + type: 'Python' + top: 'data' + top: 'im_info' + top: 'gt_boxes' + python_param { + module: 'roi_data_layer.layer' + layer: 'RoIDataLayer' + param_str: "'num_classes': 21" + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 0 + decay_mult: 0 + } + param { + lr_mult: 0 + decay_mult: 0 + } + convolution_param { + num_output: 96 + kernel_size: 7 + stride: 2 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "norm1" + type: "LRN" + bottom: "conv1" + top: "norm1" + lrn_param { + local_size: 5 + alpha: 0.0005 + beta: 0.75 + k: 2 + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "norm1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 5 + stride: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "norm2" + type: "LRN" + bottom: "conv2" + top: "norm2" + lrn_param { + local_size: 5 + alpha: 0.0005 + beta: 0.75 + k: 2 + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "norm2" + top: "pool2" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv4" + type: "Convolution" + bottom: "conv3" + top: "conv4" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu4" + type: "ReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "conv5" + type: "Convolution" + bottom: "conv4" + top: "conv5" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 512 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "relu5" + type: "ReLU" + bottom: "conv5" + top: "conv5" +} + +#========= RPN ============ + +layer { + name: "rpn_conv/3x3" + type: "Convolution" + bottom: "conv5" + top: "rpn/output" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 256 + kernel_size: 3 pad: 1 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_relu/3x3" + type: "ReLU" + bottom: "rpn/output" + top: "rpn/output" +} +layer { + name: "rpn_cls_score" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_cls_score" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 18 # 2(bg/fg) * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} + +layer { + name: "rpn_bbox_pred" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_bbox_pred" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 36 # 4 * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} + +layer { + bottom: "rpn_cls_score" + top: "rpn_cls_score_reshape" + name: "rpn_cls_score_reshape" + type: "Reshape" + reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } +} + +layer { + name: 'rpn-data' + type: 'Python' + bottom: 'rpn_cls_score' + bottom: 'gt_boxes' + bottom: 'im_info' + bottom: 'data' + top: 'rpn_labels' + top: 'rpn_bbox_targets' + top: 'rpn_bbox_inside_weights' + top: 'rpn_bbox_outside_weights' + python_param { + module: 'rpn.anchor_target_layer' + layer: 'AnchorTargetLayer' + param_str: "'feat_stride': 16" + } +} + +layer { + name: "rpn_loss_cls" + type: "SoftmaxWithLoss" + bottom: "rpn_cls_score_reshape" + bottom: "rpn_labels" + propagate_down: 1 + propagate_down: 0 + top: "rpn_cls_loss" + loss_weight: 1 + loss_param { + ignore_label: -1 + normalize: true + } +} + +layer { + name: "rpn_loss_bbox" + type: "SmoothL1Loss" + bottom: "rpn_bbox_pred" + bottom: "rpn_bbox_targets" + bottom: 'rpn_bbox_inside_weights' + bottom: 'rpn_bbox_outside_weights' + top: "rpn_loss_bbox" + loss_weight: 1 + smooth_l1_loss_param { sigma: 3.0 } +} + +#========= RoI Proposal ============ + +layer { + name: "rpn_cls_prob" + type: "Softmax" + bottom: "rpn_cls_score_reshape" + top: "rpn_cls_prob" +} + +layer { + name: 'rpn_cls_prob_reshape' + type: 'Reshape' + bottom: 'rpn_cls_prob' + top: 'rpn_cls_prob_reshape' + reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } +} + +layer { + name: 'proposal' + type: 'Python' + bottom: 'rpn_cls_prob_reshape' + bottom: 'rpn_bbox_pred' + bottom: 'im_info' + top: 'rpn_rois' +# top: 'rpn_scores' + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } +} + +#layer { +# name: 'debug-data' +# type: 'Python' +# bottom: 'data' +# bottom: 'rpn_rois' +# bottom: 'rpn_scores' +# python_param { +# module: 'rpn.debug_layer' +# layer: 'RPNDebugLayer' +# } +#} + +layer { + name: 'roi-data' + type: 'Python' + bottom: 'rpn_rois' + bottom: 'gt_boxes' + top: 'rois' + top: 'labels' + top: 'bbox_targets' + top: 'bbox_inside_weights' + top: 'bbox_outside_weights' + python_param { + module: 'rpn.proposal_target_layer' + layer: 'ProposalTargetLayer' + param_str: "'num_classes': 21" + } +} + +#========= RCNN ============ + +layer { + name: "roi_pool5" + type: "ROIPooling" + bottom: "conv5" + bottom: "rois" + top: "pool5" + roi_pooling_param { + pooled_w: 6 + pooled_h: 6 + spatial_scale: 0.0625 # 1/16 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "pool5" + top: "fc6" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 1024 + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "cls_score" + type: "InnerProduct" + bottom: "fc7" + top: "cls_score" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "bbox_pred" + type: "InnerProduct" + bottom: "fc7" + top: "bbox_pred" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + inner_product_param { + num_output: 84 + weight_filler { + type: "gaussian" + std: 0.001 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "loss_cls" + type: "SoftmaxWithLoss" + bottom: "cls_score" + bottom: "labels" + propagate_down: 1 + propagate_down: 0 + top: "loss_cls" + loss_weight: 1 +} +layer { + name: "loss_bbox" + type: "SmoothL1Loss" + bottom: "bbox_pred" + bottom: "bbox_targets" + bottom: "bbox_inside_weights" + bottom: "bbox_outside_weights" + top: "loss_bbox" + loss_weight: 1 +} diff --git a/models/ZF/faster_rcnn_end2end/solver.prototxt b/models/ZF/faster_rcnn_end2end/solver.prototxt new file mode 100644 index 000000000..32c914ed1 --- /dev/null +++ b/models/ZF/faster_rcnn_end2end/solver.prototxt @@ -0,0 +1,25 @@ +train_net: "models/ZF/faster_rcnn_end2end/train.prototxt" + +base_lr: 0.001 +lr_policy: "step" +gamma: 0.1 +stepsize: 50000 +display: 20 +average_loss: 100 +momentum: 0.9 +weight_decay: 0.0005 + +#base_lr: 0.001 +#lr_policy: "exp" +#gamma: 0.999539589 # (0.00001/0.001)^(1/10000) +#display: 1 +#average_loss: 100 +#momentum: 0.9 +#weight_decay: 0.0005 + +# We disable standard caffe solver snapshotting and implement our own snapshot +# function +snapshot: 0 +# We still use the snapshot prefix, though +snapshot_prefix: "zf_faster_rcnn" +iter_size: 2 diff --git a/models/ZF/faster_rcnn_end2end/test.prototxt b/models/ZF/faster_rcnn_end2end/test.prototxt new file mode 100644 index 000000000..5c4ac7c25 --- /dev/null +++ b/models/ZF/faster_rcnn_end2end/test.prototxt @@ -0,0 +1,371 @@ +name: "ZF" + +input: "data" +input_shape { + dim: 1 + dim: 3 + dim: 224 + dim: 224 +} + +input: "im_info" +input_shape { + dim: 1 + dim: 3 +} + +#========= conv1-conv5 ============ + +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + convolution_param { + num_output: 96 + kernel_size: 7 + pad: 3 + stride: 2 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "norm1" + type: "LRN" + bottom: "conv1" + top: "norm1" + lrn_param { + local_size: 3 + alpha: 0.00005 + beta: 0.75 + norm_region: WITHIN_CHANNEL + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "norm1" + top: "pool1" + pooling_param { + kernel_size: 3 + stride: 2 + pad: 1 + pool: MAX + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + convolution_param { + num_output: 256 + kernel_size: 5 + pad: 2 + stride: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "norm2" + type: "LRN" + bottom: "conv2" + top: "norm2" + lrn_param { + local_size: 3 + alpha: 0.00005 + beta: 0.75 + norm_region: WITHIN_CHANNEL + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "norm2" + top: "pool2" + pooling_param { + kernel_size: 3 + stride: 2 + pad: 1 + pool: MAX + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + convolution_param { + num_output: 384 + kernel_size: 3 + pad: 1 + stride: 1 + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv4" + type: "Convolution" + bottom: "conv3" + top: "conv4" + convolution_param { + num_output: 384 + kernel_size: 3 + pad: 1 + stride: 1 + } +} +layer { + name: "relu4" + type: "ReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "conv5" + type: "Convolution" + bottom: "conv4" + top: "conv5" + convolution_param { + num_output: 256 + kernel_size: 3 + pad: 1 + stride: 1 + } +} +layer { + name: "relu5" + type: "ReLU" + bottom: "conv5" + top: "conv5" +} + +#========= RPN ============ + +layer { + name: "rpn_conv/3x3" + type: "Convolution" + bottom: "conv5" + top: "rpn/output" + convolution_param { + num_output: 256 + kernel_size: 3 pad: 1 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_relu/3x3" + type: "ReLU" + bottom: "rpn/output" + top: "rpn/output" +} +#layer { +# name: "rpn_conv/3x3" +# type: "Convolution" +# bottom: "conv5" +# top: "rpn_conv/3x3" +# param { lr_mult: 1.0 decay_mult: 1.0 } +# param { lr_mult: 2.0 decay_mult: 0 } +# convolution_param { +# num_output: 192 +# kernel_size: 3 pad: 1 stride: 1 +# weight_filler { type: "gaussian" std: 0.01 } +# bias_filler { type: "constant" value: 0 } +# } +#} +#layer { +# name: "rpn_conv/5x5" +# type: "Convolution" +# bottom: "conv5" +# top: "rpn_conv/5x5" +# param { lr_mult: 1.0 decay_mult: 1.0 } +# param { lr_mult: 2.0 decay_mult: 0 } +# convolution_param { +# num_output: 64 +# kernel_size: 5 pad: 2 stride: 1 +# weight_filler { type: "gaussian" std: 0.0036 } +# bias_filler { type: "constant" value: 0 } +# } +#} +#layer { +# name: "rpn/output" +# type: "Concat" +# bottom: "rpn_conv/3x3" +# bottom: "rpn_conv/5x5" +# top: "rpn/output" +#} +#layer { +# name: "rpn_relu/output" +# type: "ReLU" +# bottom: "rpn/output" +# top: "rpn/output" +#} +layer { + name: "rpn_cls_score" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_cls_score" + convolution_param { + num_output: 18 # 2(bg/fg) * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_bbox_pred" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_bbox_pred" + convolution_param { + num_output: 36 # 4 * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + bottom: "rpn_cls_score" + top: "rpn_cls_score_reshape" + name: "rpn_cls_score_reshape" + type: "Reshape" + reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } +} + +#========= RoI Proposal ============ + +layer { + name: "rpn_cls_prob" + type: "Softmax" + bottom: "rpn_cls_score_reshape" + top: "rpn_cls_prob" +} +layer { + name: 'rpn_cls_prob_reshape' + type: 'Reshape' + bottom: 'rpn_cls_prob' + top: 'rpn_cls_prob_reshape' + reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } +} +layer { + name: 'proposal' + type: 'Python' + bottom: 'rpn_cls_prob_reshape' + bottom: 'rpn_bbox_pred' + bottom: 'im_info' + top: 'rois' + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } +} + +#========= RCNN ============ + +layer { + name: "roi_pool_conv5" + type: "ROIPooling" + bottom: "conv5" + bottom: "rois" + top: "roi_pool_conv5" + roi_pooling_param { + pooled_w: 6 + pooled_h: 6 + spatial_scale: 0.0625 # 1/16 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "roi_pool_conv5" + top: "fc6" + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + scale_train: false + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + scale_train: false + } +} +layer { + name: "cls_score" + type: "InnerProduct" + bottom: "fc7" + top: "cls_score" + inner_product_param { + num_output: 21 + } +} +layer { + name: "bbox_pred" + type: "InnerProduct" + bottom: "fc7" + top: "bbox_pred" + inner_product_param { + num_output: 84 + } +} +layer { + name: "cls_prob" + type: "Softmax" + bottom: "cls_score" + top: "cls_prob" + loss_param { + ignore_label: -1 + normalize: true + } +} diff --git a/models/ZF/faster_rcnn_end2end/train.prototxt b/models/ZF/faster_rcnn_end2end/train.prototxt new file mode 100644 index 000000000..1f055cf2f --- /dev/null +++ b/models/ZF/faster_rcnn_end2end/train.prototxt @@ -0,0 +1,495 @@ +name: "ZF" +layer { + name: 'input-data' + type: 'Python' + top: 'data' + top: 'im_info' + top: 'gt_boxes' + python_param { + module: 'roi_data_layer.layer' + layer: 'RoIDataLayer' + param_str: "'num_classes': 21" + } +} + +#========= conv1-conv5 ============ + +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 96 + kernel_size: 7 + pad: 3 + stride: 2 + } +} +layer { + name: "relu1" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "norm1" + type: "LRN" + bottom: "conv1" + top: "norm1" + lrn_param { + local_size: 3 + alpha: 0.00005 + beta: 0.75 + norm_region: WITHIN_CHANNEL + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "norm1" + top: "pool1" + pooling_param { + kernel_size: 3 + stride: 2 + pad: 1 + pool: MAX + } +} +layer { + name: "conv2" + type: "Convolution" + bottom: "pool1" + top: "conv2" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 256 + kernel_size: 5 + pad: 2 + stride: 2 + } +} +layer { + name: "relu2" + type: "ReLU" + bottom: "conv2" + top: "conv2" +} +layer { + name: "norm2" + type: "LRN" + bottom: "conv2" + top: "norm2" + lrn_param { + local_size: 3 + alpha: 0.00005 + beta: 0.75 + norm_region: WITHIN_CHANNEL + } +} +layer { + name: "pool2" + type: "Pooling" + bottom: "norm2" + top: "pool2" + pooling_param { + kernel_size: 3 + stride: 2 + pad: 1 + pool: MAX + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 384 + kernel_size: 3 + pad: 1 + stride: 1 + } +} +layer { + name: "relu3" + type: "ReLU" + bottom: "conv3" + top: "conv3" +} +layer { + name: "conv4" + type: "Convolution" + bottom: "conv3" + top: "conv4" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 384 + kernel_size: 3 + pad: 1 + stride: 1 + } +} +layer { + name: "relu4" + type: "ReLU" + bottom: "conv4" + top: "conv4" +} +layer { + name: "conv5" + type: "Convolution" + bottom: "conv4" + top: "conv5" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 256 + kernel_size: 3 + pad: 1 + stride: 1 + } +} +layer { + name: "relu5" + type: "ReLU" + bottom: "conv5" + top: "conv5" +} + +#========= RPN ============ + +layer { + name: "rpn_conv/3x3" + type: "Convolution" + bottom: "conv5" + top: "rpn/output" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 256 + kernel_size: 3 pad: 1 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_relu/3x3" + type: "ReLU" + bottom: "rpn/output" + top: "rpn/output" +} + +#layer { +# name: "rpn_conv/3x3" +# type: "Convolution" +# bottom: "conv5" +# top: "rpn_conv/3x3" +# param { lr_mult: 1.0 } +# param { lr_mult: 2.0 } +# convolution_param { +# num_output: 192 +# kernel_size: 3 pad: 1 stride: 1 +# weight_filler { type: "gaussian" std: 0.01 } +# bias_filler { type: "constant" value: 0 } +# } +#} +#layer { +# name: "rpn_conv/5x5" +# type: "Convolution" +# bottom: "conv5" +# top: "rpn_conv/5x5" +# param { lr_mult: 1.0 } +# param { lr_mult: 2.0 } +# convolution_param { +# num_output: 64 +# kernel_size: 5 pad: 2 stride: 1 +# weight_filler { type: "gaussian" std: 0.0036 } +# bias_filler { type: "constant" value: 0 } +# } +#} +#layer { +# name: "rpn/output" +# type: "Concat" +# bottom: "rpn_conv/3x3" +# bottom: "rpn_conv/5x5" +# top: "rpn/output" +#} +#layer { +# name: "rpn_relu/output" +# type: "ReLU" +# bottom: "rpn/output" +# top: "rpn/output" +#} + +layer { + name: "rpn_cls_score" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_cls_score" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 18 # 2(bg/fg) * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + name: "rpn_bbox_pred" + type: "Convolution" + bottom: "rpn/output" + top: "rpn_bbox_pred" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + convolution_param { + num_output: 36 # 4 * 9(anchors) + kernel_size: 1 pad: 0 stride: 1 + weight_filler { type: "gaussian" std: 0.01 } + bias_filler { type: "constant" value: 0 } + } +} +layer { + bottom: "rpn_cls_score" + top: "rpn_cls_score_reshape" + name: "rpn_cls_score_reshape" + type: "Reshape" + reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } +} +layer { + name: 'rpn-data' + type: 'Python' + bottom: 'rpn_cls_score' + bottom: 'gt_boxes' + bottom: 'im_info' + bottom: 'data' + top: 'rpn_labels' + top: 'rpn_bbox_targets' + top: 'rpn_bbox_inside_weights' + top: 'rpn_bbox_outside_weights' + python_param { + module: 'rpn.anchor_target_layer' + layer: 'AnchorTargetLayer' + param_str: "'feat_stride': 16" + } +} +layer { + name: "rpn_loss_cls" + type: "SoftmaxWithLoss" + bottom: "rpn_cls_score_reshape" + bottom: "rpn_labels" + propagate_down: 1 + propagate_down: 0 + top: "rpn_cls_loss" + loss_weight: 1 + loss_param { + ignore_label: -1 + normalize: true + } +} +layer { + name: "rpn_loss_bbox" + type: "SmoothL1Loss" + bottom: "rpn_bbox_pred" + bottom: "rpn_bbox_targets" + bottom: 'rpn_bbox_inside_weights' + bottom: 'rpn_bbox_outside_weights' + top: "rpn_loss_bbox" + loss_weight: 1 + smooth_l1_loss_param { sigma: 3.0 } +} + +#========= RoI Proposal ============ + +layer { + name: "rpn_cls_prob" + type: "Softmax" + bottom: "rpn_cls_score_reshape" + top: "rpn_cls_prob" +} +layer { + name: 'rpn_cls_prob_reshape' + type: 'Reshape' + bottom: 'rpn_cls_prob' + top: 'rpn_cls_prob_reshape' + reshape_param { shape { dim: 0 dim: 18 dim: -1 dim: 0 } } +} +layer { + name: 'proposal' + type: 'Python' + bottom: 'rpn_cls_prob_reshape' + bottom: 'rpn_bbox_pred' + bottom: 'im_info' + top: 'rpn_rois' +# top: 'rpn_scores' + python_param { + module: 'rpn.proposal_layer' + layer: 'ProposalLayer' + param_str: "'feat_stride': 16" + } +} +#layer { +# name: 'debug-data' +# type: 'Python' +# bottom: 'data' +# bottom: 'rpn_rois' +# bottom: 'rpn_scores' +# python_param { +# module: 'rpn.debug_layer' +# layer: 'RPNDebugLayer' +# } +#} +layer { + name: 'roi-data' + type: 'Python' + bottom: 'rpn_rois' + bottom: 'gt_boxes' + top: 'rois' + top: 'labels' + top: 'bbox_targets' + top: 'bbox_inside_weights' + top: 'bbox_outside_weights' + python_param { + module: 'rpn.proposal_target_layer' + layer: 'ProposalTargetLayer' + param_str: "'num_classes': 21" + } +} + +#========= RCNN ============ + +layer { + name: "roi_pool_conv5" + type: "ROIPooling" + bottom: "conv5" + bottom: "rois" + top: "roi_pool_conv5" + roi_pooling_param { + pooled_w: 6 + pooled_h: 6 + spatial_scale: 0.0625 # 1/16 + } +} +layer { + name: "fc6" + type: "InnerProduct" + bottom: "roi_pool_conv5" + top: "fc6" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu6" + type: "ReLU" + bottom: "fc6" + top: "fc6" +} +layer { + name: "drop6" + type: "Dropout" + bottom: "fc6" + top: "fc6" + dropout_param { + dropout_ratio: 0.5 + scale_train: false + } +} +layer { + name: "fc7" + type: "InnerProduct" + bottom: "fc6" + top: "fc7" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + inner_product_param { + num_output: 4096 + } +} +layer { + name: "relu7" + type: "ReLU" + bottom: "fc7" + top: "fc7" +} +layer { + name: "drop7" + type: "Dropout" + bottom: "fc7" + top: "fc7" + dropout_param { + dropout_ratio: 0.5 + scale_train: false + } +} +layer { + name: "cls_score" + type: "InnerProduct" + bottom: "fc7" + top: "cls_score" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + inner_product_param { + num_output: 21 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "bbox_pred" + type: "InnerProduct" + bottom: "fc7" + top: "bbox_pred" + param { lr_mult: 1.0 } + param { lr_mult: 2.0 } + inner_product_param { + num_output: 84 + weight_filler { + type: "gaussian" + std: 0.001 + } + bias_filler { + type: "constant" + value: 0 + } + } +} +layer { + name: "loss_cls" + type: "SoftmaxWithLoss" + bottom: "cls_score" + bottom: "labels" + propagate_down: 1 + propagate_down: 0 + top: "cls_loss" + loss_weight: 1 + loss_param { + ignore_label: -1 + normalize: true + } +} +layer { + name: "loss_bbox" + type: "SmoothL1Loss" + bottom: "bbox_pred" + bottom: "bbox_targets" + bottom: 'bbox_inside_weights' + bottom: 'bbox_outside_weights' + top: "bbox_loss" + loss_weight: 1 +}