Skip to content

Commit

Permalink
add num_classes=-1 support in cpu_impl of detection_output (openvinot…
Browse files Browse the repository at this point in the history
  • Loading branch information
wilson-seok authored Aug 2, 2023
1 parent 9e9cf72 commit d51fc7a
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 14 deletions.
23 changes: 14 additions & 9 deletions src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,12 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {
auto out_ptr = lock.begin();

const auto& args = instance.argument;

auto confidence_layout = instance.confidence_memory()->get_layout();
auto priors_layout = instance.prior_box_memory()->get_layout();

const int num_of_priors = priors_layout.spatial(1) / args->prior_info_size;
const int num_classes = (args->num_classes == -1) ? confidence_layout.feature() / num_of_priors : args->num_classes;
// Per image -> For each label: Pair (score, prior index)
std::vector<std::map<int, std::vector<std::pair<float, int>>>> final_detections;
for (int image = 0; image < num_of_images; ++image) {
Expand All @@ -290,7 +296,7 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {
std::map<int, std::vector<int>> indices;
int num_det = 0;
if (nms_type == NMSType::CAFFE) {
for (int cls = 0; cls < static_cast<int>(args->num_classes); ++cls) {
for (int cls = 0; cls < num_classes; ++cls) {
if (static_cast<int>(cls) == args->background_label_id) {
conf_per_image[cls].clear();
continue; // Skip background class.
Expand Down Expand Up @@ -522,9 +528,7 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {
template <typename dtype>
void extract_confidences_per_image_caffe(stream& stream, const detection_output_inst& instance,
std::vector<std::vector<std::vector<std::pair<float, int>>>>& confidences,
const int num_of_priors) {
const int num_classes = instance.argument->num_classes;

const int num_of_priors, const int num_classes) {
const int num_of_images = static_cast<int>(confidences.size());
auto input_confidence = instance.confidence_memory();
const float confidence_threshold = instance.argument->confidence_threshold;
Expand Down Expand Up @@ -616,9 +620,8 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {
template <typename dtype>
void extract_confidences_per_image_mxnet(stream& stream, const detection_output_inst& instance,
std::vector<std::vector<std::vector<std::pair<float, int>>>>& confidences,
const int num_of_priors,
const int num_of_priors, const int num_classes,
std::vector<std::vector<std::pair<float, std::pair<int, int>>>>& scoreIndexPairs) {
const int num_classes = instance.argument->num_classes;
const int background_label_id = instance.argument->background_label_id;
const int num_of_images = static_cast<int>(confidences.size());
auto input_confidence = instance.confidence_memory();
Expand Down Expand Up @@ -750,11 +753,13 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {

const auto& args = instance.argument;

auto confidence_layout = instance.confidence_memory()->get_layout();
auto priors_layout = instance.prior_box_memory()->get_layout();

const int num_of_images = static_cast<int>(bboxes.size());
const int num_of_priors = priors_layout.spatial(1) / args->prior_info_size;
const int num_loc_classes = args->share_location ? 1 : args->num_classes;
const int num_classes = (args->num_classes == -1) ? confidence_layout.feature() / num_of_priors : args->num_classes;
const int num_loc_classes = args->share_location ? 1 : num_classes;

// Extract locations per image.
std::vector<std::vector<std::vector<bounding_box>>> locations(
Expand Down Expand Up @@ -812,9 +817,9 @@ struct detection_output_impl : typed_primitive_impl<detection_output> {
}
// Extract confidences per image.
if (nms_type == NMSType::CAFFE) {
extract_confidences_per_image_caffe<dtype>(stream, instance, confidences, num_of_priors);
extract_confidences_per_image_caffe<dtype>(stream, instance, confidences, num_of_priors, num_classes);
} else {
extract_confidences_per_image_mxnet<dtype>(stream, instance, confidences, num_of_priors, scoreIndexPairs);
extract_confidences_per_image_mxnet<dtype>(stream, instance, confidences, num_of_priors, num_classes, scoreIndexPairs);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,24 @@ class DetectionOutputLayerGPUTest : public testing::WithParamInterface<Detection

auto params = ngraph::builder::makeDynamicParams(ngraph::element::f32, inputDynamicShapes);
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::opset3::Parameter>(params));
auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
function = std::make_shared<ngraph::Function>(results, params, "DetectionOutputDynamic");

if (attrs.num_classes == -1) {
std::shared_ptr<ov::op::v8::DetectionOutput> detOut;

if (paramOuts.size() == 3)
detOut = std::make_shared<ov::op::v8::DetectionOutput>(paramOuts[0], paramOuts[1], paramOuts[2], attrs);
else if (paramOuts.size() == 5)
detOut = std::make_shared<ov::op::v8::DetectionOutput>(paramOuts[0], paramOuts[1], paramOuts[2], paramOuts[3], paramOuts[4], attrs);
else
throw std::runtime_error("DetectionOutput layer supports only 3 or 5 inputs");

ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
function = std::make_shared<ngraph::Function>(results, params, "DetectionOutputDynamic");
} else {
auto detOut = ngraph::builder::makeDetectionOutput(paramOuts, attrs);
ngraph::ResultVector results{std::make_shared<ngraph::opset3::Result>(detOut)};
function = std::make_shared<ngraph::Function>(results, params, "DetectionOutputDynamic");
}
}

private:
Expand Down Expand Up @@ -242,7 +257,7 @@ TEST_P(DetectionOutputLayerGPUTest, CompareWithRefs) {

namespace {

const int numClasses = 11;
const std::vector<int> numClasses = {11, -1};
const int backgroundLabelId = 0;
const std::vector<int> topK = {75};
const std::vector<std::vector<int>> keepTopK = { {50}, {100} };
Expand All @@ -256,7 +271,7 @@ const float objectnessScore = 0.4f;
const std::vector<size_t> numberBatch = {1, 2};

const auto commonAttributes = ::testing::Combine(
::testing::Values(numClasses),
::testing::Values(numClasses[0]),
::testing::Values(backgroundLabelId),
::testing::ValuesIn(topK),
::testing::ValuesIn(keepTopK),
Expand All @@ -268,6 +283,18 @@ const auto commonAttributes = ::testing::Combine(
::testing::ValuesIn(decreaseLabelId)
);

const auto commonAttributes_v8 = ::testing::Combine(
::testing::Values(numClasses[1]),
::testing::Values(backgroundLabelId),
::testing::Values(topK[0]),
::testing::Values(keepTopK[0]),
::testing::ValuesIn(codeType),
::testing::Values(nmsThreshold),
::testing::Values(confidenceThreshold),
::testing::Values(clipAfterNms[0]),
::testing::Values(clipBeforeNms[0]),
::testing::Values(decreaseLabelId[0])
);
/* =============== 3 inputs cases =============== */

const std::vector<ParamsWhichSizeDependsDynamic> specificParams3InDynamic = {
Expand Down Expand Up @@ -362,9 +389,21 @@ const auto params3InputsDynamic = ::testing::Combine(
::testing::Values(ov::test::utils::DEVICE_GPU)
);

const auto params3InputsDynamic_v8 = ::testing::Combine(
commonAttributes_v8,
::testing::Values(specificParams3InDynamic[0]),
::testing::ValuesIn(numberBatch),
::testing::Values(objectnessScore),
::testing::Values(true),
::testing::Values(ov::test::utils::DEVICE_GPU)
);

INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputDynamic3In, DetectionOutputLayerGPUTest,
params3InputsDynamic,
DetectionOutputLayerGPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_GPUDetectionOutputV8Dynamic3In, DetectionOutputLayerGPUTest,
params3InputsDynamic_v8,
DetectionOutputLayerGPUTest::getTestCaseName);
} // namespace
} // namespace GPULayerTestsDefinitions

0 comments on commit d51fc7a

Please sign in to comment.