diff --git a/docs/en/understand_mmcv/ops.md b/docs/en/understand_mmcv/ops.md index c0c40fed88..4ff81374ba 100644 --- a/docs/en/understand_mmcv/ops.md +++ b/docs/en/understand_mmcv/ops.md @@ -35,6 +35,7 @@ We implement common CUDA ops used in detection, segmentation, etc. - SigmoidFocalLoss - SoftmaxFocalLoss - SoftNMS +- Sparse Convolution - Synchronized BatchNorm - Voxelization - ThreeInterpolate diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md index d53375e36f..0d0a9c9103 100644 --- a/docs/zh_cn/understand_mmcv/ops.md +++ b/docs/zh_cn/understand_mmcv/ops.md @@ -34,6 +34,7 @@ MMCV 提供了检测、分割等任务中常用的 CUDA 算子 - SigmoidFocalLoss - SoftmaxFocalLoss - SoftNMS +- Sparse Convolution - Synchronized BatchNorm - Voxelization - ThreeInterpolate diff --git a/mmcv/ops/__init__.py b/mmcv/ops/__init__.py index 313cd47f7c..bdd39fcae7 100644 --- a/mmcv/ops/__init__.py +++ b/mmcv/ops/__init__.py @@ -53,6 +53,12 @@ from .rotated_feature_align import rotated_feature_align from .saconv import SAConv2d from .scatter_points import DynamicScatter, dynamic_scatter +from .sparse_conv import (SparseConv2d, SparseConv3d, SparseConvTranspose2d, + SparseConvTranspose3d, SparseInverseConv2d, + SparseInverseConv3d, SubMConv2d, SubMConv3d) +from .sparse_modules import SparseModule, SparseSequential +from .sparse_pool import SparseMaxPool2d, SparseMaxPool3d +from .sparse_structure import SparseConvTensor, scatter_nd from .sync_bn import SyncBatchNorm from .three_interpolate import three_interpolate from .three_nn import three_nn @@ -84,6 +90,10 @@ 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', + 'SparseConv2d', 'SparseConv3d', 'SparseConvTranspose2d', + 'SparseConvTranspose3d', 'SparseInverseConv2d', 'SparseInverseConv3d', + 'SubMConv2d', 'SubMConv3d', 'SparseModule', 'SparseSequential', + 'SparseMaxPool2d', 'SparseMaxPool3d', 'SparseConvTensor', 'scatter_nd', 'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all', 'points_in_polygons', 'min_area_polygons', 'active_rotated_filter', 'convex_iou', 'convex_giou' diff --git a/mmcv/ops/csrc/common/cuda/spconv/indice.cuh b/mmcv/ops/csrc/common/cuda/spconv/indice.cuh new file mode 100644 index 0000000000..5ef0009a10 --- /dev/null +++ b/mmcv/ops/csrc/common/cuda/spconv/indice.cuh @@ -0,0 +1,236 @@ +// Copyright 2019 Yan Yan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INDICE_CU_H_ +#define INDICE_CU_H_ +#include +#include + +#include + +template +__global__ void prepareIndicePairsKernel( + tv::TensorView indicesIn, tv::TensorView indicesOut, + tv::TensorView gridsOut, tv::TensorView indicePairs, + tv::TensorView indiceNum, tv::TensorView indicePairUnique, + const tv::SimpleVector kernelSize, + const tv::SimpleVector stride, + const tv::SimpleVector padding, + const tv::SimpleVector dilation, + const tv::SimpleVector outSpatialShape) { + auto numActIn = indicesIn.dim(0); + Index spatialVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + spatialVolume *= outSpatialShape[i]; + } + Index kernelVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + kernelVolume *= kernelSize[i]; + } + Index numValidPoints = 0; + Index validPoints[KernelMaxVolume * (NDim + 1)]; + Index *pointPtr = nullptr; + auto indicePairsDim2 = indicePairs.dim(2); + Index index; + for (int ix : tv::KernelLoopX(numActIn)) { + numValidPoints = getValidOutPos( + indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(), + stride.data(), padding.data(), dilation.data(), outSpatialShape.data(), + validPoints); + for (Index i = 0; i < numValidPoints; ++i) { + pointPtr = validPoints + i * (NDim + 1); + auto offset = pointPtr[NDim]; + auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1)); + indicePairs(offset, 0, oldNum) = ix; + index = tv::rowArrayIdx(pointPtr, outSpatialShape.data()) + + spatialVolume * indicesIn(ix, 0); + indicePairs(offset, 1, oldNum) = index; + indicePairUnique[offset * indicePairsDim2 + oldNum] = index; + } + } +} + +template +__global__ void prepareDeConvIndicePairsKernel( + tv::TensorView indicesIn, tv::TensorView indicesOut, + tv::TensorView gridsOut, tv::TensorView indicePairs, + tv::TensorView indiceNum, tv::TensorView indicePairUnique, + const tv::SimpleVector kernelSize, + const tv::SimpleVector stride, + const tv::SimpleVector padding, + const tv::SimpleVector dilation, + const tv::SimpleVector outSpatialShape) { + auto numActIn = indicesIn.dim(0); + Index spatialVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + spatialVolume *= outSpatialShape[i]; + } + Index kernelVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + kernelVolume *= kernelSize[i]; + } + Index numValidPoints = 0; + Index validPoints[KernelMaxVolume * (NDim + 1)]; + Index *pointPtr = nullptr; + auto indicePairsDim2 = indicePairs.dim(2); + Index index; + for (int ix : tv::KernelLoopX(numActIn)) { + numValidPoints = getValidOutPosTranspose( + indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(), + stride.data(), padding.data(), dilation.data(), outSpatialShape.data(), + validPoints); + for (Index i = 0; i < numValidPoints; ++i) { + pointPtr = validPoints + i * (NDim + 1); + auto offset = pointPtr[NDim]; + auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1)); + indicePairs(offset, 0, oldNum) = ix; + index = tv::rowArrayIdx(pointPtr, outSpatialShape.data()) + + spatialVolume * indicesIn(ix, 0); + indicePairs(offset, 1, oldNum) = index; + indicePairUnique[offset * indicePairsDim2 + oldNum] = index; + } + } +} + +template +__global__ void assignGridAndIndiceOutKernel( + tv::TensorView indicesOut, tv::TensorView gridsOut, + int numAct, tv::TensorView indicePairs, + tv::TensorView indicePairUnique, + const tv::SimpleVector outSpatialShape, int batchSize) { + Index index; + auto indicesOutPtr = indicesOut.data(); + for (int ix : tv::KernelLoopX(numAct)) { + index = indicePairUnique[ix]; + gridsOut[index] = ix; + index = tv::rowArrayIdxInv( + index, indicesOutPtr + ix * (NDim + 1) + 1, outSpatialShape.data()); + indicesOut[ix * (NDim + 1)] = index % batchSize; + } +} + +template +__global__ void assignIndicePairsKernel( + tv::TensorView indicesOut, tv::TensorView gridsOut, + int numActIn, tv::TensorView indicePairs, + tv::TensorView indicePairUnique, + const tv::SimpleVector outSpatialShape) { + Index index; + int kernelVolume = indicePairs.dim(0); + for (int ix : tv::KernelLoopX(numActIn)) { + for (int i = 0; i < kernelVolume; ++i) { + index = indicePairs(i, 1, ix); + if (index > -1) { + indicePairs(i, 1, ix) = gridsOut[index]; + } + } + } +} + +template +__global__ void prepareSubMGridKernel( + tv::TensorView indicesIn, tv::TensorView gridsOut, + const tv::SimpleVector outSpatialShape) { + auto numActIn = indicesIn.dim(0); + Index spatialVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + spatialVolume *= outSpatialShape[i]; + } + Index index = 0; + for (int ix : tv::KernelLoopX(numActIn)) { + index = tv::rowArrayIdx(indicesIn.data() + ix * (NDim + 1) + 1, + outSpatialShape.data()) + + spatialVolume * indicesIn(ix, 0); + gridsOut[index] = ix; + } +} + +template +__global__ void getSubMIndicePairsKernel( + tv::TensorView indicesIn, tv::TensorView gridsOut, + tv::TensorView indicePairs, tv::TensorView indiceNum, + const tv::SimpleVector kernelSize, + const tv::SimpleVector stride, + const tv::SimpleVector padding, + const tv::SimpleVector dilation, + const tv::SimpleVector outSpatialShape) { + auto numActIn = indicesIn.dim(0); + Index spatialVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + spatialVolume *= outSpatialShape[i]; + } + Index numValidPoints = 0; + Index validPoints[KernelMaxVolume * (NDim + 1)]; + Index *pointPtr = nullptr; + Index index = 0; + for (int ix : tv::KernelLoopX(numActIn)) { + numValidPoints = getValidOutPos( + indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(), + stride.data(), padding.data(), dilation.data(), outSpatialShape.data(), + validPoints); + for (int i = 0; i < numValidPoints; ++i) { + pointPtr = validPoints + i * (NDim + 1); + auto offset = pointPtr[NDim]; + index = tv::rowArrayIdx(pointPtr, outSpatialShape.data()) + + spatialVolume * indicesIn(ix, 0); + if (gridsOut[index] > -1) { + auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1)); + indicePairs(offset, 1, oldNum) = gridsOut[index]; + indicePairs(offset, 0, oldNum) = ix; + } + } + } +} + +template +__global__ void resetGridKernel(const Index *indicePairUnique, + tv::TensorView gridsOut, + int numAct) { + for (int ix : tv::KernelLoopX(numAct)) { + gridsOut[indicePairUnique[ix]] = -1; + } +} + +template +__global__ void resetGridSubMKernel( + const Index *indices, tv::TensorView gridsOut, + const tv::SimpleVector outSpatialShape, int numAct) { + int outSpatialShapeReg[NDim]; + for (int i = 0; i < NDim; ++i) { + outSpatialShapeReg[i] = outSpatialShape[i]; + } + Index spatialVolume = 1; + auto indsPtr = indices; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + spatialVolume *= outSpatialShape[i]; + } + Index index; + for (int ix : tv::KernelLoopX(numAct)) { + indsPtr = indices + ix * (NDim + 1); + index = tv::rowArrayIdx(indsPtr + 1, outSpatialShapeReg); + gridsOut[index + spatialVolume * indsPtr[0]] = -1; + } +} + +#endif diff --git a/mmcv/ops/csrc/common/cuda/spconv/reordering.cuh b/mmcv/ops/csrc/common/cuda/spconv/reordering.cuh new file mode 100644 index 0000000000..e3ec68b937 --- /dev/null +++ b/mmcv/ops/csrc/common/cuda/spconv/reordering.cuh @@ -0,0 +1,160 @@ +// Copyright 2019 Yan Yan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef REORDERING_CU_H_ +#define REORDERING_CU_H_ +#include + +template +__global__ void gatherGenericKernel(scalar_t *buffer, const scalar_t *features, + const Index *indices, int size, + int numPlanes) { + int ILPStrideX[NumILP]; + Index inds[NumILP]; +#pragma unroll + for (int ilp = 0; ilp < NumILP; ilp++) + ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x; + + for (int ix : tv::KernelLoopX(size)) { +#pragma unroll + for (int ilp = 0; ilp < NumILP; ilp++) { + if (ix + ILPStrideX[ilp] < size) + inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes; + } + for (int iy : tv::KernelLoopY(numPlanes)) { +#pragma unroll + for (int ilp = 0; ilp < NumILP; ++ilp) { + if (ix + ILPStrideX[ilp] < size) + buffer[(ix + ILPStrideX[ilp]) * numPlanes + iy] = + features[inds[ilp] + iy]; + } + } + } +} + +template +__global__ void gatherVecKernel(scalar_t *buffer, const scalar_t *features, + const Index *indices, int size, int numPlanes) { + int ILPStrideX[NumILP]; + Index inds[NumILP]; +#pragma unroll + for (int ilp = 0; ilp < NumILP; ilp++) + ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x; + + for (int ix : tv::KernelLoopX(size)) { +#pragma unroll + for (int ilp = 0; ilp < NumILP; ilp++) { + if (ix + ILPStrideX[ilp] < size) + inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes; + } + for (int iy : tv::KernelLoopY(numPlanes)) { +#pragma unroll + for (int ilp = 0; ilp < NumILP; ++ilp) { + if (ix + ILPStrideX[ilp] < size) + reinterpret_cast( + buffer)[(ix + ILPStrideX[ilp]) * numPlanes + iy] = + reinterpret_cast(features)[inds[ilp] + iy]; + } + } + } +} + +template +__global__ void gatherVecBlockKernel(scalar_t *buffer, const scalar_t *features, + const Index *indices, int size, + int numPlanes) { + int ILPStrideY[NumILP]; +#pragma unroll + for (int ilp = 0; ilp < NumILP; ilp++) + ILPStrideY[ilp] = ilp * gridDim.y * blockDim.y; + features += blockIdx.x * NumTLP; + buffer += blockIdx.x * NumTLP; + + for (int iy : tv::KernelLoopY(size)) { +#pragma unroll + for (int ilp = 0; ilp < NumILP; ++ilp) { + reinterpret_cast( + buffer)[(iy + ILPStrideY[ilp]) * numPlanes + threadIdx.x] = + reinterpret_cast( + features)[indices[iy + ILPStrideY[ilp]] * numPlanes + + threadIdx.x]; + } + } +} + +template +__global__ void scatterAddGenericKernel(scalar_t *outFeatures, + const scalar_t *buffer, + const Index *indices, int size, + int numPlanes) { + int ILPStrideX[NumILP]; + Index inds[NumILP]; +#pragma unroll + for (int ilp = 0; ilp < NumILP; ilp++) + ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x; + for (int ix : tv::KernelLoopX(size)) { +#pragma unroll + for (int ilp = 0; ilp < NumILP; ilp++) { + if (ix + ILPStrideX[ilp] < size) + inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes; + } + for (int iy : tv::KernelLoopY(numPlanes)) { +#pragma unroll + for (int ilp = 0; ilp < NumILP; ++ilp) { + if (ix + ILPStrideX[ilp] < size) { + outFeatures[inds[ilp] + iy] += + buffer[(ix + ILPStrideX[ilp]) * numPlanes + iy]; + } + } + } + } +} + +template +__global__ void scatterAddVecBlockKernel(scalar_t *outFeatures, + const scalar_t *buffer, + const Index *indices, int size, + int numPlanes) { + int ILPStrideY[NumILP]; + constexpr int vecloadFactor = sizeof(VecType) / sizeof(scalar_t); +#pragma unroll + for (int ilp = 0; ilp < NumILP; ilp++) + ILPStrideY[ilp] = ilp * gridDim.y * blockDim.y; + outFeatures += blockIdx.x * NumTLP; + buffer += blockIdx.x * NumTLP; + scalar_t buf[vecloadFactor]; + scalar_t buf2[vecloadFactor]; + Index idx; + for (int iy : tv::KernelLoopY(size)) { +#pragma unroll + for (int ilp = 0; ilp < NumILP; ++ilp) { + idx = indices[iy + ILPStrideY[ilp]] * numPlanes + threadIdx.x; + reinterpret_cast(buf)[0] = + reinterpret_cast(outFeatures)[idx]; + reinterpret_cast(buf2)[0] = reinterpret_cast( + buffer)[(iy + ILPStrideY[ilp]) * numPlanes + threadIdx.x]; +#pragma unroll + for (int i = 0; i < vecloadFactor; i++) { + buf[i] += buf2[i]; + } + reinterpret_cast(outFeatures)[idx] = + reinterpret_cast(buf)[0]; + } + } +} + +#endif diff --git a/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp b/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp index b812e62713..e847da23a0 100644 --- a/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp +++ b/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp @@ -1,6 +1,6 @@ #ifndef PYTORCH_CPP_HELPER #define PYTORCH_CPP_HELPER -#include +#include #include diff --git a/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h b/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h new file mode 100644 index 0000000000..f23ff44823 --- /dev/null +++ b/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h @@ -0,0 +1,70 @@ +// Copyright 2019 Yan Yan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef PARAMS_GRID_H_ +#define PARAMS_GRID_H_ +#include +#include + +namespace detail { +template +int getTotalSize(std::vector arg) { + return arg.size(); +} + +template +int getTotalSize(std::vector arg, std::vector... args) { + return arg.size() * getTotalSize(args...); +} + +template +int getSize(std::vector arg) { + return arg.size(); +} + +template +void assigner(TT &src, std::vector counter, std::vector &arg) { + std::get(src) = arg[counter[Idx]]; +} + +template +void assigner(TT &src, std::vector counter, std::vector &arg, + std::vector &... args) { + std::get(src) = arg[counter[Idx]]; + assigner(src, counter, args...); +} +} // namespace detail + +template +std::vector> paramsGrid(std::vector... args) { + int length = detail::getTotalSize(args...); + std::vector sizes = {detail::getSize(args)...}; + int size = sizes.size(); + + std::vector> params(length); + std::vector counter(size); + for (int i = 0; i < length; ++i) { + detail::assigner<0>(params[i], counter, args...); + counter[size - 1] += 1; + for (int c = size - 1; c >= 0; --c) { + if (counter[c] == sizes[c] && c > 0) { + counter[c - 1] += 1; + counter[c] = 0; + } + } + } + return params; +} + +#endif diff --git a/mmcv/ops/csrc/common/utils/spconv/prettyprint.h b/mmcv/ops/csrc/common/utils/spconv/prettyprint.h new file mode 100644 index 0000000000..0a6bdc3361 --- /dev/null +++ b/mmcv/ops/csrc/common/utils/spconv/prettyprint.h @@ -0,0 +1,493 @@ +// Copyright Louis Delacroix 2010 - 2014. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// A pretty printing library for C++ +// +// Usage: +// Include this header, and operator<< will "just work". + +#ifndef H_PRETTY_PRINT +#define H_PRETTY_PRINT + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace pretty_print { +namespace detail { +// SFINAE type trait to detect whether T::const_iterator exists. + +struct sfinae_base { + using yes = char; + using no = yes[2]; +}; + +template +struct has_const_iterator : private sfinae_base { + private: + template + static yes &test(typename C::const_iterator *); + template + static no &test(...); + + public: + static const bool value = sizeof(test(nullptr)) == sizeof(yes); + using type = T; +}; + +template +struct has_begin_end : private sfinae_base { + private: + template + static yes & + f(typename std::enable_if< + std::is_same(&C::begin)), + typename C::const_iterator (C::*)() const>::value>::type *); + + template + static no &f(...); + + template + static yes &g(typename std::enable_if< + std::is_same(&C::end)), + typename C::const_iterator (C::*)() const>::value, + void>::type *); + + template + static no &g(...); + + public: + static bool const beg_value = sizeof(f(nullptr)) == sizeof(yes); + static bool const end_value = sizeof(g(nullptr)) == sizeof(yes); +}; + +} // namespace detail + +// Holds the delimiter values for a specific character type + +template +struct delimiters_values { + using char_type = TChar; + const char_type *prefix; + const char_type *delimiter; + const char_type *postfix; +}; + +// Defines the delimiter values for a specific container and character type + +template +struct delimiters { + using type = delimiters_values; + static const type values; +}; + +// Functor to print containers. You can use this directly if you want +// to specify a non-default delimiters type. The printing logic can +// be customized by specializing the nested template. + +template , + typename TDelimiters = delimiters> +struct print_container_helper { + using delimiters_type = TDelimiters; + using ostream_type = std::basic_ostream; + + template + struct printer { + static void print_body(const U &c, ostream_type &stream) { + using std::begin; + using std::end; + + auto it = begin(c); + const auto the_end = end(c); + + if (it != the_end) { + for (;;) { + stream << *it; + + if (++it == the_end) break; + + if (delimiters_type::values.delimiter != NULL) + stream << delimiters_type::values.delimiter; + } + } + } + }; + + print_container_helper(const T &container) : container_(container) {} + + inline void operator()(ostream_type &stream) const { + if (delimiters_type::values.prefix != NULL) + stream << delimiters_type::values.prefix; + + printer::print_body(container_, stream); + + if (delimiters_type::values.postfix != NULL) + stream << delimiters_type::values.postfix; + } + + private: + const T &container_; +}; + +// Specialization for pairs + +template +template +struct print_container_helper::printer> { + using ostream_type = + typename print_container_helper::ostream_type; + + static void print_body(const std::pair &c, ostream_type &stream) { + stream << c.first; + if (print_container_helper::delimiters_type::values + .delimiter != NULL) + stream << print_container_helper::delimiters_type::values + .delimiter; + stream << c.second; + } +}; + +// Specialization for tuples + +template +template +struct print_container_helper::printer> { + using ostream_type = + typename print_container_helper::ostream_type; + using element_type = std::tuple; + + template + struct Int {}; + + static void print_body(const element_type &c, ostream_type &stream) { + tuple_print(c, stream, Int<0>()); + } + + static void tuple_print(const element_type &, ostream_type &, + Int) {} + + static void tuple_print( + const element_type &c, ostream_type &stream, + typename std::conditional, + std::nullptr_t>::type) { + stream << std::get<0>(c); + tuple_print(c, stream, Int<1>()); + } + + template + static void tuple_print(const element_type &c, ostream_type &stream, Int) { + if (print_container_helper::delimiters_type::values + .delimiter != NULL) + stream << print_container_helper::delimiters_type::values + .delimiter; + + stream << std::get(c); + + tuple_print(c, stream, Int()); + } +}; + +// Prints a print_container_helper to the specified stream. + +template +inline std::basic_ostream &operator<<( + std::basic_ostream &stream, + const print_container_helper &helper) { + helper(stream); + return stream; +} + +// Basic is_container template; specialize to derive from std::true_type for all +// desired container types + +template +struct is_container + : public std::integral_constant::value && + detail::has_begin_end::beg_value && + detail::has_begin_end::end_value> {}; + +template +struct is_container : std::true_type {}; + +template +struct is_container : std::false_type {}; + +template +struct is_container> : std::true_type {}; + +template +struct is_container> : std::true_type {}; + +template +struct is_container> : std::true_type {}; + +// Default delimiters + +template +struct delimiters { + static const delimiters_values values; +}; +template +const delimiters_values delimiters::values = {"[", ", ", "]"}; +template +struct delimiters { + static const delimiters_values values; +}; +template +const delimiters_values delimiters::values = {L"[", L", ", + L"]"}; + +// Delimiters for (multi)set and unordered_(multi)set + +template +struct delimiters<::std::set, char> { + static const delimiters_values values; +}; + +template +const delimiters_values + delimiters<::std::set, char>::values = {"{", ", ", + "}"}; + +template +struct delimiters<::std::set, wchar_t> { + static const delimiters_values values; +}; + +template +const delimiters_values + delimiters<::std::set, wchar_t>::values = { + L"{", L", ", L"}"}; + +template +struct delimiters<::std::multiset, char> { + static const delimiters_values values; +}; + +template +const delimiters_values + delimiters<::std::multiset, char>::values = { + "{", ", ", "}"}; + +template +struct delimiters<::std::multiset, wchar_t> { + static const delimiters_values values; +}; + +template +const delimiters_values + delimiters<::std::multiset, wchar_t>::values = { + L"{", L", ", L"}"}; + +template +struct delimiters<::std::unordered_set, char> { + static const delimiters_values values; +}; + +template +const delimiters_values delimiters< + ::std::unordered_set, char>::values = { + "{", ", ", "}"}; + +template +struct delimiters<::std::unordered_set, wchar_t> { + static const delimiters_values values; +}; + +template +const delimiters_values delimiters< + ::std::unordered_set, wchar_t>::values = { + L"{", L", ", L"}"}; + +template +struct delimiters<::std::unordered_multiset, + char> { + static const delimiters_values values; +}; + +template +const delimiters_values delimiters< + ::std::unordered_multiset, char>::values = { + "{", ", ", "}"}; + +template +struct delimiters<::std::unordered_multiset, + wchar_t> { + static const delimiters_values values; +}; + +template +const delimiters_values + delimiters<::std::unordered_multiset, + wchar_t>::values = {L"{", L", ", L"}"}; + +// Delimiters for pair and tuple + +template +struct delimiters, char> { + static const delimiters_values values; +}; +template +const delimiters_values delimiters, char>::values = { + "(", ", ", ")"}; +template +struct delimiters<::std::pair, wchar_t> { + static const delimiters_values values; +}; +template +const delimiters_values + delimiters<::std::pair, wchar_t>::values = {L"(", L", ", L")"}; + +template +struct delimiters, char> { + static const delimiters_values values; +}; +template +const delimiters_values delimiters, char>::values = { + "(", ", ", ")"}; +template +struct delimiters<::std::tuple, wchar_t> { + static const delimiters_values values; +}; +template +const delimiters_values + delimiters<::std::tuple, wchar_t>::values = {L"(", L", ", L")"}; + +// Type-erasing helper class for easy use of custom delimiters. +// Requires TCharTraits = std::char_traits and TChar = char or wchar_t, +// and MyDelims needs to be defined for TChar. Usage: "cout << +// pretty_print::custom_delims(x)". + +struct custom_delims_base { + virtual ~custom_delims_base() {} + virtual std::ostream &stream(::std::ostream &) = 0; + virtual std::wostream &stream(::std::wostream &) = 0; +}; + +template +struct custom_delims_wrapper : custom_delims_base { + custom_delims_wrapper(const T &t_) : t(t_) {} + + std::ostream &stream(std::ostream &s) { + return s << print_container_helper, Delims>( + t); + } + + std::wostream &stream(std::wostream &s) { + return s << print_container_helper, + Delims>(t); + } + + private: + const T &t; +}; + +template +struct custom_delims { + template + custom_delims(const Container &c) + : base(new custom_delims_wrapper(c)) {} + + std::unique_ptr base; +}; + +template +inline std::basic_ostream &operator<<( + std::basic_ostream &s, const custom_delims &p) { + return p.base->stream(s); +} + +// A wrapper for a C-style array given as pointer-plus-size. +// Usage: std::cout << pretty_print_array(arr, n) << std::endl; + +template +struct array_wrapper_n { + typedef const T *const_iterator; + typedef T value_type; + + array_wrapper_n(const T *const a, size_t n) : _array(a), _n(n) {} + inline const_iterator begin() const { return _array; } + inline const_iterator end() const { return _array + _n; } + + private: + const T *const _array; + size_t _n; +}; + +// A wrapper for hash-table based containers that offer local iterators to each +// bucket. Usage: std::cout << bucket_print(m, 4) << std::endl; (Prints bucket +// 5 of container m.) + +template +struct bucket_print_wrapper { + typedef typename T::const_local_iterator const_iterator; + typedef typename T::size_type size_type; + + const_iterator begin() const { return m_map.cbegin(n); } + + const_iterator end() const { return m_map.cend(n); } + + bucket_print_wrapper(const T &m, size_type bucket) : m_map(m), n(bucket) {} + + private: + const T &m_map; + const size_type n; +}; + +} // namespace pretty_print + +// Global accessor functions for the convenience wrappers + +template +inline pretty_print::array_wrapper_n pretty_print_array(const T *const a, + size_t n) { + return pretty_print::array_wrapper_n(a, n); +} + +template +pretty_print::bucket_print_wrapper bucket_print(const T &m, + typename T::size_type n) { + return pretty_print::bucket_print_wrapper(m, n); +} + +// Main magic entry point: An overload snuck into namespace std. +// Can we do better? + +namespace std { +// Prints a container to the stream using default delimiters + +template +inline typename enable_if<::pretty_print::is_container::value, + basic_ostream &>::type +operator<<(basic_ostream &stream, const T &container) { + return stream + << ::pretty_print::print_container_helper( + container); +} +} // namespace std + +#endif // H_PRETTY_PRINT diff --git a/mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h b/mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h new file mode 100644 index 0000000000..026e35b1a6 --- /dev/null +++ b/mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h @@ -0,0 +1,60 @@ +// Copyright 2019 Yan Yan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace py = pybind11; + +template +std::vector array2Vector(TPyObject arr) { + py::array arr_np = arr; + size_t size = arr.attr("size").template cast(); + py::array_t arr_cc = arr_np; + std::vector data(arr_cc.data(), arr_cc.data() + size); + return data; +} + +template +std::vector arrayT2Vector(py::array_t arr) { + std::vector data(arr.data(), arr.data() + arr.size()); + return data; +} + +template +tv::TensorView array2TensorView(TPyObject arr) { + py::array arr_np = arr; + py::array_t arr_cc = arr_np; + tv::Shape shape; + for (int i = 0; i < arr_cc.ndim(); ++i) { + shape.push_back(arr_cc.shape(i)); + } + return tv::TensorView(arr_cc.mutable_data(), shape); +} +template +tv::TensorView arrayT2TensorView(py::array_t arr) { + tv::Shape shape; + for (int i = 0; i < arr.ndim(); ++i) { + shape.push_back(arr.shape(i)); + } + return tv::TensorView(arr.mutable_data(), shape); +} diff --git a/mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h b/mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h new file mode 100644 index 0000000000..e5e093fbbe --- /dev/null +++ b/mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h @@ -0,0 +1,297 @@ +// Copyright 2019 Yan Yan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SPCONV_GEOMETRY_H_ +#define SPCONV_GEOMETRY_H_ + +#include + +#include +#include + +template +TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos, + const Index *kernelSize, + const Index *stride, const Index *padding, + const Index *dilation, + const Index *outSpatialShape, Index *out) { + Index lowers[NDim]; + Index uppers[NDim]; + Index counter[NDim]; + Index counterSize[NDim]; + Index pointCounter = 0; + Index val; + Index numPoints = 1; + Index m, offset; + bool valid = false; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 + + stride[i] + padding[i]) / + stride[i]; + uppers[i] = (input_pos[i] + padding[i]) / stride[i]; + } + +#pragma unroll + for (unsigned i = 0; i < NDim; ++i) { + counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1); + numPoints *= counterSize[i]; + } + +#pragma unroll + for (int i = 0; i < NDim; ++i) { + counter[i] = 0; + } + for (int i = 0; i < numPoints; ++i) { + valid = true; + m = 1; + offset = 0; +#pragma unroll + for (int j = NDim - 1; j >= 0; --j) { + val = uppers[j] - counter[j] * dilation[j]; + out[pointCounter * (NDim + 1) + j] = val; + if (val < 0 || (val > outSpatialShape[j] - 1)) { + valid = false; + // break; + } + offset += m * (input_pos[j] - val * stride[j] + padding[j]) / dilation[j]; + m *= kernelSize[j]; + } + + out[pointCounter * (NDim + 1) + NDim] = offset; + if (valid) ++pointCounter; + counter[NDim - 1] += 1; +#pragma unroll + for (int c = NDim - 1; c >= 0; --c) { + if (counter[c] == counterSize[c] && c > 0) { + counter[c - 1] += 1; + counter[c] = 0; + } + } + } + return pointCounter; +} + +template +TV_HOST_DEVICE Index getValidOutPosTranspose( + const Index *input_pos, const Index *kernelSize, const Index *stride, + const Index *padding, const Index *dilation, const Index *outSpatialShape, + Index *out) { + Index lowers[NDim]; + Index uppers[NDim]; + Index counter[NDim]; + Index counterSize[NDim]; + Index pointCounter = 0; + Index val; + Index numPoints = 1; + Index m, offset; + bool valid = false; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + lowers[i] = input_pos[i] * stride[i] - padding[i]; + uppers[i] = lowers[i] + (kernelSize[i] - 1) * dilation[i]; + } +#pragma unroll + for (unsigned i = 0; i < NDim; ++i) { + counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1); + numPoints *= counterSize[i]; + } +#pragma unroll + for (int i = 0; i < NDim; ++i) { + counter[i] = 0; + } + for (int i = 0; i < numPoints; ++i) { + valid = true; + m = 1; + offset = 0; +#pragma unroll + for (int j = NDim - 1; j >= 0; --j) { + val = uppers[j] - counter[j] * dilation[j]; + out[pointCounter * (NDim + 1) + j] = val; + if (val < 0 || (val > outSpatialShape[j] - 1)) { + valid = false; + } + offset += m * (val - lowers[j]) / dilation[j]; + m *= kernelSize[j]; + } + out[pointCounter * (NDim + 1) + NDim] = offset; + if (valid) ++pointCounter; + counter[NDim - 1] += 1; +#pragma unroll + for (int c = NDim - 1; c >= 0; --c) { + if (counter[c] == counterSize[c] && c > 0) { + counter[c - 1] += 1; + counter[c] = 0; + } + } + } + return pointCounter; +} + +template +Index getIndicePairsConv(tv::TensorView indicesIn, + tv::TensorView indicesOut, + tv::TensorView gridsOut, + tv::TensorView indicePairs, + tv::TensorView indiceNum, + const Index *kernelSize, const Index *stride, + const Index *padding, const Index *dilation, + const Index *outSpatialShape) { + // indicesOut: num_active * kernelVolume * (NDim + 1) + Index numAct = 0; + auto numActIn = indicesIn.dim(0); + Index batchIdx = 0; + Index spatialVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + spatialVolume *= outSpatialShape[i]; + } + Index kernelVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + kernelVolume *= kernelSize[i]; + } + Index numValidPoints = 0; + std::vector validPoints_(kernelVolume * (NDim + 1)); + Index *validPoints = validPoints_.data(); + Index *pointPtr = nullptr; + for (int j = 0; j < numActIn; ++j) { + batchIdx = indicesIn(j, 0); + numValidPoints = getValidOutPos( + indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding, + dilation, outSpatialShape, validPoints); + for (Index i = 0; i < numValidPoints; ++i) { + pointPtr = validPoints + i * (NDim + 1); + auto offset = pointPtr[NDim]; + auto index = tv::rowArrayIdx(pointPtr, outSpatialShape) + + spatialVolume * batchIdx; + if (gridsOut[index] == -1) { + for (unsigned k = 1; k < NDim + 1; ++k) { + indicesOut(numAct, k) = pointPtr[k - 1]; + } + indicesOut(numAct, 0) = batchIdx; + gridsOut[index] = numAct++; + } + // indicePairs: [K, 2, L] + indicePairs(offset, 0, indiceNum[offset]) = j; + indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index]; + } + } + return numAct; +} + +template +Index getIndicePairsDeConv(tv::TensorView indicesIn, + tv::TensorView indicesOut, + tv::TensorView gridsOut, + tv::TensorView indicePairs, + tv::TensorView indiceNum, + const Index *kernelSize, const Index *stride, + const Index *padding, const Index *dilation, + const Index *outSpatialShape) { + Index numAct = 0; + auto numActIn = indicesIn.dim(0); + Index batchIdx = 0; + Index spatialVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + spatialVolume *= outSpatialShape[i]; + } + Index kernelVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + kernelVolume *= kernelSize[i]; + } + Index numValidPoints = 0; + std::vector validPoints_(kernelVolume * (NDim + 1)); + Index *validPoints = validPoints_.data(); + Index *pointPtr = nullptr; + for (int j = 0; j < numActIn; ++j) { + batchIdx = indicesIn(j, 0); + numValidPoints = getValidOutPosTranspose( + indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding, + dilation, outSpatialShape, validPoints); + for (Index i = 0; i < numValidPoints; ++i) { + pointPtr = validPoints + i * (NDim + 1); + auto offset = pointPtr[NDim]; + auto index = tv::rowArrayIdx(pointPtr, outSpatialShape) + + spatialVolume * batchIdx; + if (gridsOut[index] == -1) { + for (unsigned k = 1; k < NDim + 1; ++k) { + indicesOut(numAct, k) = pointPtr[k - 1]; + } + indicesOut(numAct, 0) = batchIdx; + gridsOut[index] = numAct++; + } + // indicePairs: [K, 2, L] + indicePairs(offset, 0, indiceNum[offset]) = j; + indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index]; + } + } + return numAct; +} + +template +Index getIndicePairsSubM(tv::TensorView indicesIn, + tv::TensorView gridsOut, + tv::TensorView indicePairs, + tv::TensorView indiceNum, + const Index *const kernelSize, + const Index *const stride, const Index *const padding, + const Index *dilation, + const Index *const outSpatialShape) { + Index numAct = 0; + auto numActIn = indicesIn.dim(0); + Index batchIdx = 0; + Index spatialVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + spatialVolume *= outSpatialShape[i]; + } + Index kernelVolume = 1; +#pragma unroll + for (int i = 0; i < NDim; ++i) { + kernelVolume *= kernelSize[i]; + } + Index numValidPoints = 0; + // Index validPoints[kernelVolume * (NDim + 1)]; + std::vector validPoints_(kernelVolume * (NDim + 1)); + Index *validPoints = validPoints_.data(); + Index *pointPtr = nullptr; + Index index = 0; + for (int j = 0; j < numActIn; ++j) { + index = tv::rowArrayIdx(indicesIn.data() + j * (NDim + 1) + 1, + outSpatialShape) + + spatialVolume * indicesIn(j, 0); + gridsOut[index] = j; + } + for (int j = 0; j < numActIn; ++j) { + numValidPoints = getValidOutPos( + indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding, + dilation, outSpatialShape, validPoints); + for (Index i = 0; i < numValidPoints; ++i) { + pointPtr = validPoints + i * (NDim + 1); + auto offset = pointPtr[NDim]; + index = tv::rowArrayIdx(pointPtr, outSpatialShape) + + spatialVolume * indicesIn(j, 0); + if (gridsOut[index] > -1) { + indicePairs(offset, 0, indiceNum[offset]) = j; + indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index]; + } + } + } + return numActIn; +} + +#endif diff --git a/mmcv/ops/csrc/common/utils/spconv/spconv/indice.h b/mmcv/ops/csrc/common/utils/spconv/spconv/indice.h new file mode 100644 index 0000000000..96ce34e3b4 --- /dev/null +++ b/mmcv/ops/csrc/common/utils/spconv/spconv/indice.h @@ -0,0 +1,78 @@ +// Copyright 2019 Yan Yan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SPARSE_CONV_INDICE_FUNCTOR_H_ +#define SPARSE_CONV_INDICE_FUNCTOR_H_ +#include + +namespace functor { +template +struct CreateConvIndicePairFunctorP1 { + Index operator()(const Device& d, tv::TensorView indicesIn, + tv::TensorView indicesOut, + tv::TensorView gridsOut, + tv::TensorView indicePairs, + tv::TensorView indiceNum, + tv::TensorView indicePairUnique, + const tv::SimpleVector kernelSize, + const tv::SimpleVector stride, + const tv::SimpleVector padding, + const tv::SimpleVector dilation, + const tv::SimpleVector outSpatialShape, + bool transpose); +}; + +template +struct CreateConvIndicePairFunctorP2 { + Index operator()(const Device& d, tv::TensorView indicesIn, + tv::TensorView indicesOut, + tv::TensorView gridsOut, + tv::TensorView indicePairs, + tv::TensorView indiceNum, + tv::TensorView indicePairUnique, + const tv::SimpleVector outSpatialShape, + bool transpose, bool resetGrid = false); +}; + +template +struct CreateConvIndicePairFunctor { + Index operator()(const Device& d, tv::TensorView indicesIn, + tv::TensorView indicesOut, + tv::TensorView gridsOut, + tv::TensorView indicePairs, + tv::TensorView indiceNum, + const tv::SimpleVector kernelSize, + const tv::SimpleVector stride, + const tv::SimpleVector padding, + const tv::SimpleVector dilation, + const tv::SimpleVector outSpatialShape, + bool transpose, bool resetGrid = false); +}; + +template +struct CreateSubMIndicePairFunctor { + Index operator()(const Device& d, tv::TensorView indicesIn, + tv::TensorView gridsOut, + tv::TensorView indicePairs, + tv::TensorView indiceNum, + const tv::SimpleVector kernelSize, + const tv::SimpleVector stride, + const tv::SimpleVector padding, + const tv::SimpleVector dilation, + const tv::SimpleVector outSpatialShape, + bool transpose, bool resetGrid = false); +}; +} // namespace functor + +#endif diff --git a/mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h b/mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h new file mode 100644 index 0000000000..78f32edd4d --- /dev/null +++ b/mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h @@ -0,0 +1,37 @@ +// Copyright 2019 Yan Yan +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SPARSE_MAXPOOL_FUNCTOR_H_ +#define SPARSE_MAXPOOL_FUNCTOR_H_ +#include + +namespace functor { +template +struct SparseMaxPoolForwardFunctor { + void operator()(const Device& d, tv::TensorView outFeatures, + tv::TensorView inFeatures, + tv::TensorView indices, int size); +}; + +template +struct SparseMaxPoolBackwardFunctor { + void operator()(const Device& d, tv::TensorView outFeatures, + tv::TensorView inFeatures, + tv::TensorView fout, + tv::TensorView fin, + tv::TensorView indices, int size); +}; +} // namespace functor + +#endif diff --git a/mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h b/mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h new file mode 100644 index 0000000000..8262b30efb --- /dev/null +++ b/mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h @@ -0,0 +1,50 @@ +#ifndef MP_HELPER_H_ +#define MP_HELPER_H_ +#include +#include + +template +struct mp_list {}; + +template +using mp_list_c = mp_list...>; + +namespace detail { + +template +constexpr F mp_for_each_impl(mp_list, F &&f) { + return std::initializer_list{(f(T()), 0)...}, std::forward(f); +} + +template +constexpr F mp_for_each_impl(mp_list<>, F &&f) { + return std::forward(f); +} + +} // namespace detail + +namespace detail { + +template class B> +struct mp_rename_impl { + // An error "no type named 'type'" here means that the first argument to + // mp_rename is not a list +}; + +template