Skip to content
This repository has been archived by the owner on Oct 1, 2020. It is now read-only.

Commit

Permalink
Refactor dwconv microkernels
Browse files Browse the repository at this point in the history
  • Loading branch information
Marat Dukhan committed Dec 23, 2018
1 parent 6a16de0 commit 943ec74
Show file tree
Hide file tree
Showing 16 changed files with 1,317 additions and 1,347 deletions.
33 changes: 12 additions & 21 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ SET(QNNPACK_ARM_NEON_UKERNELS
src/q8avgpool/up8xm-neon.c
src/q8conv/4x8-neon.c
src/q8conv/8x8-neon.c
src/q8dwconv/mp8x25-neon.c
src/q8dwconv/up8x9-neon.c
src/q8gavgpool/mp8x7-neon.c
src/q8gavgpool/up8x7-neon.c
src/q8gavgpool/up8xm-neon.c
Expand All @@ -169,8 +171,6 @@ SET(QNNPACK_ARM_NEON_UKERNELS
src/q8gemm/4x8c2-xzp-neon.c
src/q8gemm/6x4-neon.c
src/q8gemm/8x8-neon.c
src/q8mpdw/25c8-neon.c
src/q8updw/9c8-neon.c
src/q8vadd/neon.c
src/sgemm/5x8-neon.c
src/sgemm/6x8-neon.c
Expand All @@ -186,9 +186,9 @@ SET(QNNPACK_ARM_NEON_UKERNELS
SET(QNNPACK_AARCH32_ASM_UKERNELS
src/hgemm/8x8-aarch32-neonfp16arith.S
src/q8conv/4x8-aarch32-neon.S
src/q8dwconv/up8x9-aarch32-neon.S
src/q8gemm/4x8-aarch32-neon.S
src/q8gemm/4x8c2-xzp-aarch32-neon.S
src/q8updw/9c8-aarch32-neon.S)
src/q8gemm/4x8c2-xzp-aarch32-neon.S)

SET(QNNPACK_AARCH64_ASM_UKERNELS
src/q8conv/8x8-aarch64-neon.S
Expand All @@ -199,13 +199,13 @@ SET(QNNPACK_X86_SSE2_UKERNELS
src/q8avgpool/up8x9-sse2.c
src/q8avgpool/up8xm-sse2.c
src/q8conv/4x4c2-sse2.c
src/q8gavgpool/mp8x7-sse2.c
src/q8dwconv/mp8x25-sse2.c
src/q8dwconv/up8x9-sse2.c
src/q8gavgpool/mp8x7p7q-sse2.c
src/q8gavgpool/up8x7-sse2.c
src/q8gavgpool/up8xm-sse2.c
src/q8gemm/2x4c8-sse2.c
src/q8gemm/4x4c2-sse2.c
src/q8mpdw/25c8-sse2.c
src/q8updw/9c8-sse2.c
src/q8vadd/sse2.c
src/u8clamp/sse2.c
src/u8maxpool/16x9p8q-sse2.c
Expand Down Expand Up @@ -482,23 +482,14 @@ IF(QNNPACK_BUILD_TESTS)
TARGET_LINK_LIBRARIES(q8conv-test PRIVATE qnnpack cpuinfo fp16 gtest gtest_main)
ADD_TEST(q8conv-test q8conv-test)

ADD_EXECUTABLE(q8updw-test test/q8updw.cc)
SET_TARGET_PROPERTIES(q8updw-test PROPERTIES
ADD_EXECUTABLE(q8dwconv-test test/q8dwconv.cc)
SET_TARGET_PROPERTIES(q8dwconv-test PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS NO)
TARGET_INCLUDE_DIRECTORIES(q8updw-test PRIVATE src test)
TARGET_LINK_LIBRARIES(q8updw-test PRIVATE qnnpack cpuinfo fp16 gtest gtest_main)
ADD_TEST(q8updw-test q8updw-test)

ADD_EXECUTABLE(q8mpdw-test test/q8mpdw.cc)
SET_TARGET_PROPERTIES(q8mpdw-test PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS NO)
TARGET_INCLUDE_DIRECTORIES(q8mpdw-test PRIVATE src test)
TARGET_LINK_LIBRARIES(q8mpdw-test PRIVATE qnnpack cpuinfo fp16 gtest gtest_main)
ADD_TEST(q8mpdw-test q8mpdw-test)
TARGET_INCLUDE_DIRECTORIES(q8dwconv-test PRIVATE src test)
TARGET_LINK_LIBRARIES(q8dwconv-test PRIVATE qnnpack cpuinfo fp16 gtest gtest_main)
ADD_TEST(q8dwconv-test q8dwconv-test)

ADD_EXECUTABLE(q8vadd-test test/q8vadd.cc)
SET_TARGET_PROPERTIES(q8vadd-test PROPERTIES
Expand Down
31 changes: 15 additions & 16 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def main(args):
build.cc("q8avgpool/up8xm-neon.c"),
build.cc("q8conv/4x8-neon.c"),
build.cc("q8conv/8x8-neon.c"),
build.cc("q8dwconv/mp8x25-neon.c"),
build.cc("q8dwconv/up8x9-neon.c"),
build.cc("q8gavgpool/mp8x7p7q-neon.c"),
build.cc("q8gavgpool/up8x7-neon.c"),
build.cc("q8gavgpool/up8xm-neon.c"),
Expand All @@ -106,8 +108,6 @@ def main(args):
build.cc("q8gemm/4x8c2-xzp-neon.c"),
build.cc("q8gemm/6x4-neon.c"),
build.cc("q8gemm/8x8-neon.c"),
build.cc("q8mpdw/25c8-neon.c"),
build.cc("q8updw/9c8-neon.c"),
build.cc("q8vadd/neon.c"),
build.cc("sgemm/5x8-neon.c"),
build.cc("sgemm/6x8-neon.c"),
Expand All @@ -122,11 +122,11 @@ def main(args):
]
if build.target.is_arm:
qnnpack_objects += [
build.cc("hgemm/8x8-aarch32-neonfp16arith.S"),
build.cc("q8conv/4x8-aarch32-neon.S"),
build.cc("q8dwconv/up8x9-aarch32-neon.S"),
build.cc("q8gemm/4x8-aarch32-neon.S"),
build.cc("q8gemm/4x8c2-xzp-aarch32-neon.S"),
build.cc("q8conv/4x8-aarch32-neon.S"),
build.cc("q8updw/9c8-aarch32-neon.S"),
build.cc("hgemm/8x8-aarch32-neonfp16arith.S"),
]
if build.target.is_arm64:
qnnpack_objects += [
Expand All @@ -140,13 +140,13 @@ def main(args):
build.cc("q8avgpool/up8x9-sse2.c"),
build.cc("q8avgpool/up8xm-sse2.c"),
build.cc("q8conv/4x4c2-sse2.c"),
build.cc("q8dwconv/mp8x25-sse2.c"),
build.cc("q8dwconv/up8x9-sse2.c"),
build.cc("q8gavgpool/mp8x7p7q-sse2.c"),
build.cc("q8gavgpool/up8x7-sse2.c"),
build.cc("q8gavgpool/up8xm-sse2.c"),
build.cc("q8gemm/2x4c8-sse2.c"),
build.cc("q8gemm/4x4c2-sse2.c"),
build.cc("q8mpdw/25c8-sse2.c"),
build.cc("q8updw/9c8-sse2.c"),
build.cc("q8vadd/sse2.c"),
build.cc("u8clamp/sse2.c"),
build.cc("u8maxpool/16x9p8q-sse2.c"),
Expand All @@ -165,21 +165,20 @@ def main(args):
"log": build.target.is_android},
extra_include_dirs=["src", "test"]):

build.unittest("q8gemm-test", build.cxx("q8gemm.cc"))
build.unittest("q8conv-test", build.cxx("q8conv.cc"))
build.unittest("q8updw-test", build.cxx("q8updw.cc"))
build.unittest("q8mpdw-test", build.cxx("q8mpdw.cc"))
build.unittest("hgemm-test", build.cxx("hgemm.cc"))
build.unittest("q8avgpool-test", build.cxx("q8avgpool.cc"))
build.unittest("q8conv-test", build.cxx("q8conv.cc"))
build.unittest("q8dwconv-test", build.cxx("q8dwconv.cc"))
build.unittest("q8gavgpool-test", build.cxx("q8gavgpool.cc"))
build.unittest("q8gemm-test", build.cxx("q8gemm.cc"))
build.unittest("q8vadd-test", build.cxx("q8vadd.cc"))
build.unittest("u8maxpool-test", build.cxx("u8maxpool.cc"))
build.unittest("sgemm-test", build.cxx("sgemm.cc"))
build.unittest("u8clamp-test", build.cxx("u8clamp.cc"))
build.unittest("u8rmax-test", build.cxx("u8rmax.cc"))
build.unittest("u8lut32norm-test", build.cxx("u8lut32norm.cc"))
build.unittest("hgemm-test", build.cxx("hgemm.cc"))
build.unittest("sgemm-test", build.cxx("sgemm.cc"))
build.unittest("x8zip-test", build.cxx("x8zip.cc"))
build.unittest("u8maxpool-test", build.cxx("u8maxpool.cc"))
build.unittest("u8rmax-test", build.cxx("u8rmax.cc"))
build.unittest("x8lut-test", build.cxx("x8lut.cc"))
build.unittest("x8zip-test", build.cxx("x8zip.cc"))

build.unittest("add-test", build.cxx("add.cc"))
build.unittest("average-pooling-test", build.cxx("average-pooling.cc"))
Expand Down
20 changes: 10 additions & 10 deletions jni/Android.mk
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,20 @@ LOCAL_SRC_FILES += \
src/q8avgpool/up8x9-neon.c \
src/q8avgpool/up8xm-neon.c \
src/q8conv/4x8-aarch32-neon.S \
src/q8gavgpool/mp8x7-neon.c \
src/q8dwconv/mp8x25-neon.c \
src/q8dwconv/up8x9-aarch32-neon.S \
src/q8gavgpool/mp8x7p7q-neon.c \
src/q8gavgpool/up8x7-neon.c \
src/q8gavgpool/up8xm-neon.c \
src/q8gemm/4x-sumrows-neon.c \
src/q8gemm/4x8-aarch32-neon.S \
src/q8gemm/4x8c2-xzp-aarch32-neon.S \
src/q8mpdw/25c8-neon.c \
src/q8updw/9c8-aarch32-neon.S \
src/q8vadd/neon.c \
src/u8clamp/neon.c \
src/u8lut32norm/scalar.c \
src/u8maxpool/16x9p8q-neon.c \
src/u8maxpool/sub16-neon.c \
src/u8rmax/neon.c \
src/u8lut32norm/scalar.c \
src/x8lut/scalar.c \
src/x8zip/x2-neon.c \
src/x8zip/x3-neon.c \
Expand All @@ -47,12 +47,12 @@ LOCAL_SRC_FILES += \
src/q8avgpool/up8x9-neon.c \
src/q8avgpool/up8xm-neon.c \
src/q8conv/8x8-aarch64-neon.S \
src/q8gavgpool/mp8x7-neon.c \
src/q8dwconv/mp8x25-neon.c \
src/q8dwconv/up8x9-neon.c \
src/q8gavgpool/mp8x7p7q-neon.c \
src/q8gavgpool/up8x7-neon.c \
src/q8gavgpool/up8xm-neon.c \
src/q8gemm/8x8-aarch64-neon.S \
src/q8mpdw/25c8-neon.c \
src/q8updw/9c8-neon.c \
src/q8vadd/neon.c \
src/u8clamp/neon.c \
src/u8lut32norm/scalar.c \
Expand All @@ -78,12 +78,12 @@ LOCAL_SRC_FILES += \
src/q8avgpool/up8x9-sse2.c \
src/q8avgpool/up8xm-sse2.c \
src/q8conv/4x4c2-sse2.c \
src/q8gavgpool/mp8x7-sse2.c \
src/q8dwconv/mp8x25-sse2.c \
src/q8dwconv/up8x9-sse2.c \
src/q8gavgpool/mp8x7p7q-sse2.c \
src/q8gavgpool/up8x7-sse2.c \
src/q8gavgpool/up8xm-sse2.c \
src/q8gemm/4x4c2-sse2.c \
src/q8mpdw/25c8-sse2.c \
src/q8updw/9c8-sse2.c \
src/q8vadd/sse2.c \
src/u8clamp/sse2.c \
src/u8lut32norm/scalar.c \
Expand Down
26 changes: 13 additions & 13 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <qnnpack/params.h>
#include <qnnpack/q8avgpool.h>
#include <qnnpack/q8conv.h>
#include <qnnpack/q8dw.h>
#include <qnnpack/q8dwconv.h>
#include <qnnpack/q8gavgpool.h>
#include <qnnpack/q8gemm.h>
#include <qnnpack/q8vadd.h>
Expand Down Expand Up @@ -73,12 +73,12 @@ static void init(void) {
default:
break;
}
qnnp_params.q8dw9 = (struct q8updw_parameters) {
.updw = q8updw_ukernel_9c8__aarch32_neon,
qnnp_params.q8dw9 = (struct q8dwconv_up_parameters) {
.updw = q8dwconv_ukernel_up8x9__aarch32_neon,
.cr = 8,
};
qnnp_params.q8dw25 = (struct q8mpdw_parameters) {
.mpdw = q8mpdw_ukernel_25c8__neon,
qnnp_params.q8dw25 = (struct q8dwconv_mp_parameters) {
.mpdw = q8dwconv_ukernel_mp8x25__neon,
.cr = 8,
};
qnnp_params.q8sum_rows = (struct q8sum_rows_parameters) {
Expand Down Expand Up @@ -129,12 +129,12 @@ static void init(void) {
qnnp_params.q8conv_xzp = (struct q8conv_xzp_parameters) {
.kthreshold = SIZE_MAX,
};
qnnp_params.q8dw9 = (struct q8updw_parameters) {
.updw = q8updw_ukernel_9c8__neon,
qnnp_params.q8dw9 = (struct q8dwconv_up_parameters) {
.updw = q8dwconv_ukernel_up8x9__neon,
.cr = 8,
};
qnnp_params.q8dw25 = (struct q8mpdw_parameters) {
.mpdw = q8mpdw_ukernel_25c8__neon,
qnnp_params.q8dw25 = (struct q8dwconv_mp_parameters) {
.mpdw = q8dwconv_ukernel_mp8x25__neon,
.cr = 8,
};
qnnp_params.q8vadd = q8vadd_ukernel__neon;
Expand Down Expand Up @@ -185,12 +185,12 @@ static void init(void) {
qnnp_params.q8conv_xzp = (struct q8conv_xzp_parameters) {
.kthreshold = SIZE_MAX,
};
qnnp_params.q8dw9 = (struct q8updw_parameters) {
.updw = q8updw_ukernel_9c8__sse2,
qnnp_params.q8dw9 = (struct q8dwconv_up_parameters) {
.updw = q8dwconv_ukernel_up8x9__sse2,
.cr = 8,
};
qnnp_params.q8dw25 = (struct q8mpdw_parameters) {
.mpdw = q8mpdw_ukernel_25c8__sse2,
qnnp_params.q8dw25 = (struct q8dwconv_mp_parameters) {
.mpdw = q8dwconv_ukernel_mp8x25__sse2,
.cr = 8,
};
qnnp_params.q8vadd = q8vadd_ukernel__sse2;
Expand Down
26 changes: 13 additions & 13 deletions src/operator-run.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ static void compute_q8conv(
&context->quantization_params);
}

struct q8dw_context {
struct q8dwconv_context {
size_t groups;
size_t group_stride;
const uint8_t** indirection_buffer;
Expand All @@ -227,13 +227,13 @@ struct q8dw_context {
size_t output_col_increment;
union qnnp_conv_quantization_params quantization_params;
union {
const q8updw_ukernel_function unipass_ukernel;
const q8mpdw_ukernel_function multipass_ukernel;
const q8dwconv_up_ukernel_function unipass_ukernel;
const q8dwconv_mp_ukernel_function multipass_ukernel;
};
};

static void compute_q8updw(
const struct q8dw_context context[restrict static 1],
static void compute_dwconv_unipass(
const struct q8dwconv_context context[restrict static 1],
size_t image,
size_t output_y)
{
Expand All @@ -250,8 +250,8 @@ static void compute_q8updw(
&context->quantization_params);
}

static void compute_q8mpdw(
const struct q8dw_context context[restrict static 1],
static void compute_dwconv_multiipass(
const struct q8dwconv_context context[restrict static 1],
size_t image,
size_t output_y)
{
Expand Down Expand Up @@ -621,7 +621,7 @@ enum qnnp_status qnnp_run_operator(qnnp_operator_t op, pthreadpool_t threadpool)
switch (kernel_size) {
case 9:
{
struct q8dw_context q8dw_context = {
struct q8dwconv_context context = {
.groups = groups,
.indirection_buffer = (const uint8_t**) op->indirection_buffer,
.indirection_buffer_row_stride = kernel_size + (output_width * width_step - 1) * kernel_height,
Expand All @@ -637,14 +637,14 @@ enum qnnp_status qnnp_run_operator(qnnp_operator_t op, pthreadpool_t threadpool)
};
pthreadpool_compute_2d(
threadpool,
(pthreadpool_function_2d_t) compute_q8updw,
&q8dw_context,
(pthreadpool_function_2d_t) compute_dwconv_unipass,
&context,
batch_size, output_height);
break;
}
case 25:
{
struct q8dw_context q8dw_context = {
struct q8dwconv_context context = {
.groups = groups,
.group_stride = op->group_stride,
.indirection_buffer = (const uint8_t**) op->indirection_buffer,
Expand All @@ -661,8 +661,8 @@ enum qnnp_status qnnp_run_operator(qnnp_operator_t op, pthreadpool_t threadpool)
};
pthreadpool_compute_2d(
threadpool,
(pthreadpool_function_2d_t) compute_q8mpdw,
&q8dw_context,
(pthreadpool_function_2d_t) compute_dwconv_multiipass,
&context,
batch_size, output_height);
break;
}
Expand Down
4 changes: 2 additions & 2 deletions src/q8mpdw/25c8-neon.c → src/q8dwconv/mp8x25-neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

#include <arm_neon.h>

#include <qnnpack/q8dw.h>
#include <qnnpack/q8dwconv.h>


void q8mpdw_ukernel_25c8__neon(
void q8dwconv_ukernel_mp8x25__neon(
size_t channels,
size_t output_width,
const uint8_t** input,
Expand Down
4 changes: 2 additions & 2 deletions src/q8mpdw/25c8-sse2.c → src/q8dwconv/mp8x25-sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@

#include <immintrin.h>

#include <qnnpack/q8dw.h>
#include <qnnpack/q8dwconv.h>


void q8mpdw_ukernel_25c8__sse2(
void q8dwconv_ukernel_mp8x25__sse2(
size_t channels,
size_t output_width,
const uint8_t** input,
Expand Down
Loading

0 comments on commit 943ec74

Please sign in to comment.