Skip to content

Commit

Permalink
X32-PACKW Neon microkernels renamed to include ld4lane and ld2lane
Browse files Browse the repository at this point in the history
- x8 and x12 add ld4lane suffix
- x2 add ld2lane suffix

PiperOrigin-RevId: 516538969
  • Loading branch information
fbarchard authored and xnnpack-bot committed Mar 14, 2023
1 parent daad9f6 commit 9ae8348
Show file tree
Hide file tree
Showing 13 changed files with 132 additions and 132 deletions.
78 changes: 39 additions & 39 deletions bench/f32-bgemm.cc

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions bench/x32-packw.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,28 +81,28 @@ static void x32_packw(benchmark::State& state,


#if XNN_ARCH_ARM || XNN_ARCH_ARM64
static void x32_packw_x2__neon(benchmark::State& state, const char* net) {
static void x32_packw_x2__neon_ld2lane(benchmark::State& state, const char* net) {
x32_packw(state,
xnn_x32_packw_gemm_goi_ukernel_x2__neon,
xnn_x32_packw_gemm_goi_ukernel_x2__neon_ld2lane,
/*nr=*/2, /*kr=*/1, /*sr=*/1,
benchmark::utils::CheckNEON);
}
static void x32_packw_x8__neon(benchmark::State& state, const char* net) {
static void x32_packw_x8__neon_ld4lane(benchmark::State& state, const char* net) {
x32_packw(state,
xnn_x32_packw_gemm_goi_ukernel_x8__neon,
xnn_x32_packw_gemm_goi_ukernel_x8__neon_ld4lane,
/*nr=*/8, /*kr=*/1, /*sr=*/1,
benchmark::utils::CheckNEON);
}
static void x32_packw_x12__neon(benchmark::State& state, const char* net) {
static void x32_packw_x12__neon_ld4lane(benchmark::State& state, const char* net) {
x32_packw(state,
xnn_x32_packw_gemm_goi_ukernel_x12__neon,
xnn_x32_packw_gemm_goi_ukernel_x12__neon_ld4lane,
/*nr=*/12, /*kr=*/1, /*sr=*/1,
benchmark::utils::CheckNEON);
}

BENCHMARK_BGEMM(x32_packw_x2__neon)
BENCHMARK_BGEMM(x32_packw_x8__neon)
BENCHMARK_BGEMM(x32_packw_x12__neon)
BENCHMARK_BGEMM(x32_packw_x2__neon_ld2lane)
BENCHMARK_BGEMM(x32_packw_x8__neon_ld4lane)
BENCHMARK_BGEMM(x32_packw_x12__neon_ld4lane)
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64

static void x32_packw_x2__scalar_float(benchmark::State& state, const char* net) {
Expand Down
10 changes: 5 additions & 5 deletions cmake/microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3101,9 +3101,9 @@ SET(ALL_NEON_MICROKERNEL_SRCS
src/x16-transposec/gen/x16-transposec-8x8-reuse-multi-zip-neon.c
src/x16-transposec/gen/x16-transposec-8x8-reuse-switch-zip-neon.c
src/x24-transposec/x24-transposec-2x2-neon-tbl64.c
src/x32-packw/gen/x32-packw-x2-neon.c
src/x32-packw/gen/x32-packw-x8-neon.c
src/x32-packw/gen/x32-packw-x12-neon.c
src/x32-packw/gen/x32-packw-x2-neon-ld2lane.c
src/x32-packw/gen/x32-packw-x8-neon-ld4lane.c
src/x32-packw/gen/x32-packw-x12-neon-ld4lane.c
src/x32-packx/x32-packx-x4-neon-st4.c
src/x32-transposec/gen/x32-transposec-2x2-multi-dec-zip-neon.c
src/x32-transposec/gen/x32-transposec-2x2-multi-mov-zip-neon.c
Expand Down Expand Up @@ -5119,6 +5119,8 @@ SET(ALL_SCALAR_MICROKERNEL_SRCS
src/x8-lut/gen/x8-lut-scalar-x4.c
src/x8-lut/gen/x8-lut-scalar-x8.c
src/x8-lut/gen/x8-lut-scalar-x16.c
src/x8-packw/gen/x8-packw-x2-scalar-int.c
src/x8-packw/gen/x8-packw-x4-scalar-int.c
src/x8-transposec/gen/x8-transposec-1x2-scalar-int.c
src/x8-transposec/gen/x8-transposec-1x4-scalar-int.c
src/x8-transposec/gen/x8-transposec-2x1-scalar-int.c
Expand All @@ -5131,8 +5133,6 @@ SET(ALL_SCALAR_MICROKERNEL_SRCS
src/x8-zip/x8-zip-x3-scalar.c
src/x8-zip/x8-zip-x4-scalar.c
src/x8-zip/x8-zip-xm-scalar.c
src/x8-packw/gen/x8-packw-x2-scalar-int.c
src/x8-packw/gen/x8-packw-x4-scalar-int.c
src/x16-packw/gen/x16-packw-x8-scalar-int.c
src/x16-packw/gen/x16-packw-x16-scalar-int.c
src/x16-transposec/gen/x16-transposec-1x2-scalar-int.c
Expand Down
10 changes: 5 additions & 5 deletions microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3108,9 +3108,9 @@ ALL_NEON_MICROKERNEL_SRCS = [
"src/x16-transposec/gen/x16-transposec-8x8-reuse-multi-zip-neon.c",
"src/x16-transposec/gen/x16-transposec-8x8-reuse-switch-zip-neon.c",
"src/x24-transposec/x24-transposec-2x2-neon-tbl64.c",
"src/x32-packw/gen/x32-packw-x2-neon.c",
"src/x32-packw/gen/x32-packw-x8-neon.c",
"src/x32-packw/gen/x32-packw-x12-neon.c",
"src/x32-packw/gen/x32-packw-x2-neon-ld2lane.c",
"src/x32-packw/gen/x32-packw-x8-neon-ld4lane.c",
"src/x32-packw/gen/x32-packw-x12-neon-ld4lane.c",
"src/x32-packx/x32-packx-x4-neon-st4.c",
"src/x32-transposec/gen/x32-transposec-2x2-multi-dec-zip-neon.c",
"src/x32-transposec/gen/x32-transposec-2x2-multi-mov-zip-neon.c",
Expand Down Expand Up @@ -5138,6 +5138,8 @@ ALL_SCALAR_MICROKERNEL_SRCS = [
"src/x8-lut/gen/x8-lut-scalar-x4.c",
"src/x8-lut/gen/x8-lut-scalar-x8.c",
"src/x8-lut/gen/x8-lut-scalar-x16.c",
"src/x8-packw/gen/x8-packw-x2-scalar-int.c",
"src/x8-packw/gen/x8-packw-x4-scalar-int.c",
"src/x8-transposec/gen/x8-transposec-1x2-scalar-int.c",
"src/x8-transposec/gen/x8-transposec-1x4-scalar-int.c",
"src/x8-transposec/gen/x8-transposec-2x1-scalar-int.c",
Expand All @@ -5150,8 +5152,6 @@ ALL_SCALAR_MICROKERNEL_SRCS = [
"src/x8-zip/x8-zip-x3-scalar.c",
"src/x8-zip/x8-zip-x4-scalar.c",
"src/x8-zip/x8-zip-xm-scalar.c",
"src/x8-packw/gen/x8-packw-x2-scalar-int.c",
"src/x8-packw/gen/x8-packw-x4-scalar-int.c",
"src/x16-packw/gen/x16-packw-x8-scalar-int.c",
"src/x16-packw/gen/x16-packw-x16-scalar-int.c",
"src/x16-transposec/gen/x16-transposec-1x2-scalar-int.c",
Expand Down
6 changes: 3 additions & 3 deletions scripts/generate-x32-packw.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ tools/xngen src/x32-packw/scalar.c.in -D NR=4 -D KUNROLL=4 -D TYPE=float -o s

################################### ARM NEON ##################################
### NR multiple of 4
tools/xngen src/x32-packw/neon.c.in -D NR=8 -D KUNROLL=4 -o src/x32-packw/gen/x32-packw-x8-neon.c &
tools/xngen src/x32-packw/neon.c.in -D NR=12 -D KUNROLL=4 -o src/x32-packw/gen/x32-packw-x12-neon.c &
tools/xngen src/x32-packw/neon.c.in -D NR=8 -D KUNROLL=4 -o src/x32-packw/gen/x32-packw-x8-neon-ld4lane.c &
tools/xngen src/x32-packw/neon.c.in -D NR=12 -D KUNROLL=4 -o src/x32-packw/gen/x32-packw-x12-neon-ld4lane.c &

### NR2 micro-kernels
tools/xngen src/x32-packw/NR2-neon.c.in -D NR=2 -D KUNROLL=2 -o src/x32-packw/gen/x32-packw-x2-neon.c &
tools/xngen src/x32-packw/NR2-neon.c.in -D NR=2 -D KUNROLL=2 -o src/x32-packw/gen/x32-packw-x2-neon-ld2lane.c &

################################## Unit tests #################################
tools/generate-packw-test.py --spec test/x32-packw.yaml --output test/x32-packw.cc &
Expand Down
2 changes: 1 addition & 1 deletion src/x32-packw/NR2-neon.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ $assert KUNROLL >= 2
#include <xnnpack/packw.h>


void xnn_x32_packw_gemm_goi_ukernel_x${NR}__neon(
void xnn_x32_packw_gemm_goi_ukernel_x${NR}__neon_ld2lane(
size_t g,
size_t nc,
size_t kc,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <xnnpack/packw.h>


void xnn_x32_packw_gemm_goi_ukernel_x12__neon(
void xnn_x32_packw_gemm_goi_ukernel_x12__neon_ld4lane(
size_t g,
size_t nc,
size_t kc,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <xnnpack/packw.h>


void xnn_x32_packw_gemm_goi_ukernel_x2__neon(
void xnn_x32_packw_gemm_goi_ukernel_x2__neon_ld2lane(
size_t g,
size_t nc,
size_t kc,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <xnnpack/packw.h>


void xnn_x32_packw_gemm_goi_ukernel_x8__neon(
void xnn_x32_packw_gemm_goi_ukernel_x8__neon_ld4lane(
size_t g,
size_t nc,
size_t kc,
Expand Down
2 changes: 1 addition & 1 deletion src/x32-packw/neon.c.in
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
#include <xnnpack/packw.h>


void xnn_x32_packw_gemm_goi_ukernel_x${NR}__neon(
void xnn_x32_packw_gemm_goi_ukernel_x${NR}__neon_ld4lane(
size_t g,
size_t nc,
size_t kc,
Expand Down
6 changes: 3 additions & 3 deletions src/xnnpack/packw.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ DECLARE_X32_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x32_packw_gemm_goi_ukernel_x4__s
DECLARE_X32_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x32_packw_gemm_goi_ukernel_x2__scalar_float)
DECLARE_X32_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x32_packw_gemm_goi_ukernel_x4__scalar_float)

DECLARE_X32_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x32_packw_gemm_goi_ukernel_x2__neon)
DECLARE_X32_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x32_packw_gemm_goi_ukernel_x8__neon)
DECLARE_X32_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x32_packw_gemm_goi_ukernel_x12__neon)
DECLARE_X32_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x32_packw_gemm_goi_ukernel_x2__neon_ld2lane)
DECLARE_X32_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x32_packw_gemm_goi_ukernel_x8__neon_ld4lane)
DECLARE_X32_PACKW_GEMM_GOI_UKERNEL_FUNCTION(xnn_x32_packw_gemm_goi_ukernel_x12__neon_ld4lane)


#ifdef __cplusplus
Expand Down
Loading

0 comments on commit 9ae8348

Please sign in to comment.