Skip to content

Commit

Permalink
*fixed bug: Crash in SimdGemm.h.
Browse files Browse the repository at this point in the history
  • Loading branch information
ermig1979 committed Nov 19, 2020
1 parent f730832 commit c3f9a6b
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 70 deletions.
1 change: 1 addition & 0 deletions docs/2020.html
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ <h5>Bug fixing</h5>
<li>Compilation error in file SimdNeonSynetConvolution8i.cpp.</li>
<li>Infinite loop in SynetConvolution32fNhwcDirect::OldReorderWeight.</li>
<li>Crash in SimdRuntime.h.</li>
<li>Crash in SimdGemm.h.</li>
</ul>

<h4>Test framework</h4>
Expand Down
14 changes: 7 additions & 7 deletions src/Simd/SimdAvx1Gemm32f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ namespace Simd
if (M > 5) AddProduct(C + 5 * ldc, _alpha, c05, tail);
}

SIMD_INLINE Simd::GemmNN<float, size_t>::Tail GetGemmTail(size_t M, size_t N)
SIMD_INLINE Simd::GemmNN<float, F, size_t>::Tail GetGemmTail(size_t M, size_t N)
{
if (N <= 8)
{
Expand Down Expand Up @@ -799,7 +799,7 @@ namespace Simd

void Gemm32fNN(size_t M, size_t N, size_t K, const float * alpha, const float * A, size_t lda, const float * B, size_t ldb, const float * beta, float * C, size_t ldc)
{
typedef Simd::GemmNN<float, size_t> GemmNN;
typedef Simd::GemmNN<float, F, size_t> GemmNN;
GemmNN::Main kernelMM, kernelMT;
GemmNN::Tail kernelTM, kernelTT;
size_t microM, microN, L1, L2;
Expand Down Expand Up @@ -835,14 +835,14 @@ namespace Simd
GemmNN::PackA packA = NULL;
L1 = N > 4096 ? Base::AlgCacheL2() : Base::AlgCacheL1();
L2 = N > 4096 ? Base::AlgCacheL3() : Base::AlgCacheL2();
GemmNN gemmNN(M, N, K, microM, microN, L1, L2, Base::AlgCacheL3(), F,
GemmNN gemmNN(M, N, K, microM, microN, L1, L2, Base::AlgCacheL3(),
kernelMM, kernelMT, kernelTM, kernelTT, packA, Avx::GemmPackB, Avx::GemmScaleC, NULL);
gemmNN.Run(alpha, A, lda, B, ldb, beta, C, ldc);
}

//---------------------------------------------------------------------

typedef Simd::GemmNNcb<float, size_t> Gemm32fNNcb;
typedef Simd::GemmNNcb<float, F, size_t> Gemm32fNNcb;

SIMD_INLINE Gemm32fNNcb CreateGemm32fNNcb(size_t M, size_t N, size_t K, GemmKernelType type, bool compatibility)
{
Expand Down Expand Up @@ -890,7 +890,7 @@ namespace Simd
kernelTM = Avx::GetGemmTail(M%microM, microN);
kernelTT = Avx::GetGemmTail(M%microM, microN);
#endif
return Gemm32fNNcb(M, N, K, microM, microN, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F,
return Gemm32fNNcb(M, N, K, microM, microN, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(),
kernelMM, kernelMT, kernelTM, kernelTT, NULL, Avx::GemmPackB, Avx::GemmScaleC, NULL, compatibility);
}

Expand Down Expand Up @@ -1199,9 +1199,9 @@ namespace Simd

void Gemm32fNT(size_t M, size_t N, size_t K, const float * alpha, const float * A, size_t lda, const float * B, size_t ldb, const float * beta, float * C, size_t ldc)
{
typedef Simd::GemmNT<float> GemmNT;
typedef Simd::GemmNT<float, F> GemmNT;
#ifdef SIMD_X64_ENABLE
GemmNT gemmNT(M, N, K, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F, Avx::GemmScaleC,
GemmNT gemmNT(M, N, K, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), Avx::GemmScaleC,
Kernel1x1x8nt, Kernel1x4x8nt, Kernel2x1x8nt, Kernel2x4x8nt, Kernel3x1x8nt, Kernel3x4x8nt, NULL, NULL);
#else
GemmNT gemmNT(M, N, K, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F, Sse::GemmScaleC,
Expand Down
14 changes: 7 additions & 7 deletions src/Simd/SimdAvx2Gemm32f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ namespace Simd
if (M > 5) AddProduct(C + 5 * ldc, _alpha, c05, tail);
}

SIMD_INLINE Simd::GemmNN<float, size_t>::Tail GetGemmTail(size_t M, size_t N)
SIMD_INLINE Simd::GemmNN<float, F, size_t>::Tail GetGemmTail(size_t M, size_t N)
{
if (N <= 8)
{
Expand Down Expand Up @@ -576,7 +576,7 @@ namespace Simd
{
SIMD_PERF_BEGF(Simd::ToStr(M) + "-" + Simd::ToStr(N) + "-" + Simd::ToStr(K), M*N*K*2);

typedef Simd::GemmNN<float, size_t> GemmNN;
typedef Simd::GemmNN<float, F, size_t> GemmNN;
GemmNN::Main kernelMM, kernelMT;
GemmNN::Tail kernelTM, kernelTT;
size_t microM, microN, L1, L2;
Expand Down Expand Up @@ -612,14 +612,14 @@ namespace Simd
GemmNN::PackA packA = NULL;// K*M > 1024 * 1024 ? Avx::GemmPackA : NULL;
L1 = N > 4096 ? Base::AlgCacheL2() : Base::AlgCacheL1();
L2 = N > 4096 ? Base::AlgCacheL3() : Base::AlgCacheL2();
GemmNN gemmNN(M, N, K, microM, microN, L1, L2, Base::AlgCacheL3(), F,
GemmNN gemmNN(M, N, K, microM, microN, L1, L2, Base::AlgCacheL3(),
kernelMM, kernelMT, kernelTM, kernelTT, packA, Avx::GemmPackB, Avx::GemmScaleC, NULL);
gemmNN.Run(alpha, A, lda, B, ldb, beta, C, ldc);
}

//---------------------------------------------------------------------

typedef Simd::GemmNNcb<float, size_t> Gemm32fNNcb;
typedef Simd::GemmNNcb<float, F, size_t> Gemm32fNNcb;

SIMD_INLINE Gemm32fNNcb CreateGemm32fNNcb(size_t M, size_t N, size_t K, GemmKernelType type, bool compatibility)
{
Expand Down Expand Up @@ -669,7 +669,7 @@ namespace Simd
#endif
Gemm32fNNcb::PackA packA = (K >= 256 && M > 256) ? Avx::GemmPackA : NULL;
return Gemm32fNNcb(M, N, K, microM, microN, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(),
F, kernelMM, kernelMT, kernelTM, kernelTT, packA, Avx::GemmPackB, Avx::GemmScaleC, NULL, compatibility);
kernelMM, kernelMT, kernelTM, kernelTT, packA, Avx::GemmPackB, Avx::GemmScaleC, NULL, compatibility);
}

size_t Gemm32fNNcbBufferSize(size_t M, size_t N, size_t K, GemmKernelType type, bool compatibility)
Expand Down Expand Up @@ -979,9 +979,9 @@ namespace Simd
{
//SIMD_PERF_BEGF(Simd::ToStr(M) + "-" + Simd::ToStr(N) + "-" + Simd::ToStr(K), M*N*K * 2);

typedef Simd::GemmNT<float> GemmNT;
typedef Simd::GemmNT<float, F> GemmNT;
#ifdef SIMD_X64_ENABLE
GemmNT gemmNT(M, N, K, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F, Avx::GemmScaleC,
GemmNT gemmNT(M, N, K, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), Avx::GemmScaleC,
Kernel1x1x8nt, Kernel1x4x8nt, Kernel2x1x8nt, Kernel2x4x8nt, Kernel3x1x8nt, Kernel3x4x8nt, NULL, NULL);
#else
GemmNT gemmNT(M, N, K, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F, Sse::GemmScaleC,
Expand Down
14 changes: 7 additions & 7 deletions src/Simd/SimdAvx512fGemm32f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1838,7 +1838,7 @@ namespace Simd
if (M > 0xD) AddProduct(C, _alpha, c0D, mask), C += ldc;
}

SIMD_INLINE Simd::GemmNN<float, __mmask16>::Tail GetGemmTail(size_t M, size_t N)
SIMD_INLINE Simd::GemmNN<float, F, __mmask16>::Tail GetGemmTail(size_t M, size_t N)
{
if (N <= 16)
{
Expand Down Expand Up @@ -2506,7 +2506,7 @@ namespace Simd
{
SIMD_PERF_BEGF(Simd::ToStr(M) + "-" + Simd::ToStr(N) + "-" + Simd::ToStr(K), M*N*K * 2);

typedef Simd::GemmNN<float, __mmask16> GemmNN;
typedef Simd::GemmNN<float, F, __mmask16> GemmNN;
GemmNN::Main kernelMM, kernelMT;
GemmNN::Tail kernelTM, kernelTT;
size_t microM, microN;
Expand Down Expand Up @@ -2588,14 +2588,14 @@ namespace Simd
}
#endif
GemmNN::PackA packA = (microM > 6 && M*N*K > 700*700*700) ? Avx::GemmPackA : NULL;
GemmNN gemmNN(M, N, K, microM, microN, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F,
GemmNN gemmNN(M, N, K, microM, microN, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(),
kernelMM, kernelMT, kernelTM, kernelTT, packA, Avx512f::GemmPackB, Avx512f::GemmScaleC, TailMask16);
gemmNN.Run(alpha, A, lda, B, ldb, beta, C, ldc);
}

//---------------------------------------------------------------------

typedef Simd::GemmNNcb<float, __mmask16> Gemm32fNNcb;
typedef Simd::GemmNNcb<float, F, __mmask16> Gemm32fNNcb;

SIMD_INLINE Gemm32fNNcb CreateGemm32fNNcb(size_t M, size_t N, size_t K, GemmKernelType type, bool compatibility)
{
Expand Down Expand Up @@ -2728,7 +2728,7 @@ namespace Simd
kernelTT = Avx512f::GetGemmTail(M%microM, microN);
#endif
Gemm32fNNcb::PackA packA = ((M * 3 < N && N >= 512 && K >= 128 && M > 16) || (K >= 256 && M > 256)) ? Avx512f::GemmPackA : NULL;
return Gemm32fNNcb(M, N, K, microM, microN, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F,
return Gemm32fNNcb(M, N, K, microM, microN, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(),
kernelMM, kernelMT, kernelTM, kernelTT, packA, Avx512f::GemmPackB, Avx512f::GemmScaleC, TailMask16, compatibility);
}

Expand Down Expand Up @@ -3238,9 +3238,9 @@ namespace Simd
Avx2::Gemm32fNT(M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
return;
}
typedef Simd::GemmNT<float> GemmNT;
typedef Simd::GemmNT<float, F> GemmNT;
#if SIMD_ZMM_COUNT == 32
GemmNT gemmNT(M, N, K, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F, Avx::GemmScaleC,
GemmNT gemmNT(M, N, K, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), Avx::GemmScaleC,
Kernel1x1x16nt, Kernel1x4x16nt, Kernel2x1x16nt, Kernel2x4x16nt, Kernel3x1x16nt, Kernel3x4x16nt, Kernel6x1x16nt, Kernel6x4x16nt);
#elif defined(SIMD_X64_ENABLE)
GemmNT gemmNT(M, N, K, Base::AlgCacheL1(), Base::AlgCacheL2(), Base::AlgCacheL3(), F, Avx::GemmScaleC,
Expand Down
Loading

0 comments on commit c3f9a6b

Please sign in to comment.