From 43da394304fba820830da2cef2c0214fe292c037 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Mon, 23 Jun 2025 18:03:56 +0700 Subject: [PATCH 1/2] Properly fix AVX-512 CRC calculation bug The problem that led to the workaround in f83f14881c7 was not in fact a compiler bug, but a failure to zero the upper bits of the vector register containing the initial scalar CRC value. Fix that and revert the workaround. Diagnosed-by: Nathan Bossart Diagnosed-by: Raghuveer Devulapalli Tested-by: Andy Fan Tested-by: Soumyadeep Chakraborty Reviewed-by: Nathan Bossart Reviewed-by: Raghuveer Devulapalli Discussion: https://postgr.es/m/PH8PR11MB82866B07AA6758D12F699C00FB70A@PH8PR11MB8286.namprd11.prod.outlook.com --- src/port/pg_crc32c_sse42.c | 2 +- src/port/pg_crc32c_sse42_choose.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 9af3474a6ca95..1a7172553551a 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -123,7 +123,7 @@ pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t len) __m512i k; k = _mm512_broadcast_i32x4(_mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0)); - x0 = _mm512_xor_si512(_mm512_castsi128_si512(_mm_cvtsi32_si128(crc0)), x0); + x0 = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(crc0)), x0); buf += 64; /* Main loop. */ diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index 802e47788c10c..74d2421ba2be9 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -95,9 +95,7 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) __cpuidex(exx, 7, 0); #endif -#if defined(__clang__) && !defined(__OPTIMIZE__) - /* Some versions of clang are broken at -O0 */ -#elif defined(USE_AVX512_CRC32C_WITH_RUNTIME_CHECK) +#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK if (exx[2] & (1 << 10) && /* VPCLMULQDQ */ exx[1] & (1 << 31)) /* AVX512-VL */ pg_comp_crc32c = pg_comp_crc32c_avx512; From ccd5bc93fdfeae22c935f405b0687be5cfa9caa4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 23 Jun 2025 11:50:21 -0400 Subject: [PATCH 2/2] Include _mm512_zextsi128_si512() in AVX-512 configure probes. Commit 43da39430 added a dependency on this intrinsic to our AVX-512 CRC code. It turns out this intrinsic was added to gcc later than the other ones we were using, so that there are platforms where the new code fails to compile. Since only relatively old (pre-gcc-10) compilers are affected, it doesn't seem worth trying to make the AVX-512 CRC code actually work on these platforms. Just add the new intrinsic to the configure probe, so that we'll conclude the code can't be built. Author: Tom Lane Reviewed-by: Nathan Bossart Discussion: https://postgr.es/m/3350336.1750690281@sss.pgh.pa.us --- config/c-compiler.m4 | 1 + configure | 1 + meson.build | 1 + 3 files changed, 3 insertions(+) diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 5f3e1d1faf930..da40bd6a64755 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -602,6 +602,7 @@ AC_CACHE_CHECK([for _mm512_clmulepi64_epi128], [Ac_cachevar], { __m128i z; + x = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(0)), x); y = _mm512_clmulepi64_epi128(x, y, 0); z = _mm_ternarylogic_epi64( _mm512_castsi512_si128(y), diff --git a/configure b/configure index 4f15347cc9503..3d3d3db97a456 100755 --- a/configure +++ b/configure @@ -18227,6 +18227,7 @@ else { __m128i z; + x = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(0)), x); y = _mm512_clmulepi64_epi128(x, y, 0); z = _mm_ternarylogic_epi64( _mm512_castsi512_si128(y), diff --git a/meson.build b/meson.build index 474763ad19f0c..6ffe7b4727556 100644 --- a/meson.build +++ b/meson.build @@ -2465,6 +2465,7 @@ int main(void) { __m128i z; + x = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(0)), x); y = _mm512_clmulepi64_epi128(x, y, 0); z = _mm_ternarylogic_epi64( _mm512_castsi512_si128(y),