Skip to content

Commit

Permalink
POPCNT runtime detection using CPUID for x86 CPUs (kimwalisch#148)
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch authored Apr 4, 2024
1 parent 5cbf2e2 commit 55cdcce
Show file tree
Hide file tree
Showing 15 changed files with 415 additions and 247 deletions.
40 changes: 11 additions & 29 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ if(NOT isMultiConfig AND NOT CMAKE_BUILD_TYPE)
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif()

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(ENABLE_ASSERT "ENABLE_ASSERT")
endif()

# primesieve binary source files #####################################

set(BIN_SRC src/app/CmdOptions.cpp
Expand Down Expand Up @@ -106,8 +110,10 @@ endif()
# Check if compiler supports x64 multiarch ###########################

if(WITH_MULTIARCH)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_popcnt_bmi.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vbmi2.cmake")
if(multiarch_avx512_vbmi2)
set(MULTIARCH_AVX512 "MULTIARCH_AVX512")
endif()
endif()

# libprimesieve (shared library) #####################################
Expand All @@ -123,16 +129,7 @@ if(BUILD_SHARED_LIBS)
set_target_properties(libprimesieve PROPERTIES SOVERSION ${PRIMESIEVE_SOVERSION_MAJOR})
set_target_properties(libprimesieve PROPERTIES VERSION ${PRIMESIEVE_SOVERSION})
target_compile_options(libprimesieve PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})

if(multiarch_popcnt_bmi)
target_compile_definitions(libprimesieve PRIVATE "MULTIARCH_POPCNT_BMI")
endif()
if(multiarch_avx512)
target_compile_definitions(libprimesieve PRIVATE "MULTIARCH_AVX512")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_definitions(libprimesieve PRIVATE "ENABLE_ASSERT")
endif()
target_compile_definitions(libprimesieve PRIVATE "${ENABLE_ASSERT}" "${MULTIARCH_AVX512}")

if(WIN32_MSVC_COMPATIBLE)
# On Windows the shared library will be named primesieve.dll
Expand Down Expand Up @@ -170,16 +167,7 @@ if(BUILD_STATIC_LIBS)
set_target_properties(libprimesieve-static PROPERTIES OUTPUT_NAME primesieve)
target_link_libraries(libprimesieve-static PRIVATE Threads::Threads ${LIBATOMIC})
target_compile_options(libprimesieve-static PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})

if(multiarch_popcnt_bmi)
target_compile_definitions(libprimesieve-static PRIVATE "MULTIARCH_POPCNT_BMI")
endif()
if(multiarch_avx512)
target_compile_definitions(libprimesieve-static PRIVATE "MULTIARCH_AVX512")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_definitions(libprimesieve-static PRIVATE "ENABLE_ASSERT")
endif()
target_compile_definitions(libprimesieve-static PRIVATE "${ENABLE_ASSERT}" "${MULTIARCH_AVX512}")

if(WITH_MSVC_CRT_STATIC)
set_target_properties(libprimesieve-static PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded")
Expand Down Expand Up @@ -234,16 +222,10 @@ endif()
if(BUILD_PRIMESIEVE)
add_executable(primesieve ${BIN_SRC})
target_link_libraries(primesieve primesieve::primesieve Threads::Threads)
target_compile_definitions(primesieve PRIVATE "${ENABLE_ASSERT}")
target_compile_features(primesieve PRIVATE cxx_auto_type)
install(TARGETS primesieve DESTINATION ${CMAKE_INSTALL_BINDIR})

if(multiarch_avx512)
target_compile_definitions(primesieve PRIVATE "MULTIARCH_AVX512")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_definitions(primesieve PRIVATE "ENABLE_ASSERT")
endif()

if(WITH_MSVC_CRT_STATIC)
set_target_properties(primesieve PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded")
endif()
Expand Down
7 changes: 7 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
Changes in version 12.3, 04/04/2024
===================================

* Add runtime POPCNT detection using CPUID for x86 CPUs.
* Improve GCC/Clang multiarch preprocessor logic.
* CMakeLists.txt: Remove POPCNT/BMI check for x86 CPUs.

Changes in version 12.2, 30/03/2024
===================================

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@ check_cxx_source_compiles("
public:
__attribute__ ((target (\"default\")))
void fillNextPrimes(uint64_t* primes64);
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2,popcnt\")))
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void fillNextPrimes(uint64_t* primes64);
};
__attribute__ ((target (\"default\")))
void PrimeGenerator::fillNextPrimes(uint64_t* primes64)
{
primes64[0] = 2;
}
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2,popcnt\")))
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void PrimeGenerator::fillNextPrimes(uint64_t* primes64)
{
__m512i bytes_0_to_7 = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
Expand All @@ -36,4 +36,4 @@ check_cxx_source_compiles("
p.fillNextPrimes(primes);
return 0;
}
" multiarch_avx512)
" multiarch_avx512_vbmi2)
57 changes: 0 additions & 57 deletions cmake/multiarch_popcnt_bmi.cmake

This file was deleted.

94 changes: 94 additions & 0 deletions include/primesieve/CPUID.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
///
/// @file CPUID.hpp
/// @brief POPCNT detection fo x86 and x86-64 CPUs.
///
/// Copyright (C) 2024 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
///

#ifndef CPUID_HPP
#define CPUID_HPP

// Enable on x86 and x86-64 CPUs
#if defined(__x86_64__) || \
defined(__i386__) || \
defined(_M_X64) || \
defined(_M_IX86)

// Both GCC and Clang (even Clang on Windows) define the __POPCNT__
// macro if the user compiles with -mpopcnt. The __POPCNT__
// macro is even defined if the user compiles with other flags
// such as -mavx or -march=native.
#if defined(__POPCNT__)
#define HAS_POPCNT
// The MSVC compiler does not support a POPCNT macro, but if the user
// compiles with e.g. /arch:AVX or /arch:AVX512 then MSVC defines
// the __AVX__ macro and POPCNT is also supported.
#elif defined(_MSC_VER) && defined(__AVX__)
#define HAS_POPCNT
#endif

#if defined(_MSC_VER)
#include <intrin.h>
#endif

namespace {

inline void run_CPUID(int eax, int ecx, int* abcd)
{
#if defined(_MSC_VER)
__cpuidex(abcd, eax, ecx);
#else
int ebx = 0;
int edx = 0;

#if defined(__i386__) && \
defined(__PIC__)
/* in case of PIC under 32-bit EBX cannot be clobbered */
__asm__ ("movl %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=D" (ebx),
"+a" (eax),
"+c" (ecx),
"=d" (edx));
#else
__asm__ ("cpuid;"
: "+b" (ebx),
"+a" (eax),
"+c" (ecx),
"=d" (edx));
#endif

abcd[0] = eax;
abcd[1] = ebx;
abcd[2] = ecx;
abcd[3] = edx;
#endif
}

#if !defined(HAS_POPCNT)
#define ENABLE_CPUID_POPCNT

inline bool run_CPUID_POPCNT()
{
// %ecx POPCNT bit flag
int bit_POPCNT = 1 << 23;
int abcd[4];

run_CPUID(1, 0, abcd);
return (abcd[2] & bit_POPCNT) == bit_POPCNT;
}

/// Initialized at startup
const bool HAS_CPUID_POPCNT = run_CPUID_POPCNT();

#endif // ENABLE_CPUID_POPCNT

} // namespace

#endif // x86 CPU

#endif
34 changes: 21 additions & 13 deletions include/primesieve/PrimeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
/// returns the primes. When there are no more primes left in
/// the vector PrimeGenerator generates new primes.
///
/// Copyright (C) 2023 Kim Walisch, <[email protected]>
/// Copyright (C) 2024 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand All @@ -23,6 +23,21 @@
#include <stdint.h>
#include <cstddef>

#if defined(MULTIARCH_AVX512)
// GCC/Clang function multiversioning for AVX512 is not needed if
// the user compiles with -mavx512f -mavx512vbmi -mavx512vbmi2.
// GCC/Clang function multiversioning generally causes a minor
// overhead, hence we disable it if it is not needed.
#if defined(__AVX512__) || (defined(__AVX512F__) && \
defined(__AVX512VBMI__) && \
defined(__AVX512VBMI2__))
#undef MULTIARCH_AVX512
#else
#define MULTIARCH_TARGET_DEFAULT
#define MULTIARCH_TARGET_AVX512
#endif
#endif

namespace primesieve {

class PreSieve;
Expand All @@ -34,22 +49,15 @@ class PrimeGenerator : public Erat
void fillPrevPrimes(Vector<uint64_t>& primes, std::size_t* size);
static uint64_t maxCachedPrime();

#if defined(MULTIARCH_POPCNT_BMI)
#define MULTIARCH
__attribute__ ((target ("popcnt,bmi")))
void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size);
#if defined(MULTIARCH_TARGET_DEFAULT)
__attribute__ ((target ("default")))
#endif

#if defined(MULTIARCH_AVX512)
#define MULTIARCH
__attribute__ ((target ("avx512f,avx512vbmi,avx512vbmi2,popcnt")))
void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size);
#endif

#if defined(MULTIARCH)
__attribute__ ((target ("default")))
#endif
#if defined(MULTIARCH_TARGET_AVX512)
__attribute__ ((target ("avx512f,avx512vbmi,avx512vbmi2")))
void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size);
#endif

private:
bool isInit_ = false;
Expand Down
Loading

0 comments on commit 55cdcce

Please sign in to comment.