Skip to content

Commit

Permalink
Move x86 CPUID code from cpuid.hpp to cpuid.cpp (kimwalisch#150)
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch authored Jun 22, 2024
1 parent f2aa68c commit 863dc86
Show file tree
Hide file tree
Showing 26 changed files with 667 additions and 568 deletions.
37 changes: 21 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.4...3.27)
project(primesieve CXX)
set(PRIMESIEVE_VERSION "12.3")
set(PRIMESIEVE_SOVERSION "12.3.0")
set(PRIMESIEVE_VERSION "12.4")
set(PRIMESIEVE_SOVERSION "12.4.0")

# Build options ######################################################

Expand Down Expand Up @@ -47,7 +47,7 @@ if(NOT isMultiConfig AND NOT CMAKE_BUILD_TYPE)
endif()

if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(ENABLE_ASSERT "ENABLE_ASSERT")
list(APPEND PRIMESIEVE_COMPILE_DEFINITIONS "ENABLE_ASSERT")
endif()

# primesieve binary source files #####################################
Expand Down Expand Up @@ -82,6 +82,17 @@ set(LIB_SRC src/api-c.cpp
src/RiemannR.cpp
src/SievingPrimes.cpp)

# Check if compiler supports CPU multiarch ###########################

if(WITH_MULTIARCH)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_x86_popcnt.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vbmi2.cmake")

if(multiarch_x86_popcnt OR multiarch_avx512_vbmi2)
set(LIB_SRC ${LIB_SRC} src/x86/cpuid.cpp)
endif()
endif()

# Required includes ##################################################

include(GNUInstallDirs)
Expand All @@ -107,26 +118,20 @@ if(WITH_AUTO_VECTORIZATION)
include("${PROJECT_SOURCE_DIR}/cmake/auto_vectorization.cmake")
endif()

# Check if compiler supports x64 multiarch ###########################

if(WITH_MULTIARCH)
include("${PROJECT_SOURCE_DIR}/cmake/multiarch_avx512_vbmi2.cmake")
endif()

# libprimesieve (shared library) #####################################

find_package(Threads REQUIRED QUIET)

if(BUILD_SHARED_LIBS)
add_library(libprimesieve SHARED ${LIB_SRC})
set_target_properties(libprimesieve PROPERTIES OUTPUT_NAME primesieve)
target_link_libraries(libprimesieve PRIVATE Threads::Threads ${LIBATOMIC})
target_link_libraries(libprimesieve PRIVATE Threads::Threads ${PRIMESIEVE_LINK_LIBRARIES})
string(REPLACE "." ";" SOVERSION_LIST ${PRIMESIEVE_SOVERSION})
list(GET SOVERSION_LIST 0 PRIMESIEVE_SOVERSION_MAJOR)
set_target_properties(libprimesieve PROPERTIES SOVERSION ${PRIMESIEVE_SOVERSION_MAJOR})
set_target_properties(libprimesieve PROPERTIES VERSION ${PRIMESIEVE_SOVERSION})
target_compile_options(libprimesieve PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})
target_compile_definitions(libprimesieve PRIVATE "${ENABLE_ASSERT}" "${ENABLE_MULTIARCH_AVX512}")
target_compile_options(libprimesieve PRIVATE ${PRIMESIEVE_COMPILE_OPTIONS})
target_compile_definitions(libprimesieve PRIVATE ${PRIMESIEVE_COMPILE_DEFINITIONS})

if(WIN32_MSVC_COMPATIBLE)
# On Windows the shared library will be named primesieve.dll
Expand Down Expand Up @@ -162,9 +167,9 @@ endif()
if(BUILD_STATIC_LIBS)
add_library(libprimesieve-static STATIC ${LIB_SRC})
set_target_properties(libprimesieve-static PROPERTIES OUTPUT_NAME primesieve)
target_link_libraries(libprimesieve-static PRIVATE Threads::Threads ${LIBATOMIC})
target_compile_options(libprimesieve-static PRIVATE ${FTREE_VECTORIZE_FLAG} ${FVECT_COST_MODEL_FLAG})
target_compile_definitions(libprimesieve-static PRIVATE "${ENABLE_ASSERT}" "${ENABLE_MULTIARCH_AVX512}")
target_link_libraries(libprimesieve-static PRIVATE Threads::Threads ${PRIMESIEVE_LINK_LIBRARIES})
target_compile_options(libprimesieve-static PRIVATE ${PRIMESIEVE_COMPILE_OPTIONS})
target_compile_definitions(libprimesieve-static PRIVATE ${PRIMESIEVE_COMPILE_DEFINITIONS})

if(WITH_MSVC_CRT_STATIC)
set_target_properties(libprimesieve-static PROPERTIES MSVC_RUNTIME_LIBRARY "MultiThreaded")
Expand Down Expand Up @@ -219,7 +224,7 @@ endif()
if(BUILD_PRIMESIEVE)
add_executable(primesieve ${BIN_SRC})
target_link_libraries(primesieve primesieve::primesieve Threads::Threads)
target_compile_definitions(primesieve PRIVATE "${ENABLE_ASSERT}")
target_compile_definitions(primesieve PRIVATE ${PRIMESIEVE_COMPILE_DEFINITIONS})
target_compile_features(primesieve PRIVATE cxx_auto_type)
install(TARGETS primesieve DESTINATION ${CMAKE_INSTALL_BINDIR})

Expand Down
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
Changes in version 12.4, 22/06/2024
===================================

* Move x86 CPUID code from cpuid.hpp to src/x86/cpuid.cpp.
* multiarch_x86_popcnt.cmake: Detect x86 POPCNT support.
* CMakeLists.txt: Use CMake list for all compile time definitions.
* CMakeLists.txt: Use CMake list for all link libraries.

Changes in version 12.3, 15/04/2024
===================================

Expand Down
4 changes: 2 additions & 2 deletions cmake/auto_vectorization.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ check_cxx_compiler_flag(-ftree-vectorize ftree_vectorize)
cmake_pop_check_state()

if(ftree_vectorize)
set(FTREE_VECTORIZE_FLAG "-ftree-vectorize")
list(APPEND PRIMESIEVE_COMPILE_OPTIONS "-ftree-vectorize")

cmake_push_check_state()
set(CMAKE_REQUIRED_FLAGS -Werror)
check_cxx_compiler_flag(-fvect-cost-model=dynamic fvect_cost_model)
cmake_pop_check_state()

if(fvect_cost_model)
set(FVECT_COST_MODEL_FLAG "-fvect-cost-model=dynamic")
list(APPEND PRIMESIEVE_COMPILE_OPTIONS "-fvect-cost-model=dynamic")
endif()
endif()
5 changes: 4 additions & 1 deletion cmake/libatomic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ if(NOT atomic64)
}"
atomic64_with_libatomic)

if (NOT atomic64_with_libatomic)
if(atomic64_with_libatomic)
list(APPEND PRIMESIEVE_LINK_LIBRARIES "${LIBATOMIC}")
else()
set(LIBATOMIC "")
message(FATAL_ERROR "Failed to compile std::atomic, libatomic likely not found!")
endif()
endif()
Expand Down
14 changes: 7 additions & 7 deletions cmake/multiarch_avx512_vbmi2.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include(CheckCXXSourceCompiles)
include(CMakePushCheckState)

cmake_push_check_state()
set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}/include")
set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}")

check_cxx_source_compiles("
// GCC/Clang function multiversioning for AVX512 is not needed if
Expand All @@ -20,19 +20,19 @@ check_cxx_source_compiles("
Error: AVX512VBMI2 multiarch not needed!
#endif
#include <primesieve/cpu_supports_avx512_vbmi2.hpp>
#include <src/x86/cpuid.cpp>
#include <immintrin.h>
#include <stdint.h>
class PrimeGenerator {
public:
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void fillNextPrimes_avx512(uint64_t* primes64);
void fillNextPrimes_avx512_vbmi2(uint64_t* primes64);
void fillNextPrimes_default(uint64_t* primes64);
void fillNextPrimes(uint64_t* primes64)
{
if (cpu_supports_avx512_vbmi2)
fillNextPrimes_avx512(primes64);
if (primesieve::has_cpuid_avx512_vbmi2())
fillNextPrimes_avx512_vbmi2(primes64);
else
fillNextPrimes_default(primes64);
}
Expand All @@ -44,7 +44,7 @@ check_cxx_source_compiles("
}
__attribute__ ((target (\"avx512f,avx512vbmi,avx512vbmi2\")))
void PrimeGenerator::fillNextPrimes_avx512(uint64_t* primes64)
void PrimeGenerator::fillNextPrimes_avx512_vbmi2(uint64_t* primes64)
{
__m512i bytes_0_to_7 = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
__m512i base = _mm512_set1_epi64(123);
Expand All @@ -64,7 +64,7 @@ check_cxx_source_compiles("
" multiarch_avx512_vbmi2)

if(multiarch_avx512_vbmi2)
set(ENABLE_MULTIARCH_AVX512 "ENABLE_MULTIARCH_AVX512")
list(APPEND PRIMESIEVE_COMPILE_DEFINITIONS "ENABLE_MULTIARCH_AVX512_VBMI2")
endif()

cmake_pop_check_state()
53 changes: 53 additions & 0 deletions cmake/multiarch_x86_popcnt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# On x86 CPUs we need to enable the use of cpuid.cpp.
# If cpuid.cpp compiles we assume it is a x86 CPU.

include(CheckCXXSourceCompiles)
include(CMakePushCheckState)

cmake_push_check_state()
set(CMAKE_REQUIRED_INCLUDES "${PROJECT_SOURCE_DIR}")

check_cxx_source_compiles("
// Enable CPUID for POPCNT on x86 and x86-64 CPUs.
// This is required because not all x86 and x86-64 CPUs
// support the POPCNT instruction.
#if !(defined(__x86_64__) || \
defined(__i386__) || \
defined(_M_X64) || \
defined(_M_IX86))
Error: x86 POPCNT multiarch not needed!
#endif
// Both GCC and Clang (even Clang on Windows) define the __POPCNT__
// macro if the user compiles with -mpopcnt. The __POPCNT__
// macro is even defined if the user compiles with other flags
// such as -mavx or -march=native.
#if defined(__POPCNT__)
Error: x86 POPCNT multiarch not needed!
// The MSVC compiler does not support a POPCNT macro, but if the user
// compiles with e.g. /arch:AVX or /arch:AVX512 then MSVC defines
// the __AVX__ macro and POPCNT is also supported.
#elif defined(_MSC_VER) && defined(__AVX__)
Error: x86 POPCNT multiarch not needed!
#endif
#include <src/x86/cpuid.cpp>
#include <iostream>
int main()
{
if (primesieve::has_cpuid_popcnt())
std::cout << \"CPU supports POPCNT!\" << std::endl;
else
std::cout << \"CPU does not support POPCNT!\" << std::endl;
return 0;
}
" multiarch_x86_popcnt)

if(multiarch_x86_popcnt)
list(APPEND PRIMESIEVE_COMPILE_DEFINITIONS "ENABLE_MULTIARCH_x86_POPCNT")
endif()

cmake_pop_check_state()
4 changes: 2 additions & 2 deletions include/primesieve.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
#ifndef PRIMESIEVE_H
#define PRIMESIEVE_H

#define PRIMESIEVE_VERSION "12.3"
#define PRIMESIEVE_VERSION "12.4"
#define PRIMESIEVE_VERSION_MAJOR 12
#define PRIMESIEVE_VERSION_MINOR 3
#define PRIMESIEVE_VERSION_MINOR 4

#include <primesieve/iterator.h>

Expand Down
4 changes: 2 additions & 2 deletions include/primesieve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
#ifndef PRIMESIEVE_HPP
#define PRIMESIEVE_HPP

#define PRIMESIEVE_VERSION "12.3"
#define PRIMESIEVE_VERSION "12.4"
#define PRIMESIEVE_VERSION_MAJOR 12
#define PRIMESIEVE_VERSION_MINOR 3
#define PRIMESIEVE_VERSION_MINOR 4

#include <primesieve/iterator.hpp>
#include <primesieve/primesieve_error.hpp>
Expand Down
3 changes: 1 addition & 2 deletions include/primesieve/CpuInfo.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
///
/// @file CpuInfo.hpp
///
/// Copyright (C) 2023 Kim Walisch, <[email protected]>
/// Copyright (C) 2024 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand All @@ -22,7 +22,6 @@ class CpuInfo
public:
CpuInfo();
bool hasCpuName() const;
bool hasAVX512() const;
bool hasLogicalCpuCores() const;
bool hasL1Cache() const;
bool hasL2Cache() const;
Expand Down
4 changes: 2 additions & 2 deletions include/primesieve/Erat.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
///
/// @file Erat.hpp
///
/// Copyright (C) 2023 Kim Walisch, <[email protected]>
/// Copyright (C) 2024 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand All @@ -15,8 +15,8 @@
#include "EratMedium.hpp"
#include "EratBig.hpp"
#include "macros.hpp"
#include "intrinsics.hpp"
#include "Vector.hpp"
#include "ctz.hpp"

#include <stdint.h>

Expand Down
20 changes: 10 additions & 10 deletions include/primesieve/PrimeGenerator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
defined(__AVX512VBMI__) && \
defined(__AVX512VBMI2__) && \
__has_include(<immintrin.h>)
#define ENABLE_AVX512
#define ENABLE_AVX512_VBMI2

#elif defined(ENABLE_MULTIARCH_AVX512) && \
#elif defined(ENABLE_MULTIARCH_AVX512_VBMI2) && \
__has_include(<immintrin.h>)
#include "cpu_supports_avx512_vbmi2.hpp"
#define ENABLE_DEFAULT
Expand All @@ -50,11 +50,11 @@ class PrimeGenerator : public Erat

ALWAYS_INLINE void fillNextPrimes(Vector<uint64_t>& primes, std::size_t* size)
{
#if defined(ENABLE_AVX512)
fillNextPrimes_avx512(primes, size);
#elif defined(ENABLE_MULTIARCH_AVX512)
#if defined(ENABLE_AVX512_VBMI2)
fillNextPrimes_avx512_vbmi2(primes, size);
#elif defined(ENABLE_MULTIARCH_AVX512_VBMI2)
if (cpu_supports_avx512_vbmi2)
fillNextPrimes_avx512(primes, size);
fillNextPrimes_avx512_vbmi2(primes, size);
else
fillNextPrimes_default(primes, size);
#else
Expand All @@ -68,13 +68,13 @@ class PrimeGenerator : public Erat
void fillNextPrimes_default(Vector<uint64_t>& primes, std::size_t* size);
#endif

#if defined(ENABLE_AVX512) || \
defined(ENABLE_MULTIARCH_AVX512)
#if defined(ENABLE_AVX512_VBMI2) || \
defined(ENABLE_MULTIARCH_AVX512_VBMI2)

#if defined(ENABLE_MULTIARCH_AVX512)
#if defined(ENABLE_MULTIARCH_AVX512_VBMI2)
__attribute__ ((target ("avx512f,avx512vbmi,avx512vbmi2")))
#endif
void fillNextPrimes_avx512(Vector<uint64_t>& primes, std::size_t* size);
void fillNextPrimes_avx512_vbmi2(Vector<uint64_t>& primes, std::size_t* size);

#endif

Expand Down
Loading

0 comments on commit 863dc86

Please sign in to comment.