forked from llvm/llvm-test-suite
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MicroBenchmarks] Add initial loop vectorization benchmarks.
This patch adds initial micro-benchmarks with interesting loop-vectorization cases. To start with, it includes benchmarks using libm math functions. For each math function, there's a benchmark for the auto-vectorized version and a version with vectorization disabled. The auto-vec version of the benchmark also compares the results of the auto-vectorized functions to the scalar versions. Reviewed By: Meinersbur, lebedev.ri Differential Revision: https://reviews.llvm.org/D101844
- Loading branch information
Showing
4 changed files
with
153 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
llvm_test_run(WORKDIR ${CMAKE_CURRENT_BINARY_DIR}) | ||
|
||
# Only enable verification of results if neither 'benchmarking only' has been | ||
# selected nor -ffast-math is passed. | ||
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER) | ||
set(COMBINED_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CPPFLAGS}") | ||
if (NOT TEST_SUITE_BENCHMARKING_ONLY AND | ||
NOT ${COMBINED_CXX_FLAGS} MATCHES ".*-ffast-math.*") | ||
list(APPEND CPPFLAGS -DBENCH_AND_VERIFY) | ||
endif() | ||
|
||
llvm_test_executable(LoopVectorizationBenchmarks | ||
main.cpp | ||
MathFunctions.cpp) | ||
|
||
target_link_libraries(LoopVectorizationBenchmarks benchmark) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
#include <iostream> | ||
#include <math.h> | ||
#include <memory> | ||
#include <random> | ||
|
||
#include "benchmark/benchmark.h" | ||
|
||
#define N 10000 | ||
|
||
// Apply Fn(A[i]) + Fn(B[i]) in loop, with default loop vectorization settings. | ||
template <typename T> static void run_fn_autovec(T *A, T *B, T *C, T (*Fn)(T)) { | ||
for (unsigned i = 0; i < N; i++) { | ||
C[i] = Fn(A[i]) + Fn(B[i]); | ||
} | ||
} | ||
|
||
// Apply Fn(A[i]) + Fn(B[i]) in loop, with loop vectorization disabled. | ||
template <typename T> static void run_fn_novec(T *A, T *B, T *C, T (*Fn)(T)) { | ||
#pragma clang loop vectorize(disable) interleave(disable) | ||
for (unsigned i = 0; i < N; i++) { | ||
C[i] = Fn(A[i]) + Fn(B[i]); | ||
} | ||
} | ||
|
||
// Initialize arrays A, B and T with random numbers. | ||
template <typename T> static void init_data(T *A, T *B, T *C) { | ||
std::uniform_real_distribution<T> dist(-100, 100); | ||
std::mt19937 rng(12345); | ||
for (unsigned i = 0; i < N; i++) { | ||
A[i] = dist(rng); | ||
B[i] = dist(rng); | ||
C[i] = dist(rng); | ||
} | ||
} | ||
|
||
// Benchmark auto-vectorized version using Fn. | ||
template <typename T> | ||
static void __attribute__((always_inline)) | ||
benchmark_fn_autovec(benchmark::State &state, T (*Fn)(T)) { | ||
std::unique_ptr<T[]> A(new T[N]); | ||
std::unique_ptr<T[]> B(new T[N]); | ||
std::unique_ptr<T[]> C(new T[N]); | ||
init_data(&A[0], &B[0], &C[0]); | ||
|
||
#ifdef BENCH_AND_VERIFY | ||
// Verify the vectorized and un-vectorized versions produce the same results. | ||
{ | ||
std::unique_ptr<T[]> CNovec(new T[N]); | ||
for (unsigned i = 0; i < N; i++) | ||
CNovec[i] = C[i]; | ||
|
||
run_fn_novec(&A[0], &B[0], &CNovec[0], Fn); | ||
run_fn_autovec(&A[0], &B[0], &C[0], Fn); | ||
for (unsigned i = 0; i < N; i++) | ||
// If there's a value mismatch, fall back to fpclassify. | ||
if (C[i] != CNovec[i] && fpclassify(C[i]) != fpclassify(CNovec[i])) { | ||
std::cerr << "ERROR: autovec result different to scalar result " << C[i] | ||
<< " != " << CNovec[i] << " at index " << i << "\n"; | ||
exit(1); | ||
} | ||
} | ||
#endif | ||
|
||
for (auto _ : state) { | ||
run_fn_autovec(&A[0], &B[0], &C[0], Fn); | ||
benchmark::DoNotOptimize(A); | ||
benchmark::DoNotOptimize(B); | ||
benchmark::DoNotOptimize(C); | ||
benchmark::ClobberMemory(); | ||
} | ||
} | ||
|
||
// Benchmark version using Fn with vectorization disabled. | ||
template <typename T> | ||
static void __attribute__((always_inline)) | ||
benchmark_fn_novec(benchmark::State &state, T (*Fn)(T)) { | ||
std::unique_ptr<T[]> A(new T[N]); | ||
std::unique_ptr<T[]> B(new T[N]); | ||
std::unique_ptr<T[]> C(new T[N]); | ||
init_data(&A[0], &B[0], &C[0]); | ||
|
||
for (auto _ : state) { | ||
run_fn_novec(&A[0], &B[0], &C[0], Fn); | ||
benchmark::DoNotOptimize(A); | ||
benchmark::DoNotOptimize(B); | ||
benchmark::DoNotOptimize(C); | ||
} | ||
} | ||
|
||
// Add add auto-vectorized and disabled vectorization benchmarks for math | ||
// function fn and type ty. | ||
#define ADD_BENCHMARK(fn, ty) \ | ||
void BENCHMARK_##fn##_autovec_##ty##_(benchmark::State &state) { \ | ||
benchmark_fn_autovec<ty>(state, fn); \ | ||
} \ | ||
BENCHMARK(BENCHMARK_##fn##_autovec_##ty##_)->Unit(benchmark::kMicrosecond); \ | ||
\ | ||
void BENCHMARK_##fn##_novec_##ty##_(benchmark::State &state) { \ | ||
benchmark_fn_novec<ty>(state, fn); \ | ||
} \ | ||
BENCHMARK(BENCHMARK_##fn##_novec_##ty##_)->Unit(benchmark::kMicrosecond); | ||
|
||
ADD_BENCHMARK(expf, float) | ||
ADD_BENCHMARK(exp, double) | ||
|
||
ADD_BENCHMARK(acosf, float) | ||
ADD_BENCHMARK(acos, double) | ||
|
||
ADD_BENCHMARK(asinf, float) | ||
ADD_BENCHMARK(asin, double) | ||
|
||
ADD_BENCHMARK(atanf, float) | ||
ADD_BENCHMARK(atan, double) | ||
|
||
ADD_BENCHMARK(cbrtf, float) | ||
ADD_BENCHMARK(cbrt, double) | ||
|
||
ADD_BENCHMARK(erff, float) | ||
ADD_BENCHMARK(erf, double) | ||
|
||
ADD_BENCHMARK(cosf, float) | ||
ADD_BENCHMARK(cos, double) | ||
|
||
ADD_BENCHMARK(sinf, float) | ||
ADD_BENCHMARK(sin, double) | ||
|
||
ADD_BENCHMARK(sinhf, float) | ||
ADD_BENCHMARK(sinh, double) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#include "benchmark/benchmark.h" | ||
|
||
int main(int argc, char *argv[]) { | ||
benchmark::Initialize(&argc, argv); | ||
|
||
benchmark::RunSpecifiedBenchmarks(); | ||
return EXIT_SUCCESS; | ||
} |