Skip to content

Commit

Permalink
[MicroBenchmarks] Add initial loop vectorization benchmarks.
Browse files Browse the repository at this point in the history
This patch adds initial micro-benchmarks with interesting
loop-vectorization cases. To start with, it includes benchmarks using
libm math functions.

For each math function, there's a benchmark for the auto-vectorized
version and a version with vectorization disabled.

The auto-vec version of the benchmark also compares the results of the
auto-vectorized functions to the scalar versions.

Reviewed By: Meinersbur, lebedev.ri

Differential Revision: https://reviews.llvm.org/D101844
  • Loading branch information
fhahn committed May 17, 2021
1 parent fb038de commit 3af2314
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 0 deletions.
1 change: 1 addition & 0 deletions MicroBenchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ add_subdirectory(LCALS)
add_subdirectory(harris)
add_subdirectory(ImageProcessing)
add_subdirectory(LoopInterchange)
add_subdirectory(LoopVectorization)
add_subdirectory(MemFunctions)
16 changes: 16 additions & 0 deletions MicroBenchmarks/LoopVectorization/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
llvm_test_run(WORKDIR ${CMAKE_CURRENT_BINARY_DIR})

# Only enable verification of results if neither 'benchmarking only' has been
# selected nor -ffast-math is passed.
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER)
set(COMBINED_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CPPFLAGS}")
if (NOT TEST_SUITE_BENCHMARKING_ONLY AND
NOT ${COMBINED_CXX_FLAGS} MATCHES ".*-ffast-math.*")
list(APPEND CPPFLAGS -DBENCH_AND_VERIFY)
endif()

llvm_test_executable(LoopVectorizationBenchmarks
main.cpp
MathFunctions.cpp)

target_link_libraries(LoopVectorizationBenchmarks benchmark)
128 changes: 128 additions & 0 deletions MicroBenchmarks/LoopVectorization/MathFunctions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#include <iostream>
#include <math.h>
#include <memory>
#include <random>

#include "benchmark/benchmark.h"

#define N 10000

// Apply Fn(A[i]) + Fn(B[i]) in loop, with default loop vectorization settings.
template <typename T> static void run_fn_autovec(T *A, T *B, T *C, T (*Fn)(T)) {
for (unsigned i = 0; i < N; i++) {
C[i] = Fn(A[i]) + Fn(B[i]);
}
}

// Apply Fn(A[i]) + Fn(B[i]) in loop, with loop vectorization disabled.
template <typename T> static void run_fn_novec(T *A, T *B, T *C, T (*Fn)(T)) {
#pragma clang loop vectorize(disable) interleave(disable)
for (unsigned i = 0; i < N; i++) {
C[i] = Fn(A[i]) + Fn(B[i]);
}
}

// Initialize arrays A, B and T with random numbers.
template <typename T> static void init_data(T *A, T *B, T *C) {
std::uniform_real_distribution<T> dist(-100, 100);
std::mt19937 rng(12345);
for (unsigned i = 0; i < N; i++) {
A[i] = dist(rng);
B[i] = dist(rng);
C[i] = dist(rng);
}
}

// Benchmark auto-vectorized version using Fn.
template <typename T>
static void __attribute__((always_inline))
benchmark_fn_autovec(benchmark::State &state, T (*Fn)(T)) {
std::unique_ptr<T[]> A(new T[N]);
std::unique_ptr<T[]> B(new T[N]);
std::unique_ptr<T[]> C(new T[N]);
init_data(&A[0], &B[0], &C[0]);

#ifdef BENCH_AND_VERIFY
// Verify the vectorized and un-vectorized versions produce the same results.
{
std::unique_ptr<T[]> CNovec(new T[N]);
for (unsigned i = 0; i < N; i++)
CNovec[i] = C[i];

run_fn_novec(&A[0], &B[0], &CNovec[0], Fn);
run_fn_autovec(&A[0], &B[0], &C[0], Fn);
for (unsigned i = 0; i < N; i++)
// If there's a value mismatch, fall back to fpclassify.
if (C[i] != CNovec[i] && fpclassify(C[i]) != fpclassify(CNovec[i])) {
std::cerr << "ERROR: autovec result different to scalar result " << C[i]
<< " != " << CNovec[i] << " at index " << i << "\n";
exit(1);
}
}
#endif

for (auto _ : state) {
run_fn_autovec(&A[0], &B[0], &C[0], Fn);
benchmark::DoNotOptimize(A);
benchmark::DoNotOptimize(B);
benchmark::DoNotOptimize(C);
benchmark::ClobberMemory();
}
}

// Benchmark version using Fn with vectorization disabled.
template <typename T>
static void __attribute__((always_inline))
benchmark_fn_novec(benchmark::State &state, T (*Fn)(T)) {
std::unique_ptr<T[]> A(new T[N]);
std::unique_ptr<T[]> B(new T[N]);
std::unique_ptr<T[]> C(new T[N]);
init_data(&A[0], &B[0], &C[0]);

for (auto _ : state) {
run_fn_novec(&A[0], &B[0], &C[0], Fn);
benchmark::DoNotOptimize(A);
benchmark::DoNotOptimize(B);
benchmark::DoNotOptimize(C);
}
}

// Add add auto-vectorized and disabled vectorization benchmarks for math
// function fn and type ty.
#define ADD_BENCHMARK(fn, ty) \
void BENCHMARK_##fn##_autovec_##ty##_(benchmark::State &state) { \
benchmark_fn_autovec<ty>(state, fn); \
} \
BENCHMARK(BENCHMARK_##fn##_autovec_##ty##_)->Unit(benchmark::kMicrosecond); \
\
void BENCHMARK_##fn##_novec_##ty##_(benchmark::State &state) { \
benchmark_fn_novec<ty>(state, fn); \
} \
BENCHMARK(BENCHMARK_##fn##_novec_##ty##_)->Unit(benchmark::kMicrosecond);

ADD_BENCHMARK(expf, float)
ADD_BENCHMARK(exp, double)

ADD_BENCHMARK(acosf, float)
ADD_BENCHMARK(acos, double)

ADD_BENCHMARK(asinf, float)
ADD_BENCHMARK(asin, double)

ADD_BENCHMARK(atanf, float)
ADD_BENCHMARK(atan, double)

ADD_BENCHMARK(cbrtf, float)
ADD_BENCHMARK(cbrt, double)

ADD_BENCHMARK(erff, float)
ADD_BENCHMARK(erf, double)

ADD_BENCHMARK(cosf, float)
ADD_BENCHMARK(cos, double)

ADD_BENCHMARK(sinf, float)
ADD_BENCHMARK(sin, double)

ADD_BENCHMARK(sinhf, float)
ADD_BENCHMARK(sinh, double)
8 changes: 8 additions & 0 deletions MicroBenchmarks/LoopVectorization/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#include "benchmark/benchmark.h"

int main(int argc, char *argv[]) {
benchmark::Initialize(&argc, argv);

benchmark::RunSpecifiedBenchmarks();
return EXIT_SUCCESS;
}

0 comments on commit 3af2314

Please sign in to comment.