-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathregularizationBench.cpp
93 lines (73 loc) · 2.88 KB
/
regularizationBench.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
// Copyright (c) 2019 Shapelets.io
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include <benchmark/benchmark.h>
#include <khiva/regularization.h>
#include "khivaBenchmark.h"
template <af::Backend BE, int D>
void GroupByUnsorted(benchmark::State &state) {
af::setBackend(BE);
af::setDevice(D);
auto n = state.range(0);
auto m = state.range(1);
auto aux = af::randu(m, f64);
auto keys = af::tile(aux, n / m);
auto keysAndValues = af::join(1, keys, af::randu(n, f64));
af::sync();
while (state.KeepRunning()) {
auto groupped = khiva::regularization::groupBy(keysAndValues, af::mean, false);
groupped.eval();
af::sync();
}
addMemoryCounters(state);
}
template <af::Backend BE, int D>
void GroupBySorted(benchmark::State &state) {
af::setBackend(BE);
af::setDevice(D);
auto n = state.range(0);
auto m = state.range(1);
auto aux = af::randu(m, f64);
auto keys = af::sort(af::tile(aux, n / m));
auto keysAndValues = af::join(1, keys, af::randu(n, f64));
af::sync();
while (state.KeepRunning()) {
auto groupped = khiva::regularization::groupBy(keysAndValues, af::mean);
groupped.eval();
af::sync();
}
addMemoryCounters(state);
}
void cudaBenchmarks() {
BENCHMARK_TEMPLATE(GroupByUnsorted, af::Backend::AF_BACKEND_CUDA, CUDA_BENCHMARKING_DEVICE)
->RangeMultiplier(2)
->Ranges({{1 << 10, 512 << 10}, {16, 512}})
->Unit(benchmark::TimeUnit::kMicrosecond);
BENCHMARK_TEMPLATE(GroupBySorted, af::Backend::AF_BACKEND_CUDA, CUDA_BENCHMARKING_DEVICE)
->RangeMultiplier(2)
->Ranges({{1 << 10, 512 << 10}, {16, 512}})
->Unit(benchmark::TimeUnit::kMicrosecond);
}
void openclBenchmarks() {
BENCHMARK_TEMPLATE(GroupByUnsorted, af::Backend::AF_BACKEND_OPENCL, OPENCL_BENCHMARKING_DEVICE)
->RangeMultiplier(2)
->Ranges({{1 << 10, 512 << 10}, {16, 512}})
->Unit(benchmark::TimeUnit::kMicrosecond);
BENCHMARK_TEMPLATE(GroupBySorted, af::Backend::AF_BACKEND_OPENCL, OPENCL_BENCHMARKING_DEVICE)
->RangeMultiplier(2)
->Ranges({{1 << 10, 512 << 10}, {16, 512}})
->Unit(benchmark::TimeUnit::kMicrosecond);
}
void cpuBenchmarks() {
BENCHMARK_TEMPLATE(GroupByUnsorted, af::Backend::AF_BACKEND_CPU, CPU_BENCHMARKING_DEVICE)
->RangeMultiplier(2)
->Ranges({{1 << 10, 512 << 10}, {16, 512}})
->Unit(benchmark::TimeUnit::kMicrosecond);
BENCHMARK_TEMPLATE(GroupBySorted, af::Backend::AF_BACKEND_CPU, CPU_BENCHMARKING_DEVICE)
->RangeMultiplier(2)
->Ranges({{1 << 10, 512 << 10}, {16, 512}})
->Unit(benchmark::TimeUnit::kMicrosecond);
}
KHIVA_BENCHMARK_MAIN(cudaBenchmarks, openclBenchmarks, cpuBenchmarks)