-
Notifications
You must be signed in to change notification settings - Fork 184
/
Copy pathreduce.cpp
106 lines (76 loc) · 3.14 KB
/
reduce.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#include <catch2/catch.hpp>
#include <stdexec/execution.hpp>
#include "nvexec/stream_context.cuh"
#include "common.cuh"
#include <thrust/device_vector.h>
#include <cub/thread/thread_operators.cuh>
#include <algorithm>
#include <span>
namespace ex = stdexec;
namespace {
struct minimum {
template <class T1, class T2>
constexpr auto
operator()(const T1& lhs, const T2& rhs) const -> _CUDA_VSTD::common_type_t<T1, T2> {
return (lhs < rhs) ? lhs : rhs;
}
};
TEST_CASE("nvexec reduce returns a sender with single input", "[cuda][stream][adaptors][reduce]") {
constexpr int N = 2048;
int input[N] = {};
std::fill_n(input, N, 1);
nvexec::stream_context stream{};
auto snd = ex::transfer_just(stream.get_scheduler(), std::span{input}) | nvexec::reduce(0);
STATIC_REQUIRE(ex::sender_of<decltype(snd), ex::set_value_t(int&)>);
(void) snd;
}
TEST_CASE("nvexec reduce returns a sender with two inputs", "[cuda][stream][adaptors][reduce]") {
constexpr int N = 2048;
int input[N] = {};
std::fill_n(input, N, 1);
nvexec::stream_context stream{};
auto snd = ex::transfer_just(stream.get_scheduler(), std::span{input})
| nvexec::reduce(0, cuda::std::plus{});
STATIC_REQUIRE(ex::sender_of<decltype(snd), ex::set_value_t(int&)>);
(void) snd;
}
TEST_CASE("nvexec reduce uses sum as default", "[cuda][stream][adaptors][reduce]") {
constexpr int N = 2048;
constexpr int init = 42;
thrust::device_vector<int> input(N, 1);
int* first = thrust::raw_pointer_cast(input.data());
int* last = thrust::raw_pointer_cast(input.data()) + input.size();
nvexec::stream_context stream{};
auto snd = ex::transfer_just(stream.get_scheduler(), std::span{first, last})
| nvexec::reduce(init);
auto [result] = ex::sync_wait(std::move(snd)).value();
REQUIRE(result == N + init);
}
TEST_CASE("nvexec reduce uses the passed function", "[cuda][stream][adaptors][reduce]") {
constexpr int N = 2048;
constexpr int init = 42;
thrust::device_vector<int> input(N, 1);
int* first = thrust::raw_pointer_cast(input.data());
int* last = thrust::raw_pointer_cast(input.data()) + input.size();
nvexec::stream_context stream{};
auto snd = ex::transfer_just(stream.get_scheduler(), std::span{first, last})
| nvexec::reduce(init, minimum{});
auto [result] = ex::sync_wait(std::move(snd)).value();
REQUIRE(result == 1);
}
TEST_CASE("nvexec reduce executes on GPU", "[cuda][stream][adaptors][reduce]") {
constexpr int N = 2048;
constexpr int init = 42;
thrust::device_vector<int> input(N, 1);
int* first = thrust::raw_pointer_cast(input.data());
int* last = thrust::raw_pointer_cast(input.data()) + input.size();
auto is_on_gpu = [](const int left, const int right) {
return nvexec::is_on_gpu() ? left + right : 0;
};
nvexec::stream_context stream{};
auto snd = ex::transfer_just(stream.get_scheduler(), std::span{first, last})
| nvexec::reduce(init, is_on_gpu);
auto [result] = ex::sync_wait(std::move(snd)).value();
REQUIRE(result == N + init);
}
} // namespace