Skip to content

Commit

Permalink
Add deivce scan large indices support (ROCm#167)
Browse files Browse the repository at this point in the history
* Update num_items parameter to size_t

* Add large indices test to scan

* Remove size_limit from interface

* Fix build error

* Reduce memory usage in large indices tests

* Fix

* Fix
  • Loading branch information
neon60 authored Oct 14, 2021
1 parent 941db3c commit c7f4fa3
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 16 deletions.
35 changes: 19 additions & 16 deletions hipcub/include/hipcub/backend/rocprim/device/device_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
#ifndef HIPCUB_ROCPRIM_DEVICE_DEVICE_SCAN_HPP_
#define HIPCUB_ROCPRIM_DEVICE_DEVICE_SCAN_HPP_

#ifndef HIPCUB_DEVICE_SCAN_SIZE_LIMIT
#define HIPCUB_DEVICE_SCAN_SIZE_LIMIT size_t(std::numeric_limits<int>::max()) + 1
#endif


#include <iostream>
#include "../../../config.hpp"

Expand All @@ -50,15 +55,14 @@ class DeviceScan
size_t &temp_storage_bytes,
InputIteratorT d_in,
OutputIteratorT d_out,
int num_items,
size_t num_items,
hipStream_t stream = 0,
bool debug_synchronous = false,
size_t size_limit = size_t(std::numeric_limits<int>::max()) + 1)
bool debug_synchronous = false)
{
return InclusiveScan(
d_temp_storage, temp_storage_bytes,
d_in, d_out, ::hipcub::Sum(), num_items,
stream, debug_synchronous, size_limit
stream, debug_synchronous
);
}

Expand All @@ -73,16 +77,16 @@ class DeviceScan
InputIteratorT d_in,
OutputIteratorT d_out,
ScanOpT scan_op,
int num_items,
size_t num_items,
hipStream_t stream = 0,
bool debug_synchronous = false,
size_t size_limit = size_t(std::numeric_limits<int>::max()) + 1)
bool debug_synchronous = false)
{
return ::rocprim::inclusive_scan(
d_temp_storage, temp_storage_bytes,
d_in, d_out, num_items,
::hipcub::detail::convert_result_type<InputIteratorT, OutputIteratorT>(scan_op),
stream, debug_synchronous, size_limit
stream, debug_synchronous,
HIPCUB_DEVICE_SCAN_SIZE_LIMIT
);
}

Expand All @@ -95,16 +99,15 @@ class DeviceScan
size_t &temp_storage_bytes,
InputIteratorT d_in,
OutputIteratorT d_out,
int num_items,
size_t num_items,
hipStream_t stream = 0,
bool debug_synchronous = false,
size_t size_limit = size_t(std::numeric_limits<int>::max()) + 1)
bool debug_synchronous = false)
{
using T = typename std::iterator_traits<InputIteratorT>::value_type;
return ExclusiveScan(
d_temp_storage, temp_storage_bytes,
d_in, d_out, ::hipcub::Sum(), T(0), num_items,
stream, debug_synchronous, size_limit
stream, debug_synchronous
);
}

Expand All @@ -121,16 +124,16 @@ class DeviceScan
OutputIteratorT d_out,
ScanOpT scan_op,
InitValueT init_value,
int num_items,
size_t num_items,
hipStream_t stream = 0,
bool debug_synchronous = false,
size_t size_limit = size_t(std::numeric_limits<int>::max()) + 1)
bool debug_synchronous = false)
{
return ::rocprim::exclusive_scan(
d_temp_storage, temp_storage_bytes,
d_in, d_out, init_value, num_items,
::hipcub::detail::convert_result_type<InputIteratorT, OutputIteratorT>(scan_op),
stream, debug_synchronous, size_limit
stream, debug_synchronous,
HIPCUB_DEVICE_SCAN_SIZE_LIMIT
);
}
};
Expand Down
171 changes: 171 additions & 0 deletions test/hipcub/test_hipcub_device_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

// hipcub API
#include "hipcub/device/device_scan.hpp"
#include "hipcub/iterator/counting_input_iterator.hpp"

// Params for tests
template<
Expand Down Expand Up @@ -331,3 +332,173 @@ TYPED_TEST(HipcubDeviceScanTests, ExclusiveScan)
}
}
}

TEST(HipcubDeviceScanTests, LargeIndicesInclusiveScan)
{
using T = unsigned int;
using Iterator = typename hipcub::CountingInputIterator<T>;
const bool debug_synchronous = false;

const size_t size = (1ul << 31) + 1ul;

hipStream_t stream = 0; // default

unsigned int seed_value = rand();
SCOPED_TRACE(testing::Message() << "with seed= " << seed_value);

// Create CountingInputIterator<U> with random starting point
Iterator input_begin(test_utils::get_random_value<T>(0, 200, seed_value));

std::vector<T> output(size);
T * d_output;
HIP_CHECK(test_common_utils::hipMallocHelper(&d_output, output.size() * sizeof(T)));
HIP_CHECK(hipDeviceSynchronize());

// Calculate expected results on host
std::vector<T> expected(size);
test_utils::host_inclusive_scan(
input_begin,
input_begin + size,
expected.begin(),
::hipcub::Sum()
);

// temp storage
size_t temp_storage_size_bytes;
void * d_temp_storage = nullptr;

// Get temporary array size
HIP_CHECK(
hipcub::DeviceScan::InclusiveScan(
d_temp_storage, temp_storage_size_bytes,
input_begin, d_output,
::hipcub::Sum(), size,
stream, debug_synchronous
)
);

// temp_storage_size_bytes must be >0
ASSERT_GT(temp_storage_size_bytes, 0);

// allocate temporary storage
HIP_CHECK(test_common_utils::hipMallocHelper(&d_temp_storage, temp_storage_size_bytes));
HIP_CHECK(hipDeviceSynchronize());

// Run
HIP_CHECK(
hipcub::DeviceScan::InclusiveScan(
d_temp_storage, temp_storage_size_bytes,
input_begin, d_output,
::hipcub::Sum(), size,
stream, debug_synchronous
)
);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());

// Copy output to host
HIP_CHECK(
hipMemcpy(
output.data(), d_output,
output.size() * sizeof(T),
hipMemcpyDeviceToHost
)
);
HIP_CHECK(hipDeviceSynchronize());

// Validating results
for(size_t i = 0; i < output.size(); i++)
{
ASSERT_EQ(output[i], expected[i]) << "where index = " << i;
}

hipFree(d_output);
hipFree(d_temp_storage);
}

TEST(HipcubDeviceScanTests, LargeIndicesExclusiveScan)
{
using T = unsigned int;
using Iterator = typename hipcub::CountingInputIterator<T>;
const bool debug_synchronous = false;

const size_t size = (1ul << 31) + 1ul;

hipStream_t stream = 0; // default

unsigned int seed_value = rand();
SCOPED_TRACE(testing::Message() << "with seed= " << seed_value);

// Create CountingInputIterator<U> with random starting point
Iterator input_begin(test_utils::get_random_value<T>(0, 200, seed_value));
T initial_value = test_utils::get_random_value<T>(1, 10, seed_value);

std::vector<T> output(size);
T * d_output;
HIP_CHECK(test_common_utils::hipMallocHelper(&d_output, output.size() * sizeof(T)));
HIP_CHECK(hipDeviceSynchronize());

// Calculate expected results on host
std::vector<T> expected(size);
test_utils::host_exclusive_scan(
input_begin,
input_begin + size,
initial_value,
expected.begin(),
::hipcub::Sum()
);

// temp storage
size_t temp_storage_size_bytes;
void * d_temp_storage = nullptr;

// Get temporary array size
HIP_CHECK(
hipcub::DeviceScan::ExclusiveScan(
d_temp_storage, temp_storage_size_bytes,
input_begin, d_output,
::hipcub::Sum(),
initial_value, size,
stream, debug_synchronous
)
);

// temp_storage_size_bytes must be >0
ASSERT_GT(temp_storage_size_bytes, 0);

// allocate temporary storage
HIP_CHECK(test_common_utils::hipMallocHelper(&d_temp_storage, temp_storage_size_bytes));
HIP_CHECK(hipDeviceSynchronize());

// Run
HIP_CHECK(
hipcub::DeviceScan::ExclusiveScan(
d_temp_storage, temp_storage_size_bytes,
input_begin, d_output,
::hipcub::Sum(),
initial_value, size,
stream, debug_synchronous
)
);
HIP_CHECK(hipGetLastError());
HIP_CHECK(hipDeviceSynchronize());

// Copy output to host
HIP_CHECK(
hipMemcpy(
output.data(), d_output,
output.size() * sizeof(T),
hipMemcpyDeviceToHost
)
);
HIP_CHECK(hipDeviceSynchronize());

// Validating results
for(size_t i = 0; i < output.size(); i++)
{
ASSERT_EQ(output[i], expected[i]) << "where index = " << i;
}

hipFree(d_output);
hipFree(d_temp_storage);
}

0 comments on commit c7f4fa3

Please sign in to comment.