Skip to content

Commit

Permalink
[SYCL][HIP] Add coarse-grained memory advice for HIP on AMD (intel#12394
Browse files Browse the repository at this point in the history
)

Enables and tests coarse grained memory access via the memadvise
implementation for HIP platforms on AMD hardware.

See related UR changes for the adapter implementation:
oneapi-src/unified-runtime#1249

---------

Co-authored-by: Kenneth Benzie (Benie) <[email protected]>
Co-authored-by: aarongreig <[email protected]>
  • Loading branch information
3 people authored Feb 5, 2024
1 parent b781e6c commit ab86d0d
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 8 deletions.
5 changes: 4 additions & 1 deletion sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,10 @@
// 14.42 Added piextCommandBufferPrefetchUSM and piextCommandBufferAdviseUSM
// 15.43 Changed the signature of piextMemGetNativeHandle to also take a
// pi_device
// 15.44 Add coarse-grain memory advice flag for HIP.

#define _PI_H_VERSION_MAJOR 15
#define _PI_H_VERSION_MINOR 43
#define _PI_H_VERSION_MINOR 44

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -582,6 +583,8 @@ typedef enum {
PI_MEM_ADVICE_CUDA_UNSET_PREFERRED_LOCATION_HOST = 1 << 7,
PI_MEM_ADVICE_CUDA_SET_ACCESSED_BY_HOST = 1 << 8,
PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST = 1 << 9,
PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED = 1 << 10,
PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED = 1 << 11,
PI_MEM_ADVICE_UNKNOWN = 0x7FFFFFFF,
} _pi_mem_advice;

Expand Down
14 changes: 7 additions & 7 deletions sycl/plugins/unified_runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ endif()
if(SYCL_PI_UR_USE_FETCH_CONTENT)
include(FetchContent)

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
# commit 9363574db721d2388c7d76a10edb128764872352
# Merge: 553a6b82 5e513738
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit cd97e172cbbfc411fccb0b80e0fff6f9126574f4
# Merge: bd745d10 2a9ded6f
# Author: Kenneth Benzie (Benie) <[email protected]>
# Date: Thu Feb 1 11:50:16 2024 +0000
# Merge pull request #1302 from kbenzie/benie/cl-binary-type-intermediate
# [CL] Handle INTERMEDIATE binary type
set(UNIFIED_RUNTIME_TAG 9363574db721d2388c7d76a10edb128764872352)
# Date: Fri Feb 2 14:24:16 2024 +0000
# Merge pull request #1249 from GeorgeWeb/georgi/hip_memadvise_coarse_grained
# [HIP] Implement coarse-grained memory advice for the HIP adapter
set(UNIFIED_RUNTIME_TAG cd97e172cbbfc411fccb0b80e0fff6f9126574f4)

if(SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO)
set(UNIFIED_RUNTIME_REPO "${SYCL_PI_UR_OVERRIDE_FETCH_CONTENT_REPO}")
Expand Down
6 changes: 6 additions & 0 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3375,6 +3375,12 @@ inline pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr,
if (Advice & PI_MEM_ADVICE_CUDA_UNSET_ACCESSED_BY_HOST) {
UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_ACCESSED_BY_HOST;
}
if (Advice & PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED) {
UrAdvice |= UR_USM_ADVICE_FLAG_SET_NON_COHERENT_MEMORY;
}
if (Advice & PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED) {
UrAdvice |= UR_USM_ADVICE_FLAG_CLEAR_NON_COHERENT_MEMORY;
}
if (Advice & PI_MEM_ADVICE_RESET) {
UrAdvice |= UR_USM_ADVICE_FLAG_DEFAULT;
}
Expand Down
2 changes: 2 additions & 0 deletions sycl/test-e2e/USM/memadvise_flags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ int main() {
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_PREFERRED_LOCATION_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_ACCESSED_BY_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_ACCESSED_BY_HOST);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED);
valid_advices.emplace_back(PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED);
} else {
// Skip
return 0;
Expand Down
147 changes: 147 additions & 0 deletions sycl/test-e2e/USM/memory_coherency_hip.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// RUN: %{build} -o %t1.out
// REQUIRES: hip_amd
// RUN: %{run} %t1.out

//==---- memory_coherency_hip.cpp -----------------------------------------==//
// USM coarse/fine grain memory coherency test for the HIP-AMD backend.
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <sycl/sycl.hpp>

#include <chrono>
#include <iostream>
#include <thread>

namespace kernels {
class SquareKrnl final {
int *mPtr;

public:
SquareKrnl(int *ptr) : mPtr{ptr} {}

void operator()(sycl::id<1>) const { *mPtr = (*mPtr) * (*mPtr); }
};

class CoherencyTestKrnl final {
int *mPtr;

public:
CoherencyTestKrnl(int *ptr) : mPtr{ptr} {}

void operator()(sycl::id<1>) const {
auto atm = sycl::atomic_ref<int, sycl::memory_order::relaxed,
sycl::memory_scope::device>(mPtr[0]);

// mPtr was initialized to 1 by the host, now set it to 2.
atm.fetch_add(1);

// spin until mPtr is 3, then change it to 4.
int expected{3};
int old = atm.load();
while (true) {
old = atm.load();
if (old == expected) {
if (atm.compare_exchange_strong(old, 4)) {
break;
}
}
}
}
};
} // namespace kernels

int main() {
sycl::queue q{};
sycl::device dev = q.get_device();
sycl::context ctx = q.get_context();
if (!dev.get_info<sycl::info::device::usm_shared_allocations>()) {
std::cout << "Shared USM is not supported. Skipping test.\n";
return 0;
}

bool coherent{false};

int *ptr = sycl::malloc_shared<int>(1, q);

// Coherency test 1
//
// The following test validates if memory access is fine with memory allocated
// using malloc_managed() and COARSE_GRAINED advice set via mem_advise().
//
// Coarse grained memory is only guaranteed to be coherent outside of GPU
// kernels that modify it. Changes applied to coarse-grained memory by a GPU
// kernel are only visible to the rest of the system (CPU or other GPUs) when
// the kernel has completed. A GPU kernel is only guaranteed to see changes
// applied to coarse grained memory by the rest of the system (CPU or other
// GPUs) if those changes were made before the kernel launched.

// Hint to use coarse-grain memory.
q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_SET_COARSE_GRAINED});

int init_val{9};
int expected{init_val * init_val};

*ptr = init_val;
q.parallel_for(sycl::range{1}, kernels::SquareKrnl{ptr});
// Synchronise the underlying stream.
q.wait();

// Check if caches are flushed correctly and same memory is between devices.
if (*ptr == expected) {
coherent = true;
} else {
std::cerr << "Coherency test failed. Value: " << *ptr
<< " (expected: " << expected << ")\n";
coherent = false;
}

// Coherency test 2
//
// The following test validates if fine-grain behavior is observed or not with
// memory allocated using malloc_managed().
//
// Fine grained memory allows CPUs and GPUs to synchronize (via atomics) and
// coherently communicate with each other while the GPU kernel is running.

// Hint to use fine-grain memory.
q.mem_advise(ptr, sizeof(int), int{PI_MEM_ADVICE_HIP_UNSET_COARSE_GRAINED});

init_val = 1;
expected = 4;

*ptr = init_val;
q.parallel_for(sycl::range{1}, kernels::CoherencyTestKrnl{ptr});

// wait until ptr is 2 from the kernel (or 3 seconds), then increment to 3.
while (*ptr == 2) {
using std::chrono_literals::operator""s;
std::this_thread::sleep_for(3s);
break;
}
*ptr += 1;

// Synchronise the underlying stream.
q.wait();

// Check if caches are flushed correctly and same memory is between devices.
if (*ptr == expected) {
coherent &= true;
} else {
std::cerr << "Coherency test failed. Value: " << *ptr
<< " (expected: " << expected << ")\n";
coherent = false;
}

// Cleanup
sycl::free(ptr, q);

// Check if all coherency tests passed.
assert(coherent);
// The above assert won't trigger with NDEBUG, so ensure the right exit code.
return coherent ? 0 : 1;
}

0 comments on commit ab86d0d

Please sign in to comment.