intel · steffenlarsen · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025
@@ -49,8 +49,8 @@ This extension is implemented and fully supported by DPC++.
 This extension is currently implemented in DPC++ for all GPU devices and
 backends; however, only the CUDA, HIP and Level Zero backends allows peer to
 peer memory access. Other backends report false from the
-`ext_oneapi_can_access_peer` query. Peer-Peer memory access is only possible
-between two devices from the same backend.
+`ext_oneapi_can_access_peer` query, unless both devices are the same. Peer
+memory access is only possible between two devices from the same SYCL platform.
 
 == Overview
 
@@ -153,13 +153,17 @@ functions may access USM device allocations on the peer device subject to the
 normal rules about context as described in the core SYCL specification.
 If this device does not support peer access (as defined by
 `peer_access::access_supported`), throws an `exception` with the
-`errc::feature_not_supported` error code. If access is already enabled,
-throws an exception with the `errc::invalid` error code.
+`errc::feature_not_supported` error code.
+
+Calling this function with `peer` for which access has already been enabled will
+result in undefined behavior.
 
 
 |void ext_oneapi_disable_peer_access(const device &peer)
-|Disables access to the peer device's memory from this device. If peer access
-is not enabled, throws an `exception` with the `errc::invalid` error code.
+|Disables access to the peer device's memory from this device.
+
+Calling this function with `peer` for which access is not enabled will result in
+undefined behavior.
 
 |===
 
@@ -220,6 +220,9 @@ void device::ext_oneapi_enable_peer_access(const device &peer) {
   ur_device_handle_t Device = impl->getHandleRef();
   ur_device_handle_t Peer = peer.impl->getHandleRef();
   if (Device != Peer) {
+    if (!ext_oneapi_can_access_peer(peer))
+      throw sycl::exception(make_error_code(errc::feature_not_supported),
+                            "Peer access is not allowed between the devices.");
     detail::adapter_impl &Adapter = impl->getAdapter();
     Adapter.call<detail::UrApiKind::urUsmP2PEnablePeerAccessExp>(Device, Peer);
   }
@@ -255,9 +258,14 @@ bool device::ext_oneapi_can_access_peer(const device &peer,
   }();
   detail::adapter_impl &Adapter = impl->getAdapter();
   int value = 0;
-  Adapter.call<detail::UrApiKind::urUsmP2PPeerAccessGetInfoExp>(
-      Device, Peer, UrAttr, sizeof(int), &value, nullptr);
+  auto Err =
+      Adapter.call_nocheck<detail::UrApiKind::urUsmP2PPeerAccessGetInfoExp>(
+          Device, Peer, UrAttr, sizeof(int), &value, nullptr);
 
+  // If the backend doesn't support P2P access, neither does its devices.
+  if (Err == UR_RESULT_ERROR_UNSUPPORTED_FEATURE)
+    return false;
+  Adapter.checkUrResult(Err);
   return value == 1;
 }
 

@@ -124,6 +124,7 @@ inline namespace _V1 {
 #define SYCL_KHR_DEFAULT_CONTEXT 1
 #define SYCL_EXT_INTEL_EVENT_MODE 1
 #define SYCL_EXT_ONEAPI_TANGLE 1
+#define SYCL_EXT_ONEAPI_PEER_ACCESS 1
 
 // Unfinished KHR extensions. These extensions are only available if the
 // __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS macro is defined.

@@ -1,4 +1,3 @@
-// REQUIRES: cuda || hip || level_zero
 // RUN:  %{build} -o %t.out
 // RUN:  %{run} %t.out
 
@@ -10,8 +9,7 @@
 using namespace sycl;
 
 int main() {
-
-  auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu);
+  auto Devs = platform().get_devices();
 
   if (Devs.size() < 2) {
     std::cout << "Cannot test P2P capabilities, at least two devices are "

@@ -1,4 +1,3 @@
-// REQUIRES: cuda || hip || level_zero
 // RUN:  %{build} %if target-nvidia %{ -Xsycl-target-backend=nvptx64-nvidia-cuda --cuda-gpu-arch=sm_61 %} -o %t.out
 // RUN:  %{run} %t.out
 
@@ -18,8 +17,7 @@ using namespace sycl;
 constexpr size_t N = 512;
 
 int main() {
-
-  auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu);
+  auto Devs = platform().get_devices();
 
   if (Devs.size() < 2) {
     std::cout << "Cannot test P2P capabilities, at least two devices are "

@@ -1,4 +1,3 @@
-// REQUIRES: cuda || hip || level_zero
 // RUN:  %{build} -o %t.out
 // RUN:  %{run} %t.out
 
@@ -15,8 +14,7 @@ using namespace sycl;
 constexpr int N = 100;
 
 int main() {
-
-  auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu);
+  auto Devs = platform().get_devices();
 
   if (Devs.size() < 2) {
     std::cout << "Cannot test P2P capabilities, at least two devices are "

@@ -13,16 +13,12 @@
 UR_APIEXPORT ur_result_t UR_APICALL
 urUsmP2PEnablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice,
                             [[maybe_unused]] ur_device_handle_t peerDevice) {
-
-  die("Experimental P2P feature is not implemented for OpenCL adapter.");
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 
 UR_APIEXPORT ur_result_t UR_APICALL
 urUsmP2PDisablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice,
                              [[maybe_unused]] ur_device_handle_t peerDevice) {
-
-  die("Experimental P2P feature is not implemented for OpenCL adapter.");
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 
@@ -32,7 +28,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp(
     [[maybe_unused]] ur_exp_peer_info_t propName,
     [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue,
     [[maybe_unused]] size_t *pPropSizeRet) {
-
-  die("Experimental P2P feature is not implemented for OpenCL adapter.");
   return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }