intel · aelovikov-intel · Jul 29, 2025 · Jul 28, 2025 · Jul 29, 2025 · Jul 29, 2025
@@ -22,11 +22,10 @@ inline namespace _V1 {
 namespace ext::oneapi::experimental {
 
 namespace {
-std::vector<ur_event_handle_t>
-getUrEvents(const std::vector<std::shared_ptr<detail::event_impl>> &DepEvents) {
+std::vector<ur_event_handle_t> getUrEvents(detail::events_range DepEvents) {
   std::vector<ur_event_handle_t> RetUrEvents;
-  for (const std::shared_ptr<detail::event_impl> &EventImpl : DepEvents) {
-    ur_event_handle_t Handle = EventImpl->getHandle();
+  for (detail::event_impl &Event : DepEvents) {
+    ur_event_handle_t Handle = Event.getHandle();
     if (Handle != nullptr)
       RetUrEvents.push_back(Handle);
   }

@@ -9,10 +9,12 @@
 #pragma once
 
 #include <detail/adapter_impl.hpp>
+#include <detail/helpers.hpp>
 #include <sycl/detail/cl.h>
 #include <sycl/detail/common.hpp>
 #include <sycl/detail/host_profiling_info.hpp>
 #include <sycl/detail/ur.hpp>
+#include <sycl/event.hpp>
 #include <sycl/info/info_desc.hpp>
 
 #include <atomic>
@@ -458,6 +460,19 @@ class event_impl {
   bool MIsHostEvent = false;
 };
 
+using events_iterator =
+    variadic_iterator<event,
+                      std::vector<std::shared_ptr<event_impl>>::const_iterator,
+                      std::vector<event>::const_iterator,
+                      std::vector<event_impl *>::const_iterator, event_impl *>;
+
+class events_range : public iterator_range<events_iterator> {
+private:
+  using Base = iterator_range<events_iterator>;
+
+public:
+  using Base::Base;
+};
 } // namespace detail
 } // namespace _V1
 } // namespace sycl
@@ -118,15 +118,19 @@ void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
 #endif
 }
 
-static void waitForEvents(const std::vector<EventImplPtr> &Events) {
+static void waitForEvents(events_range Events) {
   // Assuming all events will be on the same device or
   // devices associated with the same Backend.
   if (!Events.empty()) {
-    adapter_impl &Adapter = Events[0]->getAdapter();
+    adapter_impl &Adapter = Events.front().getAdapter();
     std::vector<ur_event_handle_t> UrEvents(Events.size());
-    std::transform(
-        Events.begin(), Events.end(), UrEvents.begin(),
-        [](const EventImplPtr &EventImpl) { return EventImpl->getHandle(); });
+    std::transform(Events.begin(), Events.end(), UrEvents.begin(),
+                   [](event_impl &Event) { return Event.getHandle(); });
+    // TODO: Why this condition??? Added during PI Removal in
+    // https://github.com/intel/llvm/pull/14145 with no explanation.
+    // Should we just filter out all `nullptr`, not only the one in the first
+    // element?
+    assert(!UrEvents.empty() && UrEvents[0]);
     if (!UrEvents.empty() && UrEvents[0]) {
       Adapter.call<UrApiKind::urEventWait>(UrEvents.size(), &UrEvents[0]);
     }
@@ -251,8 +255,7 @@ void memUnmapHelper(adapter_impl &Adapter, ur_queue_handle_t Queue,
 }
 
 void MemoryManager::release(context_impl *TargetContext, SYCLMemObjI *MemObj,
-                            void *MemAllocation,
-                            std::vector<EventImplPtr> DepEvents,
+                            void *MemAllocation, events_range DepEvents,
                             ur_event_handle_t &OutEvent) {
   // There is no async API for memory releasing. Explicitly wait for all
   // dependency events and return empty event.
@@ -281,7 +284,7 @@ void MemoryManager::releaseMemObj(context_impl *TargetContext,
 
 void *MemoryManager::allocate(context_impl *TargetContext, SYCLMemObjI *MemObj,
                               bool InitFromUserData, void *HostPtr,
-                              std::vector<EventImplPtr> DepEvents,
+                              events_range DepEvents,
                               ur_event_handle_t &OutEvent) {
   // There is no async API for memory allocation. Explicitly wait for all
   // dependency events and return empty event.
@@ -432,7 +435,7 @@ void *MemoryManager::allocateMemImage(
 void *MemoryManager::allocateMemSubBuffer(context_impl *TargetContext,
                                           void *ParentMemObj, size_t ElemSize,
                                           size_t Offset, range<3> Range,
-                                          std::vector<EventImplPtr> DepEvents,
+                                          events_range DepEvents,
                                           ur_event_handle_t &OutEvent) {
   waitForEvents(DepEvents);
   OutEvent = nullptr;

@@ -26,6 +26,7 @@ namespace detail {
 
 class queue_impl;
 class event_impl;
+class events_range;
 class context_impl;
 
 using EventImplPtr = std::shared_ptr<detail::event_impl>;
@@ -38,22 +39,21 @@ class MemoryManager {
   // The following method releases memory allocation of memory object.
   // Depending on the context it releases memory on host or on device.
   static void release(context_impl *TargetContext, SYCLMemObjI *MemObj,
-                      void *MemAllocation, std::vector<EventImplPtr> DepEvents,
+                      void *MemAllocation, events_range DepEvents,
                       ur_event_handle_t &OutEvent);
 
   // The following method allocates memory allocation of memory object.
   // Depending on the context it allocates memory on host or on device.
   static void *allocate(context_impl *TargetContext, SYCLMemObjI *MemObj,
                         bool InitFromUserData, void *HostPtr,
-                        std::vector<EventImplPtr> DepEvents,
-                        ur_event_handle_t &OutEvent);
+                        events_range DepEvents, ur_event_handle_t &OutEvent);
 
   // The following method creates OpenCL sub buffer for specified
   // offset, range, and memory object.
   static void *allocateMemSubBuffer(context_impl *TargetContext,
                                     void *ParentMemObj, size_t ElemSize,
                                     size_t Offset, range<3> Range,
-                                    std::vector<EventImplPtr> DepEvents,
+                                    events_range DepEvents,
                                     ur_event_handle_t &OutEvent);
 
   // Allocates buffer in specified context taking into account situations such

@@ -726,8 +726,8 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
       return false;
 
     if (MDefaultGraphDeps.LastEventPtr != nullptr &&
-        !Scheduler::CheckEventReadiness(*MContext,
-                                        MDefaultGraphDeps.LastEventPtr))
+        !Scheduler::areEventsSafeForSchedulerBypass(
+            {*MDefaultGraphDeps.LastEventPtr}, *MContext))
       return false;
 
     MNoLastEventMode.store(true, std::memory_order_relaxed);
@@ -746,7 +746,8 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
 
     auto Event = parseEvent(Handler.finalize());
 
-    if (Event && !Scheduler::CheckEventReadiness(*MContext, Event)) {
+    if (Event &&
+        !Scheduler::areEventsSafeForSchedulerBypass({*Event}, *MContext)) {
       MDefaultGraphDeps.LastEventPtr = Event;
       MNoLastEventMode.store(false, std::memory_order_relaxed);
     }

@@ -231,20 +231,20 @@ static std::string commandToName(Command::CommandType Type) {
 }
 #endif
 
-std::vector<ur_event_handle_t>
-Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls,
-                     queue_impl *CommandQueue, bool IsHostTaskCommand) {
+std::vector<ur_event_handle_t> Command::getUrEvents(events_range Events,
+                                                    queue_impl *CommandQueue,
+                                                    bool IsHostTaskCommand) {
   std::vector<ur_event_handle_t> RetUrEvents;
-  for (auto &EventImpl : EventImpls) {
-    auto Handle = EventImpl->getHandle();
+  for (event_impl &Event : Events) {
+    auto Handle = Event.getHandle();
     if (Handle == nullptr)
       continue;
 
     // Do not add redundant event dependencies for in-order queues.
     // At this stage dependency is definitely ur task and need to check if
     // current one is a host task. In this case we should not skip ur event due
     // to different sync mechanisms for different task types on in-order queue.
-    if (CommandQueue && EventImpl->getWorkerQueue().get() == CommandQueue &&
+    if (CommandQueue && Event.getWorkerQueue().get() == CommandQueue &&
         CommandQueue->isInOrder() && !IsHostTaskCommand)
       continue;
 
@@ -254,39 +254,34 @@ Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls,
   return RetUrEvents;
 }
 
-std::vector<ur_event_handle_t>
-Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls) const {
-  return getUrEvents(EventImpls, MWorkerQueue.get(), isHostTask());
+std::vector<ur_event_handle_t> Command::getUrEvents(events_range Events) const {
+  return getUrEvents(Events, MWorkerQueue.get(), isHostTask());
 }
 
 // This function is implemented (duplicating getUrEvents a lot) as short term
 // solution for the issue that barrier with wait list could not
 // handle empty ur event handles when kernel is enqueued on host task
 // completion.
 std::vector<ur_event_handle_t>
-Command::getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls,
-                             bool HasEventMode) const {
+Command::getUrEventsBlocking(events_range Events, bool HasEventMode) const {
   std::vector<ur_event_handle_t> RetUrEvents;
-  for (auto &EventImpl : EventImpls) {
+  for (event_impl &Event : Events) {
     // Throwaway events created with empty constructor will not have a context
     // (which is set lazily) calling getContextImpl() would set that
     // context, which we wish to avoid as it is expensive.
     // Skip host task and NOP events also.
-    if (EventImpl->isDefaultConstructed() || EventImpl->isHost() ||
-        EventImpl->isNOP())
+    if (Event.isDefaultConstructed() || Event.isHost() || Event.isNOP())
       continue;
 
     // If command has not been enqueued then we have to enqueue it.
     // It may happen if async enqueue in a host task is involved.
     // Interoperability events are special cases and they are not enqueued, as
     // they don't have an associated queue and command.
-    if (!EventImpl->isInterop() && !EventImpl->isEnqueued()) {
-      if (!EventImpl->getCommand() ||
-          !EventImpl->getCommand()->producesPiEvent())
+    if (!Event.isInterop() && !Event.isEnqueued()) {
+      if (!Event.getCommand() || !Event.getCommand()->producesPiEvent())
         continue;
       std::vector<Command *> AuxCmds;
-      Scheduler::getInstance().enqueueCommandForCG(*EventImpl, AuxCmds,
-                                                   BLOCKING);
+      Scheduler::getInstance().enqueueCommandForCG(Event, AuxCmds, BLOCKING);
     }
     // Do not add redundant event dependencies for in-order queues.
     // At this stage dependency is definitely ur task and need to check if
@@ -296,11 +291,11 @@ Command::getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls,
     // redundant events may still differ from the resulting event, so they are
     // kept.
     if (!HasEventMode && MWorkerQueue &&
-        EventImpl->getWorkerQueue() == MWorkerQueue &&
-        MWorkerQueue->isInOrder() && !isHostTask())
+        Event.getWorkerQueue() == MWorkerQueue && MWorkerQueue->isInOrder() &&
+        !isHostTask())
       continue;
 
-    RetUrEvents.push_back(EventImpl->getHandle());
+    RetUrEvents.push_back(Event.getHandle());
   }
 
   return RetUrEvents;

@@ -236,21 +236,19 @@ class Command {
   /// Returns true iff this command is ready to be submitted for cleanup.
   virtual bool readyForCleanup() const;
 
-  /// Collect UR events from EventImpls and filter out some of them in case of
-  /// in order queue
-  std::vector<ur_event_handle_t>
-  getUrEvents(const std::vector<EventImplPtr> &EventImpls) const;
+  /// Collect UR events from Events and filter out some of them in case of
+  /// in order queue.
+  std::vector<ur_event_handle_t> getUrEvents(events_range Events) const;
 
-  static std::vector<ur_event_handle_t>
-  getUrEvents(const std::vector<EventImplPtr> &EventImpls,
-              queue_impl *CommandQueue, bool IsHostTaskCommand);
+  static std::vector<ur_event_handle_t> getUrEvents(events_range Events,
+                                                    queue_impl *CommandQueue,
+                                                    bool IsHostTaskCommand);
 
   /// Collect UR events from EventImpls and filter out some of them in case of
   /// in order queue. Does blocking enqueue if event is expected to produce ur
   /// event but has empty native handle.
-  std::vector<ur_event_handle_t>
-  getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls,
-                      bool HasEventMode) const;
+  std::vector<ur_event_handle_t> getUrEventsBlocking(events_range Events,
+                                                     bool HasEventMode) const;
 
   bool isHostTask() const;
 
@@ -275,9 +273,9 @@ class Command {
 
   void waitForPreparedHostEvents() const;
 
-  void flushCrossQueueDeps(const std::vector<EventImplPtr> &EventImpls) {
-    for (auto &EventImpl : EventImpls) {
-      EventImpl->flushIfNeeded(MWorkerQueue.get());
+  void flushCrossQueueDeps(events_range Events) {
+    for (event_impl &Event : Events) {
+      Event.flushIfNeeded(MWorkerQueue.get());
     }
   }
 

@@ -411,11 +411,11 @@ void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req,
   EnqueueLeaves(Record->MWriteLeaves);
 }
 
-void Scheduler::enqueueUnblockedCommands(
-    const std::vector<EventImplPtr> &ToEnqueue, ReadLockT &GraphReadLock,
-    std::vector<Command *> &ToCleanUp) {
-  for (auto &Event : ToEnqueue) {
-    Command *Cmd = Event->getCommand();
+void Scheduler::enqueueUnblockedCommands(events_range ToEnqueue,
+                                         ReadLockT &GraphReadLock,
+                                         std::vector<Command *> &ToCleanUp) {
+  for (event_impl &Event : ToEnqueue) {
+    Command *Cmd = Event.getCommand();
     if (!Cmd)
       continue;
     EnqueueResultT Res;
@@ -684,46 +684,28 @@ EventImplPtr Scheduler::addCommandGraphUpdate(
   return NewCmdEvent;
 }
 
-bool Scheduler::CheckEventReadiness(context_impl &Context,
-                                    const EventImplPtr &SyclEventImplPtr) {
-  // Events that don't have an initialized context are throwaway events that
-  // don't represent actual dependencies. Calling getContextImpl() would set
-  // their context, which we wish to avoid as it is expensive.
-  // NOP events also don't represent actual dependencies.
-  if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) {
-    return true;
-  }
-  if (SyclEventImplPtr->isHost()) {
-    return SyclEventImplPtr->isCompleted();
-  }
-  // Cross-context dependencies can't be passed to the backend directly.
-  if (&SyclEventImplPtr->getContextImpl() != &Context)
-    return false;
-
-  // A nullptr here means that the commmand does not produce a UR event or it
-  // hasn't been enqueued yet.
-  return SyclEventImplPtr->getHandle() != nullptr;
-}
-
-bool Scheduler::areEventsSafeForSchedulerBypass(
-    const std::vector<sycl::event> &DepEvents, context_impl &Context) {
-
-  return std::all_of(
-      DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) {
-        const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event);
-        return CheckEventReadiness(Context, SyclEventImplPtr);
-      });
-}
-
-bool Scheduler::areEventsSafeForSchedulerBypass(
-    const std::vector<EventImplPtr> &DepEvents, context_impl &Context) {
+bool Scheduler::areEventsSafeForSchedulerBypass(events_range DepEvents,
+                                                context_impl &Context) {
+  return all_of(DepEvents, [&Context](sycl::detail::event_impl &Event) {
+    // Events that don't have an initialized context are throwaway events that
+    // don't represent actual dependencies. Calling getContextImpl() would set
+    // their context, which we wish to avoid as it is expensive.
+    // NOP events also don't represent actual dependencies.
+    if (Event.isDefaultConstructed() || Event.isNOP())
+      return true;
+
+    if (Event.isHost())
+      return Event.isCompleted();
+
+    // Cross-context dependencies can't be passed to the backend directly.
+    if (&Event.getContextImpl() != &Context)
+      return false;
 
-  return std::all_of(DepEvents.begin(), DepEvents.end(),
-                     [&Context](const EventImplPtr &SyclEventImplPtr) {
-                       return CheckEventReadiness(Context, SyclEventImplPtr);
-                     });
+    // A nullptr here means that the commmand does not produce a UR event or it
+    // hasn't been enqueued yet.
+    return Event.getHandle() != nullptr;
+  });
 }
-
 } // namespace detail
 } // namespace _V1
 } // namespace sycl