Skip to content

[NFC][SYCL] Add events_range helper #19608

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions sycl/source/detail/async_alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,10 @@ inline namespace _V1 {
namespace ext::oneapi::experimental {

namespace {
std::vector<ur_event_handle_t>
getUrEvents(const std::vector<std::shared_ptr<detail::event_impl>> &DepEvents) {
std::vector<ur_event_handle_t> getUrEvents(detail::events_range DepEvents) {
std::vector<ur_event_handle_t> RetUrEvents;
for (const std::shared_ptr<detail::event_impl> &EventImpl : DepEvents) {
ur_event_handle_t Handle = EventImpl->getHandle();
for (detail::event_impl &Event : DepEvents) {
ur_event_handle_t Handle = Event.getHandle();
if (Handle != nullptr)
RetUrEvents.push_back(Handle);
}
Expand Down
15 changes: 15 additions & 0 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
#pragma once

#include <detail/adapter_impl.hpp>
#include <detail/helpers.hpp>
#include <sycl/detail/cl.h>
#include <sycl/detail/common.hpp>
#include <sycl/detail/host_profiling_info.hpp>
#include <sycl/detail/ur.hpp>
#include <sycl/event.hpp>
#include <sycl/info/info_desc.hpp>

#include <atomic>
Expand Down Expand Up @@ -458,6 +460,19 @@ class event_impl {
bool MIsHostEvent = false;
};

using events_iterator =
variadic_iterator<event,
std::vector<std::shared_ptr<event_impl>>::const_iterator,
std::vector<event>::const_iterator,
std::vector<event_impl *>::const_iterator, event_impl *>;

class events_range : public iterator_range<events_iterator> {
private:
using Base = iterator_range<events_iterator>;

public:
using Base::Base;
};
} // namespace detail
} // namespace _V1
} // namespace sycl
21 changes: 12 additions & 9 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,19 @@ void emitMemReleaseEndTrace(uintptr_t ObjHandle, uintptr_t AllocPtr,
#endif
}

static void waitForEvents(const std::vector<EventImplPtr> &Events) {
static void waitForEvents(events_range Events) {
// Assuming all events will be on the same device or
// devices associated with the same Backend.
if (!Events.empty()) {
adapter_impl &Adapter = Events[0]->getAdapter();
adapter_impl &Adapter = Events.front().getAdapter();
std::vector<ur_event_handle_t> UrEvents(Events.size());
std::transform(
Events.begin(), Events.end(), UrEvents.begin(),
[](const EventImplPtr &EventImpl) { return EventImpl->getHandle(); });
std::transform(Events.begin(), Events.end(), UrEvents.begin(),
[](event_impl &Event) { return Event.getHandle(); });
// TODO: Why this condition??? Added during PI Removal in
// https://github.com/intel/llvm/pull/14145 with no explanation.
// Should we just filter out all `nullptr`, not only the one in the first
// element?
assert(!UrEvents.empty() && UrEvents[0]);
if (!UrEvents.empty() && UrEvents[0]) {
Adapter.call<UrApiKind::urEventWait>(UrEvents.size(), &UrEvents[0]);
}
Expand Down Expand Up @@ -251,8 +255,7 @@ void memUnmapHelper(adapter_impl &Adapter, ur_queue_handle_t Queue,
}

void MemoryManager::release(context_impl *TargetContext, SYCLMemObjI *MemObj,
void *MemAllocation,
std::vector<EventImplPtr> DepEvents,
void *MemAllocation, events_range DepEvents,
ur_event_handle_t &OutEvent) {
// There is no async API for memory releasing. Explicitly wait for all
// dependency events and return empty event.
Expand Down Expand Up @@ -281,7 +284,7 @@ void MemoryManager::releaseMemObj(context_impl *TargetContext,

void *MemoryManager::allocate(context_impl *TargetContext, SYCLMemObjI *MemObj,
bool InitFromUserData, void *HostPtr,
std::vector<EventImplPtr> DepEvents,
events_range DepEvents,
ur_event_handle_t &OutEvent) {
// There is no async API for memory allocation. Explicitly wait for all
// dependency events and return empty event.
Expand Down Expand Up @@ -432,7 +435,7 @@ void *MemoryManager::allocateMemImage(
void *MemoryManager::allocateMemSubBuffer(context_impl *TargetContext,
void *ParentMemObj, size_t ElemSize,
size_t Offset, range<3> Range,
std::vector<EventImplPtr> DepEvents,
events_range DepEvents,
ur_event_handle_t &OutEvent) {
waitForEvents(DepEvents);
OutEvent = nullptr;
Expand Down
8 changes: 4 additions & 4 deletions sycl/source/detail/memory_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace detail {

class queue_impl;
class event_impl;
class events_range;
class context_impl;

using EventImplPtr = std::shared_ptr<detail::event_impl>;
Expand All @@ -38,22 +39,21 @@ class MemoryManager {
// The following method releases memory allocation of memory object.
// Depending on the context it releases memory on host or on device.
static void release(context_impl *TargetContext, SYCLMemObjI *MemObj,
void *MemAllocation, std::vector<EventImplPtr> DepEvents,
void *MemAllocation, events_range DepEvents,
ur_event_handle_t &OutEvent);

// The following method allocates memory allocation of memory object.
// Depending on the context it allocates memory on host or on device.
static void *allocate(context_impl *TargetContext, SYCLMemObjI *MemObj,
bool InitFromUserData, void *HostPtr,
std::vector<EventImplPtr> DepEvents,
ur_event_handle_t &OutEvent);
events_range DepEvents, ur_event_handle_t &OutEvent);

// The following method creates OpenCL sub buffer for specified
// offset, range, and memory object.
static void *allocateMemSubBuffer(context_impl *TargetContext,
void *ParentMemObj, size_t ElemSize,
size_t Offset, range<3> Range,
std::vector<EventImplPtr> DepEvents,
events_range DepEvents,
ur_event_handle_t &OutEvent);

// Allocates buffer in specified context taking into account situations such
Expand Down
7 changes: 4 additions & 3 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -726,8 +726,8 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
return false;

if (MDefaultGraphDeps.LastEventPtr != nullptr &&
!Scheduler::CheckEventReadiness(*MContext,
MDefaultGraphDeps.LastEventPtr))
!Scheduler::areEventsSafeForSchedulerBypass(
{*MDefaultGraphDeps.LastEventPtr}, *MContext))
return false;

MNoLastEventMode.store(true, std::memory_order_relaxed);
Expand All @@ -746,7 +746,8 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {

auto Event = parseEvent(Handler.finalize());

if (Event && !Scheduler::CheckEventReadiness(*MContext, Event)) {
if (Event &&
!Scheduler::areEventsSafeForSchedulerBypass({*Event}, *MContext)) {
MDefaultGraphDeps.LastEventPtr = Event;
MNoLastEventMode.store(false, std::memory_order_relaxed);
}
Expand Down
39 changes: 17 additions & 22 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,20 +231,20 @@ static std::string commandToName(Command::CommandType Type) {
}
#endif

std::vector<ur_event_handle_t>
Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls,
queue_impl *CommandQueue, bool IsHostTaskCommand) {
std::vector<ur_event_handle_t> Command::getUrEvents(events_range Events,
queue_impl *CommandQueue,
bool IsHostTaskCommand) {
std::vector<ur_event_handle_t> RetUrEvents;
for (auto &EventImpl : EventImpls) {
auto Handle = EventImpl->getHandle();
for (event_impl &Event : Events) {
auto Handle = Event.getHandle();
if (Handle == nullptr)
continue;

// Do not add redundant event dependencies for in-order queues.
// At this stage dependency is definitely ur task and need to check if
// current one is a host task. In this case we should not skip ur event due
// to different sync mechanisms for different task types on in-order queue.
if (CommandQueue && EventImpl->getWorkerQueue().get() == CommandQueue &&
if (CommandQueue && Event.getWorkerQueue().get() == CommandQueue &&
CommandQueue->isInOrder() && !IsHostTaskCommand)
continue;

Expand All @@ -254,39 +254,34 @@ Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls,
return RetUrEvents;
}

std::vector<ur_event_handle_t>
Command::getUrEvents(const std::vector<EventImplPtr> &EventImpls) const {
return getUrEvents(EventImpls, MWorkerQueue.get(), isHostTask());
std::vector<ur_event_handle_t> Command::getUrEvents(events_range Events) const {
return getUrEvents(Events, MWorkerQueue.get(), isHostTask());
}

// This function is implemented (duplicating getUrEvents a lot) as short term
// solution for the issue that barrier with wait list could not
// handle empty ur event handles when kernel is enqueued on host task
// completion.
std::vector<ur_event_handle_t>
Command::getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls,
bool HasEventMode) const {
Command::getUrEventsBlocking(events_range Events, bool HasEventMode) const {
std::vector<ur_event_handle_t> RetUrEvents;
for (auto &EventImpl : EventImpls) {
for (event_impl &Event : Events) {
// Throwaway events created with empty constructor will not have a context
// (which is set lazily) calling getContextImpl() would set that
// context, which we wish to avoid as it is expensive.
// Skip host task and NOP events also.
if (EventImpl->isDefaultConstructed() || EventImpl->isHost() ||
EventImpl->isNOP())
if (Event.isDefaultConstructed() || Event.isHost() || Event.isNOP())
continue;

// If command has not been enqueued then we have to enqueue it.
// It may happen if async enqueue in a host task is involved.
// Interoperability events are special cases and they are not enqueued, as
// they don't have an associated queue and command.
if (!EventImpl->isInterop() && !EventImpl->isEnqueued()) {
if (!EventImpl->getCommand() ||
!EventImpl->getCommand()->producesPiEvent())
if (!Event.isInterop() && !Event.isEnqueued()) {
if (!Event.getCommand() || !Event.getCommand()->producesPiEvent())
continue;
std::vector<Command *> AuxCmds;
Scheduler::getInstance().enqueueCommandForCG(*EventImpl, AuxCmds,
BLOCKING);
Scheduler::getInstance().enqueueCommandForCG(Event, AuxCmds, BLOCKING);
}
// Do not add redundant event dependencies for in-order queues.
// At this stage dependency is definitely ur task and need to check if
Expand All @@ -296,11 +291,11 @@ Command::getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls,
// redundant events may still differ from the resulting event, so they are
// kept.
if (!HasEventMode && MWorkerQueue &&
EventImpl->getWorkerQueue() == MWorkerQueue &&
MWorkerQueue->isInOrder() && !isHostTask())
Event.getWorkerQueue() == MWorkerQueue && MWorkerQueue->isInOrder() &&
!isHostTask())
continue;

RetUrEvents.push_back(EventImpl->getHandle());
RetUrEvents.push_back(Event.getHandle());
}

return RetUrEvents;
Expand Down
24 changes: 11 additions & 13 deletions sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,21 +236,19 @@ class Command {
/// Returns true iff this command is ready to be submitted for cleanup.
virtual bool readyForCleanup() const;

/// Collect UR events from EventImpls and filter out some of them in case of
/// in order queue
std::vector<ur_event_handle_t>
getUrEvents(const std::vector<EventImplPtr> &EventImpls) const;
/// Collect UR events from Events and filter out some of them in case of
/// in order queue.
std::vector<ur_event_handle_t> getUrEvents(events_range Events) const;

static std::vector<ur_event_handle_t>
getUrEvents(const std::vector<EventImplPtr> &EventImpls,
queue_impl *CommandQueue, bool IsHostTaskCommand);
static std::vector<ur_event_handle_t> getUrEvents(events_range Events,
queue_impl *CommandQueue,
bool IsHostTaskCommand);

/// Collect UR events from EventImpls and filter out some of them in case of
/// in order queue. Does blocking enqueue if event is expected to produce ur
/// event but has empty native handle.
std::vector<ur_event_handle_t>
getUrEventsBlocking(const std::vector<EventImplPtr> &EventImpls,
bool HasEventMode) const;
std::vector<ur_event_handle_t> getUrEventsBlocking(events_range Events,
bool HasEventMode) const;

bool isHostTask() const;

Expand All @@ -275,9 +273,9 @@ class Command {

void waitForPreparedHostEvents() const;

void flushCrossQueueDeps(const std::vector<EventImplPtr> &EventImpls) {
for (auto &EventImpl : EventImpls) {
EventImpl->flushIfNeeded(MWorkerQueue.get());
void flushCrossQueueDeps(events_range Events) {
for (event_impl &Event : Events) {
Event.flushIfNeeded(MWorkerQueue.get());
}
}

Expand Down
68 changes: 25 additions & 43 deletions sycl/source/detail/scheduler/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,11 +411,11 @@ void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req,
EnqueueLeaves(Record->MWriteLeaves);
}

void Scheduler::enqueueUnblockedCommands(
const std::vector<EventImplPtr> &ToEnqueue, ReadLockT &GraphReadLock,
std::vector<Command *> &ToCleanUp) {
for (auto &Event : ToEnqueue) {
Command *Cmd = Event->getCommand();
void Scheduler::enqueueUnblockedCommands(events_range ToEnqueue,
ReadLockT &GraphReadLock,
std::vector<Command *> &ToCleanUp) {
for (event_impl &Event : ToEnqueue) {
Command *Cmd = Event.getCommand();
if (!Cmd)
continue;
EnqueueResultT Res;
Expand Down Expand Up @@ -684,46 +684,28 @@ EventImplPtr Scheduler::addCommandGraphUpdate(
return NewCmdEvent;
}

bool Scheduler::CheckEventReadiness(context_impl &Context,
const EventImplPtr &SyclEventImplPtr) {
// Events that don't have an initialized context are throwaway events that
// don't represent actual dependencies. Calling getContextImpl() would set
// their context, which we wish to avoid as it is expensive.
// NOP events also don't represent actual dependencies.
if (SyclEventImplPtr->isDefaultConstructed() || SyclEventImplPtr->isNOP()) {
return true;
}
if (SyclEventImplPtr->isHost()) {
return SyclEventImplPtr->isCompleted();
}
// Cross-context dependencies can't be passed to the backend directly.
if (&SyclEventImplPtr->getContextImpl() != &Context)
return false;

// A nullptr here means that the commmand does not produce a UR event or it
// hasn't been enqueued yet.
return SyclEventImplPtr->getHandle() != nullptr;
}

bool Scheduler::areEventsSafeForSchedulerBypass(
const std::vector<sycl::event> &DepEvents, context_impl &Context) {

return std::all_of(
DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) {
const EventImplPtr &SyclEventImplPtr = detail::getSyclObjImpl(Event);
return CheckEventReadiness(Context, SyclEventImplPtr);
});
}

bool Scheduler::areEventsSafeForSchedulerBypass(
const std::vector<EventImplPtr> &DepEvents, context_impl &Context) {
bool Scheduler::areEventsSafeForSchedulerBypass(events_range DepEvents,
context_impl &Context) {
return all_of(DepEvents, [&Context](sycl::detail::event_impl &Event) {
// Events that don't have an initialized context are throwaway events that
// don't represent actual dependencies. Calling getContextImpl() would set
// their context, which we wish to avoid as it is expensive.
// NOP events also don't represent actual dependencies.
if (Event.isDefaultConstructed() || Event.isNOP())
return true;

if (Event.isHost())
return Event.isCompleted();

// Cross-context dependencies can't be passed to the backend directly.
if (&Event.getContextImpl() != &Context)
return false;

return std::all_of(DepEvents.begin(), DepEvents.end(),
[&Context](const EventImplPtr &SyclEventImplPtr) {
return CheckEventReadiness(Context, SyclEventImplPtr);
});
// A nullptr here means that the commmand does not produce a UR event or it
// hasn't been enqueued yet.
return Event.getHandle() != nullptr;
});
}

} // namespace detail
} // namespace _V1
} // namespace sycl
Loading
Loading