From e5bb5a73c689f86e0ac12409b77a870a40f9ba6b Mon Sep 17 00:00:00 2001 From: Vasanth Tovinkere Date: Wed, 23 Jul 2025 08:51:09 -0700 Subject: [PATCH 1/2] [SYCL][XPTI] Minimize the use of xpti APIs - Re-organized the initialization so all streams are initialized in one place. Fixes issues where streams were not being registered - Minimized the use of xptiRegisterStream by using the global setup during initialization. This should improve performance for collectors Signed-off-by: Vasanth Tovinkere --- sycl/source/detail/buffer_impl.cpp | 3 - sycl/source/detail/event_impl.cpp | 10 +- sycl/source/detail/global_handler.cpp | 34 ++---- sycl/source/detail/global_handler.hpp | 5 - sycl/source/detail/graph/graph_impl.cpp | 19 ++-- sycl/source/detail/image_impl.cpp | 3 - sycl/source/detail/memory_manager.cpp | 5 - sycl/source/detail/queue_impl.cpp | 59 ++++++---- sycl/source/detail/scheduler/commands.cpp | 14 +-- sycl/source/detail/ur.cpp | 47 ++------ sycl/source/detail/usm/usm_impl.cpp | 50 ++++----- sycl/source/detail/xpti_registry.cpp | 36 +++++-- sycl/source/detail/xpti_registry.hpp | 120 +++++++++++++++------ sycl/source/handler.cpp | 18 ++-- sycl/unittests/xpti_trace/QueueIDCheck.cpp | 4 +- 15 files changed, 222 insertions(+), 205 deletions(-) diff --git a/sycl/source/detail/buffer_impl.cpp b/sycl/source/detail/buffer_impl.cpp index 54f0c70cf7bc1..0db81e3f02129 100644 --- a/sycl/source/detail/buffer_impl.cpp +++ b/sycl/source/detail/buffer_impl.cpp @@ -18,9 +18,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -uint8_t GBufferStreamID; -#endif void *buffer_impl::allocateMem(context_impl *Context, bool InitFromUserData, void *HostPtr, ur_event_handle_t &OutEventToWait) { diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index c37600c9907cc..0694664c88fda 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -19,7 +19,6 @@ #include #ifdef XPTI_ENABLE_INSTRUMENTATION -#include "xpti/xpti_trace_framework.hpp" #include #include #include @@ -28,10 +27,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -extern xpti::trace_event_data_t *GSYCLGraphEvent; -#endif - // If we do not yet have a context, use the default one. void event_impl::initContextIfNeeded() { if (MContext || !MIsDefaultConstructed) @@ -293,8 +288,7 @@ void event_impl::wait(bool *Success) { void *TelemetryEvent = nullptr; uint64_t IId = 0; std::string Name; - xpti::stream_id_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME); - TelemetryEvent = instrumentationProlog(Name, StreamID, IId); + TelemetryEvent = instrumentationProlog(Name, GSYCLStreamID, IId); #endif auto EventHandle = getHandle(); @@ -306,7 +300,7 @@ void event_impl::wait(bool *Success) { detail::Scheduler::getInstance().waitForEvent(*this, Success); #ifdef XPTI_ENABLE_INSTRUMENTATION - instrumentationEpilog(TelemetryEvent, Name, StreamID, IId); + instrumentationEpilog(TelemetryEvent, Name, GSYCLStreamID, IId); #endif } diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 46509daf741b9..1f7e09de391bc 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -77,46 +77,28 @@ std::atomic_uint ObjectUsageCounter::MCounter{0}; GlobalHandler::GlobalHandler() = default; GlobalHandler::~GlobalHandler() = default; -void GlobalHandler::InitXPTI() { -#ifdef XPTI_ENABLE_INSTRUMENTATION - // Let subscribers know a new stream is being initialized - getXPTIRegistry().initializeStream(SYCL_STREAM_NAME, GMajVer, GMinVer, - GVerStr); - xpti::payload_t SYCLPayload("SYCL Runtime Exceptions"); - uint64_t SYCLInstanceNo; - GSYCLCallEvent = xptiMakeEvent("SYCL Try-catch Exceptions", &SYCLPayload, - xpti::trace_algorithm_event, xpti_at::active, - &SYCLInstanceNo); -#endif -} - void GlobalHandler::TraceEventXPTI(const char *Message) { if (!Message) return; #ifdef XPTI_ENABLE_INSTRUMENTATION - static std::once_flag InitXPTIFlag; + // This section is used to emit XPTI trace events when exceptions occur if (xptiTraceEnabled()) { - std::call_once(InitXPTIFlag, [&]() { InitXPTI(); }); + GlobalHandler::instance().getXPTIRegistry().initializeFrameworkOnce(); // We have to handle the cases where: (1) we may have just the code location // set and not UID and (2) UID set detail::tls_code_loc_t Tls; auto CodeLocation = Tls.query(); - // Creating a tracepoint will convert a CodeLocation to UID, if not set - xpti::framework::tracepoint_t TP( + // Creating a tracepoint using the stashed code location and notifying the + // subscriber with the diagnostic message + xpti::framework::tracepoint_scope_t TP( CodeLocation.fileName(), CodeLocation.functionName(), CodeLocation.lineNumber(), CodeLocation.columnNumber(), nullptr); - // The call to notify will have the signature of: - // (1) the stream defined in .stream() - // (2) The trace type equal to what is set by .trace_type() - // (3) Parent event set to NULL - // (4) Current event set to one created from CodeLocation and UID - // (5) An instance ID that records the number of times this code location - // has been seen (6) The message generated by the exception handler - TP.stream(SYCL_STREAM_NAME) - .trace_type(xpti::trace_point_type_t::diagnostics) + TP.stream(GSYCLStreamID) + .traceType(xpti::trace_point_type_t::diagnostics) + .parentEvent(GSYCLCallEvent) .notify(static_cast(Message)); } diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 605fd10fb77ab..f29e2f1368c9f 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -85,17 +85,12 @@ class GlobalHandler { void drainThreadPool(); void prepareSchedulerToRelease(bool Blocking); - void InitXPTI(); void TraceEventXPTI(const char *Message); // For testing purposes only void attachScheduler(Scheduler *Scheduler); private: -#ifdef XPTI_ENABLE_INSTRUMENTATION - void *GSYCLCallEvent = nullptr; -#endif - bool OkToDefer = true; friend void shutdown_early(); diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index 6581bce0ef91f..4fb8ed79f09af 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -737,23 +737,23 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( #ifdef XPTI_ENABLE_INSTRUMENTATION const bool xptiEnabled = xptiTraceEnabled(); - auto StreamID = xpti::invalid_id; xpti_td *CmdTraceEvent = nullptr; uint64_t InstanceID = 0; if (xptiEnabled) { - StreamID = xptiRegisterStream(sycl::detail::SYCL_STREAM_NAME); sycl::detail::CGExecKernel *CGExec = static_cast(Node.MCommandGroup.get()); sycl::detail::code_location CodeLoc(CGExec->MFileName.c_str(), CGExec->MFunctionName.c_str(), CGExec->MLine, CGExec->MColumn); std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( - StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc, - CGExec->MKernelName.data(), CGExec->MKernelNameBasedCachePtr, nullptr, - CGExec->MNDRDesc, CGExec->MKernelBundle.get(), CGExec->MArgs); + sycl::detail::GSYCLStreamID, CGExec->MSyclKernel, CodeLoc, + CGExec->MIsTopCodeLoc, CGExec->MKernelName.data(), + CGExec->MKernelNameBasedCachePtr, nullptr, CGExec->MNDRDesc, + CGExec->MKernelBundle.get(), CGExec->MArgs); if (CmdTraceEvent) - sycl::detail::emitInstrumentationGeneral( - StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); + sycl::detail::emitInstrumentationGeneral(sycl::detail::GSYCLStreamID, + InstanceID, CmdTraceEvent, + xpti::trace_task_begin, nullptr); } #endif @@ -773,8 +773,9 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( #ifdef XPTI_ENABLE_INSTRUMENTATION if (xptiEnabled && CmdTraceEvent) - sycl::detail::emitInstrumentationGeneral( - StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr); + sycl::detail::emitInstrumentationGeneral(sycl::detail::GSYCLStreamID, + InstanceID, CmdTraceEvent, + xpti::trace_task_end, nullptr); #endif return NewSyncPoint; diff --git a/sycl/source/detail/image_impl.cpp b/sycl/source/detail/image_impl.cpp index 43568f7dfe6c0..cbeb07f297034 100644 --- a/sycl/source/detail/image_impl.cpp +++ b/sycl/source/detail/image_impl.cpp @@ -18,9 +18,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -uint8_t GImageStreamID; -#endif template static bool checkImageValueRange(const std::vector &Devices, diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 3a5c615364f47..88ba698756ce8 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -35,11 +35,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -uint8_t GMemAllocStreamID; -xpti::trace_event_data_t *GMemAllocEvent; -#endif - uint64_t emitMemAllocBeginTrace(uintptr_t ObjHandle, size_t AllocSize, size_t GuardZone) { (void)ObjHandle; diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index e7b163ddd62e6..c7c59769ba9ad 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -18,7 +18,6 @@ #include #ifdef XPTI_ENABLE_INSTRUMENTATION -#include "xpti/xpti_trace_framework.hpp" #include #include #endif @@ -156,10 +155,23 @@ event queue_impl::memset(void *Ptr, int Value, size_t Count, // We need a code pointer value and we use the object ptr; if code location // information is available, we will have function name and source file // information - XPTIScope PrepareNotify((void *)this, - (uint16_t)xpti::trace_point_type_t::node_create, - SYCL_STREAM_NAME, "memory_transfer_node::memset"); - PrepareNotify.addMetadata([&](auto TEvent) { + const char *UserData = "memory_transfer_node::memset", *FuncName = nullptr; + // We have to get the stashed code location when not available + detail::tls_code_loc_t Tls; + auto CodeLocation = Tls.query(); + if (!CodeLocation.functionName()) + // If the code location is not available, we use the user data + FuncName = UserData; + else + FuncName = CodeLocation.functionName(); + xpti::framework::tracepoint_scope_t TP( + CodeLocation.fileName(), FuncName, CodeLocation.lineNumber(), + CodeLocation.columnNumber(), (void *)this); + TP.stream(GSYCLStreamID) + .traceType(xpti::trace_point_type_t::node_create) + .parentEvent(GSYCLGraphEvent); + + TP.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "sycl_device", reinterpret_cast(MDevice.getHandleRef())); xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast(Ptr)); @@ -167,13 +179,15 @@ event queue_impl::memset(void *Ptr, int Value, size_t Count, xpti::addMetadata(TEvent, "memory_size", Count); xpti::addMetadata(TEvent, "queue_id", MQueueID); }); + // Before we notifiy the subscribers, we broadcast the 'queue_id', which was a // metadata entry to TLS for use by callback handlers xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); - // Notify XPTI about the memset submission - PrepareNotify.notify(); + // Notify XPTI about the memset submission, which will create a memory object + // node + TP.notify(UserData); // Emit a begin/end scope for this call - PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); + TP.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin, UserData); #endif const std::vector Pattern{static_cast(Value)}; return submitMemOpHelper( @@ -202,10 +216,14 @@ event queue_impl::memcpy(void *Dest, const void *Src, size_t Count, // We need a code pointer value and we duse the object ptr; If code location // is available, we use the source file information along with the object // pointer. - XPTIScope PrepareNotify((void *)this, - (uint16_t)xpti::trace_point_type_t::node_create, - SYCL_STREAM_NAME, "memory_transfer_node::memcpy"); - PrepareNotify.addMetadata([&](auto TEvent) { + xpti::framework::tracepoint_scope_t TP( + CodeLoc.fileName(), CodeLoc.functionName(), CodeLoc.lineNumber(), + CodeLoc.columnNumber(), (void *)this); + TP.stream(GSYCLStreamID) + .traceType(xpti::trace_point_type_t::node_create) + .parentEvent(GSYCLGraphEvent); + const char *UserData = "memory_transfer_node::memcpy"; + TP.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "sycl_device", reinterpret_cast(MDevice.getHandleRef())); xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast(Src)); @@ -214,11 +232,13 @@ event queue_impl::memcpy(void *Dest, const void *Src, size_t Count, xpti::addMetadata(TEvent, "memory_size", Count); xpti::addMetadata(TEvent, "queue_id", MQueueID); }); + // Before we notify the subscribers, we stash the 'queue_id', which was a + // metadata entry to TLS for use by callback handlers xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); // Notify XPTI about the memcpy submission - PrepareNotify.notify(); + TP.notify(UserData); // Emit a begin/end scope for this call - PrepareNotify.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin); + TP.scopedNotify((uint16_t)xpti::trace_point_type_t::task_begin, UserData); #endif if ((!Src || !Dest) && Count != 0) { @@ -575,14 +595,12 @@ void queue_impl::instrumentationEpilog(void *TelemetryEvent, std::string &Name, void queue_impl::wait(const detail::code_location &CodeLoc) { (void)CodeLoc; #ifdef XPTI_ENABLE_INSTRUMENTATION - const bool xptiEnabled = xptiTraceEnabled(); + const bool xptiEnabled = xptiCheckTraceEnabled(GSYCLStreamID); void *TelemetryEvent = nullptr; uint64_t IId; std::string Name; - auto StreamID = xpti::invalid_id; if (xptiEnabled) { - StreamID = xptiRegisterStream(SYCL_STREAM_NAME); - TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId); + TelemetryEvent = instrumentationProlog(CodeLoc, Name, GSYCLStreamID, IId); } #endif @@ -664,7 +682,7 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { #ifdef XPTI_ENABLE_INSTRUMENTATION if (xptiEnabled) { - instrumentationEpilog(TelemetryEvent, Name, StreamID, IId); + instrumentationEpilog(TelemetryEvent, Name, GSYCLStreamID, IId); } #endif } @@ -672,7 +690,8 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { void queue_impl::constructorNotification() { #if XPTI_ENABLE_INSTRUMENTATION if (xptiTraceEnabled()) { - MStreamID = xptiRegisterStream(SYCL_STREAM_NAME); + // Making it ABI compatible and not removing the member variable + MStreamID = GSYCLStreamID; constexpr uint16_t NotificationTraceType = static_cast(xpti::trace_point_type_t::queue_create); if (xptiCheckTraceEnabled(MStreamID, NotificationTraceType)) { diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 6709bf0d9ac19..e0571d0210f3e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -45,7 +45,6 @@ #endif #ifdef XPTI_ENABLE_INSTRUMENTATION -#include "xpti/xpti_trace_framework.hpp" #include #endif @@ -78,8 +77,6 @@ ur_result_t callMemOpHelperRet(MemOpRet &MemOpResult, MemOpFuncT &MemOpFunc, } #ifdef XPTI_ENABLE_INSTRUMENTATION -// Global graph for the application -extern xpti::trace_event_data_t *GSYCLGraphEvent; static bool CurrentCodeLocationValid() { detail::tls_code_loc_t Tls; @@ -581,8 +578,9 @@ Command::Command( #ifdef XPTI_ENABLE_INSTRUMENTATION if (!xptiTraceEnabled()) return; - // Obtain the stream ID so all commands can emit traces to that stream - MStreamID = xptiRegisterStream(SYCL_STREAM_NAME); + // Obtain the stream ID so all commands can emit traces to that stream; + // copying it to the member variable to avoid ABI breakage + MStreamID = GSYCLStreamID; #endif } @@ -1818,8 +1816,7 @@ void EmptyCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#8d8f29\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "EMPTY NODE" - << "\\n"; + Stream << "EMPTY NODE" << "\\n"; Stream << "\"];" << std::endl; @@ -3866,8 +3863,7 @@ void UpdateCommandBufferCommand::printDot(std::ostream &Stream) const { Stream << "\"" << this << "\" [style=filled, fillcolor=\"#8d8f29\", label=\""; Stream << "ID = " << this << "\\n"; - Stream << "CommandBuffer Command Update" - << "\\n"; + Stream << "CommandBuffer Command Update" << "\\n"; Stream << "\"];" << std::endl; diff --git a/sycl/source/detail/ur.cpp b/sycl/source/detail/ur.cpp index 5262c663542ab..db4cd6c34a1b5 100644 --- a/sycl/source/detail/ur.cpp +++ b/sycl/source/detail/ur.cpp @@ -57,13 +57,6 @@ void contextSetExtendedDeleter(const sycl::context &context, } } // namespace pi -#ifdef XPTI_ENABLE_INSTRUMENTATION -// Global (to the SYCL runtime) graph handle that all command groups are a -// child of -/// Event to be used by graph related activities -xpti_td *GSYCLGraphEvent = nullptr; -#endif // XPTI_ENABLE_INSTRUMENTATION - template void *getAdapterOpaqueData([[maybe_unused]] void *OpaqueDataParam) { // This was formerly a call to piextAdapterGetOpaqueData, a deprecated PI @@ -93,8 +86,6 @@ bool trace(TraceLevel Level) { static void initializeAdapters(std::vector &Adapters, ur_loader_config_handle_t LoaderConfig); -bool XPTIInitDone = false; - // Initializes all available Adapters. std::vector & initializeUr(ur_loader_config_handle_t LoaderConfig) { @@ -249,36 +240,14 @@ static void initializeAdapters(std::vector &Adapters, } #ifdef XPTI_ENABLE_INSTRUMENTATION - GlobalHandler::instance().getXPTIRegistry().initializeFrameworkOnce(); - - if (!(xptiTraceEnabled() && !XPTIInitDone)) - return; - // Not sure this is the best place to initialize the framework; SYCL runtime - // team needs to advise on the right place, until then we piggy-back on the - // initialization of the UR layer. - - // Initialize the global events just once, in the case ur::initialize() is - // called multiple times - XPTIInitDone = true; - // Registers a new stream for 'sycl' and any application that wants to listen - // to this stream will register itself using this string or stream ID for - // this string. - uint8_t StreamID = xptiRegisterStream(SYCL_STREAM_NAME); - // Let all tool applications know that a stream by the name of 'sycl' has - // been initialized and will be generating the trace stream. - GlobalHandler::instance().getXPTIRegistry().initializeStream( - SYCL_STREAM_NAME, GMajVer, GMinVer, GVerStr); - // Create a tracepoint to indicate the graph creation - xpti::payload_t GraphPayload("application_graph"); - uint64_t GraphInstanceNo; - GSYCLGraphEvent = - xptiMakeEvent("application_graph", &GraphPayload, xpti::trace_graph_event, - xpti_at::active, &GraphInstanceNo); - if (GSYCLGraphEvent) { - // The graph event is a global event and will be used as the parent for - // all nodes (command groups) - xptiNotifySubscribers(StreamID, xpti::trace_graph_create, nullptr, - GSYCLGraphEvent, GraphInstanceNo, nullptr); + if (xptiTraceEnabled()) { + // Initialize the XPTI framework. + // Not sure this is the best place to initialize the framework; SYCL runtime + // team needs to advise on the right place, until then we piggy-back on the + // initialization of the UR layer. + + // This is done only once, even if multiple adapters are initialized. + GlobalHandler::instance().getXPTIRegistry().initializeFrameworkOnce(); } #endif #undef CHECK_UR_SUCCESS diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 4e6e9750c3484..c66c7c8ab257e 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -65,32 +65,31 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const sycl::context &Ctxt, auto [urCtx, Adapter] = get_ur_handles(Ctxt); ur_result_t Error = UR_RESULT_ERROR_INVALID_VALUE; - ur_usm_desc_t UsmDesc{}; - UsmDesc.align = Alignment; - - ur_usm_alloc_location_desc_t UsmLocationDesc{}; - UsmLocationDesc.stype = UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC; - - if (PropList.has_property< - sycl::ext::intel::experimental::property::usm::buffer_location>() && - Ctxt.get_platform().has_extension( - "cl_intel_mem_alloc_buffer_location")) { - UsmLocationDesc.location = static_cast( - PropList - .get_property() - .get_buffer_location()); - UsmDesc.pNext = &UsmLocationDesc; - } + ur_usm_desc_t UsmDesc{}; + UsmDesc.align = Alignment; + + ur_usm_alloc_location_desc_t UsmLocationDesc{}; + UsmLocationDesc.stype = UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC; + + if (PropList.has_property< + sycl::ext::intel::experimental::property::usm::buffer_location>() && + Ctxt.get_platform().has_extension("cl_intel_mem_alloc_buffer_location")) { + UsmLocationDesc.location = static_cast( + PropList + .get_property() + .get_buffer_location()); + UsmDesc.pNext = &UsmLocationDesc; + } - Error = Adapter->call_nocheck( - urCtx, &UsmDesc, - /* pool= */ nullptr, Size, &RetVal); + Error = Adapter->call_nocheck( + urCtx, &UsmDesc, + /* pool= */ nullptr, Size, &RetVal); - // Error is for debugging purposes. - // The spec wants a nullptr returned, not an exception. - if (Error != UR_RESULT_SUCCESS) - return nullptr; + // Error is for debugging purposes. + // The spec wants a nullptr returned, not an exception. + if (Error != UR_RESULT_SUCCESS) + return nullptr; #ifdef XPTI_ENABLE_INSTRUMENTATION xpti::addMetadata(PrepareNotify.traceEvent(), "memory_ptr", reinterpret_cast(RetVal)); @@ -105,9 +104,6 @@ inline namespace _V1 { using alloc = sycl::usm::alloc; namespace detail { -#ifdef XPTI_ENABLE_INSTRUMENTATION -extern xpti::trace_event_data_t *GSYCLGraphEvent; -#endif namespace usm { void *alignedAllocInternal(size_t Alignment, size_t Size, diff --git a/sycl/source/detail/xpti_registry.cpp b/sycl/source/detail/xpti_registry.cpp index 78a1c66cb346d..5f35997639da8 100644 --- a/sycl/source/detail/xpti_registry.cpp +++ b/sycl/source/detail/xpti_registry.cpp @@ -18,21 +18,39 @@ namespace sycl { inline namespace _V1 { namespace detail { #ifdef XPTI_ENABLE_INSTRUMENTATION +// Declare the global variables used for XPTI streams +uint8_t GBufferStreamID = xpti::invalid_id; +uint8_t GImageStreamID = xpti::invalid_id; +uint8_t GMemAllocStreamID = xpti::invalid_id; +uint8_t GCudaCallStreamID = xpti::invalid_id; +uint8_t GCudaDebugStreamID = xpti::invalid_id; +uint8_t GSYCLStreamID = xpti::invalid_id; +uint8_t GUrCallStreamID = xpti::invalid_id; +uint8_t GUrApiStreamID = xpti::invalid_id; + +xpti::trace_event_data_t *GMemAllocEvent = nullptr; +xpti::trace_event_data_t *GSYCLGraphEvent = nullptr; +xpti::trace_event_data_t *GSYCLCallEvent = nullptr; +xpti::trace_event_data_t *GApiEvent = nullptr; + xpti::trace_event_data_t *XPTIRegistry::createTraceEvent( const void *Obj, const void *FuncPtr, uint64_t &IId, const detail::code_location &CodeLoc, uint16_t TraceEventType) { - xpti::utils::StringHelper NG; - auto Name = NG.nameWithAddress(CodeLoc.functionName(), - const_cast(FuncPtr)); - xpti::payload_t Payload(Name.c_str(), - (CodeLoc.fileName() ? CodeLoc.fileName() : ""), - CodeLoc.lineNumber(), CodeLoc.columnNumber(), Obj); - + (void)FuncPtr; + auto TP = xptiCreateTracepoint(CodeLoc.functionName(), CodeLoc.fileName(), + CodeLoc.lineNumber(), CodeLoc.columnNumber(), + const_cast(Obj)); + // Send the instance ID back to the caller + IId = TP->instance(); + + auto TPEvent = TP->event_ref(); + // Set the trace event type- see trace_event_type_t + if (TPEvent) + TPEvent->event_type = TraceEventType; // Calls could be at different user-code locations; We create a new event // based on the code location info and if this has been seen before, a // previously created event will be returned. - return xptiMakeEvent(Name.c_str(), &Payload, TraceEventType, xpti_at::active, - &IId); + return TPEvent; } #endif // XPTI_ENABLE_INSTRUMENTATION diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index c9b72a22ed626..d388500907c4e 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -24,77 +24,133 @@ namespace sycl { inline namespace _V1 { namespace detail { -// We define a sycl stream name and this will be used by the instrumentation -// framework -inline constexpr const char *SYCL_STREAM_NAME = "sycl"; -inline constexpr auto SYCL_MEM_ALLOC_STREAM_NAME = - "sycl.experimental.mem_alloc"; #ifdef XPTI_ENABLE_INSTRUMENTATION -extern uint8_t GBufferStreamID; -extern uint8_t GImageStreamID; -extern uint8_t GMemAllocStreamID; -extern xpti::trace_event_data_t *GMemAllocEvent; -extern xpti::trace_event_data_t *GSYCLGraphEvent; - -// We will pick a global constant so that the pointer in TLS never goes stale -inline constexpr auto XPTI_QUEUE_INSTANCE_ID_KEY = "queue_id"; - #define STR(x) #x +#define TO_STRING(x) STR(x) #define SYCL_VERSION_STR \ - "sycl " STR(__LIBSYCL_MAJOR_VERSION) "." STR(__LIBSYCL_MINOR_VERSION) + "sycl " TO_STRING(__LIBSYCL_MAJOR_VERSION) "." TO_STRING( \ + __LIBSYCL_MINOR_VERSION) /// Constants being used as placeholder until one is able to reliably get the /// version of the SYCL runtime constexpr uint32_t GMajVer = __LIBSYCL_MAJOR_VERSION; constexpr uint32_t GMinVer = __LIBSYCL_MINOR_VERSION; constexpr const char *GVerStr = SYCL_VERSION_STR; -#endif +/// We define all the streams used the instrumentation framework here +inline constexpr const char *SYCL_STREAM_NAME = "sycl"; +inline constexpr auto SYCL_MEM_ALLOC_STREAM_NAME = + "sycl.experimental.mem_alloc"; +inline constexpr const char *CUDA_CALL_STREAM_NAME = + "sycl.experimental.cuda.call"; +inline constexpr const char *CUDA_DEBUG_STREAM_NAME = + "sycl.experimental.cuda.debug"; // Stream name being used to notify about buffer objects. inline constexpr const char *SYCL_BUFFER_STREAM_NAME = "sycl.experimental.buffer"; - // Stream name being used to notify about image objects. inline constexpr const char *SYCL_IMAGE_STREAM_NAME = "sycl.experimental.image"; +inline constexpr const char *UR_CALL_STREAM_NAME = "ur.call"; +inline constexpr const char *UR_API_STREAM_NAME = "ur.api"; + +extern uint8_t GBufferStreamID; +extern uint8_t GImageStreamID; +extern uint8_t GMemAllocStreamID; +extern uint8_t GCudaCallStreamID; +extern uint8_t GCudaDebugStreamID; +extern uint8_t GSYCLStreamID; +extern uint8_t GUrCallStreamID; +extern uint8_t GUrApiStreamID; + +extern xpti::trace_event_data_t *GMemAllocEvent; +extern xpti::trace_event_data_t *GSYCLGraphEvent; +extern xpti::trace_event_data_t *GSYCLCallEvent; +extern xpti::trace_event_data_t *GApiEvent; + +// We will pick a global constant so that the pointer in TLS never goes stale +inline constexpr auto XPTI_QUEUE_INSTANCE_ID_KEY = "queue_id"; +#endif class XPTIRegistry { public: void initializeFrameworkOnce() { #ifdef XPTI_ENABLE_INSTRUMENTATION std::call_once(MInitialized, [this] { + if (!xptiTraceEnabled()) + // If tracing is not enabled, do not initialize the framework + return; + + // Initialize the XPTI framework xptiFrameworkInitialize(); + // Register the streams that we will be using + // SYCL events + GSYCLStreamID = + this->initializeStream(SYCL_STREAM_NAME, GMajVer, GMinVer, GVerStr); // SYCL buffer events - GBufferStreamID = xptiRegisterStream(SYCL_BUFFER_STREAM_NAME); - this->initializeStream(SYCL_BUFFER_STREAM_NAME, 0, 1, "0.1"); + GBufferStreamID = this->initializeStream(SYCL_BUFFER_STREAM_NAME, GMajVer, + GMinVer, GVerStr); // SYCL image events - GImageStreamID = xptiRegisterStream(SYCL_IMAGE_STREAM_NAME); - this->initializeStream(SYCL_IMAGE_STREAM_NAME, 0, 1, "0.1"); - + GImageStreamID = this->initializeStream(SYCL_IMAGE_STREAM_NAME, GMajVer, + GMinVer, GVerStr); // Memory allocation events - GMemAllocStreamID = xptiRegisterStream(SYCL_MEM_ALLOC_STREAM_NAME); - this->initializeStream(SYCL_MEM_ALLOC_STREAM_NAME, 0, 1, "0.1"); - xpti::payload_t MAPayload("SYCL Memory Allocations Layer"); - uint64_t MAInstanceNo = 0; - GMemAllocEvent = xptiMakeEvent("SYCL Memory Allocations", &MAPayload, - xpti::trace_algorithm_event, - xpti_at::active, &MAInstanceNo); + GMemAllocStreamID = this->initializeStream(SYCL_MEM_ALLOC_STREAM_NAME, + GMajVer, GMinVer, GVerStr); + // UR call events + GUrCallStreamID = this->initializeStream(UR_CALL_STREAM_NAME, GMajVer, + GMinVer, GVerStr); + // UR API events + GUrApiStreamID = + this->initializeStream(UR_API_STREAM_NAME, GMajVer, GMinVer, GVerStr); + // CUDA call events + GCudaCallStreamID = this->initializeStream(CUDA_CALL_STREAM_NAME, GMajVer, + GMinVer, GVerStr); + // CUDA debug events + GCudaDebugStreamID = this->initializeStream(CUDA_DEBUG_STREAM_NAME, + GMajVer, GMinVer, GVerStr); + + auto SYCLEventTP = xptiCreateTracepoint("sycl.application.graph", nullptr, + 0, 0, nullptr); + GSYCLGraphEvent = SYCLEventTP->event_ref(); + if (GSYCLGraphEvent) { + // The graph event is a global event and will be used as the parent for + // all nodes (command groups, memory allocations, etc) + xptiNotifySubscribers(GSYCLStreamID, xpti::trace_graph_create, nullptr, + GSYCLGraphEvent, GSYCLGraphEvent->instance_id, + nullptr); + } + auto MemAllocEventTP = + xptiCreateTracepoint("sycl.memory.alloc", nullptr, 0, 0, nullptr); + GMemAllocEvent = MemAllocEventTP->event_ref(); + + // We capture all API calls in a single event, so that we can minimize + // XPTI infra calls + auto APIEventTP = + xptiCreateTracepoint("api.function", nullptr, 0, 0, nullptr); + GApiEvent = APIEventTP->event_ref(); + + auto SYCLExceptionsTP = + xptiCreateTracepoint("sycl.exceptions", nullptr, 0, 0, nullptr); + GSYCLCallEvent = SYCLExceptionsTP->event_ref(); }); #endif } - /// Notifies XPTI subscribers about new stream. + /// Registers and notifies XPTI subscribers about new stream. /// /// \param StreamName is a name of newly initialized stream. /// \param MajVer is a stream major version. /// \param MinVer is a stream minor version. /// \param VerStr is a string of "MajVer.MinVer" format. - void initializeStream(const std::string &StreamName, uint32_t MajVer, - uint32_t MinVer, const std::string &VerStr) { + uint8_t initializeStream(const std::string &StreamName, uint32_t MajVer, + uint32_t MinVer, const std::string &VerStr) { + uint8_t StreamID = xpti::invalid_id; #ifdef XPTI_ENABLE_INSTRUMENTATION + StreamID = xptiRegisterStream(StreamName.c_str()); MActiveStreams.insert(StreamName); xptiInitialize(StreamName.c_str(), MajVer, MinVer, VerStr.c_str()); #endif // XPTI_ENABLE_INSTRUMENTATION + return StreamID; } ~XPTIRegistry() { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index dc5d2f9df6758..6b43006667ad9 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -40,6 +40,10 @@ #include #include +#ifdef XPTI_ENABLE_INSTRUMENTATION +#include +#endif + namespace sycl { inline namespace _V1 { @@ -576,21 +580,20 @@ event handler::finalize() { : detail::event_impl::create_device_event(impl->get_queue()); #ifdef XPTI_ENABLE_INSTRUMENTATION - const bool xptiEnabled = xptiTraceEnabled(); + // Only enable instrumentation if there are subscribes to the SYCL stream + const bool xptiEnabled = xptiCheckTraceEnabled(detail::GSYCLStreamID); #endif auto EnqueueKernel = [&]() { #ifdef XPTI_ENABLE_INSTRUMENTATION - auto StreamID = xpti::invalid_id; xpti_td *CmdTraceEvent = nullptr; uint64_t InstanceID = 0; if (xptiEnabled) { - StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME); std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( - StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, + detail::GSYCLStreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, MKernelName.data(), impl->MKernelNameBasedCachePtr, impl->get_queue_or_null(), impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); - detail::emitInstrumentationGeneral(StreamID, InstanceID, + detail::emitInstrumentationGeneral(detail::GSYCLStreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); } @@ -614,10 +617,11 @@ event handler::finalize() { // Emit signal only when event is created if (!DiscardEvent) { detail::emitInstrumentationGeneral( - StreamID, InstanceID, CmdTraceEvent, xpti::trace_signal, + detail::GSYCLStreamID, InstanceID, CmdTraceEvent, + xpti::trace_signal, static_cast(ResultEvent->getHandle())); } - detail::emitInstrumentationGeneral(StreamID, InstanceID, + detail::emitInstrumentationGeneral(detail::GSYCLStreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr); } diff --git a/sycl/unittests/xpti_trace/QueueIDCheck.cpp b/sycl/unittests/xpti_trace/QueueIDCheck.cpp index e5002cba9135d..d0740436b0a37 100644 --- a/sycl/unittests/xpti_trace/QueueIDCheck.cpp +++ b/sycl/unittests/xpti_trace/QueueIDCheck.cpp @@ -136,9 +136,7 @@ TEST_F(QueueID, QueueCreationAndKernelWithDeps) { checkTaskBeginEnd(QueueIDSTr); } -// Re-enable this test after fixing -// https://github.com/intel/llvm/issues/12963 -TEST_F(QueueID, DISABLED_QueueCreationUSMOperations) { +TEST_F(QueueID, QueueCreationUSMOperations) { sycl::queue Q0; sycl::detail::queue_impl &Queue0Impl = *sycl::detail::getSyclObjImpl(Q0); auto QueueIDSTr = std::to_string(Queue0Impl.getQueueID()); From 00bbf8edadaafe71fca170c1e3614d23e6327e7a Mon Sep 17 00:00:00 2001 From: Vasanth Tovinkere Date: Fri, 25 Jul 2025 13:34:09 -0700 Subject: [PATCH 2/2] [SYCL][XPTI] Adjusting initialization to only include SYCL Signed-off-by: Vasanth Tovinkere --- sycl/source/detail/xpti_registry.hpp | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index d388500907c4e..6023189046121 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -42,25 +42,17 @@ constexpr const char *GVerStr = SYCL_VERSION_STR; inline constexpr const char *SYCL_STREAM_NAME = "sycl"; inline constexpr auto SYCL_MEM_ALLOC_STREAM_NAME = "sycl.experimental.mem_alloc"; -inline constexpr const char *CUDA_CALL_STREAM_NAME = - "sycl.experimental.cuda.call"; -inline constexpr const char *CUDA_DEBUG_STREAM_NAME = - "sycl.experimental.cuda.debug"; // Stream name being used to notify about buffer objects. inline constexpr const char *SYCL_BUFFER_STREAM_NAME = "sycl.experimental.buffer"; // Stream name being used to notify about image objects. inline constexpr const char *SYCL_IMAGE_STREAM_NAME = "sycl.experimental.image"; -inline constexpr const char *UR_CALL_STREAM_NAME = "ur.call"; inline constexpr const char *UR_API_STREAM_NAME = "ur.api"; extern uint8_t GBufferStreamID; extern uint8_t GImageStreamID; extern uint8_t GMemAllocStreamID; -extern uint8_t GCudaCallStreamID; -extern uint8_t GCudaDebugStreamID; extern uint8_t GSYCLStreamID; -extern uint8_t GUrCallStreamID; extern uint8_t GUrApiStreamID; extern xpti::trace_event_data_t *GMemAllocEvent; @@ -96,18 +88,9 @@ class XPTIRegistry { // Memory allocation events GMemAllocStreamID = this->initializeStream(SYCL_MEM_ALLOC_STREAM_NAME, GMajVer, GMinVer, GVerStr); - // UR call events - GUrCallStreamID = this->initializeStream(UR_CALL_STREAM_NAME, GMajVer, - GMinVer, GVerStr); // UR API events GUrApiStreamID = this->initializeStream(UR_API_STREAM_NAME, GMajVer, GMinVer, GVerStr); - // CUDA call events - GCudaCallStreamID = this->initializeStream(CUDA_CALL_STREAM_NAME, GMajVer, - GMinVer, GVerStr); - // CUDA debug events - GCudaDebugStreamID = this->initializeStream(CUDA_DEBUG_STREAM_NAME, - GMajVer, GMinVer, GVerStr); auto SYCLEventTP = xptiCreateTracepoint("sycl.application.graph", nullptr, 0, 0, nullptr); @@ -144,7 +127,8 @@ class XPTIRegistry { /// \param VerStr is a string of "MajVer.MinVer" format. uint8_t initializeStream(const std::string &StreamName, uint32_t MajVer, uint32_t MinVer, const std::string &VerStr) { - uint8_t StreamID = xpti::invalid_id; + // We need to return an invalid ID if XPTI is not enabled + uint8_t StreamID = std::numeric_limits::max(); #ifdef XPTI_ENABLE_INSTRUMENTATION StreamID = xptiRegisterStream(StreamName.c_str()); MActiveStreams.insert(StreamName);