diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp
index b26c3327c37ba..9744dfcb1a19d 100644
--- a/sycl/source/detail/graph/graph_impl.cpp
+++ b/sycl/source/detail/graph/graph_impl.cpp
@@ -866,7 +866,7 @@ exec_graph_impl::enqueueNode(ur_exp_command_buffer_handle_t CommandBuffer,
 
   sycl::detail::EventImplPtr Event =
       sycl::detail::Scheduler::getInstance().addCG(
-          Node->getCGCopy(), MQueueImpl,
+          Node->getCGCopy(), *MQueueImpl,
           /*EventNeeded=*/true, CommandBuffer, Deps);
 
   if (MIsUpdatable) {
@@ -1048,7 +1048,7 @@ EventImplPtr exec_graph_impl::enqueueHostTaskPartition(
           NodeCommandGroup->getType()));
 
   EventImplPtr SchedulerEvent = sycl::detail::Scheduler::getInstance().addCG(
-      std::move(CommandGroup), Queue.shared_from_this(), EventNeeded);
+      std::move(CommandGroup), Queue, EventNeeded);
 
   if (EventNeeded) {
     return SchedulerEvent;
@@ -1076,7 +1076,7 @@ EventImplPtr exec_graph_impl::enqueuePartitionWithScheduler(
           CommandBuffer, nullptr, std::move(CGData));
 
   EventImplPtr SchedulerEvent = sycl::detail::Scheduler::getInstance().addCG(
-      std::move(CommandGroup), Queue.shared_from_this(), EventNeeded);
+      std::move(CommandGroup), Queue, EventNeeded);
 
   if (EventNeeded) {
     SchedulerEvent->setEventFromSubmittedExecCommandBuffer(true);
@@ -1551,7 +1551,7 @@ void exec_graph_impl::update(
     // other scheduler commands
     auto UpdateEvent =
         sycl::detail::Scheduler::getInstance().addCommandGraphUpdate(
-            this, Nodes, MQueueImpl, std::move(UpdateRequirements),
+            this, Nodes, MQueueImpl.get(), std::move(UpdateRequirements),
             MSchedulerDependencies);
 
     MSchedulerDependencies.push_back(UpdateEvent);
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index cb3a17ceb79f8..a595f25780a52 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -118,10 +118,10 @@ queue_impl::get_backend_info<info::device::backend_version>() const {
 }
 #endif
 
-static event prepareSYCLEventAssociatedWithQueue(
-    const std::shared_ptr<detail::queue_impl> &QueueImpl) {
-  auto EventImpl = detail::event_impl::create_device_event(*QueueImpl);
-  EventImpl->setContextImpl(QueueImpl->getContextImpl());
+static event
+prepareSYCLEventAssociatedWithQueue(detail::queue_impl &QueueImpl) {
+  auto EventImpl = detail::event_impl::create_device_event(QueueImpl);
+  EventImpl->setContextImpl(QueueImpl.getContextImpl());
   EventImpl->setStateIncomplete();
   return detail::createSyclObjFromImpl<event>(EventImpl);
 }
@@ -464,7 +464,7 @@ event queue_impl::submitMemOpHelper(const std::vector<event> &DepEvents,
             event_impl::create_discarded_event());
       }
 
-      event ResEvent = prepareSYCLEventAssociatedWithQueue(shared_from_this());
+      event ResEvent = prepareSYCLEventAssociatedWithQueue(*this);
       const auto &EventImpl = detail::getSyclObjImpl(ResEvent);
       {
         NestedCallsTracker tracker;
diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp
index d391e65c59c6a..3fbf72042e998 100644
--- a/sycl/source/detail/queue_impl.hpp
+++ b/sycl/source/detail/queue_impl.hpp
@@ -649,9 +649,6 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
   static ContextImplPtr getContext(queue_impl *Queue) {
     return Queue ? Queue->getContextImplPtr() : nullptr;
   }
-  static ContextImplPtr getContext(const QueueImplPtr &Queue) {
-    return getContext(Queue.get());
-  }
 
   // Must be called under MMutex protection
   void doUnenqueuedCommandCleanup(
@@ -688,7 +685,7 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
 protected:
   template <typename HandlerType = handler>
   EventImplPtr insertHelperBarrier(const HandlerType &Handler) {
-    auto &Queue = Handler.impl->get_queue();
+    queue_impl &Queue = Handler.impl->get_queue();
     auto ResEvent = detail::event_impl::create_device_event(Queue);
     ur_event_handle_t UREvent = nullptr;
     getAdapter()->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp
index abcff11f23ef5..3116ae898fba6 100644
--- a/sycl/source/detail/scheduler/graph_builder.cpp
+++ b/sycl/source/detail/scheduler/graph_builder.cpp
@@ -57,10 +57,6 @@ static bool isOnSameContext(const ContextImplPtr Context, queue_impl *Queue) {
   // contexts comparison.
   return Context == queue_impl::getContext(Queue);
 }
-static bool isOnSameContext(const ContextImplPtr Context,
-                            const QueueImplPtr &Queue) {
-  return isOnSameContext(Context, Queue.get());
-}
 
 /// Checks if the required access mode is allowed under the current one.
 static bool isAccessModeAllowed(access::mode Required, access::mode Current) {
@@ -183,7 +179,7 @@ MemObjRecord *Scheduler::GraphBuilder::getMemObjRecord(SYCLMemObjI *MemObject) {
 }
 
 MemObjRecord *
-Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
+Scheduler::GraphBuilder::getOrInsertMemObjRecord(queue_impl *Queue,
                                                  const Requirement *Req) {
   SYCLMemObjI *MemObject = Req->MSYCLMemObj;
   MemObjRecord *Record = getMemObjRecord(MemObject);
@@ -231,8 +227,8 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue,
     MemObject->MRecord.reset(
         new MemObjRecord{InteropCtxPtr, LeafLimit, AllocateDependency});
     std::vector<Command *> ToEnqueue;
-    getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr,
-                            ToEnqueue);
+    getOrCreateAllocaForReq(MemObject->MRecord.get(), Req,
+                            InteropQueuePtr.get(), ToEnqueue);
     assert(ToEnqueue.empty() && "Creation of the first alloca for a record "
                                 "shouldn't lead to any enqueuing (no linked "
                                 "alloca or exceeding the leaf limit).");
@@ -274,14 +270,13 @@ void Scheduler::GraphBuilder::addNodeToLeaves(
 }
 
 UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd(
-    MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
+    MemObjRecord *Record, Requirement *Req, queue_impl *Queue,
     std::vector<Command *> &ToEnqueue) {
   auto Context = queue_impl::getContext(Queue);
   AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, Context);
   assert(AllocaCmd && "There must be alloca for requirement!");
   UpdateHostRequirementCommand *UpdateCommand =
-      new UpdateHostRequirementCommand(Queue.get(), *Req, AllocaCmd,
-                                       &Req->MData);
+      new UpdateHostRequirementCommand(Queue, *Req, AllocaCmd, &Req->MData);
   // Need copy of requirement because after host accessor destructor call
   // dependencies become invalid if requirement is stored by pointer.
   const Requirement *StoredReq = UpdateCommand->getRequirement();
@@ -330,9 +325,10 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc,
   return MapCmd;
 }
 
-Command *Scheduler::GraphBuilder::insertMemoryMove(
-    MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue,
-    std::vector<Command *> &ToEnqueue) {
+Command *
+Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record,
+                                          Requirement *Req, queue_impl *Queue,
+                                          std::vector<Command *> &ToEnqueue) {
   AllocaCommandBase *AllocaCmdDst =
       getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
   if (!AllocaCmdDst)
@@ -519,7 +515,7 @@ Scheduler::GraphBuilder::addHostAccessor(Requirement *Req,
     auto SYCLMemObj = static_cast<detail::SYCLMemObjT *>(Req->MSYCLMemObj);
     SYCLMemObj->handleWriteAccessorCreation();
   }
-  // Host accessor is not attached to any queue so no QueueImplPtr object to be
+  // Host accessor is not attached to any queue so no queue object to be
   // sent to getOrInsertMemObjRecord.
   MemObjRecord *Record = getOrInsertMemObjRecord(nullptr, Req);
   if (MPrintOptionsArray[BeforeAddHostAcc])
@@ -691,7 +687,7 @@ static bool checkHostUnifiedMemory(const ContextImplPtr &Ctx) {
 // Note, creation of new allocation command can lead to the current context
 // (Record->MCurContext) change.
 AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
-    MemObjRecord *Record, const Requirement *Req, const QueueImplPtr &Queue,
+    MemObjRecord *Record, const Requirement *Req, queue_impl *Queue,
     std::vector<Command *> &ToEnqueue) {
   auto Context = queue_impl::getContext(Queue);
   AllocaCommandBase *AllocaCmd =
@@ -710,8 +706,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
 
       auto *ParentAlloca =
           getOrCreateAllocaForReq(Record, &ParentRequirement, Queue, ToEnqueue);
-      AllocaCmd = new AllocaSubBufCommand(Queue.get(), *Req, ParentAlloca,
-                                          ToEnqueue, ToCleanUp);
+      AllocaCmd = new AllocaSubBufCommand(Queue, *Req, ParentAlloca, ToEnqueue,
+                                          ToCleanUp);
     } else {
 
       const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange,
@@ -787,8 +783,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(
           }
       }
 
-      AllocaCmd = new AllocaCommand(Queue.get(), FullReq, InitFromUserData,
-                                    LinkedAllocaCmd);
+      AllocaCmd =
+          new AllocaCommand(Queue, FullReq, InitFromUserData, LinkedAllocaCmd);
 
       // Update linked command
       if (LinkedAllocaCmd) {
@@ -926,16 +922,16 @@ static void combineAccessModesOfReqs(std::vector<Requirement *> &Reqs) {
 }
 
 Command *Scheduler::GraphBuilder::addCG(
-    std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
+    std::unique_ptr<detail::CG> CommandGroup, queue_impl *Queue,
     std::vector<Command *> &ToEnqueue, bool EventNeeded,
     ur_exp_command_buffer_handle_t CommandBuffer,
     const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies) {
   std::vector<Requirement *> &Reqs = CommandGroup->getRequirements();
   std::vector<detail::EventImplPtr> &Events = CommandGroup->getEvents();
 
-  auto NewCmd = std::make_unique<ExecCGCommand>(
-      std::move(CommandGroup), Queue.get(), EventNeeded, CommandBuffer,
-      std::move(Dependencies));
+  auto NewCmd = std::make_unique<ExecCGCommand>(std::move(CommandGroup), Queue,
+                                                EventNeeded, CommandBuffer,
+                                                std::move(Dependencies));
 
   if (!NewCmd)
     throw exception(make_error_code(errc::memory_allocation),
@@ -958,9 +954,9 @@ Command *Scheduler::GraphBuilder::addCG(
     bool isSameCtx = false;
 
     {
-      const QueueImplPtr &QueueForAlloca =
+      queue_impl *QueueForAlloca =
           isInteropTask
-              ? static_cast<detail::CGHostTask &>(NewCmd->getCG()).MQueue
+              ? static_cast<detail::CGHostTask &>(NewCmd->getCG()).MQueue.get()
               : Queue;
 
       Record = getOrInsertMemObjRecord(QueueForAlloca, Req);
@@ -990,15 +986,15 @@ Command *Scheduler::GraphBuilder::addCG(
       // Cannot directly copy memory from OpenCL device to OpenCL device -
       // create two copies: device->host and host->device.
       bool NeedMemMoveToHost = false;
-      auto MemMoveTargetQueue = Queue;
+      queue_impl *MemMoveTargetQueue = Queue;
 
       if (isInteropTask) {
         const detail::CGHostTask &HT =
             static_cast<detail::CGHostTask &>(NewCmd->getCG());
 
-        if (!isOnSameContext(Record->MCurContext, HT.MQueue)) {
+        if (!isOnSameContext(Record->MCurContext, HT.MQueue.get())) {
           NeedMemMoveToHost = true;
-          MemMoveTargetQueue = HT.MQueue;
+          MemMoveTargetQueue = HT.MQueue.get();
         }
       } else if (Queue && Record->MCurContext)
         NeedMemMoveToHost = true;
@@ -1230,7 +1226,9 @@ Command *Scheduler::GraphBuilder::connectDepEvent(
   try {
     std::shared_ptr<detail::HostTask> HT(new detail::HostTask);
     std::unique_ptr<detail::CG> ConnectCG(new detail::CGHostTask(
-        std::move(HT), /* Queue = */ Cmd->getQueue(), /* Context = */ {},
+        std::move(HT),
+        /* Queue = */ Cmd->getQueue(),
+        /* Context = */ {},
         /* Args = */ {},
         detail::CG::StorageInitHelper(
             /* ArgsStorage = */ {}, /* AccStorage = */ {},
@@ -1281,11 +1279,11 @@ Command *Scheduler::GraphBuilder::addCommandGraphUpdate(
     ext::oneapi::experimental::detail::exec_graph_impl *Graph,
     std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
         Nodes,
-    const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
+    queue_impl *Queue, std::vector<Requirement *> Requirements,
     std::vector<detail::EventImplPtr> &Events,
     std::vector<Command *> &ToEnqueue) {
   auto NewCmd =
-      std::make_unique<UpdateCommandBufferCommand>(Queue.get(), Graph, Nodes);
+      std::make_unique<UpdateCommandBufferCommand>(Queue, Graph, Nodes);
   // If there are multiple requirements for the same memory object, its
   // AllocaCommand creation will be dependent on the access mode of the first
   // requirement. Combine these access modes to take all of them into account.
diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp
index c48597df603e7..9c09ea5c77579 100644
--- a/sycl/source/detail/scheduler/scheduler.cpp
+++ b/sycl/source/detail/scheduler/scheduler.cpp
@@ -103,7 +103,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
 }
 
 EventImplPtr Scheduler::addCG(
-    std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
+    std::unique_ptr<detail::CG> CommandGroup, queue_impl &Queue,
     bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer,
     const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies) {
   EventImplPtr NewEvent = nullptr;
@@ -128,7 +128,7 @@ EventImplPtr Scheduler::addCG(
       break;
     }
     default:
-      NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue),
+      NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), &Queue,
                                    AuxiliaryCmds, EventNeeded, CommandBuffer,
                                    std::move(Dependencies));
     }
@@ -646,7 +646,7 @@ EventImplPtr Scheduler::addCommandGraphUpdate(
     ext::oneapi::experimental::detail::exec_graph_impl *Graph,
     std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
         Nodes,
-    const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
+    queue_impl *Queue, std::vector<Requirement *> Requirements,
     std::vector<detail::EventImplPtr> &Events) {
   std::vector<Command *> AuxiliaryCmds;
   EventImplPtr NewCmdEvent = nullptr;
diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp
index 5ac668878bb9b..856738d324da7 100644
--- a/sycl/source/detail/scheduler/scheduler.hpp
+++ b/sycl/source/detail/scheduler/scheduler.hpp
@@ -187,7 +187,6 @@ class DispatchHostTask;
 
 using ContextImplPtr = std::shared_ptr<detail::context_impl>;
 using EventImplPtr = std::shared_ptr<detail::event_impl>;
-using QueueImplPtr = std::shared_ptr<detail::queue_impl>;
 using StreamImplPtr = std::shared_ptr<detail::stream_impl>;
 
 using CommandPtr = std::unique_ptr<Command>;
@@ -379,7 +378,7 @@ class Scheduler {
   /// \return an event object to wait on for command group completion. It can
   /// be a discarded event.
   EventImplPtr addCG(
-      std::unique_ptr<detail::CG> CommandGroup, const QueueImplPtr &Queue,
+      std::unique_ptr<detail::CG> CommandGroup, queue_impl &Queue,
       bool EventNeeded, ur_exp_command_buffer_handle_t CommandBuffer = nullptr,
       const std::vector<ur_exp_command_buffer_sync_point_t> &Dependencies = {});
 
@@ -477,7 +476,7 @@ class Scheduler {
       ext::oneapi::experimental::detail::exec_graph_impl *Graph,
       std::vector<std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
           Nodes,
-      const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
+      queue_impl *Queue, std::vector<Requirement *> Requirements,
       std::vector<detail::EventImplPtr> &Events);
 
   static bool CheckEventReadiness(context_impl &Context,
@@ -560,9 +559,8 @@ class Scheduler {
     /// \return a command that represents command group execution and a bool
     /// indicating whether this command should be enqueued to the graph
     /// processor right away or not.
-    Command *addCG(std::unique_ptr<detail::CG> CommandGroup,
-                   const QueueImplPtr &Queue, std::vector<Command *> &ToEnqueue,
-                   bool EventNeeded,
+    Command *addCG(std::unique_ptr<detail::CG> CommandGroup, queue_impl *Queue,
+                   std::vector<Command *> &ToEnqueue, bool EventNeeded,
                    ur_exp_command_buffer_handle_t CommandBuffer = nullptr,
                    const std::vector<ur_exp_command_buffer_sync_point_t>
                        &Dependencies = {});
@@ -600,7 +598,7 @@ class Scheduler {
     /// used when the user provides a "secondary" queue to the submit method
     /// which may be used when the command fails to enqueue/execute in the
     /// primary queue.
-    void rescheduleCommand(Command *Cmd, const QueueImplPtr &Queue);
+    void rescheduleCommand(Command *Cmd, queue_impl *Queue);
 
     /// \return a pointer to the corresponding memory object record for the
     /// SYCL memory object provided, or nullptr if it does not exist.
@@ -608,7 +606,7 @@ class Scheduler {
 
     /// \return a pointer to MemObjRecord for pointer to memory object. If the
     /// record is not found, nullptr is returned.
-    MemObjRecord *getOrInsertMemObjRecord(const QueueImplPtr &Queue,
+    MemObjRecord *getOrInsertMemObjRecord(queue_impl *Queue,
                                           const Requirement *Req);
 
     /// Decrements leaf counters for all leaves of the record.
@@ -656,7 +654,7 @@ class Scheduler {
         std::vector<
             std::shared_ptr<ext::oneapi::experimental::detail::node_impl>>
             Nodes,
-        const QueueImplPtr &Queue, std::vector<Requirement *> Requirements,
+        queue_impl *Queue, std::vector<Requirement *> Requirements,
         std::vector<detail::EventImplPtr> &Events,
         std::vector<Command *> &ToEnqueue);
 
@@ -673,7 +671,7 @@ class Scheduler {
     /// \param Req is a Requirement describing destination.
     /// \param Queue is a queue that is bound to target context.
     Command *insertMemoryMove(MemObjRecord *Record, Requirement *Req,
-                              const QueueImplPtr &Queue,
+                              queue_impl *Queue,
                               std::vector<Command *> &ToEnqueue);
 
     // Inserts commands required to remap the memory object to its current host
@@ -684,7 +682,7 @@ class Scheduler {
 
     UpdateHostRequirementCommand *
     insertUpdateHostReqCmd(MemObjRecord *Record, Requirement *Req,
-                           const QueueImplPtr &Queue,
+                           queue_impl *Queue,
                            std::vector<Command *> &ToEnqueue);
 
     /// Finds dependencies for the requirement.
@@ -717,7 +715,7 @@ class Scheduler {
     /// If none found, creates new one.
     AllocaCommandBase *
     getOrCreateAllocaForReq(MemObjRecord *Record, const Requirement *Req,
-                            const QueueImplPtr &Queue,
+                            queue_impl *Queue,
                             std::vector<Command *> &ToEnqueue);
 
     void markModifiedIfWrite(MemObjRecord *Record, Requirement *Req);
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index 469b9021025bb..e6b646a1343e9 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -936,7 +936,7 @@ event handler::finalize() {
                       CommandGroup->getRequirements().size() == 0;
 
   detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG(
-      std::move(CommandGroup), Queue->shared_from_this(), !DiscardEvent);
+      std::move(CommandGroup), *Queue, !DiscardEvent);
 
 #ifdef __INTEL_PREVIEW_BREAKING_CHANGES
   MLastEvent = DiscardEvent ? nullptr : Event;
diff --git a/sycl/unittests/buffer/BufferReleaseBase.cpp b/sycl/unittests/buffer/BufferReleaseBase.cpp
index 8b0840300d235..fdaa16710243d 100644
--- a/sycl/unittests/buffer/BufferReleaseBase.cpp
+++ b/sycl/unittests/buffer/BufferReleaseBase.cpp
@@ -218,19 +218,19 @@ TEST_F(BufferDestructionCheck, ReadyToReleaseLogic) {
   sycl::buffer<int, 1> Buf(1);
   sycl::detail::Requirement MockReq = getMockRequirement(Buf);
   sycl::detail::MemObjRecord *Rec = MockSchedulerPtr->getOrInsertMemObjRecord(
-      sycl::detail::getSyclObjImpl(Q), &MockReq);
+      sycl::detail::getSyclObjImpl(Q).get(), &MockReq);
 
   std::shared_ptr<sycl::detail::context_impl> CtxImpl =
       sycl::detail::getSyclObjImpl(Context);
   MockCmdWithReleaseTracking *ReadCmd = nullptr;
   MockCmdWithReleaseTracking *WriteCmd = nullptr;
   ReadCmd =
-      new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq);
+      new MockCmdWithReleaseTracking(*sycl::detail::getSyclObjImpl(Q), MockReq);
   // These dummy handles are automatically cleaned up by the runtime
   ReadCmd->getEvent()->setHandle(reinterpret_cast<ur_event_handle_t>(
       mock::createDummyHandle<ur_event_handle_t>()));
   WriteCmd =
-      new MockCmdWithReleaseTracking(sycl::detail::getSyclObjImpl(Q), MockReq);
+      new MockCmdWithReleaseTracking(*sycl::detail::getSyclObjImpl(Q), MockReq);
   WriteCmd->getEvent()->setHandle(reinterpret_cast<ur_event_handle_t>(
       mock::createDummyHandle<ur_event_handle_t>()));
   ReadCmd->MEnqueueStatus = sycl::detail::EnqueueResultT::SyclEnqueueSuccess;
diff --git a/sycl/unittests/buffer/BufferReleaseBase.hpp b/sycl/unittests/buffer/BufferReleaseBase.hpp
index 504c2d4047de6..ae32fa0d1ea90 100644
--- a/sycl/unittests/buffer/BufferReleaseBase.hpp
+++ b/sycl/unittests/buffer/BufferReleaseBase.hpp
@@ -24,13 +24,13 @@
 class MockCmdWithReleaseTracking : public MockCommand {
 public:
   MockCmdWithReleaseTracking(
-      sycl::detail::QueueImplPtr Queue, sycl::detail::Requirement Req,
+      sycl::detail::queue_impl &Queue, sycl::detail::Requirement Req,
       sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG)
-      : MockCommand(Queue.get(), Req, Type) {};
+      : MockCommand(&Queue, Req, Type) {};
   MockCmdWithReleaseTracking(
-      sycl::detail::QueueImplPtr Queue,
+      sycl::detail::queue_impl &Queue,
       sycl::detail::Command::CommandType Type = sycl::detail::Command::RUN_CG)
-      : MockCommand(Queue.get(), Type) {};
+      : MockCommand(&Queue, Type) {};
   ~MockCmdWithReleaseTracking() { Release(); }
   MOCK_METHOD0(Release, void());
 };
@@ -54,9 +54,9 @@ class BufferDestructionCheckCommon : public ::testing::Test {
   MockCmdWithReleaseTracking *addCommandToBuffer(Buffer &Buf, sycl::queue &Q) {
     sycl::detail::Requirement MockReq = getMockRequirement(Buf);
     sycl::detail::MemObjRecord *Rec = MockSchedulerPtr->getOrInsertMemObjRecord(
-        sycl::detail::getSyclObjImpl(Q), &MockReq);
+        sycl::detail::getSyclObjImpl(Q).get(), &MockReq);
     MockCmdWithReleaseTracking *MockCmd = new MockCmdWithReleaseTracking(
-        sycl::detail::getSyclObjImpl(Q), MockReq);
+        *sycl::detail::getSyclObjImpl(Q), MockReq);
     std::vector<sycl::detail::Command *> ToEnqueue;
     MockSchedulerPtr->addNodeToLeaves(Rec, MockCmd, sycl::access::mode::write,
                                       ToEnqueue);
diff --git a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp
index 0c004dc876826..4ced36fbf47e6 100644
--- a/sycl/unittests/scheduler/AccessorDefaultCtor.cpp
+++ b/sycl/unittests/scheduler/AccessorDefaultCtor.cpp
@@ -17,7 +17,7 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) {
   queue QueueDev(context(Plt), default_selector_v);
   MockScheduler MS;
 
-  detail::QueueImplPtr QueueDevImpl = detail::getSyclObjImpl(QueueDev);
+  detail::queue_impl &QueueDevImpl = *detail::getSyclObjImpl(QueueDev);
 
   std::vector<detail::Command *> ToEnqueue;
 
@@ -35,7 +35,7 @@ TEST_F(SchedulerTest, AccDefaultCtorDoesntAffectDepGraph) {
 
   std::unique_ptr<sycl::detail::CG> CmdGroup = MockCGH.finalize();
 
-  detail::Command *NewCmd = MS.addCG(std::move(CmdGroup), QueueDevImpl,
+  detail::Command *NewCmd = MS.addCG(std::move(CmdGroup), &QueueDevImpl,
                                      ToEnqueue, /*EventNeeded=*/true);
 
   // if MDeps is empty, accessor built from default ctor does not affect
diff --git a/sycl/unittests/scheduler/AllocaLinking.cpp b/sycl/unittests/scheduler/AllocaLinking.cpp
index b60a5740ab9e0..2c02aed1b896f 100644
--- a/sycl/unittests/scheduler/AllocaLinking.cpp
+++ b/sycl/unittests/scheduler/AllocaLinking.cpp
@@ -50,7 +50,7 @@ TEST_F(SchedulerTest, AllocaLinking) {
   sycl::queue Q{sycl::platform().get_devices()[0]};
   mock::getCallbacks().set_after_callback("urDeviceGetInfo",
                                           &redefinedDeviceGetInfoAfter);
-  sycl::detail::QueueImplPtr QImpl = detail::getSyclObjImpl(Q);
+  sycl::detail::queue_impl &QImpl = *detail::getSyclObjImpl(Q);
 
   MockScheduler MS;
   // Should not be linked w/o host unified memory or pinned host memory
@@ -58,10 +58,10 @@ TEST_F(SchedulerTest, AllocaLinking) {
     buffer<int, 1> Buf(range<1>(1));
     detail::Requirement Req = getMockRequirement(Buf);
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(&QImpl, &Req);
     std::vector<detail::Command *> AuxCmds;
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, &QImpl, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
         MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
@@ -74,10 +74,10 @@ TEST_F(SchedulerTest, AllocaLinking) {
         range<1>(1), {ext::oneapi::property::buffer::use_pinned_host_memory()});
     detail::Requirement Req = getMockRequirement(Buf);
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(&QImpl, &Req);
     std::vector<detail::Command *> AuxCmds;
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, &QImpl, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
         MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
@@ -90,10 +90,10 @@ TEST_F(SchedulerTest, AllocaLinking) {
     buffer<int, 1> Buf(range<1>(1));
     detail::Requirement Req = getMockRequirement(Buf);
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(&QImpl, &Req);
     std::vector<detail::Command *> AuxCmds;
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, &QImpl, AuxCmds);
     detail::AllocaCommandBase *HostAllocaCmd =
         MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
diff --git a/sycl/unittests/scheduler/Commands.cpp b/sycl/unittests/scheduler/Commands.cpp
index 05325f969dc46..a5b00fa100b32 100644
--- a/sycl/unittests/scheduler/Commands.cpp
+++ b/sycl/unittests/scheduler/Commands.cpp
@@ -55,7 +55,7 @@ TEST_F(SchedulerTest, WaitEmptyEventWithBarrier) {
       &redefineEnqueueEventsWaitWithBarrierExt);
 
   queue Queue{Plt.get_devices()[0]};
-  sycl::detail::QueueImplPtr QueueImpl = detail::getSyclObjImpl(Queue);
+  detail::queue_impl &QueueImpl = *detail::getSyclObjImpl(Queue);
 
   queue_global_context =
       detail::getSyclObjImpl(Queue.get_context())->getHandleRef();
diff --git a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
index e6157c033f686..b938d38f7883b 100644
--- a/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
+++ b/sycl/unittests/scheduler/CommandsWaitForEvents.cpp
@@ -137,12 +137,10 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) {
     sycl::unittest::UrMock<> Mock;
     sycl::platform Plt = sycl::platform();
     sycl::queue Q(Plt.get_devices()[0]);
-    std::shared_ptr<sycl::detail::queue_impl> QueueImpl =
-        detail::getSyclObjImpl(Q);
+    auto &QueueImpl =
+        static_cast<QueueImplProxyT &>(*detail::getSyclObjImpl(Q));
 
-    auto QueueImplProxy = std::static_pointer_cast<QueueImplProxyT>(QueueImpl);
-
-    ASSERT_TRUE(QueueImplProxy->MStreamsServiceEvents.empty())
+    ASSERT_TRUE(QueueImpl.MStreamsServiceEvents.empty())
         << "No stream service events are expected at the beggining";
 
     event Event = Q.submit([&](handler &CGH) {
@@ -151,7 +149,7 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) {
           [=]() { Out << "Hello, World!" << endl; });
     });
 
-    ASSERT_TRUE(QueueImplProxy->MStreamsServiceEvents.size() == 1)
+    ASSERT_TRUE(QueueImpl.MStreamsServiceEvents.size() == 1)
         << "Expected 1 service stream event";
 
     std::shared_ptr<sycl::detail::event_impl> EventImpl =
@@ -164,7 +162,7 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) {
 
     Q.wait();
 
-    ASSERT_TRUE(QueueImplProxy->MStreamsServiceEvents.empty())
+    ASSERT_TRUE(QueueImpl.MStreamsServiceEvents.empty())
         << "No stream service events are expected to left after wait";
   }
 
@@ -172,22 +170,19 @@ TEST_F(SchedulerTest, StreamAUXCmdsWait) {
     sycl::unittest::UrMock<> Mock;
     sycl::platform Plt = sycl::platform();
     sycl::queue Q(Plt.get_devices()[0]);
-    std::shared_ptr<sycl::detail::queue_impl> QueueImpl =
-        detail::getSyclObjImpl(Q);
+    auto &QueueImpl =
+        static_cast<QueueImplProxyT &>(*detail::getSyclObjImpl(Q));
 
     mock::getCallbacks().set_before_callback("urEventWait",
                                              &urEventsWaitRedefineCheckCalled);
 
-    auto QueueImplProxy = std::static_pointer_cast<QueueImplProxyT>(QueueImpl);
-
     ur_event_handle_t UREvent = mock::createDummyHandle<ur_event_handle_t>();
 
-    auto EventImpl = sycl::detail::event_impl::create_device_event(*QueueImpl);
+    auto EventImpl = sycl::detail::event_impl::create_device_event(QueueImpl);
     EventImpl->setHandle(UREvent);
 
-    QueueImplProxy->registerStreamServiceEvent(EventImpl);
-
-    QueueImplProxy->wait();
+    QueueImpl.registerStreamServiceEvent(EventImpl);
+    QueueImpl.wait();
 
     ASSERT_TRUE(GpiEventsWaitRedefineCalled)
         << "No stream service events are expected to left after wait";
diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
index 8c5bd97eb2ae6..22ba0d3bd2cab 100644
--- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
+++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp
@@ -56,7 +56,7 @@ class DependsOnTests : public ::testing::Test {
     std::vector<detail::Command *> ToEnqueue;
 
     // Emulating processing of command group function
-    MockHandlerCustomFinalize MockCGH(QueueDevImpl,
+    MockHandlerCustomFinalize MockCGH(*QueueDevImpl,
                                       /*CallerNeedsEvent=*/true);
 
     for (auto EventImpl : Events)
@@ -80,7 +80,7 @@ class DependsOnTests : public ::testing::Test {
 
     detail::Command *NewCmd =
         MS.addCG(std::move(CmdGroup),
-                 Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl,
+                 Type == TestCGType::HOST_TASK ? nullptr : QueueDevImpl.get(),
                  ToEnqueue, /*EventNeeded=*/true);
     EXPECT_EQ(ToEnqueue.size(), 0u);
     return NewCmd;
@@ -154,7 +154,7 @@ class DependsOnTests : public ::testing::Test {
       detail::SYCLConfig<detail::SYCL_DISABLE_EXECUTION_GRAPH_CLEANUP>::reset};
   MockScheduler MS;
 
-  detail::QueueImplPtr QueueDevImpl;
+  std::shared_ptr<detail::queue_impl> QueueDevImpl;
 
   std::mutex m;
   std::function<void()> CustomHostLambda = [&]() {
diff --git a/sycl/unittests/scheduler/GraphCleanup.cpp b/sycl/unittests/scheduler/GraphCleanup.cpp
index 7ac490c7f76f6..4a69bf1cf89ff 100644
--- a/sycl/unittests/scheduler/GraphCleanup.cpp
+++ b/sycl/unittests/scheduler/GraphCleanup.cpp
@@ -65,17 +65,16 @@ static void verifyCleanup(detail::MemObjRecord *Record,
 // Check that any non-leaf commands enqueued as part of high level scheduler
 // calls are cleaned up.
 static void checkCleanupOnEnqueue(MockScheduler &MS,
-                                  detail::QueueImplPtr &QueueImplPtr,
+                                  detail::queue_impl &QueueImpl,
                                   buffer<int, 1> &Buf,
                                   detail::Requirement &MockReq) {
-  detail::queue_impl &QueueImpl = *QueueImplPtr;
   bool CommandDeleted = false;
   std::vector<detail::Command *> ToCleanUp;
   std::vector<detail::Command *> ToEnqueue;
   detail::MemObjRecord *Record =
-      MS.getOrInsertMemObjRecord(QueueImplPtr, &MockReq);
+      MS.getOrInsertMemObjRecord(&QueueImpl, &MockReq);
   detail::AllocaCommandBase *AllocaCmd =
-      MS.getOrCreateAllocaForReq(Record, &MockReq, QueueImplPtr, ToEnqueue);
+      MS.getOrCreateAllocaForReq(Record, &MockReq, &QueueImpl, ToEnqueue);
   std::function<void()> Callback = [&CommandDeleted]() {
     CommandDeleted = true;
   };
@@ -99,7 +98,7 @@ static void checkCleanupOnEnqueue(MockScheduler &MS,
                              /*Requirements*/ {&MockReq},
                              /*Events*/ {}))};
   detail::EventImplPtr Event =
-      MS.addCG(std::move(CG), QueueImplPtr, /*EventNeeded=*/true);
+      MS.addCG(std::move(CG), QueueImpl, /*EventNeeded=*/true);
   auto *Cmd = static_cast<detail::Command *>(Event->getCommand());
   verifyCleanup(Record, AllocaCmd, MockCmd, CommandDeleted);
 
@@ -164,7 +163,7 @@ static void checkCleanupOnEnqueue(MockScheduler &MS,
 }
 
 static void checkCleanupOnLeafUpdate(
-    MockScheduler &MS, detail::QueueImplPtr QueueImpl, buffer<int, 1> &Buf,
+    MockScheduler &MS, detail::queue_impl *QueueImpl, buffer<int, 1> &Buf,
     detail::Requirement &MockReq,
     std::function<void(detail::MemObjRecord *)> SchedulerCall) {
   bool CommandDeleted = false;
@@ -180,7 +179,7 @@ static void checkCleanupOnLeafUpdate(
 
   // Add a mock command as a leaf and enqueue it.
   MockCommand *MockCmd =
-      new MockCommandWithCallback(QueueImpl.get(), MockReq, Callback);
+      new MockCommandWithCallback(QueueImpl, MockReq, Callback);
   (void)MockCmd->addDep(detail::DepDesc(AllocaCmd, &MockReq, nullptr),
                         ToCleanUp);
   EXPECT_TRUE(ToCleanUp.empty());
@@ -211,7 +210,7 @@ TEST_F(SchedulerTest, PostEnqueueCleanup) {
 
   context Ctx{Plt};
   queue Queue{Ctx, default_selector_v};
-  detail::QueueImplPtr QueueImpl = detail::getSyclObjImpl(Queue);
+  detail::queue_impl &QueueImpl = *detail::getSyclObjImpl(Queue);
   MockScheduler MS;
 
   buffer<int, 1> Buf{range<1>(1)};
@@ -222,37 +221,36 @@ TEST_F(SchedulerTest, PostEnqueueCleanup) {
 
   checkCleanupOnEnqueue(MS, QueueImpl, Buf, MockReq);
   std::vector<detail::Command *> ToEnqueue;
-  checkCleanupOnLeafUpdate(MS, QueueImpl, Buf, MockReq,
+  checkCleanupOnLeafUpdate(MS, &QueueImpl, Buf, MockReq,
                            [&](detail::MemObjRecord *Record) {
                              MS.decrementLeafCountersForRecord(Record);
                            });
   checkCleanupOnLeafUpdate(
-      MS, QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) {
-        MS.insertMemoryMove(Record, &MockReq, QueueImpl, ToEnqueue);
+      MS, &QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) {
+        MS.insertMemoryMove(Record, &MockReq, &QueueImpl, ToEnqueue);
       });
-  checkCleanupOnLeafUpdate(MS, QueueImpl, Buf, MockReq,
+  checkCleanupOnLeafUpdate(MS, &QueueImpl, Buf, MockReq,
                            [&](detail::MemObjRecord *Record) {
                              Record->MMemModified = true;
                              MS.addCopyBack(&MockReq, ToEnqueue);
                            });
   checkCleanupOnLeafUpdate(
-      MS, QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) {
+      MS, &QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) {
         detail::Command *Leaf = *Record->MWriteLeaves.begin();
         MS.addEmptyCmd(Leaf, {&MockReq}, detail::Command::BlockReason::HostTask,
                        ToEnqueue);
       });
   checkCleanupOnLeafUpdate(
       MS, nullptr, Buf, MockReq, [&](detail::MemObjRecord *Record) {
-        MS.getOrCreateAllocaForReq(Record, &MockReq, QueueImpl, ToEnqueue);
+        MS.getOrCreateAllocaForReq(Record, &MockReq, &QueueImpl, ToEnqueue);
       });
   // Check cleanup on exceeding leaf limit.
   checkCleanupOnLeafUpdate(
-      MS, QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) {
+      MS, &QueueImpl, Buf, MockReq, [&](detail::MemObjRecord *Record) {
         std::vector<std::unique_ptr<MockCommand>> Leaves;
         for (std::size_t I = 0;
              I < Record->MWriteLeaves.genericCommandsCapacity(); ++I)
-          Leaves.push_back(
-              std::make_unique<MockCommand>(QueueImpl.get(), MockReq));
+          Leaves.push_back(std::make_unique<MockCommand>(&QueueImpl, MockReq));
 
         detail::AllocaCommandBase *AllocaCmd = Record->MAllocaCommands[0];
         std::vector<detail::Command *> ToCleanUp;
@@ -314,17 +312,17 @@ TEST_F(SchedulerTest, StreamBufferDeallocation) {
   platform Plt = sycl::platform();
   context Ctx{Plt};
   queue Queue{Ctx, default_selector_v};
-  detail::QueueImplPtr QueueImplPtr = detail::getSyclObjImpl(Queue);
+  detail::queue_impl &QueueImpl = *detail::getSyclObjImpl(Queue);
 
   MockScheduler *MSPtr = new MockScheduler();
   AttachSchedulerWrapper AttachScheduler{MSPtr};
   detail::EventImplPtr EventImplPtr;
   {
-    MockHandlerCustomFinalize MockCGH(QueueImplPtr,
+    MockHandlerCustomFinalize MockCGH(QueueImpl,
                                       /*CallerNeedsEvent=*/true);
     kernel_bundle KernelBundle =
         sycl::get_kernel_bundle<sycl::bundle_state::input>(
-            QueueImplPtr->get_context());
+            QueueImpl.get_context());
     auto ExecBundle = sycl::build(KernelBundle);
     MockCGH.use_kernel_bundle(ExecBundle);
     stream Stream{1, 1, MockCGH};
@@ -332,8 +330,7 @@ TEST_F(SchedulerTest, StreamBufferDeallocation) {
     MockCGH.single_task<TestKernel<>>([] {});
     std::unique_ptr<detail::CG> CG = MockCGH.finalize();
 
-    EventImplPtr =
-        MSPtr->addCG(std::move(CG), QueueImplPtr, /*EventNeeded=*/true);
+    EventImplPtr = MSPtr->addCG(std::move(CG), QueueImpl, /*EventNeeded=*/true);
   }
 
   // The buffers should have been released with graph cleanup once the work is
@@ -377,18 +374,18 @@ TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) {
   platform Plt = sycl::platform();
   context Ctx{Plt};
   queue Queue{Ctx, default_selector_v};
-  detail::QueueImplPtr QueueImplPtr = detail::getSyclObjImpl(Queue);
+  detail::queue_impl &QueueImpl = *detail::getSyclObjImpl(Queue);
 
   MockScheduler *MSPtr = new MockScheduler();
   AttachSchedulerWrapper AttachScheduler{MSPtr};
   detail::EventImplPtr EventImplPtr;
   bool MockAuxResourceDeleted = false;
   {
-    MockHandlerCustomFinalize MockCGH(QueueImplPtr,
+    MockHandlerCustomFinalize MockCGH(QueueImpl,
                                       /*CallerNeedsEvent=*/true);
     kernel_bundle KernelBundle =
         sycl::get_kernel_bundle<sycl::bundle_state::input>(
-            QueueImplPtr->get_context());
+            QueueImpl.get_context());
     auto ExecBundle = sycl::build(KernelBundle);
     auto MockAuxResourcePtr =
         std::make_shared<MockAuxResource>(MockAuxResourceDeleted);
@@ -397,15 +394,14 @@ TEST_F(SchedulerTest, AuxiliaryResourcesDeallocation) {
     auto BufPtr = std::make_shared<buffer<char, 1>>(
         MockAuxResourcePtr->getDataPtr(), range<1>{1});
     detail::Requirement MockReq = getMockRequirement(*BufPtr);
-    MSPtr->getOrInsertMemObjRecord(QueueImplPtr, &MockReq);
+    MSPtr->getOrInsertMemObjRecord(&QueueImpl, &MockReq);
     MockCGH.use_kernel_bundle(ExecBundle);
     MockCGH.addReduction(std::move(MockAuxResourcePtr));
     MockCGH.addReduction(std::move(BufPtr));
     MockCGH.single_task<TestKernel<>>([] {});
     std::unique_ptr<detail::CG> CG = MockCGH.finalize();
 
-    EventImplPtr =
-        MSPtr->addCG(std::move(CG), QueueImplPtr, /*EventNeeded=*/true);
+    EventImplPtr = MSPtr->addCG(std::move(CG), QueueImpl, /*EventNeeded=*/true);
   }
 
   EventCompleted = false;
diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp
index 5d7b75feb1bf4..e511ce3cba2e8 100644
--- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp
+++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp
@@ -58,8 +58,7 @@ TEST_F(SchedulerTest, InOrderQueueDeps) {
 
   context Ctx{Plt.get_devices()[0]};
   queue InOrderQueue{Ctx, default_selector_v, property::queue::in_order()};
-  sycl::detail::QueueImplPtr InOrderQueueImpl =
-      detail::getSyclObjImpl(InOrderQueue);
+  detail::queue_impl &InOrderQueueImpl = *detail::getSyclObjImpl(InOrderQueue);
 
   MockScheduler MS;
 
@@ -68,9 +67,9 @@ TEST_F(SchedulerTest, InOrderQueueDeps) {
   detail::Requirement Req = getMockRequirement(Buf);
 
   detail::MemObjRecord *Record =
-      MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req);
+      MS.getOrInsertMemObjRecord(&InOrderQueueImpl, &Req);
   std::vector<detail::Command *> AuxCmds;
-  MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl, AuxCmds);
+  MS.getOrCreateAllocaForReq(Record, &Req, &InOrderQueueImpl, AuxCmds);
   MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
   // Check that sequential memory movements submitted to the same in-order
@@ -79,7 +78,7 @@ TEST_F(SchedulerTest, InOrderQueueDeps) {
   detail::EnqueueResultT Res;
   auto ReadLock = MS.acquireGraphReadLock();
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
-  Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl, AuxCmds);
+  Cmd = MS.insertMemoryMove(Record, &Req, &InOrderQueueImpl, AuxCmds);
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
   Cmd = MS.insertMemoryMove(Record, &Req, nullptr, AuxCmds);
   MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING);
diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp
index e169b4b89a016..34e4795009d78 100644
--- a/sycl/unittests/scheduler/LeafLimit.cpp
+++ b/sycl/unittests/scheduler/LeafLimit.cpp
@@ -45,8 +45,7 @@ TEST_F(SchedulerTest, LeafLimit) {
   detail::Requirement MockReq = getMockRequirement(Buf);
 
   MockDepCmd = std::make_unique<MockCommand>(&QueueImpl, MockReq);
-  detail::MemObjRecord *Rec =
-      MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(Q), &MockReq);
+  detail::MemObjRecord *Rec = MS.getOrInsertMemObjRecord(&QueueImpl, &MockReq);
 
   // Create commands that will be added as leaves exceeding the limit by 1
   for (std::size_t i = 0; i < Rec->MWriteLeaves.genericCommandsCapacity() + 1;
diff --git a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp
index 4af81fd2da891..1a172181fdb52 100644
--- a/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp
+++ b/sycl/unittests/scheduler/LeafLimitDiffContexts.cpp
@@ -53,13 +53,14 @@ TEST_F(SchedulerTest, LeafLimitDiffContexts) {
 
     void InitializeUtils(detail::Requirement &MockReq, MockScheduler &MS) {
 
-      Rec = MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(Queue), &MockReq);
+      Rec = MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(Queue).get(),
+                                       &MockReq);
       // Creating Alloca on both - device and host contexts (will be created in
       // real case in insertMemMove for example) It is done to avoid extra
       // AllocCmd insertion during ConnectCmd insertion
       std::vector<detail::Command *> ToEnqueue;
       AllocaCmd = MS.getOrCreateAllocaForReq(
-          Rec, &MockReq, detail::getSyclObjImpl(Queue), ToEnqueue);
+          Rec, &MockReq, detail::getSyclObjImpl(Queue).get(), ToEnqueue);
       std::ignore =
           MS.getOrCreateAllocaForReq(Rec, &MockReq, nullptr, ToEnqueue);
       DepCmd = std::make_unique<MockCommand>(
diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp
index 712f3b3e4e66e..99c06bb5cdcc0 100644
--- a/sycl/unittests/scheduler/LeavesCollection.cpp
+++ b/sycl/unittests/scheduler/LeavesCollection.cpp
@@ -31,9 +31,8 @@ class LeavesCollectionTest : public ::testing::Test {
   };
 };
 
-std::shared_ptr<Command>
-createGenericCommand(const std::shared_ptr<queue_impl> &Q) {
-  return std::shared_ptr<Command>{new MockCommand(Q.get(), Command::RUN_CG)};
+std::shared_ptr<Command> createGenericCommand(queue_impl &Q) {
+  return std::shared_ptr<Command>{new MockCommand(&Q, Command::RUN_CG)};
 }
 
 std::shared_ptr<Command> createEmptyCommand(const Requirement &Req) {
@@ -66,7 +65,7 @@ TEST_F(LeavesCollectionTest, PushBack) {
     TimesGenericWasFull = 0;
 
     for (size_t Idx = 0; Idx < GenericCmdsCapacity * 2; ++Idx) {
-      Cmds.push_back(createGenericCommand(getSyclObjImpl(Q)));
+      Cmds.push_back(createGenericCommand(*getSyclObjImpl(Q)));
 
       LE.push_back(Cmds.back().get(), ToEnqueue);
     }
@@ -94,7 +93,7 @@ TEST_F(LeavesCollectionTest, PushBack) {
     TimesGenericWasFull = 0;
 
     for (size_t Idx = 0; Idx < GenericCmdsCapacity * 4; ++Idx) {
-      auto Cmd = Idx % 2 ? createGenericCommand(getSyclObjImpl(Q))
+      auto Cmd = Idx % 2 ? createGenericCommand(*getSyclObjImpl(Q))
                          : createEmptyCommand(MockReq);
       Cmds.push_back(Cmd);
 
@@ -134,7 +133,7 @@ TEST_F(LeavesCollectionTest, Remove) {
     std::vector<std::shared_ptr<Command>> Cmds;
 
     for (size_t Idx = 0; Idx < GenericCmdsCapacity * 4; ++Idx) {
-      auto Cmd = Idx % 2 ? createGenericCommand(getSyclObjImpl(Q))
+      auto Cmd = Idx % 2 ? createGenericCommand(*getSyclObjImpl(Q))
                          : createEmptyCommand(MockReq);
       Cmds.push_back(Cmd);
 
diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
index 81dafa6b5ef43..dc82c9b13139c 100644
--- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
+++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp
@@ -63,7 +63,7 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) {
   // Commands are linked only if the device supports host unified memory.
 
   sycl::queue Queue1{Dev};
-  sycl::detail::QueueImplPtr Q1 = sycl::detail::getSyclObjImpl(Queue1);
+  sycl::detail::queue_impl &Q1 = *sycl::detail::getSyclObjImpl(Queue1);
 
   auto AllocaDep = [](sycl::detail::Command *, sycl::detail::Command *,
                       sycl::detail::MemObjRecord *,
@@ -87,7 +87,7 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) {
 
   MockScheduler MS;
   sycl::detail::Command *AllocaCmd2 =
-      MS.getOrCreateAllocaForReq(Record.get(), &Req, Q1, ToEnqueue);
+      MS.getOrCreateAllocaForReq(Record.get(), &Req, &Q1, ToEnqueue);
 
   ASSERT_TRUE(!!AllocaCmd1.MLinkedAllocaCmd)
       << "No link appeared in existing command";
diff --git a/sycl/unittests/scheduler/MemObjCommandCleanup.cpp b/sycl/unittests/scheduler/MemObjCommandCleanup.cpp
index 935c13a8beca9..31fcfbecd3b3b 100644
--- a/sycl/unittests/scheduler/MemObjCommandCleanup.cpp
+++ b/sycl/unittests/scheduler/MemObjCommandCleanup.cpp
@@ -26,7 +26,7 @@ TEST_F(SchedulerTest, MemObjCommandCleanupAllocaUsers) {
   detail::Requirement MockReqA = getMockRequirement(BufA);
   detail::Requirement MockReqB = getMockRequirement(BufB);
   detail::MemObjRecord *RecA =
-      MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(Q), &MockReqA);
+      MS.getOrInsertMemObjRecord(&QueueImpl, &MockReqA);
 
   // Create 2 fake allocas, one of which will be cleaned up
   detail::AllocaCommand *MockAllocaA =
@@ -68,7 +68,7 @@ TEST_F(SchedulerTest, MemObjCommandCleanupAllocaDeps) {
   buffer<int, 1> Buf(range<1>(1));
   detail::Requirement MockReq = getMockRequirement(Buf);
   detail::MemObjRecord *MemObjRec =
-      MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(Q), &MockReq);
+      MS.getOrInsertMemObjRecord(&QueueImpl, &MockReq);
 
   // Create a fake alloca.
   detail::AllocaCommand *MockAllocaCmd =
diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
index 4c242a0c538df..26988bc3c2ee8 100644
--- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
+++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp
@@ -86,7 +86,7 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
                                           &redefinedMemGetInfoAfter);
   mock::getCallbacks().set_before_callback("urMemBufferCreateWithNativeHandle",
                                            &redefinedMemCreateWithNativeHandle);
-  sycl::detail::QueueImplPtr QImpl = detail::getSyclObjImpl(Q);
+  sycl::detail::queue_impl &QImpl = *detail::getSyclObjImpl(Q);
 
   MockScheduler MS;
   // Check non-host alloca with non-discard access mode
@@ -95,10 +95,10 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     buffer<int, 1> Buf(&val, range<1>(1));
     detail::Requirement Req = getMockRequirement(Buf);
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(&QImpl, &Req);
     std::vector<detail::Command *> AuxCmds;
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, &QImpl, AuxCmds);
 
     // Both non-host and host allocations should be created in this case in
     // order to perform a memory move.
@@ -110,7 +110,7 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     EXPECT_TRUE(Record->MCurContext == nullptr);
 
     detail::Command *MemoryMove =
-        MS.insertMemoryMove(Record, &Req, QImpl, AuxCmds);
+        MS.insertMemoryMove(Record, &Req, &QImpl, AuxCmds);
     EXPECT_EQ(MemoryMove->getType(), detail::Command::COPY_MEMORY);
   }
   // Check non-host alloca with discard access modes
@@ -124,9 +124,9 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
 
     // No need to create a host allocation in this case since the data can be
     // discarded.
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(&QImpl, &Req);
     std::vector<detail::Command *> AuxCmds;
-    MS.getOrCreateAllocaForReq(Record, &DiscardReq, QImpl, AuxCmds);
+    MS.getOrCreateAllocaForReq(Record, &DiscardReq, &QImpl, AuxCmds);
     EXPECT_EQ(Record->MAllocaCommands.size(), 1U);
   }
   // Check non-host alloca without user pointer
@@ -136,9 +136,9 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
 
     // No need to create a host allocation in this case since there's no data to
     // initialize the buffer with.
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(&QImpl, &Req);
     std::vector<detail::Command *> AuxCmds;
-    MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
+    MS.getOrCreateAllocaForReq(Record, &Req, &QImpl, AuxCmds);
     EXPECT_EQ(Record->MAllocaCommands.size(), 1U);
   }
   // Check host -> non-host alloca
@@ -155,13 +155,13 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
         MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
     EXPECT_EQ(Record->MAllocaCommands.size(), 1U);
     detail::AllocaCommandBase *NonHostAllocaCmd =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, &QImpl, AuxCmds);
     EXPECT_EQ(Record->MAllocaCommands.size(), 2U);
     EXPECT_TRUE(!HostAllocaCmd->MLinkedAllocaCmd);
     EXPECT_TRUE(!NonHostAllocaCmd->MLinkedAllocaCmd);
 
     detail::Command *MemoryMove =
-        MS.insertMemoryMove(Record, &Req, QImpl, AuxCmds);
+        MS.insertMemoryMove(Record, &Req, &QImpl, AuxCmds);
     EXPECT_EQ(MemoryMove->getType(), detail::Command::COPY_MEMORY);
   }
   // Check that memory movement operations work correctly with/after discard
@@ -174,9 +174,9 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     detail::Requirement DiscardReq = getMockRequirement(Buf);
     DiscardReq.MAccessMode = access::mode::discard_read_write;
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(&QImpl, &Req);
     std::vector<detail::Command *> AuxCmds;
-    MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
+    MS.getOrCreateAllocaForReq(Record, &Req, &QImpl, AuxCmds);
     MS.getOrCreateAllocaForReq(Record, &Req, nullptr, AuxCmds);
 
     // Memory movement operations should be omitted for discard access modes.
@@ -203,10 +203,10 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) {
     detail::Requirement Req = getMockRequirement();
     Req.MSYCLMemObj = BufI.get();
 
-    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req);
+    detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(&QImpl, &Req);
     std::vector<detail::Command *> AuxCmds;
     detail::AllocaCommandBase *InteropAlloca =
-        MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds);
+        MS.getOrCreateAllocaForReq(Record, &Req, &QImpl, AuxCmds);
     detail::EnqueueResultT Res;
     MockScheduler::enqueueCommand(InteropAlloca, Res, detail::BLOCKING);
 
diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp
index 0acbcc119e290..61c7a82c764ae 100644
--- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp
+++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp
@@ -75,10 +75,10 @@ class MockCommand : public sycl::detail::Command {
   ur_result_t MRetVal = UR_RESULT_SUCCESS;
 
   void waitForEventsCall(
-      std::shared_ptr<sycl::detail::queue_impl> Queue,
+      sycl::detail::queue_impl *Queue,
       std::vector<std::shared_ptr<sycl::detail::event_impl>> &RawEvents,
       ur_event_handle_t &Event) {
-    Command::waitForEvents(Queue.get(), RawEvents, Event);
+    Command::waitForEvents(Queue, RawEvents, Event);
   }
 
   std::shared_ptr<sycl::detail::event_impl> getEvent() { return MEvent; }
@@ -109,7 +109,7 @@ class MockScheduler : public sycl::detail::Scheduler {
   using sycl::detail::Scheduler::MDeferredMemObjRelease;
 
   sycl::detail::MemObjRecord *
-  getOrInsertMemObjRecord(const sycl::detail::QueueImplPtr &Queue,
+  getOrInsertMemObjRecord(sycl::detail::queue_impl *Queue,
                           sycl::detail::Requirement *Req) {
     return MGraphBuilder.getOrInsertMemObjRecord(Queue, Req);
   }
@@ -153,7 +153,7 @@ class MockScheduler : public sycl::detail::Scheduler {
   sycl::detail::AllocaCommandBase *
   getOrCreateAllocaForReq(sycl::detail::MemObjRecord *Record,
                           const sycl::detail::Requirement *Req,
-                          sycl::detail::QueueImplPtr Queue,
+                          sycl::detail::queue_impl *Queue,
                           std::vector<sycl::detail::Command *> &ToEnqueue) {
     return MGraphBuilder.getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue);
   }
@@ -167,7 +167,7 @@ class MockScheduler : public sycl::detail::Scheduler {
   sycl::detail::Command *
   insertMemoryMove(sycl::detail::MemObjRecord *Record,
                    sycl::detail::Requirement *Req,
-                   const sycl::detail::QueueImplPtr &Queue,
+                   sycl::detail::queue_impl *Queue,
                    std::vector<sycl::detail::Command *> &ToEnqueue) {
     return MGraphBuilder.insertMemoryMove(Record, Req, Queue, ToEnqueue);
   }
@@ -181,7 +181,7 @@ class MockScheduler : public sycl::detail::Scheduler {
   sycl::detail::UpdateHostRequirementCommand *
   insertUpdateHostReqCmd(sycl::detail::MemObjRecord *Record,
                          sycl::detail::Requirement *Req,
-                         const sycl::detail::QueueImplPtr &Queue,
+                         sycl::detail::queue_impl *Queue,
                          std::vector<sycl::detail::Command *> &ToEnqueue) {
     return MGraphBuilder.insertUpdateHostReqCmd(Record, Req, Queue, ToEnqueue);
   }
@@ -195,7 +195,7 @@ class MockScheduler : public sycl::detail::Scheduler {
   }
 
   sycl::detail::Command *addCG(std::unique_ptr<sycl::detail::CG> CommandGroup,
-                               sycl::detail::QueueImplPtr Queue,
+                               sycl::detail::queue_impl *Queue,
                                std::vector<sycl::detail::Command *> &ToEnqueue,
                                bool EventNeeded) {
     return MGraphBuilder.addCG(std::move(CommandGroup), Queue, ToEnqueue,
@@ -220,9 +220,8 @@ sycl::detail::Requirement getMockRequirement(const MemObjT &MemObj) {
 
 class MockHandler : public sycl::handler {
 public:
-  MockHandler(std::shared_ptr<sycl::detail::queue_impl> &Queue,
-              bool CallerNeedsEvent)
-      : sycl::handler(Queue, CallerNeedsEvent) {}
+  MockHandler(sycl::detail::queue_impl &Queue, bool CallerNeedsEvent)
+      : sycl::handler(Queue.shared_from_this(), CallerNeedsEvent) {}
   // Methods
   using sycl::handler::addReduction;
   using sycl::handler::getType;
@@ -260,9 +259,7 @@ class MockHandler : public sycl::handler {
   std::shared_ptr<sycl::detail::HostTask> &getHostTask() {
     return impl->MHostTask;
   }
-  const std::shared_ptr<sycl::detail::queue_impl> getQueue() {
-    return impl->get_queue().shared_from_this();
-  }
+  sycl::detail::queue_impl *getQueue() { return impl->get_queue_or_null(); }
 
   void setType(sycl::detail::CGType Type) { impl->MCGType = Type; }
 
@@ -291,7 +288,7 @@ class MockHandler : public sycl::handler {
 
 class MockHandlerCustomFinalize : public MockHandler {
 public:
-  MockHandlerCustomFinalize(std::shared_ptr<sycl::detail::queue_impl> &Queue,
+  MockHandlerCustomFinalize(sycl::detail::queue_impl &Queue,
                             bool CallerNeedsEvent)
       : MockHandler(Queue, CallerNeedsEvent) {}
 
@@ -313,9 +310,8 @@ class MockHandlerCustomFinalize : public MockHandler {
     }
     case sycl::detail::CGType::CodeplayHostTask: {
       CommandGroup.reset(new sycl::detail::CGHostTask(
-          std::move(getHostTask()), getQueue().get(),
-          getQueue()->getContextImplPtr(), getArgs(), std::move(CGData),
-          getType(), getCodeLoc()));
+          std::move(getHostTask()), getQueue(), getQueue()->getContextImplPtr(),
+          getArgs(), std::move(CGData), getType(), getCodeLoc()));
       break;
     }
     default:
diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
index f7814562abee7..d038004b1e1e4 100644
--- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
+++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp
@@ -21,8 +21,7 @@ inline constexpr auto DisableCleanupName =
 
 class MockHandlerStreamInit : public MockHandler {
 public:
-  MockHandlerStreamInit(std::shared_ptr<detail::queue_impl> Queue,
-                        bool CallerNeedsEvent)
+  MockHandlerStreamInit(detail::queue_impl &Queue, bool CallerNeedsEvent)
       : MockHandler(Queue, CallerNeedsEvent) {}
   std::unique_ptr<detail::CG> finalize() {
     std::unique_ptr<detail::CG> CommandGroup;