From 986ed0662c024f0772b2aeb8725c751276d5d2b1 Mon Sep 17 00:00:00 2001 From: psucien Date: Fri, 5 Jul 2024 21:21:30 +0200 Subject: [PATCH 01/12] gnmdriver, amdgpu: added gpu idle IRQ; submission lock logic improved --- src/core/libraries/gnmdriver/gnmdriver.cpp | 35 +++++++++++++--------- src/core/platform.h | 1 + src/video_core/amdgpu/liverpool.cpp | 13 +------- src/video_core/amdgpu/liverpool.h | 9 ------ 4 files changed, 23 insertions(+), 35 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index b2c5b752e0d..cf1778d1eab 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/config.h" +#include "common/debug.h" #include "common/logging/log.h" #include "common/path_util.h" #include "common/slot_vector.h" @@ -264,6 +265,7 @@ static_assert(CtxInitSequence400.size() == 0x61); // In case if `submitDone` is issued we need to block submissions until GPU idle static u32 submission_lock{}; +std::condition_variable cv_lock{}; static std::mutex m_submission{}; static u64 frames_submitted{}; // frame counter static bool send_init_packet{true}; // initialize HW state before first game's submit in a frame @@ -277,6 +279,18 @@ struct AscQueueInfo { static Common::SlotVector asc_queues{}; static constexpr VAddr tessellation_factors_ring_addr = 0xFF0000000ULL; +static void ResetSubmissionLock(Platform::InterruptId irq) { + std::unique_lock lock{m_submission}; + submission_lock = 0; + cv_lock.notify_all(); +} + +static void WaitGpuIdle() { + HLE_TRACE; + std::unique_lock lock{m_submission}; + cv_lock.wait(lock, [] { return submission_lock == 0; }); +} + static void DumpCommandList(std::span cmd_list, const std::string& postfix) { using namespace Common::FS; const auto dump_dir = GetUserPath(PathType::PM4Dir); @@ -465,14 +479,9 @@ void PS4_SYSV_ABI sceGnmDingDong(u32 gnm_vqid, u32 next_offs_dw) { return; } - std::unique_lock lock{m_submission}; - if (submission_lock != 0) { - liverpool->WaitGpuIdle(); - - // Suspend logic goes here + WaitGpuIdle(); - submission_lock = 0; - } + /* Suspend logic goes here */ auto vqid = gnm_vqid - 1; auto& asc_queue = asc_queues[{vqid}]; @@ -1930,13 +1939,9 @@ s32 PS4_SYSV_ABI sceGnmSubmitCommandBuffers(u32 count, const u32* dcb_gpu_addrs[ } } - if (submission_lock != 0) { - liverpool->WaitGpuIdle(); + WaitGpuIdle(); - // Suspend logic goes here - - submission_lock = 0; - } + /* Suspend logic goes here */ if (send_init_packet) { if (sdk_version <= 0x1ffffffu) { @@ -1990,7 +1995,6 @@ int PS4_SYSV_ABI sceGnmSubmitDone() { if (!liverpool->IsGpuIdle()) { submission_lock = true; } - liverpool->NotifySubmitDone(); send_init_packet = true; ++frames_submitted; return ORBIS_OK; @@ -2471,6 +2475,9 @@ void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) { sdk_version = 0; } + Platform::IrqC::Instance()->Register(Platform::InterruptId::GpuIdle, ResetSubmissionLock, + nullptr); + LIB_FUNCTION("b0xyllnVY-I", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAddEqEvent); LIB_FUNCTION("b08AgtPlHPg", "libSceGnmDriver", 1, "libSceGnmDriver", 1, 1, sceGnmAreSubmitsAllowed); diff --git a/src/core/platform.h b/src/core/platform.h index d8064c52af9..2c38dfd3913 100644 --- a/src/core/platform.h +++ b/src/core/platform.h @@ -26,6 +26,7 @@ enum class InterruptId : u32 { Compute6RelMem = 6u, GfxEop = 7u, GfxFlip = 8u, + GpuIdle = 9u, }; using IrqHandler = std::function; diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 2c3590fe6da..4bc73c671f0 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -66,21 +66,10 @@ void Liverpool::Process(std::stop_token stoken) { } } - if (submit_done) { - std::scoped_lock lk{submit_mutex}; - submit_cv.notify_all(); - submit_done = false; - } + Platform::IrqC::Instance()->Signal(Platform::InterruptId::GpuIdle); } } -void Liverpool::WaitGpuIdle() { - RENDERER_TRACE; - - std::unique_lock lk{submit_mutex}; - submit_cv.wait(lk, [this] { return num_submits == 0; }); -} - Liverpool::Task Liverpool::ProcessCeUpdate(std::span ccb) { TracyFiberEnter(ccb_task_name); diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index d18482f6e11..e12c626d9ab 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -937,18 +937,10 @@ struct Liverpool { void SubmitGfx(std::span dcb, std::span ccb); void SubmitAsc(u32 vqid, std::span acb); - void WaitGpuIdle(); - bool IsGpuIdle() const { return num_submits == 0; } - void NotifySubmitDone() { - std::scoped_lock lk{submit_mutex}; - submit_done = true; - submit_cv.notify_all(); - } - void BindRasterizer(Vulkan::Rasterizer* rasterizer_) { rasterizer = rasterizer_; } @@ -1017,7 +1009,6 @@ struct Liverpool { u32 num_submits{}; std::mutex submit_mutex; std::condition_variable_any submit_cv; - std::atomic submit_done{}; }; static_assert(GFX6_3D_REG_INDEX(ps_program) == 0x2C08); From cfbe8b9e6d413cea9138386428866fd282d9a041 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 6 Jul 2024 17:01:43 +0200 Subject: [PATCH 02/12] renderer: added support for instance step rates --- .../spirv/emit_spirv_context_get_set.cpp | 34 ++++++++++---- .../backend/spirv/spirv_emit_context.cpp | 45 ++++++++++++++++--- .../backend/spirv/spirv_emit_context.h | 3 ++ .../frontend/translate/translate.cpp | 21 +++++++-- src/shader_recompiler/runtime_info.h | 4 +- src/video_core/amdgpu/liverpool.h | 7 ++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 22 ++++++++- .../renderer_vulkan/vk_graphics_pipeline.h | 12 +++-- .../renderer_vulkan/vk_rasterizer.cpp | 7 +++ 9 files changed, 129 insertions(+), 26 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 8d8a1488529..157023b6946 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -135,15 +135,33 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) { if (IR::IsParam(attr)) { const u32 index{u32(attr) - u32(IR::Attribute::Param0)}; const auto& param{ctx.input_params.at(index)}; - if (!ValidId(param.id)) { - // Attribute is disabled or varying component is not written - return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); - } - if (param.num_components > 1) { - const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; - return ctx.OpLoad(param.component_type, pointer); + if (param.buffer_handle < 0) { + if (!ValidId(param.id)) { + // Attribute is disabled or varying component is not written + return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f); + } + + if (param.num_components > 1) { + const Id pointer{ + ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))}; + return ctx.OpLoad(param.component_type, pointer); + } else { + return ctx.OpLoad(param.component_type, param.id); + } } else { - return ctx.OpLoad(param.component_type, param.id); + const auto rate_idx = param.id.value == 0 ? ctx.u32_zero_value : ctx.u32_one_value; + const auto step_rate = ctx.OpLoad( + ctx.U32[1], + ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), + ctx.instance_step_rates, rate_idx)); + const auto offset = ctx.OpIAdd( + ctx.U32[1], + ctx.OpIMul( + ctx.U32[1], + ctx.OpUDiv(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.instance_id), step_rate), + ctx.ConstU32(param.num_components)), + ctx.ConstU32(comp)); + return EmitReadConstBuffer(ctx, param.buffer_handle, offset); } } switch (attr) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 6d9b25470b7..617458dde79 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -171,17 +171,47 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) { void EmitContext::DefineInputs(const Info& info) { switch (stage) { - case Stage::Vertex: + case Stage::Vertex: { vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input); base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input); + instance_id = DefineVariable(U32[1], spv::BuiltIn::InstanceIndex, spv::StorageClass::Input); + + // Create push constants block for instance steps rates + const Id struct_type{Name(TypeStruct(U32[1], U32[1]), "instance_step_rates")}; + Decorate(struct_type, spv::Decoration::Block); + MemberName(struct_type, 0, "sr0"); + MemberName(struct_type, 1, "sr1"); + MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); + MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); + instance_step_rates = DefineVar(struct_type, spv::StorageClass::PushConstant); + Name(instance_step_rates, "step_rates"); + interfaces.push_back(instance_step_rates); + for (const auto& input : info.vs_inputs) { const Id type{GetAttributeType(*this, input.fmt)}; - const Id id{DefineInput(type, input.binding)}; - Name(id, fmt::format("vs_in_attr{}", input.binding)); - input_params[input.binding] = GetAttributeInfo(input.fmt, id); - interfaces.push_back(id); + if (input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 || + input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate1) { + + const u32 rate_idx = + input.instance_step_rate == Info::VsInput::InstanceIdType::OverStepRate0 ? 0 + : 1; + // Note that we pass index rather than Id + input_params[input.binding] = { + rate_idx, input_u32, U32[1], input.num_components, input.instance_data_buf, + }; + } else { + Id id{DefineInput(type, input.binding)}; + if (input.instance_step_rate == Info::VsInput::InstanceIdType::Plain) { + Name(id, fmt::format("vs_instance_attr{}", input.binding)); + } else { + Name(id, fmt::format("vs_in_attr{}", input.binding)); + } + input_params[input.binding] = GetAttributeInfo(input.fmt, id); + interfaces.push_back(id); + } } break; + } case Stage::Fragment: if (info.uses_group_quad) { subgroup_local_invocation_id = DefineVariable( @@ -276,7 +306,10 @@ void EmitContext::DefineBuffers(const Info& info) { if (std::ranges::find(type_ids, record_array_type.value, &Id::value) == type_ids.end()) { Decorate(record_array_type, spv::Decoration::ArrayStride, 4); const auto name = - fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); + buffer.is_instance_data + ? fmt::format("{}_instance_data{}_{}{}", stage, i, 'f', + sizeof(float) * CHAR_BIT) + : fmt::format("{}_cbuf_block_{}{}", stage, 'f', sizeof(float) * CHAR_BIT); Name(struct_type, name); Decorate(struct_type, spv::Decoration::Block); MemberName(struct_type, 0, "data"); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index d143be4ba13..b51edd6397e 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -165,6 +165,8 @@ class EmitContext final : public Sirit::Module { Id output_position{}; Id vertex_index{}; + Id instance_id{}; + Id instance_step_rates{}; Id base_vertex{}; Id frag_coord{}; Id front_facing{}; @@ -214,6 +216,7 @@ class EmitContext final : public Sirit::Module { Id pointer_type; Id component_type; u32 num_components; + s32 buffer_handle{-1}; }; std::array input_params{}; std::array output_params{}; diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index cb6d16c34fe..ec5790ac826 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -235,9 +235,21 @@ void Translator::EmitFetch(const GcnInst& inst) { ir.SetVectorReg(dst_reg++, comp); } - if (attrib.instance_data == 2 || attrib.instance_data == 3) { - LOG_WARNING(Render_Recompiler, "Unsupported instance step rate = {}", - attrib.instance_data); + // In case of programmable step rates we need to fallback to instance data pulling in + // shader, so VBs should be bound as regular data buffers + s32 instance_buf_handle = -1; + const auto step_rate = static_cast(attrib.instance_data); + if (step_rate == Info::VsInput::OverStepRate0 || + step_rate == Info::VsInput::OverStepRate1) { + info.buffers.push_back({ + .sgpr_base = attrib.sgpr_base, + .dword_offset = attrib.dword_offset, + .stride = buffer.GetStride(), + .num_records = buffer.num_records, + .used_types = IR::Type::F32, + .is_instance_data = true, + }); + instance_buf_handle = s32(info.buffers.size() - 1); } const u32 num_components = AmdGpu::NumComponents(buffer.GetDataFmt()); @@ -247,7 +259,8 @@ void Translator::EmitFetch(const GcnInst& inst) { .num_components = std::min(attrib.num_elements, num_components), .sgpr_base = attrib.sgpr_base, .dword_offset = attrib.dword_offset, - .instance_step_rate = static_cast(attrib.instance_data), + .instance_step_rate = step_rate, + .instance_data_buf = instance_buf_handle, }); } } diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 66d32d4d7eb..6cf5c632a3a 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -77,7 +77,8 @@ struct BufferResource { u32 num_records; IR::Type used_types; AmdGpu::Buffer inline_cbuf; - bool is_storage; + bool is_storage{false}; + bool is_instance_data{false}; constexpr AmdGpu::Buffer GetVsharp(const Info& info) const noexcept; }; @@ -116,6 +117,7 @@ struct Info { u8 sgpr_base; u8 dword_offset; InstanceIdType instance_step_rate; + s32 instance_data_buf; }; boost::container::static_vector vs_inputs{}; diff --git a/src/video_core/amdgpu/liverpool.h b/src/video_core/amdgpu/liverpool.h index e12c626d9ab..536167ff13f 100644 --- a/src/video_core/amdgpu/liverpool.h +++ b/src/video_core/amdgpu/liverpool.h @@ -887,7 +887,10 @@ struct Liverpool { IndexBufferType index_buffer_type; INSERT_PADDING_WORDS(0xA2A1 - 0xA29E - 2); u32 enable_primitive_id; - INSERT_PADDING_WORDS(0xA2DF - 0xA2A1 - 1); + INSERT_PADDING_WORDS(0xA2A8 - 0xA2A1 - 1); + u32 vgt_instance_step_rate_0; + u32 vgt_instance_step_rate_1; + INSERT_PADDING_WORDS(0xA2DF - 0xA2A9 - 1); PolygonOffset poly_offset; INSERT_PADDING_WORDS(0xA2F8 - 0xA2DF - 5); AaConfig aa_config; @@ -1046,6 +1049,8 @@ static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207); static_assert(GFX6_3D_REG_INDEX(index_size) == 0xA29D); static_assert(GFX6_3D_REG_INDEX(index_buffer_type) == 0xA29F); static_assert(GFX6_3D_REG_INDEX(enable_primitive_id) == 0xA2A1); +static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_0) == 0xA2A8); +static_assert(GFX6_3D_REG_INDEX(vgt_instance_step_rate_1) == 0xA2A9); static_assert(GFX6_3D_REG_INDEX(poly_offset) == 0xA2DF); static_assert(GFX6_3D_REG_INDEX(aa_config) == 0xA2F8); static_assert(GFX6_3D_REG_INDEX(color_buffers[0].base_address) == 0xA318); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7b7eda442f4..01901836924 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -30,12 +30,19 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul stages[i] = *infos[i]; } BuildDescSetLayout(); + + const vk::PushConstantRange push_constants = { + .stageFlags = vk::ShaderStageFlagBits::eVertex, + .offset = 0, + .size = 2 * sizeof(u32), + }; + const vk::DescriptorSetLayout set_layout = *desc_layout; const vk::PipelineLayoutCreateInfo layout_info = { .setLayoutCount = 1U, .pSetLayouts = &set_layout, - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_constants, }; pipeline_layout = instance.GetDevice().createPipelineLayoutUnique(layout_info); @@ -43,6 +50,12 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul boost::container::static_vector attributes; const auto& vs_info = stages[0]; for (const auto& input : vs_info.vs_inputs) { + if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || + input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + // Skip attribute binding as the data will be pulled by shader + continue; + } + const auto buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); attributes.push_back({ .location = input.binding, @@ -420,6 +433,11 @@ void GraphicsPipeline::BindVertexBuffers(StreamBuffer& staging) const { // Calculate buffers memory overlaps boost::container::static_vector ranges{}; for (const auto& input : vs_info.vs_inputs) { + if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 || + input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) { + continue; + } + const auto& buffer = vs_info.ReadUd(input.sgpr_base, input.dword_offset); if (buffer.GetSize() == 0) { continue; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 060a269536b..ab8be78fba6 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -67,20 +67,24 @@ class GraphicsPipeline { void BindResources(Core::MemoryManager* memory, StreamBuffer& staging, VideoCore::TextureCache& texture_cache) const; - [[nodiscard]] vk::Pipeline Handle() const noexcept { + vk::Pipeline Handle() const noexcept { return *pipeline; } - [[nodiscard]] bool IsEmbeddedVs() const noexcept { + vk::PipelineLayout GetLayout() const { + return *pipeline_layout; + } + + bool IsEmbeddedVs() const noexcept { static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f; return key.stage_hashes[0] == EmbeddedVsHash; } - [[nodiscard]] auto GetWriteMasks() const { + auto GetWriteMasks() const { return key.write_masks; } - [[nodiscard]] bool IsDepthEnabled() const { + bool IsDepthEnabled() const { return key.depth.depth_enable.Value(); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6440ebc7612..f7059f17c6a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -54,6 +54,13 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) { UpdateDynamicState(*pipeline); cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + + const u32 step_rates[] = { + regs.vgt_instance_step_rate_0, + regs.vgt_instance_step_rate_1, + }; + cmdbuf.pushConstants(pipeline->GetLayout(), vk::ShaderStageFlagBits::eVertex, 0u, + sizeof(step_rates), &step_rates); if (is_indexed) { cmdbuf.drawIndexed(num_indices, regs.num_instances.NumInstances(), 0, 0, 0); } else { From 1d608427ed996b6491037b5de0d1a8ea57b8eb69 Mon Sep 17 00:00:00 2001 From: psucien Date: Sat, 6 Jul 2024 20:13:12 +0200 Subject: [PATCH 03/12] renderer: don't let unbound color attachment affect pass's rendering area --- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 50274604049..bf4bbc1030c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -183,7 +183,7 @@ void PipelineCache::RefreshGraphicsKey() { int remapped_cb{}; for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; - if (!col_buf || skip_cb_binding) { + if (skip_cb_binding || !col_buf || !regs.color_target_mask.GetMask(cb)) { continue; } const auto base_format = diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f7059f17c6a..997fcead280 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -106,6 +106,12 @@ void Rasterizer::BeginRendering() { continue; } + // If the color buffer is still bound but rendering to it is disabled by the target mask, + // we need to prevent the render area from being affected by unbound render target extents. + if (!regs.color_target_mask.GetMask(col_buf_id)) { + continue; + } + const auto& hint = liverpool->last_cb_extent[col_buf_id]; const auto& image_view = texture_cache.RenderTarget(col_buf, hint); const auto& image = texture_cache.GetImage(image_view.image_id); From bf4bf4ccb2ec5996d31e52ad570e6da00a9d6b04 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 7 Jul 2024 13:00:52 +0200 Subject: [PATCH 04/12] recompiler: fix for gather4 components return --- src/shader_recompiler/frontend/translate/vector_memory.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 21f3abcace6..f0ef85b3bb8 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -165,13 +165,14 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) { if (!flags.test(MimgModifier::Pcf)) { return ir.ImageGather(handle, body, offset, {}, info); } + ASSERT(mimg.dmask & 1); // should be always 1st (R) component return ir.ImageGatherDref(handle, body, offset, {}, dref, info); }(); + // For gather4 instructions dmask selects which component to read and must have + // only one bit set to 1 + ASSERT_MSG(std::popcount(mimg.dmask) == 1, "Unexpected bits in gather dmask"); for (u32 i = 0; i < 4; i++) { - if (((mimg.dmask >> i) & 1) == 0) { - continue; - } const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)}; ir.SetVectorReg(dest_reg++, value); } From 19c85c78cf7cdb7097222b2c4fb113188ad5a911 Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 7 Jul 2024 13:08:39 +0200 Subject: [PATCH 05/12] recompiler: switch instance data to storage buffers --- src/shader_recompiler/frontend/translate/translate.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index ec5790ac826..f289933b786 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -247,6 +247,7 @@ void Translator::EmitFetch(const GcnInst& inst) { .stride = buffer.GetStride(), .num_records = buffer.num_records, .used_types = IR::Type::F32, + .is_storage = true, // we may not fit into UBO with large meshes .is_instance_data = true, }); instance_buf_handle = s32(info.buffers.size() - 1); From c7af8df2ad925540f597173e47c5ff637f15ab7a Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 7 Jul 2024 13:47:01 +0200 Subject: [PATCH 06/12] gnmdriver: added `sceGnmFindResourcesPublic` stub --- src/core/libraries/gnmdriver/gnmdriver.cpp | 10 +++++----- src/core/libraries/gnmdriver/gnmdriver.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/core/libraries/gnmdriver/gnmdriver.cpp b/src/core/libraries/gnmdriver/gnmdriver.cpp index cf1778d1eab..653607af54a 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.cpp +++ b/src/core/libraries/gnmdriver/gnmdriver.cpp @@ -872,9 +872,9 @@ int PS4_SYSV_ABI sceGnmEndWorkload() { return ORBIS_OK; } -int PS4_SYSV_ABI sceGnmFindResourcesPublic() { - LOG_ERROR(Lib_GnmDriver, "(STUBBED) called"); - return ORBIS_OK; +s32 PS4_SYSV_ABI sceGnmFindResourcesPublic() { + LOG_TRACE(Lib_GnmDriver, "called"); + return ORBIS_GNM_ERROR_FAILURE; // not available in retail FW } void PS4_SYSV_ABI sceGnmFlushGarlic() { @@ -1330,7 +1330,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedPsShader(u32* cmdbuf, u32 size, u32 shader_id, if (shader_id > 1) { LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id); - return 0x8eee00ff; + return ORBIS_GNM_ERROR_FAILURE; } // clang-format off @@ -1400,7 +1400,7 @@ s32 PS4_SYSV_ABI sceGnmSetEmbeddedVsShader(u32* cmdbuf, u32 size, u32 shader_id, if (shader_id != 0) { LOG_ERROR(Lib_GnmDriver, "Unknown shader id {}", shader_id); - return 0x8eee00ff; + return ORBIS_GNM_ERROR_FAILURE; } // A fullscreen triangle with one uv set diff --git a/src/core/libraries/gnmdriver/gnmdriver.h b/src/core/libraries/gnmdriver/gnmdriver.h index 08099bccf02..808cdf51a76 100644 --- a/src/core/libraries/gnmdriver/gnmdriver.h +++ b/src/core/libraries/gnmdriver/gnmdriver.h @@ -75,7 +75,7 @@ int PS4_SYSV_ABI sceGnmDriverInternalVirtualQuery(); int PS4_SYSV_ABI sceGnmDriverTraceInProgress(); int PS4_SYSV_ABI sceGnmDriverTriggerCapture(); int PS4_SYSV_ABI sceGnmEndWorkload(); -int PS4_SYSV_ABI sceGnmFindResourcesPublic(); +s32 PS4_SYSV_ABI sceGnmFindResourcesPublic(); void PS4_SYSV_ABI sceGnmFlushGarlic(); int PS4_SYSV_ABI sceGnmGetCoredumpAddress(); int PS4_SYSV_ABI sceGnmGetCoredumpMode(); From 6dbb842bec3a63c68a22abb516818d7539961d2f Mon Sep 17 00:00:00 2001 From: psucien Date: Sun, 7 Jul 2024 14:20:12 +0200 Subject: [PATCH 07/12] renderer: a bit more formats to support --- .../backend/spirv/spirv_emit_context.cpp | 8 ++++++++ src/video_core/renderer_vulkan/liverpool_to_vk.cpp | 10 ++++++++++ src/video_core/texture_cache/tile_manager.cpp | 4 ++++ 3 files changed, 22 insertions(+) diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 617458dde79..61ed29d5a77 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -350,6 +350,14 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) { image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::Rg32f; } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { + return spv::ImageFormat::Rg32ui; + } + if (image.GetDataFmt() == AmdGpu::DataFormat::Format32_32_32_32 && + image.GetNumberFmt() == AmdGpu::NumberFormat::Uint) { + return spv::ImageFormat::Rgba32ui; + } if (image.GetDataFmt() == AmdGpu::DataFormat::Format16 && image.GetNumberFmt() == AmdGpu::NumberFormat::Float) { return spv::ImageFormat::R16f; diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index dca7ff3d225..64a3b7f0ce3 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -321,6 +321,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::FormatBc4 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc4UnormBlock; } + if (data_format == AmdGpu::DataFormat::FormatBc5 && num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eBc5UnormBlock; + } if (data_format == AmdGpu::DataFormat::Format16_16_16_16 && num_format == AmdGpu::NumberFormat::Sint) { return vk::Format::eR16G16B16A16Sint; @@ -366,6 +369,9 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eR8G8Unorm; } + if (data_format == AmdGpu::DataFormat::Format8_8 && num_format == AmdGpu::NumberFormat::Snorm) { + return vk::Format::eR8G8Snorm; + } if (data_format == AmdGpu::DataFormat::FormatBc7 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eBc7UnormBlock; } @@ -429,6 +435,10 @@ vk::Format SurfaceFormat(AmdGpu::DataFormat data_format, AmdGpu::NumberFormat nu if (data_format == AmdGpu::DataFormat::Format16 && num_format == AmdGpu::NumberFormat::Unorm) { return vk::Format::eR16Unorm; } + if (data_format == AmdGpu::DataFormat::Format16_16_16_16 && + num_format == AmdGpu::NumberFormat::Unorm) { + return vk::Format::eR16G16B16A16Unorm; + } UNREACHABLE_MSG("Unknown data_format={} and num_format={}", u32(data_format), u32(num_format)); } diff --git a/src/video_core/texture_cache/tile_manager.cpp b/src/video_core/texture_cache/tile_manager.cpp index 51af4ad8064..c780ca60167 100644 --- a/src/video_core/texture_cache/tile_manager.cpp +++ b/src/video_core/texture_cache/tile_manager.cpp @@ -189,10 +189,14 @@ vk::Format DemoteImageFormatForDetiling(vk::Format format) { case vk::Format::eR32Uint: return vk::Format::eR32Uint; case vk::Format::eBc1RgbaUnormBlock: + case vk::Format::eBc4UnormBlock: case vk::Format::eR32G32Sfloat: return vk::Format::eR32G32Uint; + case vk::Format::eBc2SrgbBlock: + case vk::Format::eBc2UnormBlock: case vk::Format::eBc3SrgbBlock: case vk::Format::eBc3UnormBlock: + case vk::Format::eBc5UnormBlock: case vk::Format::eBc7SrgbBlock: case vk::Format::eBc7UnormBlock: return vk::Format::eR32G32B32A32Uint; From a9f482cdfcfa13dd36feb33e8d6eb9f3dcb82678 Mon Sep 17 00:00:00 2001 From: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Sun, 7 Jul 2024 22:30:52 +0300 Subject: [PATCH 08/12] config: Disable splash by default --- src/common/config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index f23b9d6524e..a577b143a97 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -18,7 +18,7 @@ static std::string logFilter; static std::string logType = "sync"; static bool isDebugDump = false; static bool isLibc = true; -static bool isShowSplash = true; +static bool isShowSplash = false; static bool isNullGpu = false; static bool shouldDumpShaders = false; static bool shouldDumpPM4 = false; From 0619af24a7059cb8140a9c003bc0059f513beee3 Mon Sep 17 00:00:00 2001 From: offtkp Date: Mon, 8 Jul 2024 01:17:42 +0300 Subject: [PATCH 09/12] Eliminate compiler warning --- src/video_core/renderer_vulkan/liverpool_to_vk.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp index 64a3b7f0ce3..4280e6e69fd 100644 --- a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp +++ b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp @@ -222,6 +222,8 @@ vk::CompareOp DepthCompare(AmdGpu::DepthCompare comp) { return vk::CompareOp::eGreaterOrEqual; case AmdGpu::DepthCompare::Always: return vk::CompareOp::eAlways; + default: + UNREACHABLE(); } } From 574b8a458cb4fee161ade36f7311886685a87f76 Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Mon, 8 Jul 2024 01:29:01 -0300 Subject: [PATCH 10/12] Fix sceAudioOutOpen not handling audio param attributes and returning error incorrectly when some mask is applied --- src/audio_core/sdl_audio.cpp | 27 +++++++++--------- src/audio_core/sdl_audio.h | 2 +- src/core/libraries/audio/audioout.cpp | 40 +++++++++++++++++++++------ src/core/libraries/audio/audioout.h | 22 +++++++++++++-- 4 files changed, 66 insertions(+), 25 deletions(-) diff --git a/src/audio_core/sdl_audio.cpp b/src/audio_core/sdl_audio.cpp index 6a2a70e3101..0d494707dc8 100644 --- a/src/audio_core/sdl_audio.cpp +++ b/src/audio_core/sdl_audio.cpp @@ -11,8 +11,8 @@ namespace Audio { int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq, - Libraries::AudioOut::OrbisAudioOutParam format) { - using Libraries::AudioOut::OrbisAudioOutParam; + Libraries::AudioOut::OrbisAudioOutParamFormat format) { + using Libraries::AudioOut::OrbisAudioOutParamFormat; std::scoped_lock lock{m_mutex}; for (int id = 0; id < portsOut.size(); id++) { auto& port = portsOut[id]; @@ -24,42 +24,42 @@ int SDLAudio::AudioOutOpen(int type, u32 samples_num, u32 freq, port.format = format; SDL_AudioFormat sampleFormat; switch (format) { - case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_MONO: + case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_MONO: sampleFormat = SDL_AUDIO_S16; port.channels_num = 1; port.sample_size = 2; break; - case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_MONO: + case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_MONO: sampleFormat = SDL_AUDIO_F32; port.channels_num = 1; port.sample_size = 4; break; - case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_STEREO: + case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_STEREO: sampleFormat = SDL_AUDIO_S16; port.channels_num = 2; port.sample_size = 2; break; - case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_STEREO: + case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_STEREO: sampleFormat = SDL_AUDIO_F32; port.channels_num = 2; port.sample_size = 4; break; - case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH: + case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH: sampleFormat = SDL_AUDIO_S16; port.channels_num = 8; port.sample_size = 2; break; - case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH: + case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH: sampleFormat = SDL_AUDIO_F32; port.channels_num = 8; port.sample_size = 4; break; - case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH_STD: + case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH_STD: sampleFormat = SDL_AUDIO_S16; port.channels_num = 8; port.sample_size = 2; break; - case OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD: + case OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD: sampleFormat = SDL_AUDIO_F32; port.channels_num = 8; port.sample_size = 4; @@ -108,7 +108,7 @@ s32 SDLAudio::AudioOutOutput(s32 handle, const void* ptr) { } bool SDLAudio::AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume) { - using Libraries::AudioOut::OrbisAudioOutParam; + using Libraries::AudioOut::OrbisAudioOutParamFormat; std::scoped_lock lock{m_mutex}; auto& port = portsOut[handle - 1]; if (!port.isOpen) { @@ -119,8 +119,9 @@ bool SDLAudio::AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume) { if (bit == 1) { int src_index = i; - if (port.format == OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD || - port.format == OrbisAudioOutParam::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH_STD) { + if (port.format == + OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD || + port.format == OrbisAudioOutParamFormat::ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH_STD) { switch (i) { case 4: src_index = 6; diff --git a/src/audio_core/sdl_audio.h b/src/audio_core/sdl_audio.h index 7d0b0b802f8..d20c445598b 100644 --- a/src/audio_core/sdl_audio.h +++ b/src/audio_core/sdl_audio.h @@ -15,7 +15,7 @@ class SDLAudio { virtual ~SDLAudio() = default; int AudioOutOpen(int type, u32 samples_num, u32 freq, - Libraries::AudioOut::OrbisAudioOutParam format); + Libraries::AudioOut::OrbisAudioOutParamFormat format); s32 AudioOutOutput(s32 handle, const void* ptr); bool AudioOutSetVolume(s32 handle, s32 bitflag, s32* volume); bool AudioOutGetStatus(s32 handle, int* type, int* channels_num); diff --git a/src/core/libraries/audio/audioout.cpp b/src/core/libraries/audio/audioout.cpp index 0ec4fe15fcb..cc7ce342a34 100644 --- a/src/core/libraries/audio/audioout.cpp +++ b/src/core/libraries/audio/audioout.cpp @@ -33,7 +33,7 @@ static std::string_view GetAudioOutPort(u32 port) { } } -static std::string_view GetAudioOutParam(u32 param) { +static std::string_view GetAudioOutParamFormat(OrbisAudioOutParamFormat param) { switch (param) { case ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_MONO: return "S16_MONO"; @@ -56,6 +56,19 @@ static std::string_view GetAudioOutParam(u32 param) { } } +static std::string_view GetAudioOutParamAttr(OrbisAudioOutParamAttr attr) { + switch (attr) { + case ORBIS_AUDIO_OUT_PARAM_ATTR_NONE: + return "NONE"; + case ORBIS_AUDIO_OUT_PARAM_ATTR_RESTRICTED: + return "RESTRICTED"; + case ORBIS_AUDIO_OUT_PARAM_ATTR_MIX_TO_MAIN: + return "MIX_TO_MAIN"; + default: + return "INVALID"; + } +} + int PS4_SYSV_ABI sceAudioOutDeviceIdOpen() { LOG_ERROR(Lib_AudioOut, "(STUBBED) called"); return ORBIS_OK; @@ -259,12 +272,14 @@ int PS4_SYSV_ABI sceAudioOutMbusInit() { s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, OrbisAudioOutPort port_type, s32 index, u32 length, - u32 sample_rate, OrbisAudioOutParam param_type) { + u32 sample_rate, + OrbisAudioOutParamExtendedInformation param_type) { LOG_INFO(Lib_AudioOut, "AudioOutOpen id = {} port_type = {} index = {} lenght= {} sample_rate = {} " - "param_type = {}", + "param_type = {} attr = {}", user_id, GetAudioOutPort(port_type), index, length, sample_rate, - GetAudioOutParam(param_type)); + GetAudioOutParamFormat(param_type.data_format), + GetAudioOutParamAttr(param_type.attributes)); if ((port_type < 0 || port_type > 4) && (port_type != 127)) { LOG_ERROR(Lib_AudioOut, "Invalid port type"); return ORBIS_AUDIO_OUT_ERROR_INVALID_PORT_TYPE; @@ -273,10 +288,6 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, LOG_ERROR(Lib_AudioOut, "Invalid sample rate"); return ORBIS_AUDIO_OUT_ERROR_INVALID_SAMPLE_FREQ; } - if (param_type < 0 || param_type > 7) { - LOG_ERROR(Lib_AudioOut, "Invalid format"); - return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT; - } if (length != 256 && length != 512 && length != 768 && length != 1024 && length != 1280 && length != 1536 && length != 1792 && length != 2048) { LOG_ERROR(Lib_AudioOut, "Invalid length"); @@ -285,7 +296,18 @@ s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, if (index != 0) { LOG_ERROR(Lib_AudioOut, "index is not valid !=0 {}", index); } - int result = audio->AudioOutOpen(port_type, length, sample_rate, param_type); + OrbisAudioOutParamFormat format = param_type.data_format; + if (format < 0 || format > 7) { + LOG_ERROR(Lib_AudioOut, "Invalid format"); + return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT; + } + OrbisAudioOutParamAttr attr = param_type.attributes; + if (attr < 0 || attr > 2) { + // TODO Handle attributes in output audio device + LOG_ERROR(Lib_AudioOut, "Invalid format attribute"); + return ORBIS_AUDIO_OUT_ERROR_INVALID_FORMAT; + } + int result = audio->AudioOutOpen(port_type, length, sample_rate, format); if (result == -1) { LOG_ERROR(Lib_AudioOut, "Audio ports are full"); return ORBIS_AUDIO_OUT_ERROR_PORT_FULL; diff --git a/src/core/libraries/audio/audioout.h b/src/core/libraries/audio/audioout.h index c5a62dc69ab..d29eea7c367 100644 --- a/src/core/libraries/audio/audioout.h +++ b/src/core/libraries/audio/audioout.h @@ -3,6 +3,8 @@ #pragma once +#include "common/bit_field.h" + #include "core/libraries/system/userservice.h" namespace Libraries::AudioOut { @@ -18,7 +20,7 @@ enum OrbisAudioOutPort { ORBIS_AUDIO_OUT_PORT_TYPE_AUX = 127 }; -enum OrbisAudioOutParam { +enum OrbisAudioOutParamFormat { ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_MONO = 0, ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_STEREO = 1, ORBIS_AUDIO_OUT_PARAM_FORMAT_S16_8CH = 2, @@ -29,6 +31,22 @@ enum OrbisAudioOutParam { ORBIS_AUDIO_OUT_PARAM_FORMAT_FLOAT_8CH_STD = 7 }; +enum OrbisAudioOutParamAttr { + ORBIS_AUDIO_OUT_PARAM_ATTR_NONE = 0, + ORBIS_AUDIO_OUT_PARAM_ATTR_RESTRICTED = 1, + ORBIS_AUDIO_OUT_PARAM_ATTR_MIX_TO_MAIN = 2, +}; + +struct OrbisAudioOutParamExtendedInformation { + union { + BitField<0, 8, OrbisAudioOutParamFormat> data_format; + BitField<8, 8, u32> reserve0; + BitField<16, 4, OrbisAudioOutParamAttr> attributes; + BitField<20, 10, u32> reserve1; + BitField<31, 1, u32> unused; + }; +}; + struct OrbisAudioOutOutputParam { s32 handle; const void* ptr; @@ -80,7 +98,7 @@ int PS4_SYSV_ABI sceAudioOutMasteringTerm(); int PS4_SYSV_ABI sceAudioOutMbusInit(); s32 PS4_SYSV_ABI sceAudioOutOpen(UserService::OrbisUserServiceUserId user_id, OrbisAudioOutPort port_type, s32 index, u32 length, - u32 sample_rate, OrbisAudioOutParam param_type); + u32 sample_rate, OrbisAudioOutParamExtendedInformation param_type); int PS4_SYSV_ABI sceAudioOutOpenEx(); s32 PS4_SYSV_ABI sceAudioOutOutput(s32 handle, const void* ptr); s32 PS4_SYSV_ABI sceAudioOutOutputs(OrbisAudioOutOutputParam* param, u32 num); From 2620919f0bc89822dec2fd30bc8dbe3f8bc63709 Mon Sep 17 00:00:00 2001 From: Stolas Date: Mon, 8 Jul 2024 19:24:12 +1000 Subject: [PATCH 11/12] Added Legacy Min/Max ops (#266) * Forwarding V_MAX_LEGACY_F32 to V_MAX3_F32. Fixes Translation error in Geometry Wars 3. * Forwarded to correct op * Implemented Legacy Max/Min using NMax/NMin * Added extra argument to Min/Max op codes * Removed extra translator functions, replaced with bool * Formatting --- .../backend/spirv/emit_spirv_floating_point.cpp | 12 ++++++++++-- .../backend/spirv/emit_spirv_instructions.h | 4 ++-- .../frontend/translate/translate.cpp | 6 ++++++ .../frontend/translate/translate.h | 4 ++-- .../frontend/translate/vector_alu.cpp | 8 ++++---- src/shader_recompiler/ir/ir_emitter.cpp | 15 +++++++++++---- src/shader_recompiler/ir/ir_emitter.h | 4 ++-- src/shader_recompiler/ir/opcodes.inc | 4 ++-- 8 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp index 04b0b96e1b9..ce95b37094b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp @@ -51,7 +51,11 @@ Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) { return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c)); } -Id EmitFPMax32(EmitContext& ctx, Id a, Id b) { +Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy) { + if (is_legacy) { + return ctx.OpNMax(ctx.F32[1], a, b); + } + return ctx.OpFMax(ctx.F32[1], a, b); } @@ -59,7 +63,11 @@ Id EmitFPMax64(EmitContext& ctx, Id a, Id b) { return ctx.OpFMax(ctx.F64[1], a, b); } -Id EmitFPMin32(EmitContext& ctx, Id a, Id b) { +Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy) { + if (is_legacy) { + return ctx.OpNMin(ctx.F32[1], a, b); + } + return ctx.OpFMin(ctx.F32[1], a, b); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index acbaf9969f9..495ada5de0d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -165,9 +165,9 @@ Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c); -Id EmitFPMax32(EmitContext& ctx, Id a, Id b); +Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); Id EmitFPMax64(EmitContext& ctx, Id a, Id b); -Id EmitFPMin32(EmitContext& ctx, Id a, Id b); +Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false); Id EmitFPMin64(EmitContext& ctx, Id a, Id b); Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b); Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index f289933b786..4ec4128a720 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -639,6 +639,9 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::V_MIN3_F32: translator.V_MIN3_F32(inst); break; + case Opcode::V_MIN_LEGACY_F32: + translator.V_MIN_F32(inst, true); + break; case Opcode::V_MADMK_F32: translator.V_MADMK_F32(inst); break; @@ -889,6 +892,9 @@ void Translate(IR::Block* block, u32 block_base, std::span inst_l case Opcode::V_MAD_LEGACY_F32: translator.V_MAD_F32(inst); break; + case Opcode::V_MAX_LEGACY_F32: + translator.V_MAX_F32(inst, true); + break; case Opcode::V_RSQ_LEGACY_F32: case Opcode::V_RSQ_CLAMP_F32: translator.V_RSQ_F32(inst); diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index e1f72e5f8fc..f8ea773b751 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -111,14 +111,14 @@ class Translator { void V_RCP_F32(const GcnInst& inst); void V_FMA_F32(const GcnInst& inst); void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst); - void V_MAX_F32(const GcnInst& inst); + void V_MAX_F32(const GcnInst& inst, bool is_legacy = false); void V_MAX_U32(bool is_signed, const GcnInst& inst); void V_RSQ_F32(const GcnInst& inst); void V_SIN_F32(const GcnInst& inst); void V_LOG_F32(const GcnInst& inst); void V_EXP_F32(const GcnInst& inst); void V_SQRT_F32(const GcnInst& inst); - void V_MIN_F32(const GcnInst& inst); + void V_MIN_F32(const GcnInst& inst, bool is_legacy = false); void V_MIN3_F32(const GcnInst& inst); void V_MADMK_F32(const GcnInst& inst); void V_CUBEMA_F32(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index a434567a0b7..7bf24346a88 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -203,10 +203,10 @@ void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) { } } -void Translator::V_MAX_F32(const GcnInst& inst) { +void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) { const IR::F32 src0{GetSrc(inst.src[0], true)}; const IR::F32 src1{GetSrc(inst.src[1], true)}; - SetDst(inst.dst[0], ir.FPMax(src0, src1)); + SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy)); } void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) { @@ -240,10 +240,10 @@ void Translator::V_SQRT_F32(const GcnInst& inst) { SetDst(inst.dst[0], ir.FPSqrt(src0)); } -void Translator::V_MIN_F32(const GcnInst& inst) { +void Translator::V_MIN_F32(const GcnInst& inst, bool is_legacy) { const IR::F32 src0{GetSrc(inst.src[0], true)}; const IR::F32 src1{GetSrc(inst.src[1], true)}; - SetDst(inst.dst[0], ir.FPMin(src0, src1)); + SetDst(inst.dst[0], ir.FPMin(src0, src1, is_legacy)); } void Translator::V_MIN3_F32(const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index 7f0fa741e21..44128f23678 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -865,28 +865,35 @@ U1 IREmitter::FPUnordered(const F32F64& lhs, const F32F64& rhs) { return LogicalOr(FPIsNan(lhs), FPIsNan(rhs)); } -F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs) { +F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy) { if (lhs.Type() != rhs.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } + switch (lhs.Type()) { case Type::F32: - return Inst(Opcode::FPMax32, lhs, rhs); + return Inst(Opcode::FPMax32, lhs, rhs, is_legacy); case Type::F64: + if (is_legacy) { + UNREACHABLE_MSG("F64 cannot be used with LEGACY ops"); + } return Inst(Opcode::FPMax64, lhs, rhs); default: ThrowInvalidType(lhs.Type()); } } -F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs) { +F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy) { if (lhs.Type() != rhs.Type()) { UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); } switch (lhs.Type()) { case Type::F32: - return Inst(Opcode::FPMin32, lhs, rhs); + return Inst(Opcode::FPMin32, lhs, rhs, is_legacy); case Type::F64: + if (is_legacy) { + UNREACHABLE_MSG("F64 cannot be used with LEGACY ops"); + } return Inst(Opcode::FPMin64, lhs, rhs); default: ThrowInvalidType(lhs.Type()); diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index c3342530bb9..51ab9d03001 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -149,8 +149,8 @@ class IREmitter { [[nodiscard]] U1 FPIsInf(const F32F64& value); [[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs); [[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs); - [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs); - [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs); + [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false); + [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] Value IAddCary(const U32& a, const U32& b); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index 47bc4248ad8..c22db3e0761 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -154,9 +154,9 @@ OPCODE(FPAdd64, F64, F64, OPCODE(FPSub32, F32, F32, F32, ) OPCODE(FPFma32, F32, F32, F32, F32, ) OPCODE(FPFma64, F64, F64, F64, F64, ) -OPCODE(FPMax32, F32, F32, F32, ) +OPCODE(FPMax32, F32, F32, F32, U1, ) OPCODE(FPMax64, F64, F64, F64, ) -OPCODE(FPMin32, F32, F32, F32, ) +OPCODE(FPMin32, F32, F32, F32, U1, ) OPCODE(FPMin64, F64, F64, F64, ) OPCODE(FPMul32, F32, F32, F32, ) OPCODE(FPMul64, F64, F64, F64, ) From 64e283c948c8f3c6dfd310007621f685de76298e Mon Sep 17 00:00:00 2001 From: TreezZ <105643013+BigTreezZ@users.noreply.github.com> Date: Mon, 8 Jul 2024 10:25:04 +0100 Subject: [PATCH 12/12] Update building-linux.md with full instructions (#260) * Update building-linux.md with full instructions * Update documents/building-linux.md Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> * Update documents/building-linux.md Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> * Update building-linux.md with build-essentials * Update building-linux.md --------- Co-authored-by: TheTurtle <47210458+raphaelthegreat@users.noreply.github.com> --- documents/building-linux.md | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/documents/building-linux.md b/documents/building-linux.md index 9124fd9d73a..0fa22ebf4d4 100644 --- a/documents/building-linux.md +++ b/documents/building-linux.md @@ -5,6 +5,35 @@ SPDX-License-Identifier: GPL-2.0-or-later ## Build shadPS4 for Linux +### Install the necessary tools to build shadPS4: + +#### Debian & Ubuntu +``` +sudo apt-get install build-essential libasound2-dev libpulse-dev libopenal-dev zlib1g-dev libedit-dev libvulkan-dev libudev-dev git libevdev-dev libsdl2-2.0 libsdl2-dev libjack-dev libsndio-dev +``` + +#### Fedora +``` +sudo dnf install alsa-lib-devel cmake libatomic libevdev-devel libudev-devel openal-devel qt6-qtbase-devel qt6-qtbase-private-devel vulkan-devel pipewire-jack-audio-connection-kit-devel qt6-qtmultimedia-devel qt6-qtsvg-devel +``` + +#### Arch Linux +``` +sudo pacman -S openal cmake vulkan-validation-layers qt6-base qt6-declarative qt6-multimedia sdl2 sndio jack2 base-devel +``` + +#### OpenSUSE +``` +sudo zypper install git cmake libasound2 libpulse-devel openal-soft-devel zlib-devel libedit-devel vulkan-devel libudev-devel libqt6-qtbase-devel libqt6-qtmultimedia-devel libqt6-qtsvg-devel libQt6Gui-private-headers-devel libevdev-devel libsndio7_1 libjack-devel +``` +### Cloning and compiling: + +Clone the repository recursively: +``` +git clone --recursive https://github.com/shadps4-emu/shadPS4.git +cd shadPS4 +``` + Generate the build directory in the shadPS4 directory: ``` cmake -S . -B build/ @@ -17,5 +46,11 @@ cd build/ Use make to build the project: ``` -make -j$(nproc) +cmake --build . --parallel +``` + +Now run the emulator: + +``` +./shadps4 /"PATH"/"TO"/"GAME"/"FOLDER"/eboot.bin ```