Skip to content

Commit

Permalink
vk_pipeline_cache: Avoid recompiling new shaders on each new PL (shad…
Browse files Browse the repository at this point in the history
…ps4-emu#480)

* cfg: Add one more divergence case

* Seen in RDR shaders

* renderer_vulkan: Reduce number of compiled shaders

* vk_pipeline_cache: Remove some unnecessary checks
  • Loading branch information
raphaelthegreat authored Aug 20, 2024
1 parent 32cb364 commit 3f9c86a
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 86 deletions.
7 changes: 7 additions & 0 deletions src/core/libraries/kernel/time_management.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,20 @@ int PS4_SYSV_ABI sceKernelGettimeofday(OrbisKernelTimeval* tp) {
return ORBIS_KERNEL_ERROR_EFAULT;
}

#ifdef _WIN64
auto now = std::chrono::system_clock::now();
auto duration = now.time_since_epoch();
auto seconds = std::chrono::duration_cast<std::chrono::seconds>(duration);
auto microsecs = std::chrono::duration_cast<std::chrono::microseconds>(duration - seconds);

tp->tv_sec = seconds.count();
tp->tv_usec = microsecs.count();
#else
timeval tv;
gettimeofday(&tv, nullptr);
tp->tv_sec = tv.tv_sec;
tp->tv_usec = tv.tv_usec;
#endif
return ORBIS_OK;
}

Expand Down
5 changes: 3 additions & 2 deletions src/shader_recompiler/frontend/control_flow_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ static IR::Condition MakeCondition(Opcode opcode) {
return IR::Condition::Execnz;
case Opcode::S_AND_SAVEEXEC_B64:
case Opcode::S_ANDN2_B64:
case Opcode::V_CMPX_NE_U32:
return IR::Condition::Execnz;
default:
return IR::Condition::True;
Expand Down Expand Up @@ -93,7 +94,7 @@ void CFG::EmitDivergenceLabels() {
// While this instruction does not save EXEC it is often used paired
// with SAVEEXEC to mask the threads that didn't pass the condition
// of initial branch.
inst.opcode == Opcode::S_ANDN2_B64;
inst.opcode == Opcode::S_ANDN2_B64 || inst.opcode == Opcode::V_CMPX_NE_U32;
};
const auto is_close_scope = [](const GcnInst& inst) {
// Closing an EXEC scope can be either a branch instruction
Expand Down Expand Up @@ -187,7 +188,7 @@ void CFG::LinkBlocks() {
const auto end_inst{block.end_inst};
// Handle divergence block inserted here.
if (end_inst.opcode == Opcode::S_AND_SAVEEXEC_B64 ||
end_inst.opcode == Opcode::S_ANDN2_B64) {
end_inst.opcode == Opcode::S_ANDN2_B64 || end_inst.opcode == Opcode::V_CMPX_NE_U32) {
// Blocks are stored ordered by address in the set
auto next_it = std::next(it);
auto* target_block = &(*next_it);
Expand Down
27 changes: 14 additions & 13 deletions src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,19 @@
namespace Vulkan {

ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler_,
vk::PipelineCache pipeline_cache, const Shader::Info* info_,
u64 compute_key_, vk::ShaderModule module)
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_}, info{*info_} {
vk::PipelineCache pipeline_cache, u64 compute_key_,
const Program* program)
: instance{instance_}, scheduler{scheduler_}, compute_key{compute_key_},
info{&program->pgm.info} {
const vk::PipelineShaderStageCreateInfo shader_ci = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = module,
.module = program->module,
.pName = "main",
};

u32 binding{};
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
for (const auto& buffer : info.buffers) {
for (const auto& buffer : info->buffers) {
bindings.push_back({
.binding = binding++,
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
Expand All @@ -32,7 +33,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
for (const auto& image : info.images) {
for (const auto& image : info->images) {
bindings.push_back({
.binding = binding++,
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
Expand All @@ -41,7 +42,7 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
.stageFlags = vk::ShaderStageFlagBits::eCompute,
});
}
for (const auto& sampler : info.samplers) {
for (const auto& sampler : info->samplers) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eSampler,
Expand Down Expand Up @@ -96,8 +97,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
Shader::PushData push_data{};
u32 binding{};

for (const auto& buffer : info.buffers) {
const auto vsharp = buffer.GetVsharp(info);
for (const auto& buffer : info->buffers) {
const auto vsharp = buffer.GetVsharp(*info);
const VAddr address = vsharp.base_address;
// Most of the time when a metadata is updated with a shader it gets cleared. It means we
// can skip the whole dispatch and update the tracked state instead. Also, it is not
Expand Down Expand Up @@ -139,9 +140,9 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
});
}

for (const auto& image_desc : info.images) {
for (const auto& image_desc : info->images) {
const auto tsharp =
info.ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset);
info->ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset);
VideoCore::ImageInfo image_info{tsharp};
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
Expand All @@ -161,8 +162,8 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (texture)");
}
}
for (const auto& sampler : info.samplers) {
const auto ssharp = sampler.GetSsharp(info);
for (const auto& sampler : info->samplers) {
const auto ssharp = sampler.GetSsharp(*info);
const auto vk_sampler = texture_cache.GetSampler(ssharp);
image_infos.emplace_back(vk_sampler, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
set_writes.push_back({
Expand Down
14 changes: 11 additions & 3 deletions src/video_core/renderer_vulkan/vk_compute_pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#pragma once

#include "shader_recompiler/ir/program.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/vk_common.h"

Expand All @@ -16,11 +17,18 @@ namespace Vulkan {
class Instance;
class Scheduler;

struct Program {
Shader::IR::Program pgm;
std::vector<u32> spv;
vk::ShaderModule module;
u32 end_binding;
};

class ComputePipeline {
public:
explicit ComputePipeline(const Instance& instance, Scheduler& scheduler,
vk::PipelineCache pipeline_cache, const Shader::Info* info,
u64 compute_key, vk::ShaderModule module);
vk::PipelineCache pipeline_cache, u64 compute_key,
const Program* program);
~ComputePipeline();

[[nodiscard]] vk::Pipeline Handle() const noexcept {
Expand All @@ -37,7 +45,7 @@ class ComputePipeline {
vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout;
u64 compute_key;
Shader::Info info{};
const Shader::Info* info;
};

} // namespace Vulkan
61 changes: 33 additions & 28 deletions src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@ namespace Vulkan {
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
const GraphicsPipelineKey& key_,
vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules)
std::span<const Program*, MaxShaderStages> programs)
: instance{instance_}, scheduler{scheduler_}, key{key_} {
const vk::Device device = instance.GetDevice();
for (u32 i = 0; i < MaxShaderStages; i++) {
if (!infos[i]) {
if (!programs[i]) {
continue;
}
stages[i] = *infos[i];
stages[i] = &programs[i]->pgm.info;
}
BuildDescSetLayout();

Expand All @@ -49,14 +48,14 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
boost::container::static_vector<vk::VertexInputBindingDescription, 32> bindings;
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> attributes;
const auto& vs_info = stages[u32(Shader::Stage::Vertex)];
for (const auto& input : vs_info.vs_inputs) {
for (const auto& input : vs_info->vs_inputs) {
if (input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate0 ||
input.instance_step_rate == Shader::Info::VsInput::InstanceIdType::OverStepRate1) {
// Skip attribute binding as the data will be pulled by shader
continue;
}

const auto buffer = vs_info.ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
const auto buffer = vs_info->ReadUd<AmdGpu::Buffer>(input.sgpr_base, input.dword_offset);
attributes.push_back({
.location = input.binding,
.binding = input.binding,
Expand Down Expand Up @@ -192,21 +191,21 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
.maxDepthBounds = key.depth_bounds_max,
};

u32 shader_count{};
auto stage = u32(Shader::Stage::Vertex);
std::array<vk::PipelineShaderStageCreateInfo, MaxShaderStages> shader_stages;
shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{
boost::container::static_vector<vk::PipelineShaderStageCreateInfo, MaxShaderStages>
shader_stages;
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eVertex,
.module = modules[stage],
.module = programs[stage]->module,
.pName = "main",
};
});
stage = u32(Shader::Stage::Fragment);
if (modules[stage]) {
shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{
if (programs[stage]) {
shader_stages.emplace_back(vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eFragment,
.module = modules[stage],
.module = programs[stage]->module,
.pName = "main",
};
});
}

const auto it = std::ranges::find(key.color_formats, vk::Format::eUndefined);
Expand Down Expand Up @@ -280,7 +279,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul

const vk::GraphicsPipelineCreateInfo pipeline_info = {
.pNext = &pipeline_rendering_ci,
.stageCount = shader_count,
.stageCount = static_cast<u32>(shader_stages.size()),
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_info,
.pInputAssemblyState = &input_assembly,
Expand All @@ -306,8 +305,11 @@ GraphicsPipeline::~GraphicsPipeline() = default;
void GraphicsPipeline::BuildDescSetLayout() {
u32 binding{};
boost::container::small_vector<vk::DescriptorSetLayoutBinding, 32> bindings;
for (const auto& stage : stages) {
for (const auto& buffer : stage.buffers) {
for (const auto* stage : stages) {
if (!stage) {
continue;
}
for (const auto& buffer : stage->buffers) {
bindings.push_back({
.binding = binding++,
.descriptorType = buffer.is_storage ? vk::DescriptorType::eStorageBuffer
Expand All @@ -316,7 +318,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
});
}
for (const auto& image : stage.images) {
for (const auto& image : stage->images) {
bindings.push_back({
.binding = binding++,
.descriptorType = image.is_storage ? vk::DescriptorType::eStorageImage
Expand All @@ -325,7 +327,7 @@ void GraphicsPipeline::BuildDescSetLayout() {
.stageFlags = vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eFragment,
});
}
for (const auto& sampler : stage.samplers) {
for (const auto& sampler : stage->samplers) {
bindings.push_back({
.binding = binding++,
.descriptorType = vk::DescriptorType::eSampler,
Expand All @@ -352,13 +354,16 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
Shader::PushData push_data{};
u32 binding{};

for (const auto& stage : stages) {
if (stage.uses_step_rates) {
for (const auto* stage : stages) {
if (!stage) {
continue;
}
if (stage->uses_step_rates) {
push_data.step0 = regs.vgt_instance_step_rate_0;
push_data.step1 = regs.vgt_instance_step_rate_1;
}
for (const auto& buffer : stage.buffers) {
const auto vsharp = buffer.GetVsharp(stage);
for (const auto& buffer : stage->buffers) {
const auto vsharp = buffer.GetVsharp(*stage);
if (vsharp) {
const VAddr address = vsharp.base_address;
if (texture_cache.IsMeta(address)) {
Expand Down Expand Up @@ -391,9 +396,9 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
}

boost::container::static_vector<AmdGpu::Image, 16> tsharps;
for (const auto& image_desc : stage.images) {
for (const auto& image_desc : stage->images) {
const auto& tsharp = tsharps.emplace_back(
stage.ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset));
stage->ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset));
VideoCore::ImageInfo image_info{tsharp};
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
Expand All @@ -413,8 +418,8 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a PS shader (texture)");
}
}
for (const auto& sampler : stage.samplers) {
auto ssharp = sampler.GetSsharp(stage);
for (const auto& sampler : stage->samplers) {
auto ssharp = sampler.GetSsharp(*stage);
if (sampler.disable_aniso) {
const auto& tsharp = tsharps[sampler.associated_image];
if (tsharp.base_level == 0 && tsharp.last_level == 0) {
Expand Down
9 changes: 4 additions & 5 deletions src/video_core/renderer_vulkan/vk_graphics_pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

#include <xxhash.h>
#include "common/types.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"

namespace VideoCore {
class BufferCache;
Expand Down Expand Up @@ -58,8 +58,7 @@ class GraphicsPipeline {
public:
explicit GraphicsPipeline(const Instance& instance, Scheduler& scheduler,
const GraphicsPipelineKey& key, vk::PipelineCache pipeline_cache,
std::span<const Shader::Info*, MaxShaderStages> infos,
std::array<vk::ShaderModule, MaxShaderStages> modules);
std::span<const Program*, MaxShaderStages> programs);
~GraphicsPipeline();

void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache,
Expand All @@ -74,7 +73,7 @@ class GraphicsPipeline {
}

const Shader::Info& GetStage(Shader::Stage stage) const noexcept {
return stages[u32(stage)];
return *stages[u32(stage)];
}

bool IsEmbeddedVs() const noexcept {
Expand All @@ -99,7 +98,7 @@ class GraphicsPipeline {
vk::UniquePipeline pipeline;
vk::UniquePipelineLayout pipeline_layout;
vk::UniqueDescriptorSetLayout desc_layout;
std::array<Shader::Info, MaxShaderStages> stages{};
std::array<const Shader::Info*, MaxShaderStages> stages{};
GraphicsPipelineKey key;
};

Expand Down
Loading

0 comments on commit 3f9c86a

Please sign in to comment.