Skip to content

Commit

Permalink
vk: Add synchronous compute pipelines
Browse files Browse the repository at this point in the history
- Compute is now used to assist in some parts of blit operations, since there are no format conversions with vulkan like OGL does
- TODO: Integrate this into all types of GPU memory conversion operations instead of downloading to CPU then converting
  • Loading branch information
kd-11 committed Jun 18, 2018
1 parent 0d5c071 commit 2afcf36
Show file tree
Hide file tree
Showing 17 changed files with 568 additions and 129 deletions.
3 changes: 2 additions & 1 deletion rpcs3/Emu/RSX/Common/GLSLCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ namespace glsl
enum program_domain
{
glsl_vertex_program = 0,
glsl_fragment_program = 1
glsl_fragment_program = 1,
glsl_compute_program = 2
};

enum glsl_rules
Expand Down
3 changes: 2 additions & 1 deletion rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,8 @@ namespace vk

bool compile_glsl_to_spv(std::string& shader, program_domain domain, std::vector<u32>& spv)
{
EShLanguage lang = (domain == glsl_fragment_program) ? EShLangFragment : EShLangVertex;
EShLanguage lang = (domain == glsl_fragment_program) ? EShLangFragment :
(domain == glsl_vertex_program)? EShLangVertex : EShLangCompute;

glslang::TProgram program;
glslang::TShader shader_object(lang);
Expand Down
256 changes: 256 additions & 0 deletions rpcs3/Emu/RSX/VK/VKCompute.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
#pragma once
#include "VKHelpers.h"

namespace vk
{
struct compute_task
{
std::string m_src;
vk::glsl::shader m_shader;
std::unique_ptr<vk::glsl::program> m_program;

vk::descriptor_pool m_descriptor_pool;
VkDescriptorSet m_descriptor_set = nullptr;
VkDescriptorSetLayout m_descriptor_layout = nullptr;
VkPipelineLayout m_pipeline_layout = nullptr;
u32 m_used_descriptors = 0;

bool initialized = false;
u32 optimal_group_size = 64;

void init_descriptors()
{
VkDescriptorPoolSize descriptor_pool_sizes[1] =
{
{ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 120 },
};

//Reserve descriptor pools
m_descriptor_pool.create(*get_current_renderer(), descriptor_pool_sizes, 1);

std::vector<VkDescriptorSetLayoutBinding> bindings(1);

bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
bindings[0].binding = 0;
bindings[0].pImmutableSamplers = nullptr;

VkDescriptorSetLayoutCreateInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
infos.pBindings = bindings.data();
infos.bindingCount = bindings.size();

CHECK_RESULT(vkCreateDescriptorSetLayout(*get_current_renderer(), &infos, nullptr, &m_descriptor_layout));

VkPipelineLayoutCreateInfo layout_info = {};
layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
layout_info.setLayoutCount = 1;
layout_info.pSetLayouts = &m_descriptor_layout;

CHECK_RESULT(vkCreatePipelineLayout(*get_current_renderer(), &layout_info, nullptr, &m_pipeline_layout));
}

void create()
{
if (!initialized)
{
init_descriptors();

switch (vk::get_driver_vendor())
{
case vk::driver_vendor::unknown:
// Probably intel
case vk::driver_vendor::NVIDIA:
optimal_group_size = 32;
break;
}

initialized = true;
}
}

void destroy()
{
if (initialized)
{
m_shader.destroy();
m_program.reset();

vkDestroyDescriptorSetLayout(*get_current_renderer(), m_descriptor_layout, nullptr);
vkDestroyPipelineLayout(*get_current_renderer(), m_pipeline_layout, nullptr);
m_descriptor_pool.destroy();

initialized = false;
}
}

void free_resources()
{
if (m_used_descriptors == 0)
return;

vkResetDescriptorPool(*get_current_renderer(), m_descriptor_pool, 0);
m_used_descriptors = 0;
}

virtual void bind_resources()
{}

void load_program(const vk::command_buffer& cmd)
{
if (!m_program)
{
m_shader.create(::glsl::program_domain::glsl_compute_program, m_src);
auto handle = m_shader.compile();

VkPipelineShaderStageCreateInfo shader_stage{};
shader_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
shader_stage.module = handle;
shader_stage.pName = "main";

VkComputePipelineCreateInfo info{};
info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
info.stage = shader_stage;
info.layout = m_pipeline_layout;
info.basePipelineIndex = -1;
info.basePipelineHandle = VK_NULL_HANDLE;

VkPipeline pipeline;
vkCreateComputePipelines(*get_current_renderer(), nullptr, 1, &info, nullptr, &pipeline);

std::vector<vk::glsl::program_input> inputs;
m_program = std::make_unique<vk::glsl::program>(*get_current_renderer(), pipeline, inputs, inputs);
}

verify(HERE), m_used_descriptors < 120;

VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.descriptorPool = m_descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &m_descriptor_layout;
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;

CHECK_RESULT(vkAllocateDescriptorSets(*get_current_renderer(), &alloc_info, &m_descriptor_set));
m_used_descriptors++;

bind_resources();

vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_program->pipeline);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline_layout, 0, 1, &m_descriptor_set, 0, nullptr);
}

virtual void run(const vk::command_buffer& cmd, u32 num_invocations)
{
load_program(cmd);
vkCmdDispatch(cmd, num_invocations, 1, 1);
}
};

struct cs_shuffle_base : compute_task
{
vk::buffer* m_data;
u32 kernel_size = 1;

void build(const char* function_name, u32 _kernel_size)
{
kernel_size = _kernel_size;

m_src =
{
"#version 430\n"
"layout(local_size_x=%ws, local_size_y=1, local_size_z=1) in;\n"
"layout(std430, set=0, binding=0) buffer ssbo{ uint data[]; };\n\n"
"\n"
"#define KERNEL_SIZE %ks\n"
"#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8\n"
"#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24\n"
"#define bswap_u16_u32(bits) (bits & 0xFFFF) << 16 | (bits & 0xFFFF0000) >> 16\n"
"\n"
"void main()\n"
"{\n"
" uint index = gl_GlobalInvocationID.x * KERNEL_SIZE;\n"
" for (uint loop = 0; loop < KERNEL_SIZE; ++loop)\n"
" {\n"
" uint value = data[index];\n"
" data[index] = %f(value);\n"
" index++;\n"
" }\n"
"}\n"
};

const std::pair<std::string, std::string> syntax_replace[] =
{
{ "%ws", std::to_string(optimal_group_size) },
{ "%ks", std::to_string(kernel_size) },
{ "%f", function_name }
};

m_src = fmt::replace_all(m_src, syntax_replace);
}

void bind_resources() override
{
m_program->bind_buffer({ m_data->value, 0, VK_WHOLE_SIZE }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set);
}

void run(const vk::command_buffer& cmd, vk::buffer* data, u32 mem_size)
{
m_data = data;

const auto num_bytes_per_invocation = optimal_group_size * kernel_size * 4;
const auto num_invocations = align(mem_size, 256) / num_bytes_per_invocation;
compute_task::run(cmd, num_invocations);
}
};

struct cs_shuffle_16 : cs_shuffle_base
{
vk::buffer* m_data;

// byteswap ushort
cs_shuffle_16()
{
cs_shuffle_base::build("bswap_u16", 32);
}
};

struct cs_shuffle_32 : cs_shuffle_base
{
// byteswap_ulong
cs_shuffle_32()
{
cs_shuffle_base::build("bswap_u32", 32);
}
};

struct cs_shuffle_32_16 : cs_shuffle_base
{
// byteswap_ulong + byteswap_ushort
cs_shuffle_32_16()
{
cs_shuffle_base::build("bswap_u16_u32", 32);
}
};

// TODO: Replace with a proper manager
extern std::unordered_map<u32, std::unique_ptr<vk::compute_task>> g_compute_tasks;

template<class T>
T* get_compute_task()
{
u32 index = id_manager::typeinfo::get_index<T>();
auto &e = g_compute_tasks[index];

if (!e)
{
e = std::make_unique<T>();
e->create();
}

return static_cast<T*>(e.get());
}

void reset_compute_tasks();
}
32 changes: 7 additions & 25 deletions rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,11 @@ VKFragmentProgram::~VKFragmentProgram()
void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog)
{
u32 size;
VKFragmentDecompilerThread decompiler(shader, parr, prog, size, *this);
std::string source;
VKFragmentDecompilerThread decompiler(source, parr, prog, size, *this);
decompiler.Task();

shader.create(::glsl::program_domain::glsl_fragment_program, source);

for (const ParamType& PT : decompiler.m_parr.params[PF_PARAM_UNIFORM])
{
Expand All @@ -384,34 +387,13 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog)
void VKFragmentProgram::Compile()
{
fs::create_path(fs::get_config_dir() + "/shaderlog");
fs::file(fs::get_config_dir() + "shaderlog/FragmentProgram" + std::to_string(id) + ".spirv", fs::rewrite).write(shader);

std::vector<u32> spir_v;
if (!vk::compile_glsl_to_spv(shader, glsl::glsl_fragment_program, spir_v))
fmt::throw_exception("Failed to compile fragment shader" HERE);

//Create the object and compile
VkShaderModuleCreateInfo fs_info;
fs_info.codeSize = spir_v.size() * sizeof(u32);
fs_info.pNext = nullptr;
fs_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
fs_info.pCode = (uint32_t*)spir_v.data();
fs_info.flags = 0;

VkDevice dev = (VkDevice)*vk::get_current_renderer();
vkCreateShaderModule(dev, &fs_info, nullptr, &handle);
fs::file(fs::get_config_dir() + "shaderlog/FragmentProgram" + std::to_string(id) + ".spirv", fs::rewrite).write(shader.get_source());
handle = shader.compile();
}

void VKFragmentProgram::Delete()
{
shader.clear();

if (handle)
{
VkDevice dev = (VkDevice)*vk::get_current_renderer();
vkDestroyShaderModule(dev, handle, NULL);
handle = nullptr;
}
shader.destroy();
}

void VKFragmentProgram::SetInputs(std::vector<vk::glsl::program_input>& inputs)
Expand Down
2 changes: 1 addition & 1 deletion rpcs3/Emu/RSX/VK/VKFragmentProgram.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class VKFragmentProgram
ParamArray parr;
VkShaderModule handle = nullptr;
u32 id;
std::string shader;
vk::glsl::shader shader;
std::vector<size_t> FragmentConstantOffsetCache;

std::array<u32, 4> output_color_masks{ {} };
Expand Down
2 changes: 2 additions & 0 deletions rpcs3/Emu/RSX/VK/VKGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2024,6 +2024,8 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
m_overlay_manager->dispose(uids_to_dispose);
}

vk::reset_compute_tasks();

m_attachment_clear_pass->free_resources();
m_depth_converter->free_resources();
m_ui_renderer->free_resources();
Expand Down
Loading

0 comments on commit 2afcf36

Please sign in to comment.