Skip to content

Commit

Permalink
Baggins update stop relying on GNMX HsConstants struct. Change runtim…
Browse files Browse the repository at this point in the history
…e_info.hs_info and some regs
  • Loading branch information
diegolix29 committed Dec 13, 2024
1 parent 11c648f commit 9cdc33c
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 75 deletions.
14 changes: 4 additions & 10 deletions src/core/libraries/gnmdriver/gnmdriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1659,15 +1659,9 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
// This is wrong but just stash them here for now
// Should read the tess constants buffer instead, which is bound as V#, into runtime_info.
// HsConstants member of HsProgram is used to derive TessellationDataConstantBuffer, its members
// dont correspond to real registers
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x11cu, hs_regs[4], hs_regs[5], hs_regs[6], hs_regs[7],
hs_regs[8], hs_regs[9], hs_regs[10], hs_regs[11], hs_regs[12],
hs_regs[13]); // TODO comment
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
hs_regs[6]); // VGT_HOS_MAX_TESS_LEVEL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u,
hs_regs[5], // VGT_HOS_MAX_TESS_LEVEL
hs_regs[6]); // VGT_HOS_MIN_TESS_LEVEL
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG

Expand Down Expand Up @@ -3252,4 +3246,4 @@ void RegisterlibSceGnmDriver(Core::Loader::SymbolsResolver* sym) {
Func_BADE7B4C199140DD);
};

} // namespace Libraries::GnmDriver
} // namespace Libraries::GnmDriver
4 changes: 2 additions & 2 deletions src/shader_recompiler/backend/spirv/emit_spirv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ void DefineEntryPoint(const Info& info, EmitContext& ctx, Id main) {
execution_model = spv::ExecutionModel::TessellationControl;
ctx.AddCapability(spv::Capability::Tessellation);
ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
ctx.runtime_info.hs_info.output_control_points);
ctx.runtime_info.hs_info.NumOutputControlPoints());
break;
case LogicalStage::TessellationEval: {
execution_model = spv::ExecutionModel::TessellationEvaluation;
Expand Down Expand Up @@ -489,4 +489,4 @@ void EmitSetM0(EmitContext& ctx) {
UNREACHABLE_MSG("Unreachable instruction");
}

} // namespace Shader::Backend::SPIRV
} // namespace Shader::Backend::SPIRV
5 changes: 3 additions & 2 deletions src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

Expand Down Expand Up @@ -487,8 +488,8 @@ void EmitContext::DefineOutputs() {
if (num_attrs > 0) {
const Id per_vertex_type{TypeArray(F32[4], ConstU32(num_attrs))};
// The input vertex count isn't statically known, so make length 32 (what glslang does)
const Id patch_array_type{
TypeArray(per_vertex_type, ConstU32(runtime_info.hs_info.output_control_points))};
const Id patch_array_type{TypeArray(
per_vertex_type, ConstU32(runtime_info.hs_info.NumOutputControlPoints()))};
output_attr_array = DefineOutput(patch_array_type, 0);
Name(output_attr_array, "out_attrs");
}
Expand Down
28 changes: 20 additions & 8 deletions src/shader_recompiler/ir/passes/hull_shader_transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ static IR::F32 ReadTessInputComponent(IR::U32 addr, const u32 stride, IR::IREmit
} // namespace

void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {
Info& info = program.info;
const Info& info = program.info;

for (IR::Block* block : program.blocks) {
for (IR::Inst& inst : block->Instructions()) {
Expand Down Expand Up @@ -375,19 +375,31 @@ void HullShaderTransform(IR::Program& program, RuntimeInfo& runtime_info) {

const IR::Value data = inst.Arg(2);
auto get_factor_attr = [&](u32 gcn_factor_idx) -> IR::Patch {
ASSERT(gcn_factor_idx * 4 < runtime_info.hs_info.tess_factor_stride);

switch (runtime_info.hs_info.tess_factor_stride) {
case 24:
// The hull outputs tess factors in different formats depending on the shader.
// For triangle domains, it seems to pack the entries into 4 consecutive floats,
// with the 3 edge factors followed by the 1 interior factor.
// For quads, it does the expected 4 edge factors then 2 interior.
// There is a tess factor stride member of the GNMX hull constants struct in
// a hull program shader binary archive, but this doesn't seem to be
// communicated to the driver. The fixed function tessellator would need to know
// this somehow. It's probably implied by the type of the abstract domain. If
// this is causing problems, good idea to check the hs_regs argument to
// sceGnmSetHsShader. The memory containing the tess factor stride probably
// follows the memory for hs_regs if the app is providing a pointer into the
// program they loaded from disk
switch (runtime_info.hs_info.tess_type) {
case AmdGpu::TessellationType::Quad:
ASSERT(gcn_factor_idx < 6);
return IR::PatchFactor(gcn_factor_idx);
case 16:
case AmdGpu::TessellationType::Triangle:
ASSERT(gcn_factor_idx < 4);
if (gcn_factor_idx == 3) {
return IR::Patch::TessellationLodInteriorU;
}
return IR::PatchFactor(gcn_factor_idx);

default:
UNREACHABLE_MSG("Unhandled tess factor stride");
// TODO point domain types haven't been seen so far
UNREACHABLE_MSG("Unhandled tess type");
}
};

Expand Down
44 changes: 23 additions & 21 deletions src/shader_recompiler/runtime_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,15 @@ enum class Stage : u32 {
};
constexpr u32 MaxStageTypes = 7;

// Vertex intentionally comes after TCS/TES because in a tess pipeline,
// we need to find the ls_stride from tess constants V# (in order to define an output attribute
// array with correct length), and finding the tess constant V# requires analysis while compiling
// TCS/TES (for now)
// Vertex intentionally comes after TCS/TES due to order of compilation
enum class LogicalStage : u32 {
Fragment,
TessellationControl,
TessellationEval,
Vertex,
Geometry,
GsCopy,
GsCopy, // TODO delete this, but causes crash somehow (probably wrong use of Shader::Stage
// somewhere)
Compute,
};

Expand Down Expand Up @@ -106,36 +104,40 @@ struct VertexRuntimeInfo {

struct HullRuntimeInfo {
// from registers
u32 output_control_points;

// from HullStateConstants in HsProgram (TODO dont rely on this)
u32 tess_factor_stride;
u32 num_input_control_points;
u32 num_threads;
AmdGpu::TessellationType tess_type;

// from tess constants buffer
u32 ls_stride;
u32 hs_output_cp_stride;
u32 hs_num_patch;
u32 hs_output_base;
u32 patch_const_size;
u32 patch_const_base;
u32 patch_output_size;
u32 first_edge_tess_factor_index;

auto operator<=>(const HullRuntimeInfo&) const noexcept = default;

// It might be possible for a non-passthrough TCS to have these conditions, in some
// dumb situation.
// In that case, it should be fine to assume passthrough and declare some extra
// output control points and attributes that shouldnt be read by the TES anyways
//
// TODO now having patch control points dynamic state is kind of pointless if we depend on
// num_input_control_points
bool IsPassthrough() const {
return hs_output_base == 0;
return hs_output_base == 0 && num_threads == 1;
};

// regs.ls_hs_config.hs_output_control_points contains the number of threads, which
// isn't exactly the number of output control points.
// For passthrough shaders, the register field is set to 1, so use the number of
// input control points
u32 NumOutputControlPoints() const {
return IsPassthrough() ? num_input_control_points : num_threads;
}

void InitFromTessConstants(Shader::TessellationDataConstantBuffer& tess_constants) {
ls_stride = tess_constants.m_lsStride;
hs_output_cp_stride = tess_constants.m_hsCpStride;
hs_num_patch = tess_constants.m_hsNumPatch;
hs_output_base = tess_constants.m_hsOutputBase;
patch_const_size = tess_constants.m_patchConstSize;
patch_const_base = tess_constants.m_patchConstBase;
patch_output_size = tess_constants.m_patchOutputSize;
first_edge_tess_factor_index = tess_constants.m_firstEdgeTessFactorIndex;
}
};

Expand Down Expand Up @@ -255,4 +257,4 @@ struct RuntimeInfo {
}
};

} // namespace Shader
} // namespace Shader
36 changes: 11 additions & 25 deletions src/video_core/amdgpu/liverpool.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

Expand Down Expand Up @@ -143,20 +144,11 @@ struct Liverpool {
}
};

struct HsStageRegisters {
u32 vgt_tf_param;
u32 vgt_hos_max_tess_level;
u32 vgt_hos_min_tess_level;
};

struct HsConstants {
u32 num_input_cp;
u32 num_output_cp;
u32 num_patch_const;
u32 cp_stride;
u32 num_threads;
u32 tess_factor_stride;
u32 first_edge_tess_factor_index;
struct HsTessFactorClamp {
// I've only seen min=0.0, max=1.0 so far.
// TODO why is max set to 1.0? Makes no sense
float hs_max_tess;
float hs_min_tess;
};

struct ComputeProgram {
Expand Down Expand Up @@ -1098,9 +1090,6 @@ struct Liverpool {
};

union TessFactorMemoryBase {
// TODO: was going to use this to check against UD used in tcs shader
// but only seen set to 0
// Remove this and other added regs if they end up unused
u32 base;

u64 MemoryBase() const {
Expand Down Expand Up @@ -1158,11 +1147,7 @@ struct Liverpool {
ShaderProgram es_program;
INSERT_PADDING_WORDS(0x2C);
ShaderProgram hs_program;
// TODO delete. These don't actually correspond to real registers, but I'll stash them
// here to debug
HsStageRegisters hs_registers;
HsConstants hs_constants;
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20 - 3 - 7);
INSERT_PADDING_WORDS(0x2D48 - 0x2d08 - 20);
ShaderProgram ls_program;
INSERT_PADDING_WORDS(0xA4);
ComputeProgram cs_program;
Expand Down Expand Up @@ -1229,7 +1214,9 @@ struct Liverpool {
PolygonControl polygon_control;
ViewportControl viewport_control;
VsOutputControl vs_output_control;
INSERT_PADDING_WORDS(0xA290 - 0xA207 - 1);
INSERT_PADDING_WORDS(0xA287 - 0xA207 - 1);
HsTessFactorClamp hs_clamp;
INSERT_PADDING_WORDS(0xA290 - 0xA287 - 2);
GsMode vgt_gs_mode;
INSERT_PADDING_WORDS(1);
ModeControl mode_control;
Expand Down Expand Up @@ -1449,8 +1436,6 @@ static_assert(GFX6_3D_REG_INDEX(vs_program.user_data) == 0x2C4C);
static_assert(GFX6_3D_REG_INDEX(gs_program) == 0x2C88);
static_assert(GFX6_3D_REG_INDEX(es_program) == 0x2CC8);
static_assert(GFX6_3D_REG_INDEX(hs_program) == 0x2D08);
static_assert(GFX6_3D_REG_INDEX(hs_registers) == 0x2D1C);
static_assert(GFX6_3D_REG_INDEX(hs_constants) == 0x2D1F);
static_assert(GFX6_3D_REG_INDEX(ls_program) == 0x2D48);
static_assert(GFX6_3D_REG_INDEX(cs_program) == 0x2E00);
static_assert(GFX6_3D_REG_INDEX(cs_program.dim_z) == 0x2E03);
Expand Down Expand Up @@ -1489,6 +1474,7 @@ static_assert(GFX6_3D_REG_INDEX(color_control) == 0xA202);
static_assert(GFX6_3D_REG_INDEX(clipper_control) == 0xA204);
static_assert(GFX6_3D_REG_INDEX(viewport_control) == 0xA206);
static_assert(GFX6_3D_REG_INDEX(vs_output_control) == 0xA207);
static_assert(GFX6_3D_REG_INDEX(hs_clamp) == 0xA287);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_mode) == 0xA290);
static_assert(GFX6_3D_REG_INDEX(mode_control) == 0xA292);
static_assert(GFX6_3D_REG_INDEX(vgt_gs_out_prim_type) == 0xA29B);
Expand Down
10 changes: 3 additions & 7 deletions src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,9 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Stage stage, LogicalStage l_
}
case Stage::Hull: {
BuildCommon(regs.hs_program);
// TODO: ls_hs_config.output_control_points seems to be == 1 when doing passthrough
// instead of the real number which matches the input patch topology
// info.hs_info.output_control_points = regs.ls_hs_config.hs_output_control_points.Value();

// TODO dont rely on HullStateConstants
info.hs_info.output_control_points = regs.hs_constants.num_output_cp;
info.hs_info.tess_factor_stride = regs.hs_constants.tess_factor_stride;
info.hs_info.num_input_control_points = regs.ls_hs_config.hs_input_control_points.Value();
info.hs_info.num_threads = regs.ls_hs_config.hs_output_control_points.Value();
info.hs_info.tess_type = regs.tess_config.type;

// We need to initialize most hs_info fields after finding the V# with tess constants
break;
Expand Down

0 comments on commit 9cdc33c

Please sign in to comment.