Skip to content

Commit

Permalink
shader_recompiler: Implement data share append and consume operations (
Browse files Browse the repository at this point in the history
…shadps4-emu#814)

* shader_recompiler: Add more format swap modes

* texture_cache: Handle stencil texture reads

* emulator: Support loading font library

* readme: Add thanks section

* shader_recompiler: Constant buffers as integers

* shader_recompiler: Typed buffers as integers

* shader_recompiler: Separate thread bit scalars

* We can assume guest shader never mixes them with normal sgprs. This helps avoid errors where ssa could view an sgpr write dominating a thread bit read, due to how control flow is structurized, even though its not possible in actual control flow

* shader_recompiler: Implement data append/consume operations

* clang format

* buffer_cache: Simplify invalidation scheme

* video_core: Remove some invalidation remnants

* adjust
  • Loading branch information
raphaelthegreat authored Sep 6, 2024
1 parent 649527a commit 13743b2
Show file tree
Hide file tree
Showing 34 changed files with 506 additions and 266 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,20 @@ Open a PR and we'll check it :)
<img src="https://contrib.rocks/image?repo=shadps4-emu/shadPS4&max=15">
</a>


# Special Thanks

A few noteworthy teams/projects who've helped us along the way are:

- [**Panda3DS**](https://github.com/wheremyfoodat/Panda3DS): A multiplatform 3DS emulator from our co-author wheremyfoodat. They have been incredibly helpful in understanding and solving problems that came up from natively executing the x64 code of PS4 binaries

- [**fpPS4**](https://github.com/red-prig/fpPS4): The fpPS4 team has assisted massively with understanding some of the more complex parts of the PS4 operating system and libraries, by helping with reverse engineering work and research.

- **yuzu**: Our shader compiler has been designed with yuzu's Hades compiler as a blueprint. This allowed us to focus on the challenges of emulating a modern AMD GPU while having a high-quality optimizing shader compiler implementation as a base.

- [**hydra**](https://github.com/hydra-emu/hydra): A multisystem, multiplatform emulator (chip-8, GB, NES, N64) from Paris.


# Sister Projects

- [**Panda3DS**](https://github.com/wheremyfoodat/Panda3DS): A multiplatform 3DS emulator from our co-author wheremyfoodat.
Expand Down
5 changes: 3 additions & 2 deletions src/emulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ void Emulator::Run(const std::filesystem::path& file) {
}

void Emulator::LoadSystemModules(const std::filesystem::path& file) {
constexpr std::array<SysModules, 9> ModulesToLoad{
constexpr std::array<SysModules, 10> ModulesToLoad{
{{"libSceNgs2.sprx", &Libraries::Ngs2::RegisterlibSceNgs2},
{"libSceFiber.sprx", nullptr},
{"libSceUlt.sprx", nullptr},
Expand All @@ -204,7 +204,8 @@ void Emulator::LoadSystemModules(const std::filesystem::path& file) {
{"libSceLibcInternal.sprx", &Libraries::LibcInternal::RegisterlibSceLibcInternal},
{"libSceDiscMap.sprx", &Libraries::DiscMap::RegisterlibSceDiscMap},
{"libSceRtc.sprx", &Libraries::Rtc::RegisterlibSceRtc},
{"libSceJpegEnc.sprx", nullptr}},
{"libSceJpegEnc.sprx", nullptr},
{"libSceFont.sprx", nullptr}},
};

std::vector<std::filesystem::path> found_modules;
Expand Down
16 changes: 16 additions & 0 deletions src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,20 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
return ImageAtomicU32(ctx, inst, handle, coords, value, &Sirit::Module::OpAtomicExchange);
}

Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
auto& buffer = ctx.buffers[binding];
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value,
ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
}

Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
auto& buffer = ctx.buffers[binding];
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value,
ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);
}

} // namespace Shader::Backend::SPIRV
50 changes: 19 additions & 31 deletions src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,6 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
return ctx.OpLoad(buffer.data_types->Get(1), ptr);
}

Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index) {
return ctx.OpBitcast(ctx.U32[1], EmitReadConstBuffer(ctx, handle, index));
}

Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
return ctx.OpLoad(
ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]),
Expand Down Expand Up @@ -222,12 +218,8 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
ctx.OpStore(pointer, ctx.OpBitcast(ctx.F32[1], value));
}

Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
return EmitLoadBufferF32(ctx, inst, handle, address);
}

template <u32 N>
static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
static Id EmitLoadBufferU32xN(EmitContext& ctx, u32 handle, Id address) {
auto& buffer = ctx.buffers[handle];
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
Expand All @@ -246,20 +238,20 @@ static Id EmitLoadBufferF32xN(EmitContext& ctx, u32 handle, Id address) {
}
}

Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<1>(ctx, handle, address);
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<1>(ctx, handle, address);
}

Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<2>(ctx, handle, address);
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<2>(ctx, handle, address);
}

Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<3>(ctx, handle, address);
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<3>(ctx, handle, address);
}

Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferF32xN<4>(ctx, handle, address);
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
return EmitLoadBufferU32xN<4>(ctx, handle, address);
}

Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
Expand All @@ -275,7 +267,7 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr
}

template <u32 N>
static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
static void EmitStoreBufferU32xN(EmitContext& ctx, u32 handle, Id address, Id value) {
auto& buffer = ctx.buffers[handle];
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
Expand All @@ -287,29 +279,25 @@ static void EmitStoreBufferF32xN(EmitContext& ctx, u32 handle, Id address, Id va
const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i));
const Id ptr =
ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index_i);
ctx.OpStore(ptr, ctx.OpCompositeExtract(ctx.F32[1], value, i));
ctx.OpStore(ptr, ctx.OpCompositeExtract(buffer.data_types->Get(1), value, i));
}
}
}

void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<1>(ctx, handle, address, value);
}

void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<2>(ctx, handle, address, value);
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<1>(ctx, handle, address, value);
}

void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<3>(ctx, handle, address, value);
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<2>(ctx, handle, address, value);
}

void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<4>(ctx, handle, address, value);
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<3>(ctx, handle, address, value);
}

void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferF32xN<1>(ctx, handle, address, value);
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
EmitStoreBufferU32xN<4>(ctx, handle, address, value);
}

void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
Expand Down
28 changes: 10 additions & 18 deletions src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,25 +64,16 @@ void EmitGetGotoVariable(EmitContext& ctx);
void EmitSetScc(EmitContext& ctx);
Id EmitReadConst(EmitContext& ctx);
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
Id EmitReadConstBufferU32(EmitContext& ctx, u32 handle, Id index);
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicIAdd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicSMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Id EmitBufferAtomicUMin32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
Expand Down Expand Up @@ -406,12 +397,13 @@ Id EmitImageAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id EmitImageAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);

Id EmitLaneId(EmitContext& ctx);
Id EmitWarpId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
Id EmitReadFirstLane(EmitContext& ctx, Id value);
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane);
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding);
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding);

} // namespace Shader::Backend::SPIRV
18 changes: 18 additions & 0 deletions src/shader_recompiler/frontend/translate/data_share.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ void Translator::EmitDataShare(const GcnInst& inst) {
return DS_MIN_U32(inst, false, true);
case Opcode::DS_MAX_RTN_U32:
return DS_MAX_U32(inst, false, true);
case Opcode::DS_APPEND:
return DS_APPEND(inst);
case Opcode::DS_CONSUME:
return DS_CONSUME(inst);
default:
LogMissingOpcode(inst);
}
Expand Down Expand Up @@ -192,4 +196,18 @@ void Translator::V_WRITELANE_B32(const GcnInst& inst) {
ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane));
}

void Translator::DS_APPEND(const GcnInst& inst) {
const u32 inst_offset = inst.control.ds.offset0;
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
const IR::U32 prev = ir.DataAppend(gds_offset);
SetDst(inst.dst[0], prev);
}

void Translator::DS_CONSUME(const GcnInst& inst) {
const u32 inst_offset = inst.control.ds.offset0;
const IR::U32 gds_offset = ir.IAdd(ir.GetM0(), ir.Imm32(inst_offset));
const IR::U32 prev = ir.DataConsume(gds_offset);
SetDst(inst.dst[0], prev);
}

} // namespace Shader::Gcn
6 changes: 6 additions & 0 deletions src/shader_recompiler/frontend/translate/export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ void Translator::EmitExport(const GcnInst& inst) {
case MrtSwizzle::Alt:
static constexpr std::array<u32, 4> AltSwizzle = {2, 1, 0, 3};
return AltSwizzle[comp];
case MrtSwizzle::Reverse:
static constexpr std::array<u32, 4> RevSwizzle = {3, 2, 1, 0};
return RevSwizzle[comp];
case MrtSwizzle::ReverseAlt:
static constexpr std::array<u32, 4> AltRevSwizzle = {3, 0, 1, 2};
return AltRevSwizzle[comp];
default:
UNREACHABLE();
}
Expand Down
16 changes: 10 additions & 6 deletions src/shader_recompiler/frontend/translate/scalar_alu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,13 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
case Opcode::S_SUB_I32:
return S_SUB_U32(inst);
case Opcode::S_MIN_U32:
return S_MIN_U32(inst);
return S_MIN_U32(false, inst);
case Opcode::S_MIN_I32:
return S_MIN_U32(true, inst);
case Opcode::S_MAX_U32:
return S_MAX_U32(inst);
return S_MAX_U32(false, inst);
case Opcode::S_MAX_I32:
return S_MAX_U32(true, inst);
case Opcode::S_WQM_B64:
break;
default:
Expand Down Expand Up @@ -533,18 +537,18 @@ void Translator::S_ADDC_U32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry));
}

void Translator::S_MAX_U32(const GcnInst& inst) {
void Translator::S_MAX_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 result = ir.UMax(src0, src1);
const IR::U32 result = ir.IMax(src0, src1, is_signed);
SetDst(inst.dst[0], result);
ir.SetScc(ir.IEqual(result, src0));
}

void Translator::S_MIN_U32(const GcnInst& inst) {
void Translator::S_MIN_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 result = ir.UMin(src0, src1);
const IR::U32 result = ir.IMin(src0, src1, is_signed);
SetDst(inst.dst[0], result);
ir.SetScc(ir.IEqual(result, src0));
}
Expand Down
8 changes: 5 additions & 3 deletions src/shader_recompiler/frontend/translate/translate.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ class Translator {
void S_ADDC_U32(const GcnInst& inst);
void S_MULK_I32(const GcnInst& inst);
void S_ADDK_I32(const GcnInst& inst);
void S_MAX_U32(const GcnInst& inst);
void S_MIN_U32(const GcnInst& inst);
void S_MAX_U32(bool is_signed, const GcnInst& inst);
void S_MIN_U32(bool is_signed, const GcnInst& inst);
void S_CMPK(ConditionOp cond, bool is_signed, const GcnInst& inst);

// Scalar Memory
Expand Down Expand Up @@ -173,7 +173,7 @@ class Translator {
void V_BCNT_U32_B32(const GcnInst& inst);
void V_COS_F32(const GcnInst& inst);
void V_MAX3_F32(const GcnInst& inst);
void V_MAX3_U32(const GcnInst& inst);
void V_MAX3_U32(bool is_signed, const GcnInst& inst);
void V_CVT_I32_F32(const GcnInst& inst);
void V_MIN_I32(const GcnInst& inst);
void V_MUL_LO_U32(const GcnInst& inst);
Expand Down Expand Up @@ -217,6 +217,8 @@ class Translator {
void V_READFIRSTLANE_B32(const GcnInst& inst);
void V_READLANE_B32(const GcnInst& inst);
void V_WRITELANE_B32(const GcnInst& inst);
void DS_APPEND(const GcnInst& inst);
void DS_CONSUME(const GcnInst& inst);
void S_BARRIER();

// MIMG
Expand Down
35 changes: 26 additions & 9 deletions src/shader_recompiler/frontend/translate/vector_alu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,9 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
case Opcode::V_MAX3_F32:
return V_MAX3_F32(inst);
case Opcode::V_MAX3_U32:
return V_MAX3_U32(inst);
return V_MAX3_U32(false, inst);
case Opcode::V_MAX3_I32:
return V_MAX_U32(true, inst);
case Opcode::V_TRUNC_F32:
return V_TRUNC_F32(inst);
case Opcode::V_CEIL_F32:
Expand Down Expand Up @@ -831,11 +833,11 @@ void Translator::V_MAX3_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMax(src0, ir.FPMax(src1, src2)));
}

void Translator::V_MAX3_U32(const GcnInst& inst) {
void Translator::V_MAX3_U32(bool is_signed, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 src2{GetSrc(inst.src[2])};
SetDst(inst.dst[0], ir.UMax(src0, ir.UMax(src1, src2)));
SetDst(inst.dst[0], ir.IMax(src0, ir.IMax(src1, src2, is_signed), is_signed));
}

void Translator::V_CVT_I32_F32(const GcnInst& inst) {
Expand Down Expand Up @@ -967,14 +969,29 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
}

void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
if (!is_low) {
ASSERT(src0.IsImmediate() && src0.U32() == ~0U && src1.IsImmediate() && src1.U32() == 0U);
return;
// v_mbcnt_hi_u32_b32 v2, -1, 0
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193 &&
inst.src[1].field == OperandField::ConstZero) {
return;
}
// v_mbcnt_hi_u32_b32 vX, exec_hi, 0
if (inst.src[0].field == OperandField::ExecHi &&
inst.src[1].field == OperandField::ConstZero) {
return;
}
} else {
// v_mbcnt_lo_u32_b32 v2, -1, vX
// used combined with above to fetch lane id in non-compute stages
if (inst.src[0].field == OperandField::SignedConstIntNeg && inst.src[0].code == 193) {
SetDst(inst.dst[0], ir.LaneId());
}
// v_mbcnt_lo_u32_b32 v20, exec_lo, vX
// used combined in above for append buffer indexing.
if (inst.src[0].field == OperandField::ExecLo) {
SetDst(inst.dst[0], ir.Imm32(0));
}
}
ASSERT(src0.IsImmediate() && src0.U32() == ~0U);
SetDst(inst.dst[0], ir.LaneId());
}

void Translator::V_BFM_B32(const GcnInst& inst) {
Expand Down
Loading

0 comments on commit 13743b2

Please sign in to comment.