Skip to content

Commit

Permalink
Merge branch 'doitsujin:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
allfoxwy authored Aug 14, 2024
2 parents 72077ba + 159f540 commit 28fb7c7
Show file tree
Hide file tree
Showing 17 changed files with 149 additions and 124 deletions.
23 changes: 0 additions & 23 deletions dxvk.conf
Original file line number Diff line number Diff line change
Expand Up @@ -517,29 +517,6 @@
# d3d9.forceSwapchainMSAA = -1


# Long Mad
#
# Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
# This solves some rendering bugs in games that have z-pass shaders which
# don't match entirely to the regular vertex shader in this way.
#
# Supported values:
# - True/False

# d3d11.longMad = False
# d3d9.longMad = False


# Long Dot
#
# Whether to emit dot products as an FMA chain or as a plain SPIR-V dot product.
#
# Supported values:
# - True/False

# d3d11.longDot = False


# Device Local Constant Buffers
#
# Enables using device local, host accessible memory for constant buffers in D3D9.
Expand Down
2 changes: 0 additions & 2 deletions src/d3d11/d3d11_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ namespace dxvk {
this->numBackBuffers = config.getOption<int32_t>("dxgi.numBackBuffers", 0);
this->maxFrameLatency = config.getOption<int32_t>("dxgi.maxFrameLatency", 0);
this->exposeDriverCommandLists = config.getOption<bool>("d3d11.exposeDriverCommandLists", true);
this->longMad = config.getOption<bool>("d3d11.longMad", false);
this->longDot = config.getOption<bool>("d3d11.longDot", false);
this->reproducibleCommandStream = config.getOption<bool>("d3d11.reproducibleCommandStream", false);

// Clamp LOD bias so that people don't abuse this in unintended ways
Expand Down
6 changes: 0 additions & 6 deletions src/d3d11/d3d11_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,6 @@ namespace dxvk {
/// Shader dump path
std::string shaderDumpPath;

/// Translate Mad/Dfma to separate FMul+FAdd
bool longMad;

/// Translate DpX to a precise FMul+FFma chain
bool longDot;

/// Ensure that for the same D3D commands the output VK commands
/// don't change between runs. Useful for comparative benchmarking,
/// can negatively affect performance.
Expand Down
1 change: 0 additions & 1 deletion src/d3d9/d3d9_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ namespace dxvk {
this->forceSampleRateShading = config.getOption<bool> ("d3d9.forceSampleRateShading", false);
this->forceAspectRatio = config.getOption<std::string> ("d3d9.forceAspectRatio", "");
this->enumerateByDisplays = config.getOption<bool> ("d3d9.enumerateByDisplays", true);
this->longMad = config.getOption<bool> ("d3d9.longMad", false);
this->cachedDynamicBuffers = config.getOption<bool> ("d3d9.cachedDynamicBuffers", false);
this->deviceLocalConstantBuffers = config.getOption<bool> ("d3d9.deviceLocalConstantBuffers", false);
this->allowDirectBufferMapping = config.getOption<bool> ("d3d9.allowDirectBufferMapping", true);
Expand Down
5 changes: 0 additions & 5 deletions src/d3d9/d3d9_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,6 @@ namespace dxvk {
/// Enumerate adapters by displays
bool enumerateByDisplays;

/// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
/// This solves some rendering bugs in games that have z-pass shaders which
/// don't match entirely to the regular vertex shader in this way.
bool longMad;

/// Cached dynamic buffers: Maps all buffers in cached memory.
bool cachedDynamicBuffers;

Expand Down
50 changes: 20 additions & 30 deletions src/dxbc/dxbc_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1623,7 +1623,9 @@ namespace dxvk {

case DxbcOpcode::Mad:
case DxbcOpcode::DFma:
if (likely(!m_moduleInfo.options.longMad)) {
if (ins.controls.precise()) {
// FXC only emits precise mad if the shader explicitly uses
// the HLSL mad()/fma() intrinsics, let's preserve that.
dst.id = m_module.opFFma(typeId,
src.at(0).id, src.at(1).id, src.at(2).id);
} else {
Expand Down Expand Up @@ -2046,37 +2048,25 @@ namespace dxvk {
dst.type.ccount = 1;
dst.id = 0;

if (!m_moduleInfo.options.longDot) {
dst.id = m_module.opDot(
getVectorTypeId(dst.type),
src.at(0).id,
src.at(1).id);
uint32_t componentType = getVectorTypeId(dst.type);
uint32_t componentCount = srcMask.popCount();

if (ins.controls.precise() || m_precise)
m_module.decorate(dst.id, spv::DecorationNoContraction);
} else {
uint32_t componentType = getVectorTypeId(dst.type);
uint32_t componentCount = srcMask.popCount();

for (uint32_t i = 1; i <= componentCount; i++) {
uint32_t idx = componentCount - i;

if (dst.id) {
dst.id = m_module.opFFma(componentType,
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx),
dst.id);
} else {
dst.id = m_module.opFMul(componentType,
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &idx),
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &idx));
}

// Unconditionally mark as precise since the exact order of operation
// matters for some games, even if the instruction itself is not marked
// as precise.
m_module.decorate(dst.id, spv::DecorationNoContraction);
for (uint32_t i = 0; i < componentCount; i++) {
if (dst.id) {
dst.id = m_module.opFFma(componentType,
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &i),
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &i),
dst.id);
} else {
dst.id = m_module.opFMul(componentType,
m_module.opCompositeExtract(componentType, src.at(0).id, 1, &i),
m_module.opCompositeExtract(componentType, src.at(1).id, 1, &i));
}

// Unconditionally mark as precise since the exact order of operation
// matters for some games, even if the instruction itself is not marked
// as precise.
m_module.decorate(dst.id, spv::DecorationNoContraction);
}

dst = emitDstOperandModifiers(dst, ins.modifiers);
Expand Down
2 changes: 0 additions & 2 deletions src/dxbc/dxbc_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ namespace dxvk {
disableMsaa = options.disableMsaa;
forceSampleRateShading = options.forceSampleRateShading;
enableSampleShadingInterlock = device->features().extFragmentShaderInterlock.fragmentShaderSampleInterlock;
longMad = options.longMad;
longDot = options.longDot;

// Figure out float control flags to match D3D11 rules
if (options.floatControls) {
Expand Down
6 changes: 0 additions & 6 deletions src/dxbc/dxbc_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@ namespace dxvk {

/// Minimum storage buffer alignment
VkDeviceSize minSsboAlignment = 0;

/// Translate Mad/Dfma to separate FMul+FAdd
bool longMad;

/// Translate DpX to a precise FMul+FFma chain
bool longDot;
};

}
20 changes: 6 additions & 14 deletions src/dxso/dxso_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1932,21 +1932,13 @@ namespace dxvk {
emitRegisterLoad(src[1], mask).id);
break;
case DxsoOpcode::Mad:
if (!m_moduleInfo.options.longMad) {
result.id = emitFma(
emitRegisterLoad(src[0], mask),
emitRegisterLoad(src[1], mask),
emitRegisterLoad(src[2], mask)).id;
}
else {
result.id = emitMul(
emitRegisterLoad(src[0], mask),
emitRegisterLoad(src[1], mask)).id;
result.id = emitMul(
emitRegisterLoad(src[0], mask),
emitRegisterLoad(src[1], mask)).id;

result.id = m_module.opFAdd(typeId,
result.id,
emitRegisterLoad(src[2], mask).id);
}
result.id = m_module.opFAdd(typeId,
result.id,
emitRegisterLoad(src[2], mask).id);
break;
case DxsoOpcode::Mul:
result.id = emitMul(
Expand Down
1 change: 0 additions & 1 deletion src/dxso/dxso_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ namespace dxvk {

vertexFloatConstantBufferAsSSBO = pDevice->GetVertexConstantLayout().floatSize() > devInfo.core.properties.limits.maxUniformBufferRange;

longMad = options.longMad;
robustness2Supported = devFeatures.extRobustness2.robustBufferAccess2;

drefScaling = options.drefScaling;
Expand Down
5 changes: 0 additions & 5 deletions src/dxso/dxso_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@ namespace dxvk {
/// Should the SWVP float constant buffer be a SSBO (because of the size on NV)
bool vertexFloatConstantBufferAsSSBO;

/// Should we make our Mads a FFma or do it the long way with an FMul and an FAdd?
/// This solves some rendering bugs in games that have z-pass shaders which
/// don't match entirely to the regular vertex shader in this way.
bool longMad;

/// Whether or not we can rely on robustness2 to handle oob constant access
bool robustness2Supported;

Expand Down
2 changes: 1 addition & 1 deletion src/dxvk/dxvk_instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ namespace dxvk {
appInfo.pApplicationName = appName.c_str();
appInfo.applicationVersion = flags.raw();
appInfo.pEngineName = "DXVK";
appInfo.engineVersion = VK_MAKE_API_VERSION(0, 2, 4, 0);
appInfo.engineVersion = VK_MAKE_API_VERSION(0, 2, 4, 1);
appInfo.apiVersion = VK_MAKE_API_VERSION(0, 1, 3, 0);

VkInstanceCreateInfo info = { VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO };
Expand Down
12 changes: 12 additions & 0 deletions src/spirv/spirv_code_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,18 @@ namespace dxvk {
}


void SpirvCodeBuffer::append(const SpirvInstruction& ins) {
const size_t size = m_code.size();

m_code.resize(size + ins.length());

for (uint32_t i = 0; i < ins.length(); i++)
m_code[size + i] = ins.arg(i);

m_ptr += ins.length();
}


void SpirvCodeBuffer::append(const SpirvCodeBuffer& other) {
if (other.size() != 0) {
const size_t size = m_code.size();
Expand Down
8 changes: 8 additions & 0 deletions src/spirv/spirv_code_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ namespace dxvk {
*/
uint32_t allocId();

/**
* \brief Appends an instruction
*
* Slightly faster than individually adding words.
* \param [in] ins Instruction
*/
void append(const SpirvInstruction& ins);

/**
* \brief Merges two code buffers
*
Expand Down
97 changes: 95 additions & 2 deletions src/spirv/spirv_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace dxvk {
}


SpirvCodeBuffer SpirvModule::compile() const {
SpirvCodeBuffer SpirvModule::compile() {
SpirvCodeBuffer result;
result.putHeader(m_version, m_id);
result.append(m_capabilities);
Expand All @@ -28,7 +28,35 @@ namespace dxvk {
result.append(m_annotations);
result.append(m_typeConstDefs);
result.append(m_variables);
result.append(m_code);

// Perform some crude dead code elimination. In some cases, our compilers
// may emit invalid code, such as an unreachable block branching to a loop's
// continue block, but those cases cannot be reasonably detected up-front.
std::unordered_set<uint32_t> reachableBlocks;
std::unordered_set<uint32_t> mergeBlocks;

classifyBlocks(reachableBlocks, mergeBlocks);

bool reachable = true;

for (auto ins : m_code) {
if (ins.opCode() == spv::OpFunctionEnd) {
reachable = true;
result.append(ins);
} else if (ins.opCode() == spv::OpLabel) {
uint32_t labelId = ins.arg(1);

if ((reachable = reachableBlocks.find(labelId) != reachableBlocks.end())) {
result.append(ins);
} else if (mergeBlocks.find(labelId) != mergeBlocks.end()) {
result.append(ins);
result.putIns(spv::OpUnreachable, 1);
}
} else if (reachable) {
result.append(ins);
}
}

return result;
}

Expand Down Expand Up @@ -3905,4 +3933,69 @@ namespace dxvk {
}
}


void SpirvModule::classifyBlocks(
std::unordered_set<uint32_t>& reachableBlocks,
std::unordered_set<uint32_t>& mergeBlocks) {
std::unordered_multimap<uint32_t, uint32_t> branches;
std::queue<uint32_t> blockQueue;

uint32_t blockId = 0;

for (auto ins : m_code) {
switch (ins.opCode()) {
case spv::OpLabel: {
uint32_t id = ins.arg(1);

if (!blockId)
branches.insert({ 0u, id });

blockId = id;
} break;

case spv::OpFunction: {
blockId = 0u;
} break;

case spv::OpBranch: {
branches.insert({ blockId, ins.arg(1) });
} break;

case spv::OpBranchConditional: {
branches.insert({ blockId, ins.arg(2) });
branches.insert({ blockId, ins.arg(3) });
} break;

case spv::OpSwitch: {
branches.insert({ blockId, ins.arg(2) });

for (uint32_t i = 4; i < ins.length(); i += 2)
branches.insert({ blockId, ins.arg(i) });
} break;

case spv::OpSelectionMerge:
case spv::OpLoopMerge: {
mergeBlocks.insert(ins.arg(1));
} break;

default:;
}
}

blockQueue.push(0);

while (!blockQueue.empty()) {
uint32_t id = blockQueue.front();

auto range = branches.equal_range(id);

for (auto i = range.first; i != range.second; i++) {
if (reachableBlocks.insert(i->second).second)
blockQueue.push(i->second);
}

blockQueue.pop();
}
}

}
8 changes: 7 additions & 1 deletion src/spirv/spirv_module.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#pragma once

#include <queue>
#include <unordered_map>
#include <unordered_set>

#include "spirv_code_buffer.h"
Expand Down Expand Up @@ -59,7 +61,7 @@ namespace dxvk {

~SpirvModule();

SpirvCodeBuffer compile() const;
SpirvCodeBuffer compile();

size_t getInsertionPtr() {
return m_code.getInsertionPtr();
Expand Down Expand Up @@ -1326,6 +1328,10 @@ namespace dxvk {
bool isInterfaceVar(
spv::StorageClass sclass) const;

void classifyBlocks(
std::unordered_set<uint32_t>& reachableBlocks,
std::unordered_set<uint32_t>& mergeBlocks);

};

}
Loading

0 comments on commit 28fb7c7

Please sign in to comment.