Skip to content

Commit

Permalink
Add id descriptor feature to SPIR-V
Browse files Browse the repository at this point in the history
Id descriptors are computed as a recursive hash of all instructions used
to define an id. Descriptors are invarint of actual id values and
the similar code in different files would produce the same descriptors.

Multiple ids can have the same descriptor. For example
%1 = OpConstant %u32 1
%2 = OpConstant %u32 1
would produce two ids with the same descriptor. But
%3 = OpConstant %s32 1
%4 = OpConstant %u32 2
would have descriptors different from %1 and %2.

Descriptors will be used as handles of move-to-front sequences in SPIR-V
compression.
  • Loading branch information
Andrey Tuganov authored and dneto0 committed Aug 10, 2017
1 parent b0310a4 commit 78cf861
Show file tree
Hide file tree
Showing 9 changed files with 316 additions and 13 deletions.
2 changes: 2 additions & 0 deletions source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ set(SPIRV_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/enum_string_mapping.h
${CMAKE_CURRENT_SOURCE_DIR}/ext_inst.h
${CMAKE_CURRENT_SOURCE_DIR}/extensions.h
${CMAKE_CURRENT_SOURCE_DIR}/id_descriptor.h
${CMAKE_CURRENT_SOURCE_DIR}/instruction.h
${CMAKE_CURRENT_SOURCE_DIR}/macro.h
${CMAKE_CURRENT_SOURCE_DIR}/name_mapper.h
Expand Down Expand Up @@ -234,6 +235,7 @@ set(SPIRV_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/enum_string_mapping.cpp
${CMAKE_CURRENT_SOURCE_DIR}/ext_inst.cpp
${CMAKE_CURRENT_SOURCE_DIR}/extensions.cpp
${CMAKE_CURRENT_SOURCE_DIR}/id_descriptor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/libspirv.cpp
${CMAKE_CURRENT_SOURCE_DIR}/message.cpp
${CMAKE_CURRENT_SOURCE_DIR}/name_mapper.cpp
Expand Down
78 changes: 78 additions & 0 deletions source/id_descriptor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Copyright (c) 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "id_descriptor.h"

#include <cassert>
#include <iostream>

#include "opcode.h"
#include "operand.h"

namespace libspirv {

namespace {

// Hashes an array of words. Order of words is important.
uint32_t HashU32Array(const std::vector<uint32_t>& words) {
// The hash function is a sum of hashes of each word seeded by word index.
// Knuth's multiplicative hash is used to hash the words.
const uint32_t kKnuthMulHash = 2654435761;
uint32_t val = 0;
for (uint32_t i = 0; i < words.size(); ++i) {
val += (words[i] + i + 123) * kKnuthMulHash;
}
return val;
}

} // namespace

uint32_t IdDescriptorCollection::ProcessInstruction(
const spv_parsed_instruction_t& inst) {
if (!inst.result_id)
return 0;

assert(words_.empty());
words_.push_back(inst.words[0]);

for (size_t operand_index = 0; operand_index < inst.num_operands;
++operand_index) {
const auto &operand = inst.operands[operand_index];
if (spvIsIdType(operand.type)) {
const uint32_t id = inst.words[operand.offset];
const auto it = id_to_descriptor_.find(id);
// Forward declared ids are not hashed.
if (it != id_to_descriptor_.end()) {
words_.push_back(it->second);
}
} else {
for (size_t operand_word_index = 0;
operand_word_index < operand.num_words; ++operand_word_index) {
words_.push_back(inst.words[operand.offset + operand_word_index]);
}
}
}

const uint32_t descriptor = HashU32Array(words_);
assert(descriptor);

words_.clear();

const auto result = id_to_descriptor_.emplace(inst.result_id, descriptor);
assert(result.second);
(void)result;
return descriptor;
}

} // namespace libspirv
59 changes: 59 additions & 0 deletions source/id_descriptor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright (c) 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef LIBSPIRV_ID_DESCRIPTOR_H_
#define LIBSPIRV_ID_DESCRIPTOR_H_

#include <unordered_map>
#include <vector>

#include "spirv-tools/libspirv.hpp"

namespace libspirv {

// Computes and stores id descriptors.
//
// Descriptors are computed as hash of all words in the instruction where ids
// were substituted with previously computed descriptors.
class IdDescriptorCollection {
public:
IdDescriptorCollection() {
words_.reserve(16);
}

// Computes descriptor for the result id of the given instruction and
// registers it in id_to_descriptor_. Returns the computed descriptor.
// This function needs to be sequentially called for every instruction in the
// module.
uint32_t ProcessInstruction(const spv_parsed_instruction_t& inst);

// Returns a previously computed descriptor id.
uint32_t GetDescriptor(uint32_t id) const {
const auto it = id_to_descriptor_.find(id);
if (it == id_to_descriptor_.end())
return 0;
return it->second;
}

private:
std::unordered_map<uint32_t, uint32_t> id_to_descriptor_;

// Scratch buffer used for hashing. Class member to optimize on allocation.
std::vector<uint32_t> words_;
};

} // namespace libspirv

#endif // LIBSPIRV_ID_DESCRIPTOR_H_

36 changes: 35 additions & 1 deletion source/spirv_stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "enum_string_mapping.h"
#include "extensions.h"
#include "instruction.h"
#include "id_descriptor.h"
#include "opcode.h"
#include "operand.h"
#include "spirv-tools/libspirv.h"
Expand All @@ -35,6 +36,7 @@
#include "val/instruction.h"
#include "val/validation_state.h"

using libspirv::IdDescriptorCollection;
using libspirv::Instruction;
using libspirv::SpirvStats;
using libspirv::ValidationState_t;
Expand Down Expand Up @@ -77,10 +79,41 @@ class StatsAggregator {
ProcessEnums();
ProcessLiteralStrings();
ProcessNonIdWords();
ProcessIdDescriptors();

return SPV_SUCCESS;
}

// Collects statistics of descriptors generated by IdDescriptorCollection.
void ProcessIdDescriptors() {
const Instruction& inst = GetCurrentInstruction();
const uint32_t new_descriptor =
id_descriptors_.ProcessInstruction(inst.c_inst());

if (new_descriptor) {
std::stringstream ss;
ss << spvOpcodeString(inst.opcode());
for (size_t i = 1; i < inst.words().size(); ++i) {
ss << " " << inst.word(i);
}
stats_->id_descriptor_labels.emplace(new_descriptor, ss.str());
}

uint32_t index = 0;
for (const auto& operand : inst.operands()) {
if (spvIsIdType(operand.type)) {
const uint32_t descriptor =
id_descriptors_.GetDescriptor(inst.word(operand.offset));
if (descriptor) {
++stats_->id_descriptor_hist[descriptor];
++stats_->operand_slot_id_descriptor_hist[
std::pair<uint32_t, uint32_t>(inst.opcode(), index)][descriptor];
}
}
++index;
}
}

// Collects statistics of enum words for operands of specific types.
void ProcessEnums() {
const Instruction& inst = GetCurrentInstruction();
Expand Down Expand Up @@ -135,7 +168,7 @@ class StatsAggregator {
uint32_t index = 0;
for (const auto& operand : inst.operands()) {
if (operand.num_words == 1 && !spvIsIdType(operand.type)) {
++stats_->non_id_words_hist[std::pair<uint32_t, uint32_t>(
++stats_->operand_slot_non_id_words_hist[std::pair<uint32_t, uint32_t>(
inst.opcode(), index)][inst.word(operand.offset)];
}
++index;
Expand Down Expand Up @@ -240,6 +273,7 @@ class StatsAggregator {
SpirvStats* stats_;
spv_validator_options_t validator_options_;
std::unique_ptr<ValidationState_t> vstate_;
IdDescriptorCollection id_descriptors_;
};

spv_result_t ProcessHeader(
Expand Down
14 changes: 13 additions & 1 deletion source/spirv_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,19 @@ struct SpirvStats {
// This is a generalization of enum_hist, also includes literal integers and
// masks.
std::map<std::pair<uint32_t, uint32_t>,
std::map<uint32_t, uint32_t>> non_id_words_hist;
std::map<uint32_t, uint32_t>> operand_slot_non_id_words_hist;

// Historgam of descriptors generated by IdDescriptorCollection.
// Descriptor -> count.
std::unordered_map<uint32_t, uint32_t> id_descriptor_hist;

// Debut labels for id descriptors, descriptor -> label.
std::unordered_map<uint32_t, std::string> id_descriptor_labels;

// Historgam of descriptors generated by IdDescriptorCollection for every
// operand slot. pair<opcode, operand index> -> descriptor -> count.
std::map<std::pair<uint32_t, uint32_t>,
std::map<uint32_t, uint32_t>> operand_slot_id_descriptor_hist;

// Histogram of literal strings, sharded by opcodes, opcode -> string -> count.
// This is suboptimal if an opcode has multiple literal string operands,
Expand Down
51 changes: 51 additions & 0 deletions test/stats/stats_aggregate_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,4 +433,55 @@ OpMemoryModel Logical GLSL450
EXPECT_EQ(1u, stats.s64_constant_hist.at(-64));
}

TEST(AggregateStats, IdDescriptor) {
const std::string code1 = R"(
OpCapability Addresses
OpCapability Kernel
OpCapability GenericPointer
OpCapability Linkage
OpMemoryModel Physical32 OpenCL
%u32 = OpTypeInt 32 0
%f32 = OpTypeFloat 32
%1 = OpConstant %f32 1
%2 = OpConstant %f32 1
%3 = OpConstant %u32 32
)";

const std::string code2 = R"(
OpCapability Shader
OpCapability Linkage
OpMemoryModel Logical GLSL450
%f32 = OpTypeFloat 32
%u32 = OpTypeInt 32 0
%1 = OpConstant %f32 1
%2 = OpConstant %f32 3
%3 = OpConstant %u32 32
)";

const uint32_t kF32 = 1951208733;
const uint32_t kU32 = 2430404313;
const uint32_t kF32_1 = 296981500;
const uint32_t kF32_3 = 1450415100;
const uint32_t kU32_32 = 827246872;

SpirvStats stats;

CompileAndAggregateStats(code1, &stats);

{
const std::unordered_map<uint32_t, uint32_t> expected = {
{kF32, 3}, {kU32, 2}, {kF32_1, 2}, {kU32_32, 1}
};
EXPECT_EQ(expected, stats.id_descriptor_hist);
}

CompileAndAggregateStats(code2, &stats);
{
const std::unordered_map<uint32_t, uint32_t> expected = {
{kF32, 6}, {kU32, 4}, {kF32_1, 3}, {kF32_3, 1}, {kU32_32, 2}
};
EXPECT_EQ(expected, stats.id_descriptor_hist);
}
}

} // namespace
35 changes: 25 additions & 10 deletions tools/stats/stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ find . -name "*.spv" -print0 | xargs -0 -s 2000000 %s
Output generated C++ code for Huffman codecs for
single-word non-id slots.
This flag disables non-C++ output.
--codegen_id_descriptor_huffman_codecs
Output generated C++ code for Huffman codecs for
common id descriptors.
This flag disables non-C++ output.
)",
argv0, argv0, argv0);
}
Expand Down Expand Up @@ -113,6 +118,7 @@ int main(int argc, char** argv) {
bool codegen_opcode_and_num_operands_markov_huffman_codecs = false;
bool codegen_literal_string_huffman_codecs = false;
bool codegen_non_id_word_huffman_codecs = false;
bool codegen_id_descriptor_huffman_codecs = false;

std::vector<const char*> paths;
const char* output_path = nullptr;
Expand All @@ -128,27 +134,31 @@ int main(int argc, char** argv) {
codegen_opcode_hist = true;
export_text = false;
} else if (0 == strcmp(cur_arg,
"--codegen_opcode_and_num_operands_hist")) {
"--codegen_opcode_and_num_operands_hist")) {
codegen_opcode_and_num_operands_hist = true;
export_text = false;
} else if (strcmp(
"--codegen_opcode_and_num_operands_markov_huffman_codecs",
cur_arg) == 0) {
"--codegen_opcode_and_num_operands_markov_huffman_codecs",
cur_arg) == 0) {
codegen_opcode_and_num_operands_markov_huffman_codecs = true;
export_text = false;
} else if (0 == strcmp(cur_arg,
"--codegen_literal_string_huffman_codecs")) {
"--codegen_literal_string_huffman_codecs")) {
codegen_literal_string_huffman_codecs = true;
export_text = false;
} else if (0 == strcmp(cur_arg,
"--codegen_non_id_word_huffman_codecs")) {
"--codegen_non_id_word_huffman_codecs")) {
codegen_non_id_word_huffman_codecs = true;
export_text = false;
} else if (0 == strcmp(cur_arg,
"--codegen_id_descriptor_huffman_codecs")) {
codegen_id_descriptor_huffman_codecs = true;
export_text = false;
} else if (0 == strcmp(cur_arg, "--verbose") ||
0 == strcmp(cur_arg, "-v")) {
0 == strcmp(cur_arg, "-v")) {
verbose = true;
} else if (0 == strcmp(cur_arg, "--output") ||
0 == strcmp(cur_arg, "-o")) {
0 == strcmp(cur_arg, "-o")) {
expect_output_path = true;
} else {
PrintUsage(argv[0]);
Expand All @@ -157,10 +167,10 @@ int main(int argc, char** argv) {
}
} else {
if (expect_output_path) {
output_path = cur_arg;
expect_output_path = false;
output_path = cur_arg;
expect_output_path = false;
} else {
paths.push_back(cur_arg);
paths.push_back(cur_arg);
}
}
}
Expand Down Expand Up @@ -255,5 +265,10 @@ int main(int argc, char** argv) {
analyzer.WriteCodegenNonIdWordHuffmanCodecs(out);
}

if (codegen_id_descriptor_huffman_codecs) {
out << std::endl;
analyzer.WriteCodegenIdDescriptorHuffmanCodecs(out);
}

return 0;
}
Loading

0 comments on commit 78cf861

Please sign in to comment.