Add id descriptor feature to SPIR-V

Id descriptors are computed as a recursive hash of all instructions used to define an id. Descriptors are invarint of actual id values and the similar code in different files would produce the same descriptors. Multiple ids can have the same descriptor. For example %1 = OpConstant %u32 1 %2 = OpConstant %u32 1 would produce two ids with the same descriptor. But %3 = OpConstant %s32 1 %4 = OpConstant %u32 2 would have descriptors different from %1 and %2. Descriptors will be used as handles of move-to-front sequences in SPIR-V compression.
2024-11-23 23:39:40 +00:00 · 2017-08-02 16:47:25 -04:00 · 2017-08-02 16:47:25 -04:00 · 78cf86150e
commit 78cf86150e
parent b0310a4156
9 changed files with 316 additions and 13 deletions
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@ -207,6 +207,7 @@ set(SPIRV_SOURCES
  ${CMAKE_CURRENT_SOURCE_DIR}/enum_string_mapping.h
  ${CMAKE_CURRENT_SOURCE_DIR}/ext_inst.h
  ${CMAKE_CURRENT_SOURCE_DIR}/extensions.h
+  ${CMAKE_CURRENT_SOURCE_DIR}/id_descriptor.h
  ${CMAKE_CURRENT_SOURCE_DIR}/instruction.h
  ${CMAKE_CURRENT_SOURCE_DIR}/macro.h
  ${CMAKE_CURRENT_SOURCE_DIR}/name_mapper.h
@ -234,6 +235,7 @@ set(SPIRV_SOURCES
  ${CMAKE_CURRENT_SOURCE_DIR}/enum_string_mapping.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/ext_inst.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/extensions.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/id_descriptor.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/libspirv.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/message.cpp
  ${CMAKE_CURRENT_SOURCE_DIR}/name_mapper.cpp
--- a/source/id_descriptor.cpp
+++ b/source/id_descriptor.cpp
@ -0,0 +1,78 @@
+// Copyright (c) 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "id_descriptor.h"
+
+#include <cassert>
+#include <iostream>
+
+#include "opcode.h"
+#include "operand.h"
+
+namespace libspirv {
+
+namespace {
+
+// Hashes an array of words. Order of words is important.
+uint32_t HashU32Array(const std::vector<uint32_t>& words) {
+  // The hash function is a sum of hashes of each word seeded by word index.
+  // Knuth's multiplicative hash is used to hash the words.
+  const uint32_t kKnuthMulHash = 2654435761;
+  uint32_t val = 0;
+  for (uint32_t i = 0; i < words.size(); ++i) {
+    val += (words[i] + i + 123) * kKnuthMulHash;
+  }
+  return val;
+}
+
+}  // namespace
+
+uint32_t IdDescriptorCollection::ProcessInstruction(
+    const spv_parsed_instruction_t& inst) {
+  if (!inst.result_id)
+    return 0;
+
+  assert(words_.empty());
+  words_.push_back(inst.words[0]);
+
+  for (size_t operand_index = 0; operand_index < inst.num_operands;
+       ++operand_index) {
+    const auto &operand = inst.operands[operand_index];
+    if (spvIsIdType(operand.type)) {
+      const uint32_t id = inst.words[operand.offset];
+      const auto it = id_to_descriptor_.find(id);
+      // Forward declared ids are not hashed.
+      if (it != id_to_descriptor_.end()) {
+        words_.push_back(it->second);
+      }
+    } else {
+      for (size_t operand_word_index = 0;
+           operand_word_index < operand.num_words; ++operand_word_index) {
+        words_.push_back(inst.words[operand.offset + operand_word_index]);
+      }
+    }
+  }
+
+  const uint32_t descriptor = HashU32Array(words_);
+  assert(descriptor);
+
+  words_.clear();
+
+  const auto result = id_to_descriptor_.emplace(inst.result_id, descriptor);
+  assert(result.second);
+  (void)result;
+  return descriptor;
+}
+
+}  // namespace libspirv
--- a/source/id_descriptor.h
+++ b/source/id_descriptor.h
@ -0,0 +1,59 @@
+// Copyright (c) 2017 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIBSPIRV_ID_DESCRIPTOR_H_
+#define LIBSPIRV_ID_DESCRIPTOR_H_
+
+#include <unordered_map>
+#include <vector>
+
+#include "spirv-tools/libspirv.hpp"
+
+namespace libspirv {
+
+// Computes and stores id descriptors.
+//
+// Descriptors are computed as hash of all words in the instruction where ids
+// were substituted with previously computed descriptors.
+class IdDescriptorCollection {
+ public:
+  IdDescriptorCollection() {
+    words_.reserve(16);
+  }
+
+  // Computes descriptor for the result id of the given instruction and
+  // registers it in id_to_descriptor_. Returns the computed descriptor.
+  // This function needs to be sequentially called for every instruction in the
+  // module.
+  uint32_t ProcessInstruction(const spv_parsed_instruction_t& inst);
+
+  // Returns a previously computed descriptor id.
+  uint32_t GetDescriptor(uint32_t id) const {
+    const auto it = id_to_descriptor_.find(id);
+    if (it == id_to_descriptor_.end())
+      return 0;
+    return it->second;
+  }
+
+ private:
+  std::unordered_map<uint32_t, uint32_t> id_to_descriptor_;
+
+  // Scratch buffer used for hashing. Class member to optimize on allocation.
+  std::vector<uint32_t> words_;
+};
+
+}  // namespace libspirv
+
+#endif  // LIBSPIRV_ID_DESCRIPTOR_H_
+
--- a/source/spirv_stats.cpp
+++ b/source/spirv_stats.cpp
@ -26,6 +26,7 @@
 #include "enum_string_mapping.h"
 #include "extensions.h"
 #include "instruction.h"
+#include "id_descriptor.h"
 #include "opcode.h"
 #include "operand.h"
 #include "spirv-tools/libspirv.h"
@ -35,6 +36,7 @@
 #include "val/instruction.h"
 #include "val/validation_state.h"

+using libspirv::IdDescriptorCollection;
 using libspirv::Instruction;
 using libspirv::SpirvStats;
 using libspirv::ValidationState_t;
@ -77,10 +79,41 @@ class StatsAggregator {
    ProcessEnums();
    ProcessLiteralStrings();
    ProcessNonIdWords();
+    ProcessIdDescriptors();

    return SPV_SUCCESS;
  }

+  // Collects statistics of descriptors generated by IdDescriptorCollection.
+  void ProcessIdDescriptors() {
+    const Instruction& inst = GetCurrentInstruction();
+    const uint32_t new_descriptor =
+        id_descriptors_.ProcessInstruction(inst.c_inst());
+
+    if (new_descriptor) {
+      std::stringstream ss;
+      ss << spvOpcodeString(inst.opcode());
+      for (size_t i = 1; i < inst.words().size(); ++i) {
+        ss << " " << inst.word(i);
+      }
+      stats_->id_descriptor_labels.emplace(new_descriptor, ss.str());
+    }
+
+    uint32_t index = 0;
+    for (const auto& operand : inst.operands()) {
+      if (spvIsIdType(operand.type)) {
+        const uint32_t descriptor =
+            id_descriptors_.GetDescriptor(inst.word(operand.offset));
+        if (descriptor) {
+          ++stats_->id_descriptor_hist[descriptor];
+          ++stats_->operand_slot_id_descriptor_hist[
+              std::pair<uint32_t, uint32_t>(inst.opcode(), index)][descriptor];
+        }
+      }
+      ++index;
+    }
+  }
+
  // Collects statistics of enum words for operands of specific types.
  void ProcessEnums() {
    const Instruction& inst = GetCurrentInstruction();
@ -135,7 +168,7 @@ class StatsAggregator {
    uint32_t index = 0;
    for (const auto& operand : inst.operands()) {
      if (operand.num_words == 1 && !spvIsIdType(operand.type)) {
-          ++stats_->non_id_words_hist[std::pair<uint32_t, uint32_t>(
+          ++stats_->operand_slot_non_id_words_hist[std::pair<uint32_t, uint32_t>(
              inst.opcode(), index)][inst.word(operand.offset)];
      }
      ++index;
@ -240,6 +273,7 @@ class StatsAggregator {
  SpirvStats* stats_;
  spv_validator_options_t validator_options_;
  std::unique_ptr<ValidationState_t> vstate_;
+  IdDescriptorCollection id_descriptors_;
 };

 spv_result_t ProcessHeader(
--- a/source/spirv_stats.h
+++ b/source/spirv_stats.h
@ -77,7 +77,19 @@ struct SpirvStats {
  // This is a generalization of enum_hist, also includes literal integers and
  // masks.
  std::map<std::pair<uint32_t, uint32_t>,
-      std::map<uint32_t, uint32_t>> non_id_words_hist;
+      std::map<uint32_t, uint32_t>> operand_slot_non_id_words_hist;
+
+  // Historgam of descriptors generated by IdDescriptorCollection.
+  // Descriptor -> count.
+  std::unordered_map<uint32_t, uint32_t> id_descriptor_hist;
+
+  // Debut labels for id descriptors, descriptor -> label.
+  std::unordered_map<uint32_t, std::string> id_descriptor_labels;
+
+  // Historgam of descriptors generated by IdDescriptorCollection for every
+  // operand slot. pair<opcode, operand index> -> descriptor -> count.
+  std::map<std::pair<uint32_t, uint32_t>,
+      std::map<uint32_t, uint32_t>> operand_slot_id_descriptor_hist;

  // Histogram of literal strings, sharded by opcodes, opcode -> string -> count.
  // This is suboptimal if an opcode has multiple literal string operands,
--- a/test/stats/stats_aggregate_test.cpp
+++ b/test/stats/stats_aggregate_test.cpp
@ -433,4 +433,55 @@ OpMemoryModel Logical GLSL450
  EXPECT_EQ(1u, stats.s64_constant_hist.at(-64));
 }

+TEST(AggregateStats, IdDescriptor) {
+  const std::string code1 = R"(
+OpCapability Addresses
+OpCapability Kernel
+OpCapability GenericPointer
+OpCapability Linkage
+OpMemoryModel Physical32 OpenCL
+%u32 = OpTypeInt 32 0
+%f32 = OpTypeFloat 32
+%1 = OpConstant %f32 1
+%2 = OpConstant %f32 1
+%3 = OpConstant %u32 32
+)";
+
+  const std::string code2 = R"(
+OpCapability Shader
+OpCapability Linkage
+OpMemoryModel Logical GLSL450
+%f32 = OpTypeFloat 32
+%u32 = OpTypeInt 32 0
+%1 = OpConstant %f32 1
+%2 = OpConstant %f32 3
+%3 = OpConstant %u32 32
+)";
+
+  const uint32_t kF32 = 1951208733;
+  const uint32_t kU32 = 2430404313;
+  const uint32_t kF32_1 = 296981500;
+  const uint32_t kF32_3 = 1450415100;
+  const uint32_t kU32_32 = 827246872;
+
+  SpirvStats stats;
+
+  CompileAndAggregateStats(code1, &stats);
+
+  {
+    const std::unordered_map<uint32_t, uint32_t> expected = {
+      {kF32, 3}, {kU32, 2}, {kF32_1, 2}, {kU32_32, 1}
+    };
+    EXPECT_EQ(expected, stats.id_descriptor_hist);
+  }
+
+  CompileAndAggregateStats(code2, &stats);
+  {
+    const std::unordered_map<uint32_t, uint32_t> expected = {
+      {kF32, 6}, {kU32, 4}, {kF32_1, 3}, {kF32_3, 1}, {kU32_32, 2}
+    };
+    EXPECT_EQ(expected, stats.id_descriptor_hist);
+  }
+}
+
 }  // namespace
--- a/tools/stats/stats.cpp
+++ b/tools/stats/stats.cpp
@ -73,6 +73,11 @@ Options:
                   Output generated C++ code for Huffman codecs for
                   single-word non-id slots.
                   This flag disables non-C++ output.
+
+  --codegen_id_descriptor_huffman_codecs
+                   Output generated C++ code for Huffman codecs for
+                   common id descriptors.
+                   This flag disables non-C++ output.
 )",
      argv0, argv0, argv0);
 }
@ -113,6 +118,7 @@ int main(int argc, char** argv) {
  bool codegen_opcode_and_num_operands_markov_huffman_codecs = false;
  bool codegen_literal_string_huffman_codecs = false;
  bool codegen_non_id_word_huffman_codecs = false;
+  bool codegen_id_descriptor_huffman_codecs = false;

  std::vector<const char*> paths;
  const char* output_path = nullptr;
@ -128,27 +134,31 @@ int main(int argc, char** argv) {
        codegen_opcode_hist = true;
        export_text = false;
      } else if (0 == strcmp(cur_arg,
-			     "--codegen_opcode_and_num_operands_hist")) {
+                             "--codegen_opcode_and_num_operands_hist")) {
        codegen_opcode_and_num_operands_hist = true;
        export_text = false;
      } else if (strcmp(
-	  "--codegen_opcode_and_num_operands_markov_huffman_codecs",
-	  cur_arg) == 0) {
+          "--codegen_opcode_and_num_operands_markov_huffman_codecs",
+          cur_arg) == 0) {
        codegen_opcode_and_num_operands_markov_huffman_codecs = true;
        export_text = false;
      } else if (0 == strcmp(cur_arg,
-			     "--codegen_literal_string_huffman_codecs")) {
+                             "--codegen_literal_string_huffman_codecs")) {
        codegen_literal_string_huffman_codecs = true;
        export_text = false;
      } else if (0 == strcmp(cur_arg,
-			     "--codegen_non_id_word_huffman_codecs")) {
+                             "--codegen_non_id_word_huffman_codecs")) {
        codegen_non_id_word_huffman_codecs = true;
        export_text = false;
+      } else if (0 == strcmp(cur_arg,
+                             "--codegen_id_descriptor_huffman_codecs")) {
+        codegen_id_descriptor_huffman_codecs = true;
+        export_text = false;
      } else if (0 == strcmp(cur_arg, "--verbose") ||
-		 0 == strcmp(cur_arg, "-v")) {
+                 0 == strcmp(cur_arg, "-v")) {
        verbose = true;
      } else if (0 == strcmp(cur_arg, "--output") ||
-		 0 == strcmp(cur_arg, "-o")) {
+                 0 == strcmp(cur_arg, "-o")) {
        expect_output_path = true;
      } else {
        PrintUsage(argv[0]);
@ -157,10 +167,10 @@ int main(int argc, char** argv) {
      }
    } else {
      if (expect_output_path) {
-	output_path = cur_arg;
-	expect_output_path = false;
+        output_path = cur_arg;
+        expect_output_path = false;
      } else {
-	paths.push_back(cur_arg);
+        paths.push_back(cur_arg);
      }
    }
  }
@ -255,5 +265,10 @@ int main(int argc, char** argv) {
    analyzer.WriteCodegenNonIdWordHuffmanCodecs(out);
  }

+  if (codegen_id_descriptor_huffman_codecs) {
+    out << std::endl;
+    analyzer.WriteCodegenIdDescriptorHuffmanCodecs(out);
+  }
+
  return 0;
 }
--- a/tools/stats/stats_analyzer.cpp
+++ b/tools/stats/stats_analyzer.cpp
@ -726,7 +726,7 @@ void StatsAnalyzer::WriteCodegenNonIdWordHuffmanCodecs(std::ostream& out) {
      << "  std::map<std::pair<uint32_t, uint32_t>, "
      << "std::unique_ptr<HuffmanCodec<uint64_t>>> codecs;\n";

-  for (const auto& kv : stats_.non_id_words_hist) {
+  for (const auto& kv : stats_.operand_slot_non_id_words_hist) {
    const auto& opcode_and_index = kv.first;
    const uint32_t opcode = opcode_and_index.first;
    const uint32_t index = opcode_and_index.second;
@ -764,3 +764,50 @@ void StatsAnalyzer::WriteCodegenNonIdWordHuffmanCodecs(std::ostream& out) {

  out << "  return codecs;\n}\n";
 }
+
+void StatsAnalyzer::WriteCodegenIdDescriptorHuffmanCodecs(
+    std::ostream& out) {
+  out << "std::map<std::pair<uint32_t, uint32_t>, "
+      << "std::unique_ptr<HuffmanCodec<uint64_t>>>\n"
+      << "GetIdDescriptorHuffmanCodecs() {\n"
+      << "  std::map<std::pair<uint32_t, uint32_t>, "
+      << "std::unique_ptr<HuffmanCodec<uint64_t>>> codecs;\n";
+
+  for (const auto& kv : stats_.operand_slot_id_descriptor_hist) {
+    const auto& opcode_and_index = kv.first;
+    const uint32_t opcode = opcode_and_index.first;
+    const uint32_t index = opcode_and_index.second;
+
+    const double kOpcodeFrequentEnoughToAnalyze = 0.001;
+    if (opcode_freq_[opcode] < kOpcodeFrequentEnoughToAnalyze) continue;
+
+    const std::map<uint32_t, uint32_t>& hist = kv.second;
+
+    uint32_t total = 0;
+    for (const auto& pair : hist) {
+      total += pair.second;
+    }
+
+    out << "  {\n";
+    out << "    std::unique_ptr<HuffmanCodec<uint64_t>> "
+        << "codec(new HuffmanCodec<uint64_t>({\n";
+    for (const auto& pair : hist) {
+      const uint32_t descriptor = pair.first;
+      const uint32_t count = pair.second;
+      const double freq = double(count) / double(total);
+      const double kDescriptorFrequentEnoughToAnalyze = 0.005;
+      if (freq < kDescriptorFrequentEnoughToAnalyze) continue;
+      out << "      { " << descriptor << ", " << count << " },\n";
+    }
+
+    out << "      { kMarkvNoneOfTheAbove, " << 1 + int(total * 0.05) << " },\n";
+
+    out << "    }));\n" << std::endl;
+    out << "    codecs.emplace(std::pair<uint32_t, uint32_t>(SpvOp"
+        << spvOpcodeString(SpvOp(opcode))
+        << ", " << index << "), std::move(codec));\n";
+    out << "  }\n\n";
+  }
+
+  out << "  return codecs;\n}\n";
+}
--- a/tools/stats/stats_analyzer.h
+++ b/tools/stats/stats_analyzer.h
@ -60,6 +60,11 @@ class StatsAnalyzer {
  // specific operand slot (opcode and operand number).
  void WriteCodegenNonIdWordHuffmanCodecs(std::ostream& out);

+  // Writes C++ code containing a function returning a map of Huffman codecs
+  // for common id descriptors. Each Huffman codec is created for a
+  // specific operand slot (opcode and operand number).
+  void WriteCodegenIdDescriptorHuffmanCodecs(std::ostream& out);
+
 private:
  const libspirv::SpirvStats& stats_;