diff --git a/source/spirv_stats.cpp b/source/spirv_stats.cpp index 2186e0d7..0c628d5d 100644 --- a/source/spirv_stats.cpp +++ b/source/spirv_stats.cpp @@ -74,10 +74,74 @@ class StatsAggregator { ProcessCapability(); ProcessExtension(); ProcessConstant(); + ProcessEnums(); + ProcessLiteralStrings(); + ProcessNonIdWords(); return SPV_SUCCESS; } + // Collects statistics of enum words for operands of specific types. + void ProcessEnums() { + const Instruction& inst = GetCurrentInstruction(); + for (const auto& operand : inst.operands()) { + switch (operand.type) { + case SPV_OPERAND_TYPE_SOURCE_LANGUAGE: + case SPV_OPERAND_TYPE_EXECUTION_MODEL: + case SPV_OPERAND_TYPE_ADDRESSING_MODEL: + case SPV_OPERAND_TYPE_MEMORY_MODEL: + case SPV_OPERAND_TYPE_EXECUTION_MODE: + case SPV_OPERAND_TYPE_STORAGE_CLASS: + case SPV_OPERAND_TYPE_DIMENSIONALITY: + case SPV_OPERAND_TYPE_SAMPLER_ADDRESSING_MODE: + case SPV_OPERAND_TYPE_SAMPLER_FILTER_MODE: + case SPV_OPERAND_TYPE_SAMPLER_IMAGE_FORMAT: + case SPV_OPERAND_TYPE_IMAGE_CHANNEL_ORDER: + case SPV_OPERAND_TYPE_IMAGE_CHANNEL_DATA_TYPE: + case SPV_OPERAND_TYPE_FP_ROUNDING_MODE: + case SPV_OPERAND_TYPE_LINKAGE_TYPE: + case SPV_OPERAND_TYPE_ACCESS_QUALIFIER: + case SPV_OPERAND_TYPE_FUNCTION_PARAMETER_ATTRIBUTE: + case SPV_OPERAND_TYPE_DECORATION: + case SPV_OPERAND_TYPE_BUILT_IN: + case SPV_OPERAND_TYPE_GROUP_OPERATION: + case SPV_OPERAND_TYPE_KERNEL_ENQ_FLAGS: + case SPV_OPERAND_TYPE_KERNEL_PROFILING_INFO: + case SPV_OPERAND_TYPE_CAPABILITY: { + ++stats_->enum_hist[operand.type][inst.word(operand.offset)]; + break; + } + default: + break; + } + } + } + + // Collects statistics of literal strings used by opcodes. + void ProcessLiteralStrings() { + const Instruction& inst = GetCurrentInstruction(); + for (const auto& operand : inst.operands()) { + if (operand.type == SPV_OPERAND_TYPE_LITERAL_STRING) { + const std::string str = + reinterpret_cast(&inst.words()[operand.offset]); + ++stats_->literal_strings_hist[inst.opcode()][str]; + } + } + } + + // Collects statistics of all single word non-id operand slots. + void ProcessNonIdWords() { + const Instruction& inst = GetCurrentInstruction(); + uint32_t index = 0; + for (const auto& operand : inst.operands()) { + if (operand.num_words == 1 && !spvIsIdType(operand.type)) { + ++stats_->non_id_words_hist[std::pair( + inst.opcode(), index)][inst.word(operand.offset)]; + } + ++index; + } + } + // Collects OpCapability statistics. void ProcessCapability() { const Instruction& inst = GetCurrentInstruction(); @@ -100,7 +164,18 @@ class StatsAggregator { const SpvOp opcode = inst_it->opcode(); ++stats_->opcode_hist[opcode]; + const uint32_t opcode_and_num_operands = + (uint32_t(inst_it->operands().size()) << 16) | uint32_t(opcode); + ++stats_->opcode_and_num_operands_hist[opcode_and_num_operands]; + ++inst_it; + + if (inst_it != vstate_->ordered_instructions().rend()) { + const SpvOp prev_opcode = inst_it->opcode(); + ++stats_->opcode_and_num_operands_markov_hist[prev_opcode][ + opcode_and_num_operands]; + } + auto step_it = stats_->opcode_markov_hist.begin(); for (; inst_it != vstate_->ordered_instructions().rend() && step_it != stats_->opcode_markov_hist.end(); ++inst_it, ++step_it) { diff --git a/source/spirv_stats.h b/source/spirv_stats.h index 9c7a41aa..4b169514 100644 --- a/source/spirv_stats.h +++ b/source/spirv_stats.h @@ -15,6 +15,7 @@ #ifndef LIBSPIRV_SPIRV_STATS_H_ #define LIBSPIRV_SPIRV_STATS_H_ +#include #include #include #include @@ -39,6 +40,10 @@ struct SpirvStats { // Opcode histogram, SpvOpXXX -> count. std::unordered_map opcode_hist; + // Histogram of words combining opcode and number of operands, + // opcode | (num_operands << 16) -> count. + std::unordered_map opcode_and_num_operands_hist; + // OpConstant u16 histogram, value -> count. std::unordered_map u16_constant_hist; @@ -63,6 +68,29 @@ struct SpirvStats { // OpConstant f64 histogram, value -> count. std::unordered_map f64_constant_hist; + // Enum histogram, operand type -> operand value -> count. + std::unordered_map> enum_hist; + + // Histogram of all non-id single words. + // pair -> value -> count. + // This is a generalization of enum_hist, also includes literal integers and + // masks. + std::map, + std::map> non_id_words_hist; + + // Histogram of literal strings, sharded by opcodes, opcode -> string -> count. + // This is suboptimal if an opcode has multiple literal string operands, + // as it wouldn't differentiate between operands. + std::unordered_map> + literal_strings_hist; + + // Markov chain histograms: + // opcode -> next(opcode | (num_operands << 16)) -> count. + // See also opcode_and_num_operands_hist, which collects global statistics. + std::unordered_map> + opcode_and_num_operands_markov_hist; + // Used to collect statistics on opcodes triggering other opcodes. // Container scheme: gap between instructions -> cue opcode -> later opcode // -> count. diff --git a/tools/stats/stats.cpp b/tools/stats/stats.cpp index 51d61834..6e6878d3 100644 --- a/tools/stats/stats.cpp +++ b/tools/stats/stats.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -43,8 +44,35 @@ TIP: In order to collect statistics from all .spv files under current dir use find . -name "*.spv" -print0 | xargs -0 -s 2000000 %s Options: - -h, --help Print this help. - -v, --verbose Print additional info to stderr. + -h, --help + Print this help. + + -v, --verbose + Print additional info to stderr. + + --codegen_opcode_hist + Output generated C++ code for opcode histogram. + This flag disables non-C++ output. + + --codegen_opcode_and_num_operands_hist + Output generated C++ code for opcode_and_num_operands + histogram. + This flag disables non-C++ output. + + --codegen_opcode_and_num_operands_markov_huffman_codecs + Output generated C++ code for Huffman codecs of + opcode_and_num_operands Markov chain. + This flag disables non-C++ output. + + --codegen_literal_string_huffman_codecs + Output generated C++ code for Huffman codecs for + literal strings. + This flag disables non-C++ output. + + --codegen_non_id_word_huffman_codecs + Output generated C++ code for Huffman codecs for + single-word non-id slots. + This flag disables non-C++ output. )", argv0, argv0, argv0); } @@ -77,9 +105,17 @@ int main(int argc, char** argv) { bool continue_processing = true; int return_code = 0; + bool expect_output_path = false; bool verbose = false; + bool export_text = true; + bool codegen_opcode_hist = false; + bool codegen_opcode_and_num_operands_hist = false; + bool codegen_opcode_and_num_operands_markov_huffman_codecs = false; + bool codegen_literal_string_huffman_codecs = false; + bool codegen_non_id_word_huffman_codecs = false; std::vector paths; + const char* output_path = nullptr; for (int argi = 1; continue_processing && argi < argc; ++argi) { const char* cur_arg = argv[argi]; @@ -88,15 +124,44 @@ int main(int argc, char** argv) { PrintUsage(argv[0]); continue_processing = false; return_code = 0; - } else if (0 == strcmp(cur_arg, "--verbose") || 0 == strcmp(cur_arg, "-v")) { + } else if (0 == strcmp(cur_arg, "--codegen_opcode_hist")) { + codegen_opcode_hist = true; + export_text = false; + } else if (0 == strcmp(cur_arg, + "--codegen_opcode_and_num_operands_hist")) { + codegen_opcode_and_num_operands_hist = true; + export_text = false; + } else if (strcmp( + "--codegen_opcode_and_num_operands_markov_huffman_codecs", + cur_arg) == 0) { + codegen_opcode_and_num_operands_markov_huffman_codecs = true; + export_text = false; + } else if (0 == strcmp(cur_arg, + "--codegen_literal_string_huffman_codecs")) { + codegen_literal_string_huffman_codecs = true; + export_text = false; + } else if (0 == strcmp(cur_arg, + "--codegen_non_id_word_huffman_codecs")) { + codegen_non_id_word_huffman_codecs = true; + export_text = false; + } else if (0 == strcmp(cur_arg, "--verbose") || + 0 == strcmp(cur_arg, "-v")) { verbose = true; + } else if (0 == strcmp(cur_arg, "--output") || + 0 == strcmp(cur_arg, "-o")) { + expect_output_path = true; } else { PrintUsage(argv[0]); continue_processing = false; return_code = 1; } } else { - paths.push_back(cur_arg); + if (expect_output_path) { + output_path = cur_arg; + expect_output_path = false; + } else { + paths.push_back(cur_arg); + } } } @@ -133,26 +198,62 @@ int main(int argc, char** argv) { StatsAnalyzer analyzer(stats); - std::ostream& out = std::cout; + std::ofstream fout; + if (output_path) { + fout.open(output_path); + if (!fout.is_open()) { + std::cerr << "error: Failed to open " << output_path << std::endl; + return 1; + } + } - out << std::endl; - analyzer.WriteVersion(out); - analyzer.WriteGenerator(out); + std::ostream& out = fout.is_open() ? fout : std::cout; - out << std::endl; - analyzer.WriteCapability(out); + if (export_text) { + out << std::endl; + analyzer.WriteVersion(out); + analyzer.WriteGenerator(out); - out << std::endl; - analyzer.WriteExtension(out); + out << std::endl; + analyzer.WriteCapability(out); - out << std::endl; - analyzer.WriteOpcode(out); + out << std::endl; + analyzer.WriteExtension(out); - out << std::endl; - analyzer.WriteOpcodeMarkov(out); + out << std::endl; + analyzer.WriteOpcode(out); - out << std::endl; - analyzer.WriteConstantLiterals(out); + out << std::endl; + analyzer.WriteOpcodeMarkov(out); + + out << std::endl; + analyzer.WriteConstantLiterals(out); + } + + if (codegen_opcode_hist) { + out << std::endl; + analyzer.WriteCodegenOpcodeHist(out); + } + + if (codegen_opcode_and_num_operands_hist) { + out << std::endl; + analyzer.WriteCodegenOpcodeAndNumOperandsHist(out); + } + + if (codegen_opcode_and_num_operands_markov_huffman_codecs) { + out << std::endl; + analyzer.WriteCodegenOpcodeAndNumOperandsMarkovHuffmanCodecs(out); + } + + if (codegen_literal_string_huffman_codecs) { + out << std::endl; + analyzer.WriteCodegenLiteralStringHuffmanCodecs(out); + } + + if (codegen_non_id_word_huffman_codecs) { + out << std::endl; + analyzer.WriteCodegenNonIdWordHuffmanCodecs(out); + } return 0; } diff --git a/tools/stats/stats_analyzer.cpp b/tools/stats/stats_analyzer.cpp index 9e248a42..e196e49a 100644 --- a/tools/stats/stats_analyzer.cpp +++ b/tools/stats/stats_analyzer.cpp @@ -15,19 +15,340 @@ #include "stats_analyzer.h" #include +#include +#include #include #include #include +#include "spirv/1.2/spirv.h" #include "source/enum_string_mapping.h" #include "source/opcode.h" +#include "source/operand.h" #include "source/spirv_constant.h" -#include "spirv/1.1/spirv.h" using libspirv::SpirvStats; namespace { +// Returns all SPIR-V v1.2 opcodes. +std::vector GetAllOpcodes() { + return std::vector({ + SpvOpNop, + SpvOpUndef, + SpvOpSourceContinued, + SpvOpSource, + SpvOpSourceExtension, + SpvOpName, + SpvOpMemberName, + SpvOpString, + SpvOpLine, + SpvOpExtension, + SpvOpExtInstImport, + SpvOpExtInst, + SpvOpMemoryModel, + SpvOpEntryPoint, + SpvOpExecutionMode, + SpvOpCapability, + SpvOpTypeVoid, + SpvOpTypeBool, + SpvOpTypeInt, + SpvOpTypeFloat, + SpvOpTypeVector, + SpvOpTypeMatrix, + SpvOpTypeImage, + SpvOpTypeSampler, + SpvOpTypeSampledImage, + SpvOpTypeArray, + SpvOpTypeRuntimeArray, + SpvOpTypeStruct, + SpvOpTypeOpaque, + SpvOpTypePointer, + SpvOpTypeFunction, + SpvOpTypeEvent, + SpvOpTypeDeviceEvent, + SpvOpTypeReserveId, + SpvOpTypeQueue, + SpvOpTypePipe, + SpvOpTypeForwardPointer, + SpvOpConstantTrue, + SpvOpConstantFalse, + SpvOpConstant, + SpvOpConstantComposite, + SpvOpConstantSampler, + SpvOpConstantNull, + SpvOpSpecConstantTrue, + SpvOpSpecConstantFalse, + SpvOpSpecConstant, + SpvOpSpecConstantComposite, + SpvOpSpecConstantOp, + SpvOpFunction, + SpvOpFunctionParameter, + SpvOpFunctionEnd, + SpvOpFunctionCall, + SpvOpVariable, + SpvOpImageTexelPointer, + SpvOpLoad, + SpvOpStore, + SpvOpCopyMemory, + SpvOpCopyMemorySized, + SpvOpAccessChain, + SpvOpInBoundsAccessChain, + SpvOpPtrAccessChain, + SpvOpArrayLength, + SpvOpGenericPtrMemSemantics, + SpvOpInBoundsPtrAccessChain, + SpvOpDecorate, + SpvOpMemberDecorate, + SpvOpDecorationGroup, + SpvOpGroupDecorate, + SpvOpGroupMemberDecorate, + SpvOpVectorExtractDynamic, + SpvOpVectorInsertDynamic, + SpvOpVectorShuffle, + SpvOpCompositeConstruct, + SpvOpCompositeExtract, + SpvOpCompositeInsert, + SpvOpCopyObject, + SpvOpTranspose, + SpvOpSampledImage, + SpvOpImageSampleImplicitLod, + SpvOpImageSampleExplicitLod, + SpvOpImageSampleDrefImplicitLod, + SpvOpImageSampleDrefExplicitLod, + SpvOpImageSampleProjImplicitLod, + SpvOpImageSampleProjExplicitLod, + SpvOpImageSampleProjDrefImplicitLod, + SpvOpImageSampleProjDrefExplicitLod, + SpvOpImageFetch, + SpvOpImageGather, + SpvOpImageDrefGather, + SpvOpImageRead, + SpvOpImageWrite, + SpvOpImage, + SpvOpImageQueryFormat, + SpvOpImageQueryOrder, + SpvOpImageQuerySizeLod, + SpvOpImageQuerySize, + SpvOpImageQueryLod, + SpvOpImageQueryLevels, + SpvOpImageQuerySamples, + SpvOpConvertFToU, + SpvOpConvertFToS, + SpvOpConvertSToF, + SpvOpConvertUToF, + SpvOpUConvert, + SpvOpSConvert, + SpvOpFConvert, + SpvOpQuantizeToF16, + SpvOpConvertPtrToU, + SpvOpSatConvertSToU, + SpvOpSatConvertUToS, + SpvOpConvertUToPtr, + SpvOpPtrCastToGeneric, + SpvOpGenericCastToPtr, + SpvOpGenericCastToPtrExplicit, + SpvOpBitcast, + SpvOpSNegate, + SpvOpFNegate, + SpvOpIAdd, + SpvOpFAdd, + SpvOpISub, + SpvOpFSub, + SpvOpIMul, + SpvOpFMul, + SpvOpUDiv, + SpvOpSDiv, + SpvOpFDiv, + SpvOpUMod, + SpvOpSRem, + SpvOpSMod, + SpvOpFRem, + SpvOpFMod, + SpvOpVectorTimesScalar, + SpvOpMatrixTimesScalar, + SpvOpVectorTimesMatrix, + SpvOpMatrixTimesVector, + SpvOpMatrixTimesMatrix, + SpvOpOuterProduct, + SpvOpDot, + SpvOpIAddCarry, + SpvOpISubBorrow, + SpvOpUMulExtended, + SpvOpSMulExtended, + SpvOpAny, + SpvOpAll, + SpvOpIsNan, + SpvOpIsInf, + SpvOpIsFinite, + SpvOpIsNormal, + SpvOpSignBitSet, + SpvOpLessOrGreater, + SpvOpOrdered, + SpvOpUnordered, + SpvOpLogicalEqual, + SpvOpLogicalNotEqual, + SpvOpLogicalOr, + SpvOpLogicalAnd, + SpvOpLogicalNot, + SpvOpSelect, + SpvOpIEqual, + SpvOpINotEqual, + SpvOpUGreaterThan, + SpvOpSGreaterThan, + SpvOpUGreaterThanEqual, + SpvOpSGreaterThanEqual, + SpvOpULessThan, + SpvOpSLessThan, + SpvOpULessThanEqual, + SpvOpSLessThanEqual, + SpvOpFOrdEqual, + SpvOpFUnordEqual, + SpvOpFOrdNotEqual, + SpvOpFUnordNotEqual, + SpvOpFOrdLessThan, + SpvOpFUnordLessThan, + SpvOpFOrdGreaterThan, + SpvOpFUnordGreaterThan, + SpvOpFOrdLessThanEqual, + SpvOpFUnordLessThanEqual, + SpvOpFOrdGreaterThanEqual, + SpvOpFUnordGreaterThanEqual, + SpvOpShiftRightLogical, + SpvOpShiftRightArithmetic, + SpvOpShiftLeftLogical, + SpvOpBitwiseOr, + SpvOpBitwiseXor, + SpvOpBitwiseAnd, + SpvOpNot, + SpvOpBitFieldInsert, + SpvOpBitFieldSExtract, + SpvOpBitFieldUExtract, + SpvOpBitReverse, + SpvOpBitCount, + SpvOpDPdx, + SpvOpDPdy, + SpvOpFwidth, + SpvOpDPdxFine, + SpvOpDPdyFine, + SpvOpFwidthFine, + SpvOpDPdxCoarse, + SpvOpDPdyCoarse, + SpvOpFwidthCoarse, + SpvOpEmitVertex, + SpvOpEndPrimitive, + SpvOpEmitStreamVertex, + SpvOpEndStreamPrimitive, + SpvOpControlBarrier, + SpvOpMemoryBarrier, + SpvOpAtomicLoad, + SpvOpAtomicStore, + SpvOpAtomicExchange, + SpvOpAtomicCompareExchange, + SpvOpAtomicCompareExchangeWeak, + SpvOpAtomicIIncrement, + SpvOpAtomicIDecrement, + SpvOpAtomicIAdd, + SpvOpAtomicISub, + SpvOpAtomicSMin, + SpvOpAtomicUMin, + SpvOpAtomicSMax, + SpvOpAtomicUMax, + SpvOpAtomicAnd, + SpvOpAtomicOr, + SpvOpAtomicXor, + SpvOpPhi, + SpvOpLoopMerge, + SpvOpSelectionMerge, + SpvOpLabel, + SpvOpBranch, + SpvOpBranchConditional, + SpvOpSwitch, + SpvOpKill, + SpvOpReturn, + SpvOpReturnValue, + SpvOpUnreachable, + SpvOpLifetimeStart, + SpvOpLifetimeStop, + SpvOpGroupAsyncCopy, + SpvOpGroupWaitEvents, + SpvOpGroupAll, + SpvOpGroupAny, + SpvOpGroupBroadcast, + SpvOpGroupIAdd, + SpvOpGroupFAdd, + SpvOpGroupFMin, + SpvOpGroupUMin, + SpvOpGroupSMin, + SpvOpGroupFMax, + SpvOpGroupUMax, + SpvOpGroupSMax, + SpvOpReadPipe, + SpvOpWritePipe, + SpvOpReservedReadPipe, + SpvOpReservedWritePipe, + SpvOpReserveReadPipePackets, + SpvOpReserveWritePipePackets, + SpvOpCommitReadPipe, + SpvOpCommitWritePipe, + SpvOpIsValidReserveId, + SpvOpGetNumPipePackets, + SpvOpGetMaxPipePackets, + SpvOpGroupReserveReadPipePackets, + SpvOpGroupReserveWritePipePackets, + SpvOpGroupCommitReadPipe, + SpvOpGroupCommitWritePipe, + SpvOpEnqueueMarker, + SpvOpEnqueueKernel, + SpvOpGetKernelNDrangeSubGroupCount, + SpvOpGetKernelNDrangeMaxSubGroupSize, + SpvOpGetKernelWorkGroupSize, + SpvOpGetKernelPreferredWorkGroupSizeMultiple, + SpvOpRetainEvent, + SpvOpReleaseEvent, + SpvOpCreateUserEvent, + SpvOpIsValidEvent, + SpvOpSetUserEventStatus, + SpvOpCaptureEventProfilingInfo, + SpvOpGetDefaultQueue, + SpvOpBuildNDRange, + SpvOpImageSparseSampleImplicitLod, + SpvOpImageSparseSampleExplicitLod, + SpvOpImageSparseSampleDrefImplicitLod, + SpvOpImageSparseSampleDrefExplicitLod, + SpvOpImageSparseSampleProjImplicitLod, + SpvOpImageSparseSampleProjExplicitLod, + SpvOpImageSparseSampleProjDrefImplicitLod, + SpvOpImageSparseSampleProjDrefExplicitLod, + SpvOpImageSparseFetch, + SpvOpImageSparseGather, + SpvOpImageSparseDrefGather, + SpvOpImageSparseTexelsResident, + SpvOpNoLine, + SpvOpAtomicFlagTestAndSet, + SpvOpAtomicFlagClear, + SpvOpImageSparseRead, + SpvOpSizeOf, + SpvOpTypePipeStorage, + SpvOpConstantPipeStorage, + SpvOpCreatePipeFromPipeStorage, + SpvOpGetKernelLocalSizeForSubgroupCount, + SpvOpGetKernelMaxNumSubgroups, + SpvOpTypeNamedBarrier, + SpvOpNamedBarrierInitialize, + SpvOpMemoryNamedBarrier, + SpvOpModuleProcessed, + SpvOpExecutionModeId, + SpvOpDecorateId, + SpvOpSubgroupBallotKHR, + SpvOpSubgroupFirstInvocationKHR, + SpvOpSubgroupAllKHR, + SpvOpSubgroupAnyKHR, + SpvOpSubgroupAllEqualKHR, + SpvOpSubgroupReadInvocationKHR, + }); +} + std::string GetVersionString(uint32_t word) { std::stringstream ss; ss << "Version " << SPV_SPIRV_VERSION_MAJOR_PART(word) @@ -239,3 +560,207 @@ void StatsAnalyzer::WriteOpcodeMarkov(std::ostream& out) { } } } + +void StatsAnalyzer::WriteCodegenOpcodeHist(std::ostream& out) { + auto all_opcodes = GetAllOpcodes(); + + // uint64_t is used because kMarkvNoneOfTheAbove is outside of uint32_t range. + out << "std::map GetOpcodeHist() {\n" + << " return std::map({\n"; + + uint32_t total = 0; + for (const auto& kv : stats_.opcode_hist) { + total += kv.second; + } + + for (uint32_t opcode : all_opcodes) { + const auto it = stats_.opcode_hist.find(opcode); + const uint32_t count = it == stats_.opcode_hist.end() ? 0 : it->second; + const double kMaxValue = 1000.0; + uint32_t value = uint32_t(kMaxValue * double(count) / double(total)); + if (value == 0) + value = 1; + out << " { SpvOp" << GetOpcodeString(opcode) + << ", " << value << " },\n"; + } + + // Add kMarkvNoneOfTheAbove as a signal for unknown opcode. + out << " { kMarkvNoneOfTheAbove, " << 10 << " },\n"; + out << " });\n}\n"; +} + +void StatsAnalyzer::WriteCodegenOpcodeAndNumOperandsHist(std::ostream& out) { + out << "std::map GetOpcodeAndNumOperandsHist() {\n" + << " return std::map({\n"; + + + uint32_t total = 0; + for (const auto& kv : stats_.opcode_and_num_operands_hist) { + total += kv.second; + } + + for (const auto& kv : stats_.opcode_and_num_operands_hist) { + const uint32_t count = kv.second; + const double kFrequentEnoughToAnalyze = 0.001; + if (double(count) / double(total) < kFrequentEnoughToAnalyze) continue; + const uint32_t opcode_and_num_operands = kv.first; + const uint32_t opcode = opcode_and_num_operands & 0xFFFF; + const uint32_t num_operands = opcode_and_num_operands >> 16; + + if (opcode == SpvOpTypeStruct) + continue; + + out << " { CombineOpcodeAndNumOperands(SpvOp" + << spvOpcodeString(SpvOp(opcode)) + << ", " << num_operands << "), " << count << " },\n"; + } + + out << " { kMarkvNoneOfTheAbove, " << 1 + int(total * 0.05) << " },\n"; + out << " });\n}\n"; +} + +void StatsAnalyzer::WriteCodegenOpcodeAndNumOperandsMarkovHuffmanCodecs( + std::ostream& out) { + out << "std::map>>\n" + << "GetOpcodeAndNumOperandsMarkovHuffmanCodecs() {\n" + << " std::map>> " + << "codecs;\n"; + + for (const auto& kv : stats_.opcode_and_num_operands_markov_hist) { + const uint32_t prev_opcode = kv.first; + const double kFrequentEnoughToAnalyze = 0.001; + if (opcode_freq_[prev_opcode] < kFrequentEnoughToAnalyze) continue; + + const std::unordered_map& hist = kv.second; + + uint32_t total = 0; + for (const auto& pair : hist) { + total += pair.second; + } + + out << " {\n"; + out << " std::unique_ptr> " + << "codec(new HuffmanCodec({\n"; + + for (const auto& pair : hist) { + const uint32_t opcode_and_num_operands = pair.first; + const uint32_t opcode = opcode_and_num_operands & 0xFFFF; + + if (opcode == SpvOpTypeStruct) + continue; + + const uint32_t num_operands = opcode_and_num_operands >> 16; + const uint32_t count = pair.second; + const double posterior_freq = double(count) / double(total); + + if (opcode_freq_[opcode] < kFrequentEnoughToAnalyze && + posterior_freq < kFrequentEnoughToAnalyze) continue; + + total += count; + out << " { CombineOpcodeAndNumOperands(SpvOp" + << spvOpcodeString(SpvOp(opcode)) + << ", " << num_operands << "), " << count << " },\n"; + } + + out << " { kMarkvNoneOfTheAbove, " << 1 + int(total * 0.05) << " },\n"; + + out << " }));\n" << std::endl; + out << " codecs.emplace(SpvOp" << GetOpcodeString(prev_opcode) + << ", std::move(codec));\n"; + out << " }\n\n"; + } + + out << " return codecs;\n}\n"; +} + +void StatsAnalyzer::WriteCodegenLiteralStringHuffmanCodecs(std::ostream& out) { + out << "std::map>>\n" + << "GetLiteralStringHuffmanCodecs() {\n" + << " std::map>> " + << "codecs;\n"; + + for (const auto& kv : stats_.literal_strings_hist) { + const uint32_t opcode = kv.first; + + if (opcode == SpvOpName || opcode == SpvOpMemberName) + continue; + + const double kOpcodeFrequentEnoughToAnalyze = 0.001; + if (opcode_freq_[opcode] < kOpcodeFrequentEnoughToAnalyze) continue; + + const std::unordered_map& hist = kv.second; + + uint32_t total = 0; + for (const auto& pair : hist) { + total += pair.second; + } + + out << " {\n"; + out << " std::unique_ptr> " + << "codec(new HuffmanCodec({\n"; + for (const auto& pair : hist) { + const uint32_t count = pair.second; + const double freq = double(count) / double(total); + const double kStringFrequentEnoughToAnalyze = 0.001; + if (freq < kStringFrequentEnoughToAnalyze) continue; + out << " { std::string(\"" << pair.first << "\"), " << count + << " },\n"; + } + + out << " { std::string(\"kMarkvNoneOfTheAbove\"), " + << 1 + int(total * 0.05) << " },\n"; + + out << " }));\n" << std::endl; + out << " codecs.emplace(SpvOp" << spvOpcodeString(SpvOp(opcode)) + << ", std::move(codec));\n"; + out << " }\n\n"; + } + + out << " return codecs;\n}\n"; +} + +void StatsAnalyzer::WriteCodegenNonIdWordHuffmanCodecs(std::ostream& out) { + out << "std::map, " + << "std::unique_ptr>>\n" + << "GetNonIdWordHuffmanCodecs() {\n" + << " std::map, " + << "std::unique_ptr>> codecs;\n"; + + for (const auto& kv : stats_.non_id_words_hist) { + const auto& opcode_and_index = kv.first; + const uint32_t opcode = opcode_and_index.first; + const uint32_t index = opcode_and_index.second; + + const double kOpcodeFrequentEnoughToAnalyze = 0.001; + if (opcode_freq_[opcode] < kOpcodeFrequentEnoughToAnalyze) continue; + + const std::map& hist = kv.second; + + uint32_t total = 0; + for (const auto& pair : hist) { + total += pair.second; + } + + out << " {\n"; + out << " std::unique_ptr> " + << "codec(new HuffmanCodec({\n"; + for (const auto& pair : hist) { + const uint32_t word = pair.first; + const uint32_t count = pair.second; + const double freq = double(count) / double(total); + const double kWordFrequentEnoughToAnalyze = 0.001; + if (freq < kWordFrequentEnoughToAnalyze) continue; + out << " { " << word << ", " << count << " },\n"; + } + + out << " { kMarkvNoneOfTheAbove, " << 1 + int(total * 0.05) << " },\n"; + + out << " }));\n" << std::endl; + out << " codecs.emplace(std::pair(SpvOp" + << spvOpcodeString(SpvOp(opcode)) + << ", " << index << "), std::move(codec));\n"; + out << " }\n\n"; + } + + out << " return codecs;\n}\n"; +} diff --git a/tools/stats/stats_analyzer.h b/tools/stats/stats_analyzer.h index c1ff1871..54e2c3d8 100644 --- a/tools/stats/stats_analyzer.h +++ b/tools/stats/stats_analyzer.h @@ -36,6 +36,30 @@ class StatsAnalyzer { // level. void WriteOpcodeMarkov(std::ostream& out); + // Writes C++ code containing a function returning opcode histogram. + void WriteCodegenOpcodeHist(std::ostream& out); + + // Writes C++ code containing a function returning opcode_and_num_operands + // histogram. + void WriteCodegenOpcodeAndNumOperandsHist(std::ostream& out); + + // Writes C++ code containing a function returning a map of Huffman codecs + // for opcode_and_num_operands. Each Huffman codec is created for a specific + // previous opcode. + // TODO(atgoo@github.com) Write code which would contain pregenerated Huffman + // codecs, instead of code which would generate them every time. + void WriteCodegenOpcodeAndNumOperandsMarkovHuffmanCodecs(std::ostream& out); + + // Writes C++ code containing a function returning a map of Huffman codecs + // for literal strings. Each Huffman codec is created for a specific opcode. + // I.e. OpExtension and OpExtInstImport would use different codecs. + void WriteCodegenLiteralStringHuffmanCodecs(std::ostream& out); + + // Writes C++ code containing a function returning a map of Huffman codecs + // for single-word non-id operands. Each Huffman codec is created for a + // specific operand slot (opcode and operand number). + void WriteCodegenNonIdWordHuffmanCodecs(std::ostream& out); + private: const libspirv::SpirvStats& stats_;