mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-04-03 13:42:13 +00:00

A dialect can opt-in to handle versioning through the `BytecodeDialectInterface`. Few hooks are exposed to the dialect to allow managing a version encoded into the bytecode file. The version is loaded lazily and allows to retrieve the version information while parsing the input IR, and gives an opportunity to each dialect for which a version is present to perform IR upgrades post-parsing through the `upgradeFromVersion` method. Custom Attribute and Type encodings can also be upgraded according to the dialect version using readAttribute and readType methods. There is no restriction on what kind of information a dialect is allowed to encode to model its versioning. Currently, versioning is supported only for bytecode formats. Reviewed By: rriddle, mehdi_amini Differential Revision: https://reviews.llvm.org/D143647
876 lines
32 KiB
C++
876 lines
32 KiB
C++
//===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Bytecode/BytecodeWriter.h"
|
|
#include "../Encoding.h"
|
|
#include "IRNumbering.h"
|
|
#include "mlir/Bytecode/BytecodeImplementation.h"
|
|
#include "mlir/IR/OpImplementation.h"
|
|
#include "llvm/ADT/CachedHashString.h"
|
|
#include "llvm/ADT/MapVector.h"
|
|
#include "llvm/ADT/SmallString.h"
|
|
|
|
#define DEBUG_TYPE "mlir-bytecode-writer"
|
|
|
|
using namespace mlir;
|
|
using namespace mlir::bytecode::detail;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// BytecodeWriterConfig
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
struct BytecodeWriterConfig::Impl {
|
|
Impl(StringRef producer) : producer(producer) {}
|
|
|
|
/// The producer of the bytecode.
|
|
StringRef producer;
|
|
|
|
/// A collection of non-dialect resource printers.
|
|
SmallVector<std::unique_ptr<AsmResourcePrinter>> externalResourcePrinters;
|
|
};
|
|
|
|
BytecodeWriterConfig::BytecodeWriterConfig(StringRef producer)
|
|
: impl(std::make_unique<Impl>(producer)) {}
|
|
BytecodeWriterConfig::BytecodeWriterConfig(FallbackAsmResourceMap &map,
|
|
StringRef producer)
|
|
: BytecodeWriterConfig(producer) {
|
|
attachFallbackResourcePrinter(map);
|
|
}
|
|
BytecodeWriterConfig::~BytecodeWriterConfig() = default;
|
|
|
|
void BytecodeWriterConfig::attachResourcePrinter(
|
|
std::unique_ptr<AsmResourcePrinter> printer) {
|
|
impl->externalResourcePrinters.emplace_back(std::move(printer));
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// EncodingEmitter
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
/// This class functions as the underlying encoding emitter for the bytecode
|
|
/// writer. This class is a bit different compared to other types of encoders;
|
|
/// it does not use a single buffer, but instead may contain several buffers
|
|
/// (some owned by the writer, and some not) that get concatted during the final
|
|
/// emission.
|
|
class EncodingEmitter {
|
|
public:
|
|
EncodingEmitter() = default;
|
|
EncodingEmitter(const EncodingEmitter &) = delete;
|
|
EncodingEmitter &operator=(const EncodingEmitter &) = delete;
|
|
|
|
/// Write the current contents to the provided stream.
|
|
void writeTo(raw_ostream &os) const;
|
|
|
|
/// Return the current size of the encoded buffer.
|
|
size_t size() const { return prevResultSize + currentResult.size(); }
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// Emission
|
|
//===--------------------------------------------------------------------===//
|
|
|
|
/// Backpatch a byte in the result buffer at the given offset.
|
|
void patchByte(uint64_t offset, uint8_t value) {
|
|
assert(offset < size() && offset >= prevResultSize &&
|
|
"cannot patch previously emitted data");
|
|
currentResult[offset - prevResultSize] = value;
|
|
}
|
|
|
|
/// Emit the provided blob of data, which is owned by the caller and is
|
|
/// guaranteed to not die before the end of the bytecode process.
|
|
void emitOwnedBlob(ArrayRef<uint8_t> data) {
|
|
// Push the current buffer before adding the provided data.
|
|
appendResult(std::move(currentResult));
|
|
appendOwnedResult(data);
|
|
}
|
|
|
|
/// Emit the provided blob of data that has the given alignment, which is
|
|
/// owned by the caller and is guaranteed to not die before the end of the
|
|
/// bytecode process. The alignment value is also encoded, making it available
|
|
/// on load.
|
|
void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment) {
|
|
emitVarInt(alignment);
|
|
emitVarInt(data.size());
|
|
|
|
alignTo(alignment);
|
|
emitOwnedBlob(data);
|
|
}
|
|
void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment) {
|
|
ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
|
|
data.size());
|
|
emitOwnedBlobAndAlignment(castedData, alignment);
|
|
}
|
|
|
|
/// Align the emitter to the given alignment.
|
|
void alignTo(unsigned alignment) {
|
|
if (alignment < 2)
|
|
return;
|
|
assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
|
|
|
|
// Check to see if we need to emit any padding bytes to meet the desired
|
|
// alignment.
|
|
size_t curOffset = size();
|
|
size_t paddingSize = llvm::alignTo(curOffset, alignment) - curOffset;
|
|
while (paddingSize--)
|
|
emitByte(bytecode::kAlignmentByte);
|
|
|
|
// Keep track of the maximum required alignment.
|
|
requiredAlignment = std::max(requiredAlignment, alignment);
|
|
}
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// Integer Emission
|
|
|
|
/// Emit a single byte.
|
|
template <typename T>
|
|
void emitByte(T byte) {
|
|
currentResult.push_back(static_cast<uint8_t>(byte));
|
|
}
|
|
|
|
/// Emit a range of bytes.
|
|
void emitBytes(ArrayRef<uint8_t> bytes) {
|
|
llvm::append_range(currentResult, bytes);
|
|
}
|
|
|
|
/// Emit a variable length integer. The first encoded byte contains a prefix
|
|
/// in the low bits indicating the encoded length of the value. This length
|
|
/// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
|
|
/// indicate the number of _additional_ bytes (not including the prefix byte).
|
|
/// All remaining bits in the first byte, along with all of the bits in
|
|
/// additional bytes, provide the value of the integer encoded in
|
|
/// little-endian order.
|
|
void emitVarInt(uint64_t value) {
|
|
// In the most common case, the value can be represented in a single byte.
|
|
// Given how hot this case is, explicitly handle that here.
|
|
if ((value >> 7) == 0)
|
|
return emitByte((value << 1) | 0x1);
|
|
emitMultiByteVarInt(value);
|
|
}
|
|
|
|
/// Emit a signed variable length integer. Signed varints are encoded using
|
|
/// a varint with zigzag encoding, meaning that we use the low bit of the
|
|
/// value to indicate the sign of the value. This allows for more efficient
|
|
/// encoding of negative values by limiting the number of active bits
|
|
void emitSignedVarInt(uint64_t value) {
|
|
emitVarInt((value << 1) ^ (uint64_t)((int64_t)value >> 63));
|
|
}
|
|
|
|
/// Emit a variable length integer whose low bit is used to encode the
|
|
/// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
|
|
void emitVarIntWithFlag(uint64_t value, bool flag) {
|
|
emitVarInt((value << 1) | (flag ? 1 : 0));
|
|
}
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// String Emission
|
|
|
|
/// Emit the given string as a nul terminated string.
|
|
void emitNulTerminatedString(StringRef str) {
|
|
emitString(str);
|
|
emitByte(0);
|
|
}
|
|
|
|
/// Emit the given string without a nul terminator.
|
|
void emitString(StringRef str) {
|
|
emitBytes({reinterpret_cast<const uint8_t *>(str.data()), str.size()});
|
|
}
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// Section Emission
|
|
|
|
/// Emit a nested section of the given code, whose contents are encoded in the
|
|
/// provided emitter.
|
|
void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
|
|
// Emit the section code and length. The high bit of the code is used to
|
|
// indicate whether the section alignment is present, so save an offset to
|
|
// it.
|
|
uint64_t codeOffset = currentResult.size();
|
|
emitByte(code);
|
|
emitVarInt(emitter.size());
|
|
|
|
// Integrate the alignment of the section into this emitter if necessary.
|
|
unsigned emitterAlign = emitter.requiredAlignment;
|
|
if (emitterAlign > 1) {
|
|
if (size() & (emitterAlign - 1)) {
|
|
emitVarInt(emitterAlign);
|
|
alignTo(emitterAlign);
|
|
|
|
// Indicate that we needed to align the section, the high bit of the
|
|
// code field is used for this.
|
|
currentResult[codeOffset] |= 0b10000000;
|
|
} else {
|
|
// Otherwise, if we happen to be at a compatible offset, we just
|
|
// remember that we need this alignment.
|
|
requiredAlignment = std::max(requiredAlignment, emitterAlign);
|
|
}
|
|
}
|
|
|
|
// Push our current buffer and then merge the provided section body into
|
|
// ours.
|
|
appendResult(std::move(currentResult));
|
|
for (std::vector<uint8_t> &result : emitter.prevResultStorage)
|
|
prevResultStorage.push_back(std::move(result));
|
|
llvm::append_range(prevResultList, emitter.prevResultList);
|
|
prevResultSize += emitter.prevResultSize;
|
|
appendResult(std::move(emitter.currentResult));
|
|
}
|
|
|
|
private:
|
|
/// Emit the given value using a variable width encoding. This method is a
|
|
/// fallback when the number of bytes needed to encode the value is greater
|
|
/// than 1. We mark it noinline here so that the single byte hot path isn't
|
|
/// pessimized.
|
|
LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value);
|
|
|
|
/// Append a new result buffer to the current contents.
|
|
void appendResult(std::vector<uint8_t> &&result) {
|
|
if (result.empty())
|
|
return;
|
|
prevResultStorage.emplace_back(std::move(result));
|
|
appendOwnedResult(prevResultStorage.back());
|
|
}
|
|
void appendOwnedResult(ArrayRef<uint8_t> result) {
|
|
if (result.empty())
|
|
return;
|
|
prevResultSize += result.size();
|
|
prevResultList.emplace_back(result);
|
|
}
|
|
|
|
/// The result of the emitter currently being built. We refrain from building
|
|
/// a single buffer to simplify emitting sections, large data, and more. The
|
|
/// result is thus represented using multiple distinct buffers, some of which
|
|
/// we own (via prevResultStorage), and some of which are just pointers into
|
|
/// externally owned buffers.
|
|
std::vector<uint8_t> currentResult;
|
|
std::vector<ArrayRef<uint8_t>> prevResultList;
|
|
std::vector<std::vector<uint8_t>> prevResultStorage;
|
|
|
|
/// An up-to-date total size of all of the buffers within `prevResultList`.
|
|
/// This enables O(1) size checks of the current encoding.
|
|
size_t prevResultSize = 0;
|
|
|
|
/// The highest required alignment for the start of this section.
|
|
unsigned requiredAlignment = 1;
|
|
};
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// StringSectionBuilder
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
/// This class is used to simplify the process of emitting the string section.
|
|
class StringSectionBuilder {
|
|
public:
|
|
/// Add the given string to the string section, and return the index of the
|
|
/// string within the section.
|
|
size_t insert(StringRef str) {
|
|
auto it = strings.insert({llvm::CachedHashStringRef(str), strings.size()});
|
|
return it.first->second;
|
|
}
|
|
|
|
/// Write the current set of strings to the given emitter.
|
|
void write(EncodingEmitter &emitter) {
|
|
emitter.emitVarInt(strings.size());
|
|
|
|
// Emit the sizes in reverse order, so that we don't need to backpatch an
|
|
// offset to the string data or have a separate section.
|
|
for (const auto &it : llvm::reverse(strings))
|
|
emitter.emitVarInt(it.first.size() + 1);
|
|
// Emit the string data itself.
|
|
for (const auto &it : strings)
|
|
emitter.emitNulTerminatedString(it.first.val());
|
|
}
|
|
|
|
private:
|
|
/// A set of strings referenced within the bytecode. The value of the map is
|
|
/// unused.
|
|
llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
|
|
};
|
|
} // namespace
|
|
|
|
class DialectWriter : public DialectBytecodeWriter {
|
|
public:
|
|
DialectWriter(EncodingEmitter &emitter, IRNumberingState &numberingState,
|
|
StringSectionBuilder &stringSection)
|
|
: emitter(emitter), numberingState(numberingState),
|
|
stringSection(stringSection) {}
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// IR
|
|
//===--------------------------------------------------------------------===//
|
|
|
|
void writeAttribute(Attribute attr) override {
|
|
emitter.emitVarInt(numberingState.getNumber(attr));
|
|
}
|
|
void writeType(Type type) override {
|
|
emitter.emitVarInt(numberingState.getNumber(type));
|
|
}
|
|
|
|
void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
|
|
emitter.emitVarInt(numberingState.getNumber(resource));
|
|
}
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// Primitives
|
|
//===--------------------------------------------------------------------===//
|
|
|
|
void writeVarInt(uint64_t value) override { emitter.emitVarInt(value); }
|
|
|
|
void writeSignedVarInt(int64_t value) override {
|
|
emitter.emitSignedVarInt(value);
|
|
}
|
|
|
|
void writeAPIntWithKnownWidth(const APInt &value) override {
|
|
size_t bitWidth = value.getBitWidth();
|
|
|
|
// If the value is a single byte, just emit it directly without going
|
|
// through a varint.
|
|
if (bitWidth <= 8)
|
|
return emitter.emitByte(value.getLimitedValue());
|
|
|
|
// If the value fits within a single varint, emit it directly.
|
|
if (bitWidth <= 64)
|
|
return emitter.emitSignedVarInt(value.getLimitedValue());
|
|
|
|
// Otherwise, we need to encode a variable number of active words. We use
|
|
// active words instead of the number of total words under the observation
|
|
// that smaller values will be more common.
|
|
unsigned numActiveWords = value.getActiveWords();
|
|
emitter.emitVarInt(numActiveWords);
|
|
|
|
const uint64_t *rawValueData = value.getRawData();
|
|
for (unsigned i = 0; i < numActiveWords; ++i)
|
|
emitter.emitSignedVarInt(rawValueData[i]);
|
|
}
|
|
|
|
void writeAPFloatWithKnownSemantics(const APFloat &value) override {
|
|
writeAPIntWithKnownWidth(value.bitcastToAPInt());
|
|
}
|
|
|
|
void writeOwnedString(StringRef str) override {
|
|
emitter.emitVarInt(stringSection.insert(str));
|
|
}
|
|
|
|
void writeOwnedBlob(ArrayRef<char> blob) override {
|
|
emitter.emitVarInt(blob.size());
|
|
emitter.emitOwnedBlob(ArrayRef<uint8_t>(
|
|
reinterpret_cast<const uint8_t *>(blob.data()), blob.size()));
|
|
}
|
|
|
|
private:
|
|
EncodingEmitter &emitter;
|
|
IRNumberingState &numberingState;
|
|
StringSectionBuilder &stringSection;
|
|
};
|
|
|
|
/// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
|
|
/// to go through an intermediate buffer when interacting with code that wants a
|
|
/// raw_ostream.
|
|
class RawEmitterOstream : public raw_ostream {
|
|
public:
|
|
explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
|
|
SetUnbuffered();
|
|
}
|
|
|
|
private:
|
|
void write_impl(const char *ptr, size_t size) override {
|
|
emitter.emitBytes({reinterpret_cast<const uint8_t *>(ptr), size});
|
|
}
|
|
uint64_t current_pos() const override { return emitter.size(); }
|
|
|
|
/// The section being emitted to.
|
|
EncodingEmitter &emitter;
|
|
};
|
|
} // namespace
|
|
|
|
void EncodingEmitter::writeTo(raw_ostream &os) const {
|
|
for (auto &prevResult : prevResultList)
|
|
os.write((const char *)prevResult.data(), prevResult.size());
|
|
os.write((const char *)currentResult.data(), currentResult.size());
|
|
}
|
|
|
|
void EncodingEmitter::emitMultiByteVarInt(uint64_t value) {
|
|
// Compute the number of bytes needed to encode the value. Each byte can hold
|
|
// up to 7-bits of data. We only check up to the number of bits we can encode
|
|
// in the first byte (8).
|
|
uint64_t it = value >> 7;
|
|
for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
|
|
if (LLVM_LIKELY(it >>= 7) == 0) {
|
|
uint64_t encodedValue = (value << 1) | 0x1;
|
|
encodedValue <<= (numBytes - 1);
|
|
emitBytes({reinterpret_cast<uint8_t *>(&encodedValue), numBytes});
|
|
return;
|
|
}
|
|
}
|
|
|
|
// If the value is too large to encode in a single byte, emit a special all
|
|
// zero marker byte and splat the value directly.
|
|
emitByte(0);
|
|
emitBytes({reinterpret_cast<uint8_t *>(&value), sizeof(value)});
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Bytecode Writer
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
namespace {
|
|
class BytecodeWriter {
|
|
public:
|
|
BytecodeWriter(Operation *op) : numberingState(op) {}
|
|
|
|
/// Write the bytecode for the given root operation.
|
|
void write(Operation *rootOp, raw_ostream &os,
|
|
const BytecodeWriterConfig::Impl &config);
|
|
|
|
private:
|
|
//===--------------------------------------------------------------------===//
|
|
// Dialects
|
|
|
|
void writeDialectSection(EncodingEmitter &emitter);
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// Attributes and Types
|
|
|
|
void writeAttrTypeSection(EncodingEmitter &emitter);
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// Operations
|
|
|
|
void writeBlock(EncodingEmitter &emitter, Block *block);
|
|
void writeOp(EncodingEmitter &emitter, Operation *op);
|
|
void writeRegion(EncodingEmitter &emitter, Region *region);
|
|
void writeIRSection(EncodingEmitter &emitter, Operation *op);
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// Resources
|
|
|
|
void writeResourceSection(Operation *op, EncodingEmitter &emitter,
|
|
const BytecodeWriterConfig::Impl &config);
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// Strings
|
|
|
|
void writeStringSection(EncodingEmitter &emitter);
|
|
|
|
//===--------------------------------------------------------------------===//
|
|
// Fields
|
|
|
|
/// The builder used for the string section.
|
|
StringSectionBuilder stringSection;
|
|
|
|
/// The IR numbering state generated for the root operation.
|
|
IRNumberingState numberingState;
|
|
};
|
|
} // namespace
|
|
|
|
void BytecodeWriter::write(Operation *rootOp, raw_ostream &os,
|
|
const BytecodeWriterConfig::Impl &config) {
|
|
EncodingEmitter emitter;
|
|
|
|
// Emit the bytecode file header. This is how we identify the output as a
|
|
// bytecode file.
|
|
emitter.emitString("ML\xefR");
|
|
|
|
// Emit the bytecode version.
|
|
emitter.emitVarInt(bytecode::kVersion);
|
|
|
|
// Emit the producer.
|
|
emitter.emitNulTerminatedString(config.producer);
|
|
|
|
// Emit the dialect section.
|
|
writeDialectSection(emitter);
|
|
|
|
// Emit the attributes and types section.
|
|
writeAttrTypeSection(emitter);
|
|
|
|
// Emit the IR section.
|
|
writeIRSection(emitter, rootOp);
|
|
|
|
// Emit the resources section.
|
|
writeResourceSection(rootOp, emitter, config);
|
|
|
|
// Emit the string section.
|
|
writeStringSection(emitter);
|
|
|
|
// Write the generated bytecode to the provided output stream.
|
|
emitter.writeTo(os);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Dialects
|
|
|
|
/// Write the given entries in contiguous groups with the same parent dialect.
|
|
/// Each dialect sub-group is encoded with the parent dialect and number of
|
|
/// elements, followed by the encoding for the entries. The given callback is
|
|
/// invoked to encode each individual entry.
|
|
template <typename EntriesT, typename EntryCallbackT>
|
|
static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
|
|
EntryCallbackT &&callback) {
|
|
for (auto it = entries.begin(), e = entries.end(); it != e;) {
|
|
auto groupStart = it++;
|
|
|
|
// Find the end of the group that shares the same parent dialect.
|
|
DialectNumbering *currentDialect = groupStart->dialect;
|
|
it = std::find_if(it, e, [&](const auto &entry) {
|
|
return entry.dialect != currentDialect;
|
|
});
|
|
|
|
// Emit the dialect and number of elements.
|
|
emitter.emitVarInt(currentDialect->number);
|
|
emitter.emitVarInt(std::distance(groupStart, it));
|
|
|
|
// Emit the entries within the group.
|
|
for (auto &entry : llvm::make_range(groupStart, it))
|
|
callback(entry);
|
|
}
|
|
}
|
|
|
|
void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
|
|
EncodingEmitter dialectEmitter;
|
|
|
|
// Emit the referenced dialects.
|
|
auto dialects = numberingState.getDialects();
|
|
dialectEmitter.emitVarInt(llvm::size(dialects));
|
|
for (DialectNumbering &dialect : dialects) {
|
|
// Write the string section and get the ID.
|
|
size_t nameID = stringSection.insert(dialect.name);
|
|
|
|
// Try writing the version to the versionEmitter.
|
|
EncodingEmitter versionEmitter;
|
|
if (dialect.interface) {
|
|
// The writer used when emitting using a custom bytecode encoding.
|
|
DialectWriter versionWriter(versionEmitter, numberingState,
|
|
stringSection);
|
|
dialect.interface->writeVersion(versionWriter);
|
|
}
|
|
|
|
// If the version emitter is empty, version is not available. We can encode
|
|
// this in the dialect ID, so if there is no version, we don't write the
|
|
// section.
|
|
size_t versionAvailable = versionEmitter.size() > 0;
|
|
dialectEmitter.emitVarIntWithFlag(nameID, versionAvailable);
|
|
if (versionAvailable)
|
|
dialectEmitter.emitSection(bytecode::Section::kDialectVersions,
|
|
std::move(versionEmitter));
|
|
}
|
|
|
|
// Emit the referenced operation names grouped by dialect.
|
|
auto emitOpName = [&](OpNameNumbering &name) {
|
|
dialectEmitter.emitVarInt(stringSection.insert(name.name.stripDialect()));
|
|
};
|
|
writeDialectGrouping(dialectEmitter, numberingState.getOpNames(), emitOpName);
|
|
|
|
emitter.emitSection(bytecode::Section::kDialect, std::move(dialectEmitter));
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Attributes and Types
|
|
|
|
void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
|
|
EncodingEmitter attrTypeEmitter;
|
|
EncodingEmitter offsetEmitter;
|
|
offsetEmitter.emitVarInt(llvm::size(numberingState.getAttributes()));
|
|
offsetEmitter.emitVarInt(llvm::size(numberingState.getTypes()));
|
|
|
|
// A functor used to emit an attribute or type entry.
|
|
uint64_t prevOffset = 0;
|
|
auto emitAttrOrType = [&](auto &entry) {
|
|
auto entryValue = entry.getValue();
|
|
|
|
// First, try to emit this entry using the dialect bytecode interface.
|
|
bool hasCustomEncoding = false;
|
|
if (const BytecodeDialectInterface *interface = entry.dialect->interface) {
|
|
// The writer used when emitting using a custom bytecode encoding.
|
|
DialectWriter dialectWriter(attrTypeEmitter, numberingState,
|
|
stringSection);
|
|
|
|
if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
|
|
// TODO: We don't currently support custom encoded mutable types.
|
|
hasCustomEncoding =
|
|
!entryValue.template hasTrait<TypeTrait::IsMutable>() &&
|
|
succeeded(interface->writeType(entryValue, dialectWriter));
|
|
} else {
|
|
// TODO: We don't currently support custom encoded mutable attributes.
|
|
hasCustomEncoding =
|
|
!entryValue.template hasTrait<AttributeTrait::IsMutable>() &&
|
|
succeeded(interface->writeAttribute(entryValue, dialectWriter));
|
|
}
|
|
}
|
|
|
|
// If the entry was not emitted using the dialect interface, emit it using
|
|
// the textual format.
|
|
if (!hasCustomEncoding) {
|
|
RawEmitterOstream(attrTypeEmitter) << entryValue;
|
|
attrTypeEmitter.emitByte(0);
|
|
}
|
|
|
|
// Record the offset of this entry.
|
|
uint64_t curOffset = attrTypeEmitter.size();
|
|
offsetEmitter.emitVarIntWithFlag(curOffset - prevOffset, hasCustomEncoding);
|
|
prevOffset = curOffset;
|
|
};
|
|
|
|
// Emit the attribute and type entries for each dialect.
|
|
writeDialectGrouping(offsetEmitter, numberingState.getAttributes(),
|
|
emitAttrOrType);
|
|
writeDialectGrouping(offsetEmitter, numberingState.getTypes(),
|
|
emitAttrOrType);
|
|
|
|
// Emit the sections to the stream.
|
|
emitter.emitSection(bytecode::Section::kAttrTypeOffset,
|
|
std::move(offsetEmitter));
|
|
emitter.emitSection(bytecode::Section::kAttrType, std::move(attrTypeEmitter));
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Operations
|
|
|
|
void BytecodeWriter::writeBlock(EncodingEmitter &emitter, Block *block) {
|
|
ArrayRef<BlockArgument> args = block->getArguments();
|
|
bool hasArgs = !args.empty();
|
|
|
|
// Emit the number of operations in this block, and if it has arguments. We
|
|
// use the low bit of the operation count to indicate if the block has
|
|
// arguments.
|
|
unsigned numOps = numberingState.getOperationCount(block);
|
|
emitter.emitVarIntWithFlag(numOps, hasArgs);
|
|
|
|
// Emit the arguments of the block.
|
|
if (hasArgs) {
|
|
emitter.emitVarInt(args.size());
|
|
for (BlockArgument arg : args) {
|
|
emitter.emitVarInt(numberingState.getNumber(arg.getType()));
|
|
emitter.emitVarInt(numberingState.getNumber(arg.getLoc()));
|
|
}
|
|
}
|
|
|
|
// Emit the operations within the block.
|
|
for (Operation &op : *block)
|
|
writeOp(emitter, &op);
|
|
}
|
|
|
|
void BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
|
|
emitter.emitVarInt(numberingState.getNumber(op->getName()));
|
|
|
|
// Emit a mask for the operation components. We need to fill this in later
|
|
// (when we actually know what needs to be emitted), so emit a placeholder for
|
|
// now.
|
|
uint64_t maskOffset = emitter.size();
|
|
uint8_t opEncodingMask = 0;
|
|
emitter.emitByte(0);
|
|
|
|
// Emit the location for this operation.
|
|
emitter.emitVarInt(numberingState.getNumber(op->getLoc()));
|
|
|
|
// Emit the attributes of this operation.
|
|
DictionaryAttr attrs = op->getAttrDictionary();
|
|
if (!attrs.empty()) {
|
|
opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
|
|
emitter.emitVarInt(numberingState.getNumber(op->getAttrDictionary()));
|
|
}
|
|
|
|
// Emit the result types of the operation.
|
|
if (unsigned numResults = op->getNumResults()) {
|
|
opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
|
|
emitter.emitVarInt(numResults);
|
|
for (Type type : op->getResultTypes())
|
|
emitter.emitVarInt(numberingState.getNumber(type));
|
|
}
|
|
|
|
// Emit the operands of the operation.
|
|
if (unsigned numOperands = op->getNumOperands()) {
|
|
opEncodingMask |= bytecode::OpEncodingMask::kHasOperands;
|
|
emitter.emitVarInt(numOperands);
|
|
for (Value operand : op->getOperands())
|
|
emitter.emitVarInt(numberingState.getNumber(operand));
|
|
}
|
|
|
|
// Emit the successors of the operation.
|
|
if (unsigned numSuccessors = op->getNumSuccessors()) {
|
|
opEncodingMask |= bytecode::OpEncodingMask::kHasSuccessors;
|
|
emitter.emitVarInt(numSuccessors);
|
|
for (Block *successor : op->getSuccessors())
|
|
emitter.emitVarInt(numberingState.getNumber(successor));
|
|
}
|
|
|
|
// Check for regions.
|
|
unsigned numRegions = op->getNumRegions();
|
|
if (numRegions)
|
|
opEncodingMask |= bytecode::OpEncodingMask::kHasInlineRegions;
|
|
|
|
// Update the mask for the operation.
|
|
emitter.patchByte(maskOffset, opEncodingMask);
|
|
|
|
// With the mask emitted, we can now emit the regions of the operation. We do
|
|
// this after mask emission to avoid offset complications that may arise by
|
|
// emitting the regions first (e.g. if the regions are huge, backpatching the
|
|
// op encoding mask is more annoying).
|
|
if (numRegions) {
|
|
bool isIsolatedFromAbove = op->hasTrait<OpTrait::IsIsolatedFromAbove>();
|
|
emitter.emitVarIntWithFlag(numRegions, isIsolatedFromAbove);
|
|
|
|
for (Region ®ion : op->getRegions())
|
|
writeRegion(emitter, ®ion);
|
|
}
|
|
}
|
|
|
|
void BytecodeWriter::writeRegion(EncodingEmitter &emitter, Region *region) {
|
|
// If the region is empty, we only need to emit the number of blocks (which is
|
|
// zero).
|
|
if (region->empty())
|
|
return emitter.emitVarInt(/*numBlocks*/ 0);
|
|
|
|
// Emit the number of blocks and values within the region.
|
|
unsigned numBlocks, numValues;
|
|
std::tie(numBlocks, numValues) = numberingState.getBlockValueCount(region);
|
|
emitter.emitVarInt(numBlocks);
|
|
emitter.emitVarInt(numValues);
|
|
|
|
// Emit the blocks within the region.
|
|
for (Block &block : *region)
|
|
writeBlock(emitter, &block);
|
|
}
|
|
|
|
void BytecodeWriter::writeIRSection(EncodingEmitter &emitter, Operation *op) {
|
|
EncodingEmitter irEmitter;
|
|
|
|
// Write the IR section the same way as a block with no arguments. Note that
|
|
// the low-bit of the operation count for a block is used to indicate if the
|
|
// block has arguments, which in this case is always false.
|
|
irEmitter.emitVarIntWithFlag(/*numOps*/ 1, /*hasArgs*/ false);
|
|
|
|
// Emit the operations.
|
|
writeOp(irEmitter, op);
|
|
|
|
emitter.emitSection(bytecode::Section::kIR, std::move(irEmitter));
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Resources
|
|
|
|
namespace {
|
|
/// This class represents a resource builder implementation for the MLIR
|
|
/// bytecode format.
|
|
class ResourceBuilder : public AsmResourceBuilder {
|
|
public:
|
|
using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
|
|
|
|
ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
|
|
PostProcessFn postProcessFn)
|
|
: emitter(emitter), stringSection(stringSection),
|
|
postProcessFn(postProcessFn) {}
|
|
~ResourceBuilder() override = default;
|
|
|
|
void buildBlob(StringRef key, ArrayRef<char> data,
|
|
uint32_t dataAlignment) final {
|
|
emitter.emitOwnedBlobAndAlignment(data, dataAlignment);
|
|
postProcessFn(key, AsmResourceEntryKind::Blob);
|
|
}
|
|
void buildBool(StringRef key, bool data) final {
|
|
emitter.emitByte(data);
|
|
postProcessFn(key, AsmResourceEntryKind::Bool);
|
|
}
|
|
void buildString(StringRef key, StringRef data) final {
|
|
emitter.emitVarInt(stringSection.insert(data));
|
|
postProcessFn(key, AsmResourceEntryKind::String);
|
|
}
|
|
|
|
private:
|
|
EncodingEmitter &emitter;
|
|
StringSectionBuilder &stringSection;
|
|
PostProcessFn postProcessFn;
|
|
};
|
|
} // namespace
|
|
|
|
void BytecodeWriter::writeResourceSection(
|
|
Operation *op, EncodingEmitter &emitter,
|
|
const BytecodeWriterConfig::Impl &config) {
|
|
EncodingEmitter resourceEmitter;
|
|
EncodingEmitter resourceOffsetEmitter;
|
|
uint64_t prevOffset = 0;
|
|
SmallVector<std::tuple<StringRef, AsmResourceEntryKind, uint64_t>>
|
|
curResourceEntries;
|
|
|
|
// Functor used to process the offset for a resource of `kind` defined by
|
|
// 'key'.
|
|
auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
|
|
uint64_t curOffset = resourceEmitter.size();
|
|
curResourceEntries.emplace_back(key, kind, curOffset - prevOffset);
|
|
prevOffset = curOffset;
|
|
};
|
|
|
|
// Functor used to emit a resource group defined by 'key'.
|
|
auto emitResourceGroup = [&](uint64_t key) {
|
|
resourceOffsetEmitter.emitVarInt(key);
|
|
resourceOffsetEmitter.emitVarInt(curResourceEntries.size());
|
|
for (auto [key, kind, size] : curResourceEntries) {
|
|
resourceOffsetEmitter.emitVarInt(stringSection.insert(key));
|
|
resourceOffsetEmitter.emitVarInt(size);
|
|
resourceOffsetEmitter.emitByte(kind);
|
|
}
|
|
};
|
|
|
|
// Builder used to emit resources.
|
|
ResourceBuilder entryBuilder(resourceEmitter, stringSection,
|
|
appendResourceOffset);
|
|
|
|
// Emit the external resource entries.
|
|
resourceOffsetEmitter.emitVarInt(config.externalResourcePrinters.size());
|
|
for (const auto &printer : config.externalResourcePrinters) {
|
|
curResourceEntries.clear();
|
|
printer->buildResources(op, entryBuilder);
|
|
emitResourceGroup(stringSection.insert(printer->getName()));
|
|
}
|
|
|
|
// Emit the dialect resource entries.
|
|
for (DialectNumbering &dialect : numberingState.getDialects()) {
|
|
if (!dialect.asmInterface)
|
|
continue;
|
|
curResourceEntries.clear();
|
|
dialect.asmInterface->buildResources(op, dialect.resources, entryBuilder);
|
|
|
|
// Emit the declaration resources for this dialect, these didn't get emitted
|
|
// by the interface. These resources don't have data attached, so just use a
|
|
// "blob" kind as a placeholder.
|
|
for (const auto &resource : dialect.resourceMap)
|
|
if (resource.second->isDeclaration)
|
|
appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
|
|
|
|
// Emit the resource group for this dialect.
|
|
if (!curResourceEntries.empty())
|
|
emitResourceGroup(dialect.number);
|
|
}
|
|
|
|
// If we didn't emit any resource groups, elide the resource sections.
|
|
if (resourceOffsetEmitter.size() == 0)
|
|
return;
|
|
|
|
emitter.emitSection(bytecode::Section::kResourceOffset,
|
|
std::move(resourceOffsetEmitter));
|
|
emitter.emitSection(bytecode::Section::kResource, std::move(resourceEmitter));
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Strings
|
|
|
|
void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
|
|
EncodingEmitter stringEmitter;
|
|
stringSection.write(stringEmitter);
|
|
emitter.emitSection(bytecode::Section::kString, std::move(stringEmitter));
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Entry Points
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
void mlir::writeBytecodeToFile(Operation *op, raw_ostream &os,
|
|
const BytecodeWriterConfig &config) {
|
|
BytecodeWriter writer(op);
|
|
writer.write(op, os, config.getImpl());
|
|
}
|