[HIP] Support compressing device binary (#67162)

Add option -f[no-]offload-compress to clang to enable/disable
compression of device binary for HIP. By default it is disabled.

Add option -compress to clang-offload-bundler to enable compression of
offload bundle. By default it is disabled.

When enabled, zstd or zlib is used for compression when available.

When disabled, it is NFC compared to previous behavior. The same offload
bundle format is used as before.

Clang-offload-bundler automatically detects whether the input file to be
unbundled is compressed and the compression method and decompress if
necessary.
This commit is contained in:
Yaxun (Sam) Liu 2023-10-04 09:32:56 -04:00 committed by GitHub
parent ff48e83f18
commit a1e81d2ead
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 656 additions and 60 deletions

View File

@ -309,3 +309,30 @@ target by comparing bundle ID's. Two bundle ID's are considered compatible if:
* Their offload kind are the same
* Their target triple are the same
* Their GPUArch are the same
Compression and Decompression
=============================
``clang-offload-bundler`` provides features to compress and decompress the full
bundle, leveraging inherent redundancies within the bundle entries. Use the
`-compress` command-line option to enable this compression capability.
The compressed offload bundle begins with a header followed by the compressed binary data:
- **Magic Number (4 bytes)**:
This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).
- **Version Number (16-bit unsigned int)**:
This denotes the version of the compressed offload bundle format. The current version is `1`.
- **Compression Method (16-bit unsigned int)**:
This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.
- **Uncompressed Binary Size (32-bit unsigned int)**:
This is the size (in bytes) of the binary data before it was compressed.
- **Hash (64-bit unsigned int)**:
This is a 64-bit truncated MD5 hash of the uncompressed binary data. It serves for verification and caching purposes.
- **Compressed Data**:
The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.

View File

@ -19,6 +19,7 @@
#include "llvm/Support/Error.h"
#include "llvm/TargetParser/Triple.h"
#include <llvm/Support/MemoryBuffer.h>
#include <string>
#include <vector>
@ -26,11 +27,15 @@ namespace clang {
class OffloadBundlerConfig {
public:
OffloadBundlerConfig();
bool AllowNoHost = false;
bool AllowMissingBundles = false;
bool CheckInputArchive = false;
bool PrintExternalCommands = false;
bool HipOpenmpCompatible = false;
bool Compress = false;
bool Verbose = false;
unsigned BundleAlignment = 1;
unsigned HostInputIndex = ~0u;
@ -84,6 +89,38 @@ struct OffloadTargetInfo {
std::string str() const;
};
// CompressedOffloadBundle represents the format for the compressed offload
// bundles.
//
// The format is as follows:
// - Magic Number (4 bytes) - A constant "CCOB".
// - Version (2 bytes)
// - Compression Method (2 bytes) - Uses the values from
// llvm::compression::Format.
// - Uncompressed Size (4 bytes).
// - Truncated MD5 Hash (8 bytes).
// - Compressed Data (variable length).
class CompressedOffloadBundle {
private:
static inline const size_t MagicSize = 4;
static inline const size_t VersionFieldSize = sizeof(uint16_t);
static inline const size_t MethodFieldSize = sizeof(uint16_t);
static inline const size_t SizeFieldSize = sizeof(uint32_t);
static inline const size_t HashFieldSize = 8;
static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
MethodFieldSize + SizeFieldSize +
HashFieldSize;
static inline const llvm::StringRef MagicNumber = "CCOB";
static inline const uint16_t Version = 1;
public:
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
};
} // namespace clang
#endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H

View File

@ -1183,6 +1183,10 @@ def fgpu_inline_threshold_EQ : Joined<["-"], "fgpu-inline-threshold=">,
def fgpu_sanitize : Flag<["-"], "fgpu-sanitize">, Group<f_Group>,
HelpText<"Enable sanitizer for supported offloading devices">;
def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
def offload_compress : Flag<["--"], "offload-compress">,
HelpText<"Compress offload device binaries (HIP only)">;
def no_offload_compress : Flag<["--"], "no-offload-compress">;
}
// CUDA options

View File

@ -21,24 +21,29 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Host.h"
@ -48,6 +53,7 @@
#include <cstddef>
#include <cstdint>
#include <forward_list>
#include <llvm/Support/Process.h>
#include <memory>
#include <set>
#include <string>
@ -58,6 +64,10 @@ using namespace llvm;
using namespace llvm::object;
using namespace clang;
static llvm::TimerGroup
ClangOffloadBundlerTimerGroup("Clang Offload Bundler Timer Group",
"Timer group for clang offload bundler");
/// Magic string that marks the existence of offloading data.
#define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
@ -224,20 +234,22 @@ public:
/// Write the header of the bundled file to \a OS based on the information
/// gathered from \a Inputs.
virtual Error WriteHeader(raw_fd_ostream &OS,
virtual Error WriteHeader(raw_ostream &OS,
ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) = 0;
/// Write the marker that initiates a bundle for the triple \a TargetTriple to
/// \a OS.
virtual Error WriteBundleStart(raw_fd_ostream &OS,
StringRef TargetTriple) = 0;
virtual Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) = 0;
/// Write the marker that closes a bundle for the triple \a TargetTriple to \a
/// OS.
virtual Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0;
virtual Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) = 0;
/// Write the bundle from \a Input into \a OS.
virtual Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0;
virtual Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) = 0;
/// Finalize output file.
virtual Error finalizeOutputFile() { return Error::success(); }
/// List bundle IDs in \a Input.
virtual Error listBundleIDs(MemoryBuffer &Input) {
@ -311,7 +323,7 @@ static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) {
}
/// Write 8-byte integers to a buffer in little-endian format.
static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) {
static void Write8byteIntegerToBuffer(raw_ostream &OS, uint64_t Val) {
llvm::support::endian::write(OS, Val, llvm::support::little);
}
@ -359,8 +371,7 @@ public:
return Error::success();
// Check if no magic was found.
StringRef Magic(FC.data(), sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
if (!Magic.equals(OFFLOAD_BUNDLER_MAGIC_STR))
if (llvm::identify_magic(FC) != llvm::file_magic::offload_bundle)
return Error::success();
// Read number of bundles.
@ -435,7 +446,7 @@ public:
return Error::success();
}
Error WriteHeader(raw_fd_ostream &OS,
Error WriteHeader(raw_ostream &OS,
ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
// Compute size of the header.
@ -472,19 +483,27 @@ public:
return Error::success();
}
Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
CurWriteBundleTarget = TargetTriple.str();
return Error::success();
}
Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
return Error::success();
}
Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
auto BI = BundlesInfo[CurWriteBundleTarget];
OS.seek(BI.Offset);
// Pad with 0 to reach specified offset.
size_t CurrentPos = OS.tell();
size_t PaddingSize = BI.Offset > CurrentPos ? BI.Offset - CurrentPos : 0;
for (size_t I = 0; I < PaddingSize; ++I)
OS.write('\0');
assert(OS.tell() == BI.Offset);
OS.write(Input.getBufferStart(), Input.getBufferSize());
return Error::success();
}
};
@ -541,7 +560,7 @@ class ObjectFileHandler final : public FileHandler {
return NameOrErr.takeError();
// If it does not start with the reserved suffix, just skip this section.
if (!NameOrErr->startswith(OFFLOAD_BUNDLER_MAGIC_STR))
if (llvm::identify_magic(*NameOrErr) != llvm::file_magic::offload_bundle)
return std::nullopt;
// Return the triple that is right after the reserved prefix.
@ -607,7 +626,7 @@ public:
return Error::success();
}
Error WriteHeader(raw_fd_ostream &OS,
Error WriteHeader(raw_ostream &OS,
ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
assert(BundlerConfig.HostInputIndex != ~0u &&
"Host input index not defined.");
@ -617,12 +636,16 @@ public:
return Error::success();
}
Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
++NumberOfProcessedInputs;
return Error::success();
}
Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
return Error::success();
}
Error finalizeOutputFile() final {
assert(NumberOfProcessedInputs <= NumberOfInputs &&
"Processing more inputs that actually exist!");
assert(BundlerConfig.HostInputIndex != ~0u &&
@ -640,10 +663,6 @@ public:
assert(BundlerConfig.ObjcopyPath != "" &&
"llvm-objcopy path not specified");
// We write to the output file directly. So, we close it and use the name
// to pass down to llvm-objcopy.
OS.close();
// Temporary files that need to be removed.
TempFileHandlerRAII TempFiles;
@ -684,7 +703,7 @@ public:
return Error::success();
}
Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
return Error::success();
}
@ -781,22 +800,22 @@ protected:
return Error::success();
}
Error WriteHeader(raw_fd_ostream &OS,
Error WriteHeader(raw_ostream &OS,
ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
return Error::success();
}
Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
OS << BundleStartString << TargetTriple << "\n";
return Error::success();
}
Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
OS << BundleEndString << TargetTriple << "\n";
return Error::success();
}
Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
OS << Input.getBuffer();
return Error::success();
}
@ -881,6 +900,181 @@ CreateFileHandler(MemoryBuffer &FirstInput,
"'" + FilesType + "': invalid file type specified");
}
OffloadBundlerConfig::OffloadBundlerConfig() {
auto IgnoreEnvVarOpt =
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
return;
auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE");
if (VerboseEnvVarOpt.has_value())
Verbose = VerboseEnvVarOpt.value() == "1";
auto CompressEnvVarOpt =
llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
if (CompressEnvVarOpt.has_value())
Compress = CompressEnvVarOpt.value() == "1";
}
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
bool Verbose) {
llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
ClangOffloadBundlerTimerGroup);
if (Verbose)
HashTimer.startTimer();
llvm::MD5 Hash;
llvm::MD5::MD5Result Result;
Hash.update(Input.getBuffer());
Hash.final(Result);
uint64_t TruncatedHash = Result.low();
if (Verbose)
HashTimer.stopTimer();
SmallVector<uint8_t, 0> CompressedBuffer;
auto BufferUint8 = llvm::ArrayRef<uint8_t>(
reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
Input.getBuffer().size());
llvm::compression::Format CompressionFormat;
if (llvm::compression::zstd::isAvailable())
CompressionFormat = llvm::compression::Format::Zstd;
else if (llvm::compression::zlib::isAvailable())
CompressionFormat = llvm::compression::Format::Zlib;
else
return createStringError(llvm::inconvertibleErrorCode(),
"Compression not supported");
llvm::Timer CompressTimer("Compression Timer", "Compression time",
ClangOffloadBundlerTimerGroup);
if (Verbose)
CompressTimer.startTimer();
llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
if (Verbose)
CompressTimer.stopTimer();
uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
uint32_t UncompressedSize = Input.getBuffer().size();
SmallVector<char, 0> FinalBuffer;
llvm::raw_svector_ostream OS(FinalBuffer);
OS << MagicNumber;
OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
OS.write(reinterpret_cast<const char *>(&CompressionMethod),
sizeof(CompressionMethod));
OS.write(reinterpret_cast<const char *>(&UncompressedSize),
sizeof(UncompressedSize));
OS.write(reinterpret_cast<const char *>(&TruncatedHash),
sizeof(TruncatedHash));
OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
CompressedBuffer.size());
if (Verbose) {
auto MethodUsed =
CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
llvm::errs() << "Compressed bundle format version: " << Version << "\n"
<< "Compression method used: " << MethodUsed << "\n"
<< "Binary size before compression: " << UncompressedSize
<< " bytes\n"
<< "Binary size after compression: " << CompressedBuffer.size()
<< " bytes\n"
<< "Truncated MD5 hash: "
<< llvm::format_hex(TruncatedHash, 16) << "\n";
}
return llvm::MemoryBuffer::getMemBufferCopy(
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
}
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
bool Verbose) {
StringRef Blob = Input.getBuffer();
if (Blob.size() < HeaderSize) {
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
}
if (llvm::identify_magic(Blob) !=
llvm::file_magic::offload_bundle_compressed) {
if (Verbose)
llvm::errs() << "Uncompressed bundle.\n";
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
}
uint16_t ThisVersion = *reinterpret_cast<const uint16_t *>(
Input.getBuffer().data() + MagicNumber.size());
uint16_t CompressionMethod = *reinterpret_cast<const uint16_t *>(
Blob.data() + MagicSize + VersionFieldSize);
uint32_t UncompressedSize = *reinterpret_cast<const uint32_t *>(
Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize);
uint64_t StoredHash = *reinterpret_cast<const uint64_t *>(
Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize +
SizeFieldSize);
llvm::compression::Format CompressionFormat;
if (CompressionMethod ==
static_cast<uint16_t>(llvm::compression::Format::Zlib))
CompressionFormat = llvm::compression::Format::Zlib;
else if (CompressionMethod ==
static_cast<uint16_t>(llvm::compression::Format::Zstd))
CompressionFormat = llvm::compression::Format::Zstd;
else
return createStringError(inconvertibleErrorCode(),
"Unknown compressing method");
llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
ClangOffloadBundlerTimerGroup);
if (Verbose)
DecompressTimer.startTimer();
SmallVector<uint8_t, 0> DecompressedData;
StringRef CompressedData = Blob.substr(HeaderSize);
if (llvm::Error DecompressionError = llvm::compression::decompress(
CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
DecompressedData, UncompressedSize))
return createStringError(inconvertibleErrorCode(),
"Could not decompress embedded file contents: " +
llvm::toString(std::move(DecompressionError)));
if (Verbose) {
DecompressTimer.stopTimer();
// Recalculate MD5 hash
llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
"Hash recalculation time",
ClangOffloadBundlerTimerGroup);
HashRecalcTimer.startTimer();
llvm::MD5 Hash;
llvm::MD5::MD5Result Result;
Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(),
DecompressedData.size()));
Hash.final(Result);
uint64_t RecalculatedHash = Result.low();
HashRecalcTimer.stopTimer();
bool HashMatch = (StoredHash == RecalculatedHash);
llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
<< "Decompression method: "
<< (CompressionFormat == llvm::compression::Format::Zlib
? "zlib"
: "zstd")
<< "\n"
<< "Size before decompression: " << CompressedData.size()
<< " bytes\n"
<< "Size after decompression: " << UncompressedSize
<< " bytes\n"
<< "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
<< "Recalculated hash: "
<< llvm::format_hex(RecalculatedHash, 16) << "\n"
<< "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
}
return llvm::MemoryBuffer::getMemBufferCopy(
llvm::toStringRef(DecompressedData));
}
// List bundle IDs. Return true if an error was found.
Error OffloadBundler::ListBundleIDsInFile(
StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) {
@ -890,28 +1084,35 @@ Error OffloadBundler::ListBundleIDsInFile(
if (std::error_code EC = CodeOrErr.getError())
return createFileError(InputFileName, EC);
MemoryBuffer &Input = **CodeOrErr;
// Decompress the input if necessary.
Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
if (!DecompressedBufferOrErr)
return createStringError(
inconvertibleErrorCode(),
"Failed to decompress input: " +
llvm::toString(DecompressedBufferOrErr.takeError()));
MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
// Select the right files handler.
Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
CreateFileHandler(Input, BundlerConfig);
CreateFileHandler(DecompressedInput, BundlerConfig);
if (!FileHandlerOrErr)
return FileHandlerOrErr.takeError();
std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
assert(FH);
return FH->listBundleIDs(Input);
return FH->listBundleIDs(DecompressedInput);
}
/// Bundle the files. Return true if an error was found.
Error OffloadBundler::BundleFiles() {
std::error_code EC;
// Create output file.
raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
sys::fs::OF_None);
if (EC)
return createFileError(BundlerConfig.OutputFileNames.front(), EC);
// Create a buffer to hold the content before compressing.
SmallVector<char, 0> Buffer;
llvm::raw_svector_ostream BufferStream(Buffer);
// Open input files.
SmallVector<std::unique_ptr<MemoryBuffer>, 8u> InputBuffers;
@ -938,22 +1139,46 @@ Error OffloadBundler::BundleFiles() {
assert(FH);
// Write header.
if (Error Err = FH->WriteHeader(OutputFile, InputBuffers))
if (Error Err = FH->WriteHeader(BufferStream, InputBuffers))
return Err;
// Write all bundles along with the start/end markers. If an error was found
// writing the end of the bundle component, abort the bundle writing.
auto Input = InputBuffers.begin();
for (auto &Triple : BundlerConfig.TargetNames) {
if (Error Err = FH->WriteBundleStart(OutputFile, Triple))
if (Error Err = FH->WriteBundleStart(BufferStream, Triple))
return Err;
if (Error Err = FH->WriteBundle(OutputFile, **Input))
if (Error Err = FH->WriteBundle(BufferStream, **Input))
return Err;
if (Error Err = FH->WriteBundleEnd(OutputFile, Triple))
if (Error Err = FH->WriteBundleEnd(BufferStream, Triple))
return Err;
++Input;
}
return Error::success();
raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
sys::fs::OF_None);
if (EC)
return createFileError(BundlerConfig.OutputFileNames.front(), EC);
SmallVector<char, 0> CompressedBuffer;
if (BundlerConfig.Compress) {
std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
llvm::MemoryBuffer::getMemBufferCopy(
llvm::StringRef(Buffer.data(), Buffer.size()));
auto CompressionResult =
CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
if (auto Error = CompressionResult.takeError())
return Error;
auto CompressedMemBuffer = std::move(CompressionResult.get());
CompressedBuffer.assign(CompressedMemBuffer->getBufferStart(),
CompressedMemBuffer->getBufferEnd());
} else
CompressedBuffer = Buffer;
OutputFile.write(CompressedBuffer.data(), CompressedBuffer.size());
return FH->finalizeOutputFile();
}
// Unbundle the files. Return true if an error was found.
@ -964,7 +1189,16 @@ Error OffloadBundler::UnbundleFiles() {
if (std::error_code EC = CodeOrErr.getError())
return createFileError(BundlerConfig.InputFileNames.front(), EC);
MemoryBuffer &Input = **CodeOrErr;
// Decompress the input if necessary.
Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
if (!DecompressedBufferOrErr)
return createStringError(
inconvertibleErrorCode(),
"Failed to decompress input: " +
llvm::toString(DecompressedBufferOrErr.takeError()));
MemoryBuffer &Input = **DecompressedBufferOrErr;
// Select the right files handler.
Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
@ -1169,11 +1403,23 @@ Error OffloadBundler::UnbundleArchive() {
if (!CodeObjectBufferRefOrErr)
return CodeObjectBufferRefOrErr.takeError();
auto CodeObjectBuffer =
auto TempCodeObjectBuffer =
MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false);
// Decompress the buffer if necessary.
Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
CompressedOffloadBundle::decompress(*TempCodeObjectBuffer,
BundlerConfig.Verbose);
if (!DecompressedBufferOrErr)
return createStringError(
inconvertibleErrorCode(),
"Failed to decompress code object: " +
llvm::toString(DecompressedBufferOrErr.takeError()));
MemoryBuffer &CodeObjectBuffer = **DecompressedBufferOrErr;
Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
CreateFileHandler(*CodeObjectBuffer, BundlerConfig);
CreateFileHandler(CodeObjectBuffer, BundlerConfig);
if (!FileHandlerOrErr)
return FileHandlerOrErr.takeError();
@ -1181,11 +1427,11 @@ Error OffloadBundler::UnbundleArchive() {
assert(FileHandler &&
"FileHandle creation failed for file in the archive!");
if (Error ReadErr = FileHandler->ReadHeader(*CodeObjectBuffer))
if (Error ReadErr = FileHandler->ReadHeader(CodeObjectBuffer))
return ReadErr;
Expected<std::optional<StringRef>> CurBundleIDOrErr =
FileHandler->ReadBundleStart(*CodeObjectBuffer);
FileHandler->ReadBundleStart(CodeObjectBuffer);
if (!CurBundleIDOrErr)
return CurBundleIDOrErr.takeError();
@ -1206,7 +1452,7 @@ Error OffloadBundler::UnbundleArchive() {
BundlerConfig)) {
std::string BundleData;
raw_string_ostream DataStream(BundleData);
if (Error Err = FileHandler->ReadBundle(DataStream, *CodeObjectBuffer))
if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer))
return Err;
for (auto &CompatibleTarget : CompatibleTargets) {
@ -1244,11 +1490,11 @@ Error OffloadBundler::UnbundleArchive() {
}
}
if (Error Err = FileHandler->ReadBundleEnd(*CodeObjectBuffer))
if (Error Err = FileHandler->ReadBundleEnd(CodeObjectBuffer))
return Err;
Expected<std::optional<StringRef>> NextTripleOrErr =
FileHandler->ReadBundleStart(*CodeObjectBuffer);
FileHandler->ReadBundleStart(CodeObjectBuffer);
if (!NextTripleOrErr)
return NextTripleOrErr.takeError();

View File

@ -8470,6 +8470,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
}
CmdArgs.push_back(TCArgs.MakeArgString(UB));
}
if (TCArgs.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
CmdArgs.push_back("-compress");
if (TCArgs.hasArg(options::OPT_v))
CmdArgs.push_back("-verbose");
// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
@ -8553,6 +8558,8 @@ void OffloadBundler::ConstructJobMultipleOutputs(
}
CmdArgs.push_back("-unbundle");
CmdArgs.push_back("-allow-missing-bundles");
if (TCArgs.hasArg(options::OPT_v))
CmdArgs.push_back("-verbose");
// All the inputs are encoded as commands.
C.addCommand(std::make_unique<Command>(

View File

@ -84,6 +84,12 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
Args.MakeArgString(std::string("-output=").append(Output));
BundlerArgs.push_back(BundlerOutputArg);
if (Args.hasFlag(options::OPT_offload_compress,
options::OPT_no_offload_compress, false))
BundlerArgs.push_back("-compress");
if (Args.hasArg(options::OPT_v))
BundlerArgs.push_back("-verbose");
const char *Bundler = Args.MakeArgString(
T.getToolChain().GetProgramPath("clang-offload-bundler"));
C.addCommand(std::make_unique<Command>(

View File

@ -0,0 +1,75 @@
// REQUIRES: zlib
// REQUIRES: x86-registered-target
// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
//
// Generate the host binary to be bundled.
//
// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
//
// Generate an empty file to help with the checks of empty files.
//
// RUN: touch %t.empty
//
// Generate device binaries to be bundled.
//
// RUN: echo 'Content of device file 1' > %t.tgt1
// RUN: echo 'Content of device file 2' > %t.tgt2
//
// Check compression/decompression of offload bundle.
//
// RUN: env OFFLOAD_BUNDLER_COMPRESS=1 OFFLOAD_BUNDLER_VERBOSE=1 \
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc 2>&1 | \
// RUN: FileCheck -check-prefix=COMPRESS %s
// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
// RUN: env OFFLOAD_BUNDLER_VERBOSE=1 \
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle 2>&1 | \
// RUN: FileCheck -check-prefix=DECOMPRESS %s
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// COMPRESS: Compression method used:
// DECOMPRESS: Decompression method:
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
//
//
// Check -bundle-align option.
//
// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
// RUN: diff %t.bc %t.res.bc
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// Check unbundling archive.
//
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
// Some code so that we can create a binary out of this file.
int A = 0;
void test_func(void) {
++A;
}

View File

@ -0,0 +1,72 @@
// REQUIRES: zstd
// REQUIRES: x86-registered-target
// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
//
// Generate the host binary to be bundled.
//
// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
//
// Generate an empty file to help with the checks of empty files.
//
// RUN: touch %t.empty
//
// Generate device binaries to be bundled.
//
// RUN: echo 'Content of device file 1' > %t.tgt1
// RUN: echo 'Content of device file 2' > %t.tgt2
//
// Check compression/decompression of offload bundle.
//
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \
// RUN: FileCheck -check-prefix=COMPRESS %s
// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \
// RUN: FileCheck -check-prefix=DECOMPRESS %s
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// COMPRESS: Compression method used
// DECOMPRESS: Decompression method
// NOHOST-NOT: host-
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
//
//
// Check -bundle-align option.
//
// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
// RUN: diff %t.bc %t.res.bc
// RUN: diff %t.tgt1 %t.res.tgt1
// RUN: diff %t.tgt2 %t.res.tgt2
//
// Check unbundling archive.
//
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
// RUN: -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
// Some code so that we can create a binary out of this file.
int A = 0;
void test_func(void) {
++A;
}

View File

@ -0,0 +1,47 @@
// REQUIRES: zlib
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target
// Test compress bundled bitcode.
// RUN: rm -rf %T/a.bc
// RUN: %clang -c -v --target=x86_64-linux-gnu \
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -fgpu-rdc -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
// RUN: -o %T/a.bc \
// RUN: 2>&1 | FileCheck %s
// CHECK: clang-offload-bundler{{.*}} -type=bc
// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// CHECK-SAME: -compress -verbose
// CHECK: Compressed bundle format
// Test uncompress of bundled bitcode.
// RUN: %clang --hip-link -v --target=x86_64-linux-gnu \
// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -fgpu-rdc -nogpulib \
// RUN: %T/a.bc --offload-device-only \
// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
// UNBUNDLE: clang-offload-bundler{{.*}} -type=bc
// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// UNBUNDLE-SAME: -unbundle
// UNBUNDLE-SAME: -verbose
// UNBUNDLE: Compressed bundle format
// Test compress bundled code objects.
// RUN: %clang -c -v --target=x86_64-linux-gnu \
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress \
// RUN: 2>&1 | FileCheck -check-prefix=CO %s
// CO: clang-offload-bundler{{.*}} -type=o
// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
// CO-SAME: -compress -verbose
// CO: Compressed bundle format

View File

@ -0,0 +1,47 @@
// REQUIRES: zstd
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target
// Test compress bundled bitcode.
// RUN: rm -rf %T/a.bc
// RUN: %clang -c -v --target=x86_64-linux-gnu \
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -fgpu-rdc -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress --offload-device-only --gpu-bundle-output \
// RUN: -o %T/a.bc \
// RUN: 2>&1 | FileCheck %s
// CHECK: clang-offload-bundler{{.*}} -type=bc
// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// CHECK-SAME: -compress -verbose
// CHECK: Compressed bundle format
// Test uncompress of bundled bitcode.
// RUN: %clang --hip-link -v --target=x86_64-linux-gnu \
// RUN: --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -fgpu-rdc -nogpulib \
// RUN: %T/a.bc --offload-device-only \
// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
// UNBUNDLE: clang-offload-bundler{{.*}} -type=bc
// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
// UNBUNDLE-SAME: -unbundle
// UNBUNDLE-SAME: -verbose
// UNBUNDLE: Compressed bundle format
// Test compress bundled code objects.
// RUN: %clang -c -v --target=x86_64-linux-gnu \
// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
// RUN: -nogpuinc -nogpulib \
// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
// RUN: --offload-compress \
// RUN: 2>&1 | FileCheck -check-prefix=CO %s
// CO: clang-offload-bundler{{.*}} -type=o
// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
// CO-SAME: -compress -verbose
// CO: Compressed bundle format

View File

@ -1,4 +1,5 @@
set(LLVM_LINK_COMPONENTS
BinaryFormat
Object
Support
TargetParser

View File

@ -136,6 +136,11 @@ int main(int argc, const char **argv) {
cl::desc("Treat hip and hipv4 offload kinds as "
"compatible with openmp kind, and vice versa.\n"),
cl::init(false), cl::cat(ClangOffloadBundlerCategory));
cl::opt<bool> Compress("compress",
cl::desc("Compress output file when bundling.\n"),
cl::init(false), cl::cat(ClangOffloadBundlerCategory));
cl::opt<bool> Verbose("verbose", cl::desc("Print debug information.\n"),
cl::init(false), cl::cat(ClangOffloadBundlerCategory));
// Process commandline options and report errors
sys::PrintStackTraceOnErrorSignal(argv[0]);
@ -163,6 +168,11 @@ int main(int argc, const char **argv) {
BundlerConfig.BundleAlignment = BundleAlignment;
BundlerConfig.FilesType = FilesType;
BundlerConfig.ObjcopyPath = "";
// Do not override the default value Compress and Verbose in BundlerConfig.
if (Compress.getNumOccurrences() > 0)
BundlerConfig.Compress = Compress;
if (Verbose.getNumOccurrences() > 0)
BundlerConfig.Verbose = Verbose;
BundlerConfig.TargetNames = TargetNames;
BundlerConfig.InputFileNames = InputFileNames;

View File

@ -42,19 +42,21 @@ struct file_magic {
macho_universal_binary, ///< Mach-O universal binary
macho_file_set, ///< Mach-O file set binary
minidump, ///< Windows minidump file
coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file
coff_object, ///< COFF object file
coff_import_library, ///< COFF import library
pecoff_executable, ///< PECOFF executable file
windows_resource, ///< Windows compiled resource file (.res)
xcoff_object_32, ///< 32-bit XCOFF object file
xcoff_object_64, ///< 64-bit XCOFF object file
wasm_object, ///< WebAssembly Object file
pdb, ///< Windows PDB debug info file
tapi_file, ///< Text-based Dynamic Library Stub file
cuda_fatbinary, ///< CUDA Fatbinary object file
offload_binary, ///< LLVM offload object file
dxcontainer_object, ///< DirectX container file
coff_cl_gl_object, ///< Microsoft cl.exe's intermediate code file
coff_object, ///< COFF object file
coff_import_library, ///< COFF import library
pecoff_executable, ///< PECOFF executable file
windows_resource, ///< Windows compiled resource file (.res)
xcoff_object_32, ///< 32-bit XCOFF object file
xcoff_object_64, ///< 64-bit XCOFF object file
wasm_object, ///< WebAssembly Object file
pdb, ///< Windows PDB debug info file
tapi_file, ///< Text-based Dynamic Library Stub file
cuda_fatbinary, ///< CUDA Fatbinary object file
offload_binary, ///< LLVM offload object file
dxcontainer_object, ///< DirectX container file
offload_bundle, ///< Clang offload bundle file
offload_bundle_compressed, ///< Compressed clang offload bundle file
};
bool is_object() const { return V != unknown; }

View File

@ -87,6 +87,10 @@ file_magic llvm::identify_magic(StringRef Magic) {
if (startswith(Magic, "BC\xC0\xDE"))
return file_magic::bitcode;
break;
case 'C':
if (startswith(Magic, "CCOB"))
return file_magic::offload_bundle_compressed;
break;
case '!':
if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
return file_magic::archive;
@ -251,6 +255,13 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::coff_object;
break;
case '_': {
const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
return file_magic::offload_bundle;
break;
}
default:
break;
}

View File

@ -87,6 +87,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
case file_magic::cuda_fatbinary:
case file_magic::coff_cl_gl_object:
case file_magic::dxcontainer_object:
case file_magic::offload_bundle:
case file_magic::offload_bundle_compressed:
// Unrecognized object file format.
return errorCodeToError(object_error::invalid_file_type);
case file_magic::offload_binary:

View File

@ -158,6 +158,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
case file_magic::cuda_fatbinary:
case file_magic::offload_binary:
case file_magic::dxcontainer_object:
case file_magic::offload_bundle:
case file_magic::offload_bundle_compressed:
return errorCodeToError(object_error::invalid_file_type);
case file_magic::tapi_file:
return errorCodeToError(object_error::invalid_file_type);