Thunks: Add a new code generator based on libclang

This commit is contained in:
Tony Wasserka 2021-12-10 11:24:56 +01:00
parent c65be9f55d
commit c05e1c9797
5 changed files with 340 additions and 0 deletions

View File

@ -442,6 +442,8 @@ if (BUILD_TESTS)
endif()
if (BUILD_THUNKS)
add_subdirectory(ThunkLibs/Generator)
include(ExternalProject)
ExternalProject_Add(host-libs
@ -452,8 +454,10 @@ if (BUILD_THUNKS)
"-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
"-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
"-DVULKAN_XML=${CMAKE_SOURCE_DIR}/External/Vulkan-Docs/xml/vk.xml"
"-DGENERATOR_EXE=$<TARGET_FILE:thunkgen>"
INSTALL_COMMAND ""
BUILD_ALWAYS ON
DEPENDS thunkgen
)
install(
@ -476,8 +480,10 @@ if (BUILD_THUNKS)
"-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
"-DSTRUCT_VERIFIER=${CMAKE_SOURCE_DIR}/Scripts/StructPackVerifier.py"
"-DVULKAN_XML=${CMAKE_SOURCE_DIR}/External/Vulkan-Docs/xml/vk.xml"
"-DGENERATOR_EXE=$<TARGET_FILE:thunkgen>"
INSTALL_COMMAND ""
BUILD_ALWAYS ON
DEPENDS thunkgen
)
install(

View File

@ -0,0 +1,11 @@
find_package(Clang REQUIRED CONFIG)
find_package(OpenSSL REQUIRED COMPONENTS Crypto)
add_library(thunkgenlib gen.cpp)
target_include_directories(thunkgenlib INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(thunkgenlib SYSTEM PUBLIC ${CLANG_INCLUDE_DIRS})
target_link_libraries(thunkgenlib PUBLIC clangTooling)
target_link_libraries(thunkgenlib PRIVATE OpenSSL::Crypto)
add_executable(thunkgen main.cpp)
target_link_libraries(thunkgen PRIVATE thunkgenlib)

230
ThunkLibs/Generator/gen.cpp Normal file
View File

@ -0,0 +1,230 @@
#include "clang/AST/RecursiveASTVisitor.h"
#include <fstream>
#include <iostream>
#include <iomanip>
#include <string_view>
#include <openssl/sha.h>
#include "interface.h"
struct FunctionParams {
std::vector<clang::QualType> param_types;
};
/**
* Guest<->Host transition point.
*
* These are normally used to translate the public API of the guest to host
* function calls (ThunkedAPIFunction), but a thunk library may also define
* internal thunks that don't correspond to any function in the implemented
* API.
*/
struct ThunkedFunction : FunctionParams {
std::string function_name;
clang::QualType return_type;
clang::FunctionDecl* decl;
};
/**
* Function that is part of the API of the thunked library.
*
* For each of these, there is:
* - A publicly visible guest entrypoint (usually auto-generated but may be manually defined)
* - A pointer to the native host library function loaded through dlsym (or a user-provided function specified via host_loader)
* - A ThunkedFunction with the same function_name (possibly suffixed with _internal)
*/
struct ThunkedAPIFunction : FunctionParams {
std::string function_name;
clang::QualType return_type;
};
static std::vector<ThunkedFunction> thunks;
static std::vector<ThunkedAPIFunction> thunked_api;
class ASTVisitor : public clang::RecursiveASTVisitor<ASTVisitor> {
clang::ASTContext& context;
using ClangDiagnosticAsException = std::pair<clang::SourceLocation, unsigned>;
template<std::size_t N>
[[nodiscard]] ClangDiagnosticAsException Error(clang::SourceLocation loc, const char (&message)[N]) {
auto id = context.getDiagnostics().getCustomDiagID(clang::DiagnosticsEngine::Error, message);
return std::pair(loc, id);
}
public:
ASTVisitor(clang::ASTContext& context_) : context(context_) {
}
/**
* Matches "template<> struct fex_gen_config<LibraryFunc> { ... }"
*/
bool VisitClassTemplateSpecializationDecl(clang::ClassTemplateSpecializationDecl* decl) try {
if (decl->getName() != "fex_gen_config") {
return true;
}
if (decl->getSpecializationKind() == clang::TSK_ExplicitInstantiationDefinition) {
throw Error(decl->getBeginLoc(), "fex_gen_config may not be partially specialized\n");
}
const auto& template_args = decl->getTemplateArgs();
assert(template_args.size() == 1);
auto emitted_function = llvm::dyn_cast<clang::FunctionDecl>(template_args[0].getAsDecl());
assert(emitted_function && "Argument is not a function");
auto return_type = emitted_function->getReturnType();
if (return_type->isFunctionPointerType()) {
throw Error(decl->getBeginLoc(),
"Function pointer return types require explicit annotation\n");
}
// TODO: Use the types as written in the signature instead?
ThunkedFunction data;
data.function_name = emitted_function->getName().str();
data.return_type = return_type;
data.decl = emitted_function;
for (auto* param : emitted_function->parameters()) {
data.param_types.push_back(param->getType());
if (param->getType()->isFunctionPointerType()) {
throw Error(param->getBeginLoc(), "Function pointer parameters are not supported\n");
}
}
thunked_api.push_back(ThunkedAPIFunction { (const FunctionParams&)data, data.function_name, data.return_type });
thunks.push_back(std::move(data));
return true;
} catch (ClangDiagnosticAsException& exception) {
context.getDiagnostics().Report(exception.first, exception.second);
return false;
}
};
class ASTConsumer : public clang::ASTConsumer {
public:
void HandleTranslationUnit(clang::ASTContext& context) override {
ASTVisitor{context}.TraverseDecl(context.getTranslationUnitDecl());
}
};
FrontendAction::FrontendAction(const std::string& libname_, const OutputFilenames& output_filenames_)
: libname(libname_), output_filenames(output_filenames_) {
thunks.clear();
thunked_api.clear();
}
void FrontendAction::EndSourceFileAction() {
static auto format_decl = [](clang::QualType type, const std::string_view& name) {
if (type->isFunctionPointerType()) {
auto signature = type.getAsString();
const char needle[] = { '(', '*', ')' };
auto it = std::search(signature.begin(), signature.end(), std::begin(needle), std::end(needle));
if (it == signature.end()) {
// It's *probably* a typedef, so this should be safe after all
return signature + " " + std::string(name);
} else {
signature.insert(it + 2, name.begin(), name.end());
return signature;
}
} else {
return type.getAsString() + " " + std::string(name);
}
};
auto get_sha256 = [this](const std::string& function_name) {
std::string sha256_message = libname + ":" + function_name;
std::vector<unsigned char> sha256(SHA256_DIGEST_LENGTH);
SHA256(reinterpret_cast<const unsigned char*>(sha256_message.data()),
sha256_message.size(),
sha256.data());
return sha256;
};
if (!output_filenames.thunks.empty()) {
std::ofstream file(output_filenames.thunks);
file << "extern \"C\" {\n";
for (auto& thunk : thunks) {
const auto& function_name = thunk.function_name;
auto sha256 = get_sha256(function_name);
file << "MAKE_THUNK(" << libname << ", " << function_name << ", \"";
bool first = true;
for (auto c : sha256) {
file << (first ? "" : ", ") << "0x" << std::hex << std::setw(2) << std::setfill('0') << +c;
first = false;
}
file << "\")\n";
}
file << "}\n";
}
if (!output_filenames.function_packs_public.empty()) {
std::ofstream file(output_filenames.function_packs_public);
file << "extern \"C\" {\n";
for (auto& data : thunked_api) {
const auto& function_name = data.function_name;
file << "__attribute__((alias(\"fexfn_pack_" << function_name << "\"))) auto " << function_name << "(";
for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) {
auto& type = data.param_types[idx];
file << (idx == 0 ? "" : ", ") << format_decl(type, "a_" + std::to_string(idx));
}
file << ") -> " << data.return_type.getAsString() << ";\n";
}
file << "}\n";
}
if (!output_filenames.function_packs.empty()) {
std::ofstream file(output_filenames.function_packs);
file << "extern \"C\" {\n";
for (auto& data : thunks) {
const auto& function_name = data.function_name;
bool is_void = data.return_type->isVoidType();
file << "static auto fexfn_pack_" << function_name << "(";
for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) {
auto& type = data.param_types[idx];
file << (idx == 0 ? "" : ", ") << format_decl(type, "a_" + std::to_string(idx));
}
// Using trailing return type as it makes handling function pointer returns much easier
file << ") -> " << data.return_type.getAsString() << " {\n";
file << " struct {\n";
for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) {
auto& type = data.param_types[idx];
file << " " << format_decl(type.getUnqualifiedType(), "a_" + std::to_string(idx)) << ";\n";
}
if (!is_void) {
file << " " << format_decl(data.return_type, "rv") << ";\n";
} else if (data.param_types.size() == 0) {
// Avoid "empty struct has size 0 in C, size 1 in C++" warning
file << " char force_nonempty;\n";
}
file << " } args;\n";
for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) {
file << " args.a_" << idx << " = a_" << idx << ";\n";
}
file << " fexthunks_" << libname << "_" << function_name << "(&args);\n";
if (!is_void) {
file << " return args.rv;\n";
}
file << "}\n";
}
file << "}\n";
}
}
std::unique_ptr<clang::ASTConsumer> FrontendAction::CreateASTConsumer(clang::CompilerInstance&, clang::StringRef) {
return std::make_unique<ASTConsumer>();
}

View File

@ -0,0 +1,24 @@
#include <clang/Frontend/FrontendAction.h>
#include <optional>
#include <string>
struct OutputFilenames {
// Guest
std::string thunks;
std::string function_packs;
std::string function_packs_public;
};
class FrontendAction : public clang::ASTFrontendAction {
public:
FrontendAction(const std::string& libname, const OutputFilenames&);
void EndSourceFileAction() override;
std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance&, clang::StringRef /*file*/) override;
private:
const std::string& libname;
const OutputFilenames& output_filenames;
};

View File

@ -0,0 +1,69 @@
#include "clang/Tooling/Tooling.h"
#include "clang/Tooling/CompilationDatabase.h"
#include <iostream>
#include <string>
#include "interface.h"
using namespace clang::tooling;
void print_usage(const char* program_name) {
std::cerr << "Usage: " << program_name << " <filename> <libname> <gen_target> <output_filename> -- <clang_flags>\n";
}
class ThunkGenFrontendActionFactory : public clang::tooling::FrontendActionFactory {
public:
ThunkGenFrontendActionFactory(std::string_view libname_, OutputFilenames output_filenames_)
: libname(std::move(libname_)), output_filenames(std::move(output_filenames_)) {
}
std::unique_ptr<clang::FrontendAction> create() override {
return std::make_unique<FrontendAction>(libname, output_filenames);
}
private:
std::string libname;
OutputFilenames output_filenames;
};
int main(int argc, char* argv[]) {
if (argc < 6) {
print_usage(argv[0]);
return EXIT_FAILURE;
}
// Parse compile flags after "--" (this updates argc to the index of the "--" separator)
std::string error;
auto compile_db = FixedCompilationDatabase::loadFromCommandLine(argc, argv, error);
if (!compile_db) {
print_usage(argv[0]);
std::cerr << "\nError: " << error << "\n";
return EXIT_FAILURE;
}
char** const last_internal_arg = argv + argc;
char** arg = argv + 1;
const auto filename = *arg++;
const std::string libname = *arg++;
// Iterate over generator targets (remaining arguments up to "--" separator)
OutputFilenames output_filenames;
while (arg < last_internal_arg) {
auto target = std::string { *arg++ };
auto out_filename = *arg++;
if (target == "-thunks") {
output_filenames.thunks = out_filename;
} else if (target == "-function_packs") {
output_filenames.function_packs = out_filename;
} else if (target == "-function_packs_public") {
output_filenames.function_packs_public = out_filename;
} else {
std::cerr << "Unrecognized generator target \"" << target << "\"\n";
return EXIT_FAILURE;
}
}
ClangTool Tool(*compile_db, { filename });
return Tool.run(std::make_unique<ThunkGenFrontendActionFactory>(std::move(libname), std::move(output_filenames)).get());
}