From b3521af5ae2314c835dc8ff511bd858a8a92ce0c Mon Sep 17 00:00:00 2001 From: Vlad Tsyrklevich Date: Tue, 19 Sep 2017 22:33:09 +0000 Subject: [PATCH] Introduce the llvm-cfi-verify tool. Summary: Introduces the llvm-cfi-verify tool to llvm. Includes the design document (docs/CFIVerify.rst). Current implementation of the tool is simply a disassembler that identifies and prints the indirect control flow instructions. Reviewers: vlad.tsyrklevich Reviewed By: vlad.tsyrklevich Patch by Mitch Phillips Subscribers: llvm-commits, kcc, pcc, mgorny Differential Revision: https://reviews.llvm.org/D37937 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313688 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CFIVerify.rst | 88 ++++++++ tools/LLVMBuild.txt | 1 + tools/llvm-cfi-verify/CMakeLists.txt | 14 ++ tools/llvm-cfi-verify/LLVMBuild.txt | 22 ++ tools/llvm-cfi-verify/llvm-cfi-verify.cpp | 241 ++++++++++++++++++++++ 5 files changed, 366 insertions(+) create mode 100644 docs/CFIVerify.rst create mode 100644 tools/llvm-cfi-verify/CMakeLists.txt create mode 100644 tools/llvm-cfi-verify/LLVMBuild.txt create mode 100644 tools/llvm-cfi-verify/llvm-cfi-verify.cpp diff --git a/docs/CFIVerify.rst b/docs/CFIVerify.rst new file mode 100644 index 00000000000..3f5ebf3d52c --- /dev/null +++ b/docs/CFIVerify.rst @@ -0,0 +1,88 @@ +============================================== +Control Flow Verification Tool Design Document +============================================== + +Objective +========= + +This document provides an overview of an external tool to verify the protection +mechanisms implemented by Clang's *Control Flow Integrity* (CFI) schemes +(``-fsanitize=cfi``). This tool, provided a binary or DSO, should infer whether +indirect control flow operations are protected by CFI, and should output these +results in a human-readable form. + +This tool should also be added as part of Clang's continuous integration testing +framework, where modifications to the compiler ensure that CFI protection +schemes are still present in the final binary. + +Location +======== + +This tool will be present as a part of the LLVM toolchain, and will reside in +the "/llvm/tools/llvm-cfi-verify" directory, relative to the LLVM trunk. It will +be tested in two methods: + +- Unit tests to validate code sections, present in "/llvm/unittests/llvm-cfi- + verify". +- Integration tests, present in "/llvm/tools/clang/test/LLVMCFIVerify". These + integration tests are part of clang as part of a continuous integration + framework, ensuring updates to the compiler that reduce CFI coverage on + indirect control flow instructions are identified. + +Background +========== + +This tool will continuously validate that CFI directives are properly +implemented around all indirect control flows by analysing the output machine +code. The analysis of machine code is important as it ensures that any bugs +present in linker or compiler do not subvert CFI protections in the final +shipped binary. + +Unprotected indirect control flow instructions will be flagged for manual +review. These unexpected control flows may simply have not been accounted for in +the compiler implementation of CFI (e.g. indirect jumps to facilitate switch +statements may not be fully protected). + +It may be possible in the future to extend this tool to flag unnecessary CFI +directives (e.g. CFI directives around a static call to a non-polymorphic base +type). This type of directive has no security implications, but may present +performance impacts. + +Design Ideas +============ + +This tool will disassemble binaries and DSO's from their machine code format and +analyse the disassembled machine code. The tool will inspect virtual calls and +indirect function calls. This tool will also inspect indirect jumps, as inlined +functions and jump tables should also be subject to CFI protections. Non-virtual +calls (``-fsanitize=cfi-nvcall``) and cast checks (``-fsanitize=cfi-*cast*``) +are not implemented due to a lack of information provided by the bytecode. + +The tool would operate by searching for indirect control flow instructions in +the disassembly. A control flow graph would be generated from a small buffer of +the instructions surrounding the 'target' control flow instruction. If the +target instruction is branched-to, the fallthrough of the branch should be the +CFI trap (on x86, this is a ``ud2`` instruction). If the target instruction is +the fallthrough (i.e. immediately succeeds) of a conditional jump, the +conditional jump target should be the CFI trap. If an indirect control flow +instruction does not conform to one of these formats, the target will be noted +as being CFI-unprotected. + +Note that in the second case outlined above (where the target instruction is the +fallthrough of a conditional jump), if the target represents a vcall that takes +arguments, these arguments may be pushed to the stack after the branch but +before the target instruction. In these cases, a secondary 'spill graph' in +constructed, to ensure the register argument used by the indirect jump/call is +not spilled from the stack at any point in the interim period. If there are no +spills that affect the target register, the target is marked as CFI-protected. + +Other Design Notes +~~~~~~~~~~~~~~~~~~ + +Only machine code sections that are marked as executable will be subject to this +analysis. Non-executable sections do not require analysis as any execution +present in these sections has already violated the control flow integrity. + +Suitable extensions may be made at a later date to include anaylsis for indirect +control flow operations across DSO boundaries. Currently, these CFI features are +only experimental with an unstable ABI, making them unsuitable for analysis. diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt index 09b412205e9..63caea64cf3 100644 --- a/tools/LLVMBuild.txt +++ b/tools/LLVMBuild.txt @@ -25,6 +25,7 @@ subdirectories = llvm-as llvm-bcanalyzer llvm-cat + llvm-cfi-verify llvm-cov llvm-cvtres llvm-diff diff --git a/tools/llvm-cfi-verify/CMakeLists.txt b/tools/llvm-cfi-verify/CMakeLists.txt new file mode 100644 index 00000000000..578ce70ef92 --- /dev/null +++ b/tools/llvm-cfi-verify/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_LINK_COMPONENTS + AllTargetsAsmPrinters + AllTargetsAsmParsers + AllTargetsDescs + AllTargetsDisassemblers + AllTargetsInfos + MC + MCParser + Support + ) + +add_llvm_tool(llvm-cfi-verify + llvm-cfi-verify.cpp + ) diff --git a/tools/llvm-cfi-verify/LLVMBuild.txt b/tools/llvm-cfi-verify/LLVMBuild.txt new file mode 100644 index 00000000000..717ee55ee81 --- /dev/null +++ b/tools/llvm-cfi-verify/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/llvm-cfi-verify/LLVMBuild.txt --------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-cfi-verify +parent = Tools +required_libraries = MC MCDisassembler MCParser Support all-targets diff --git a/tools/llvm-cfi-verify/llvm-cfi-verify.cpp b/tools/llvm-cfi-verify/llvm-cfi-verify.cpp new file mode 100644 index 00000000000..1f056d048cf --- /dev/null +++ b/tools/llvm-cfi-verify/llvm-cfi-verify.cpp @@ -0,0 +1,241 @@ +//===-- llvm-cfi-verify.cpp - CFI Verification tool for LLVM --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tool verifies Control Flow Integrity (CFI) instrumentation by static +// binary anaylsis. See the design document in /docs/CFIVerify.rst for more +// information. +// +// This tool is currently incomplete. It currently only does disassembly for +// object files, and searches through the code for indirect control flow +// instructions, printing them once found. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +using namespace llvm; +using namespace llvm::object; + +cl::opt ArgDumpSymbols("sym", cl::desc("Dump the symbol table.")); +cl::opt InputFilename(cl::Positional, cl::desc(""), + cl::Required); + +static void printSymbols(const ObjectFile *Object) { + for (const SymbolRef &Symbol : Object->symbols()) { + outs() << "Symbol [" << format_hex_no_prefix(Symbol.getValue(), 2) + << "] = "; + + auto SymbolName = Symbol.getName(); + if (SymbolName) + outs() << *SymbolName; + else + outs() << "UNKNOWN"; + + if (Symbol.getFlags() & SymbolRef::SF_Hidden) + outs() << " .hidden"; + + outs() << " (Section = "; + + auto SymbolSection = Symbol.getSection(); + if (SymbolSection) { + StringRef SymbolSectionName; + if ((*SymbolSection)->getName(SymbolSectionName)) + outs() << "UNKNOWN)"; + else + outs() << SymbolSectionName << ")"; + } else { + outs() << "N/A)"; + } + + outs() << "\n"; + } +} + +int main(int argc, char **argv) { + cl::ParseCommandLineOptions(argc, argv); + + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllDisassemblers(); + + Expected> BinaryOrErr = createBinary(InputFilename); + if (!BinaryOrErr) { + errs() << "Failed to open file.\n"; + return EXIT_FAILURE; + } + + Binary &Binary = *BinaryOrErr.get().getBinary(); + ObjectFile *Object = dyn_cast(&Binary); + if (!Object) { + errs() << "Disassembling of non-objects not currently supported.\n"; + return EXIT_FAILURE; + } + + Triple TheTriple = Object->makeTriple(); + std::string TripleName = TheTriple.getTriple(); + std::string ArchName = ""; + std::string ErrorString; + + const Target *TheTarget = + TargetRegistry::lookupTarget(ArchName, TheTriple, ErrorString); + + if (!TheTarget) { + errs() << "Couldn't find target \"" << TheTriple.getTriple() + << "\", failed with error: " << ErrorString << ".\n"; + return EXIT_FAILURE; + } + + SubtargetFeatures Features = Object->getFeatures(); + + std::unique_ptr RegisterInfo( + TheTarget->createMCRegInfo(TripleName)); + if (!RegisterInfo) { + errs() << "Failed to initialise RegisterInfo.\n"; + return EXIT_FAILURE; + } + + std::unique_ptr AsmInfo( + TheTarget->createMCAsmInfo(*RegisterInfo, TripleName)); + if (!AsmInfo) { + errs() << "Failed to initialise AsmInfo.\n"; + return EXIT_FAILURE; + } + + std::string MCPU = ""; + std::unique_ptr SubtargetInfo( + TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); + if (!SubtargetInfo) { + errs() << "Failed to initialise SubtargetInfo.\n"; + return EXIT_FAILURE; + } + + std::unique_ptr MII(TheTarget->createMCInstrInfo()); + if (!MII) { + errs() << "Failed to initialise MII.\n"; + return EXIT_FAILURE; + } + + MCObjectFileInfo MOFI; + MCContext Context(AsmInfo.get(), RegisterInfo.get(), &MOFI); + + std::unique_ptr Disassembler( + TheTarget->createMCDisassembler(*SubtargetInfo, Context)); + + if (!Disassembler) { + errs() << "No disassembler available for target."; + return EXIT_FAILURE; + } + + std::unique_ptr MIA( + TheTarget->createMCInstrAnalysis(MII.get())); + + std::unique_ptr Printer( + TheTarget->createMCInstPrinter(TheTriple, AsmInfo->getAssemblerDialect(), + *AsmInfo, *MII, *RegisterInfo)); + + if (ArgDumpSymbols) + printSymbols(Object); + + for (const SectionRef &Section : Object->sections()) { + outs() << "Section [" << format_hex_no_prefix(Section.getAddress(), 2) + << "] = "; + StringRef SectionName; + + if (Section.getName(SectionName)) + outs() << "UNKNOWN.\n"; + else + outs() << SectionName << "\n"; + + StringRef SectionContents; + if (Section.getContents(SectionContents)) { + errs() << "Failed to retrieve section contents.\n"; + return EXIT_FAILURE; + } + + MCInst Instruction; + size_t InstructionSize; + + ArrayRef SectionBytes((const uint8_t *)SectionContents.data(), + Section.getSize()); + + for (size_t Byte = 0; Byte < Section.getSize();) { + bool BadInstruction = false; + + // Disassemble the instruction. + if (Disassembler->getInstruction( + Instruction, InstructionSize, SectionBytes.drop_front(Byte), 0, + nulls(), outs()) != MCDisassembler::Success) { + BadInstruction = true; + } + + Byte += InstructionSize; + + if (BadInstruction) + continue; + + // Skip instructions that do not affect the control flow. + const auto &InstrDesc = MII->get(Instruction.getOpcode()); + if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) + continue; + + // Skip instructions that do not operate on register operands. + bool UsesRegisterOperand = false; + for (const auto &Operand : Instruction) { + if (Operand.isReg()) + UsesRegisterOperand = true; + } + + if (!UsesRegisterOperand) + continue; + + // Print the instruction address. + outs() << " " + << format_hex(Section.getAddress() + Byte - InstructionSize, 2) + << ": "; + + // Print the instruction bytes. + for (size_t i = 0; i < InstructionSize; ++i) { + outs() << format_hex_no_prefix(SectionBytes[Byte - InstructionSize + i], + 2) + << " "; + } + + // Print the instruction. + outs() << " | " << MII->getName(Instruction.getOpcode()) << " "; + Instruction.dump_pretty(outs(), Printer.get()); + + outs() << "\n"; + } + } + + return EXIT_SUCCESS; +}