diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h index 63ed9df88e2..dda7111ca21 100644 --- a/include/llvm-c/Disassembler.h +++ b/include/llvm-c/Disassembler.h @@ -47,6 +47,49 @@ typedef int (*LLVMOpInfoCallback)(void *DisInfo, int TagType, void *TagBuf); +/** + * The initial support in LLVM MC for the most general form of a relocatable + * expression is "AddSymbol - SubtractSymbol + Offset". For some Darwin targets + * this full form is encoded in the relocation information so that AddSymbol and + * SubtractSymbol can be link edited independent of each other. Many other + * platforms only allow a relocatable expression of the form AddSymbol + Offset + * to be encoded. + * + * The LLVMOpInfoCallback() for the TagType value of 1 uses the struct + * LLVMOpInfo1. The value of the relocatable expression for the operand, + * including any PC adjustment, is passed in to the call back in the Value + * field. The symbolic information about the operand is returned using all + * the fields of the structure with the Offset of the relocatable expression + * returned in the Value field. It is possible that some symbols in the + * relocatable expression were assembly temporary symbols, for example + * "Ldata - LpicBase + constant", and only the Values of the symbols without + * symbol names are present in the relocation information. The VariantKind + * type is one of the Target specific #defines below and is used to print + * operands like "_foo@GOT", ":lower16:_foo", etc. + */ +struct LLVMOpInfoSymbol1 { + uint64_t Present; /* 1 if this symbol is present */ + char *Name; /* symbol name if not NULL */ + uint64_t Value; /* symbol value if name is NULL */ +}; +struct LLVMOpInfo1 { + struct LLVMOpInfoSymbol1 AddSymbol; + struct LLVMOpInfoSymbol1 SubtractSymbol; + uint64_t Value; + uint64_t VariantKind; +}; + +/** + * The operand VariantKinds for symbolic disassembly. + */ +#define LLVMDisassembler_VariantKind_None 0 /* all targets */ + +/** + * The ARM target VariantKinds. + */ +#define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */ +#define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */ + /** * The type for the symbol lookup function. This may be called by the * disassembler for such things like adding a comment for a PC plus a constant diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h index 0fdb51b11bf..ce8759a882e 100644 --- a/include/llvm/MC/MCDisassembler.h +++ b/include/llvm/MC/MCDisassembler.h @@ -10,12 +10,14 @@ #define MCDISASSEMBLER_H #include "llvm/Support/DataTypes.h" +#include "llvm-c/Disassembler.h" namespace llvm { class MCInst; class MemoryObject; class raw_ostream; +class MCContext; struct EDInstInfo; @@ -24,7 +26,7 @@ struct EDInstInfo; class MCDisassembler { public: /// Constructor - Performs initial setup for the disassembler. - MCDisassembler() {} + MCDisassembler() : GetOpInfo(0), DisInfo(0), Ctx(0) {} virtual ~MCDisassembler(); @@ -53,6 +55,30 @@ public: /// each MCInst opcode this disassembler returns. /// NULL if there is no info for this target. virtual EDInstInfo *getEDInfo() const { return (EDInstInfo*)0; } + +private: + // + // Hooks for symbolic disassembly via the public 'C' interface. + // + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The pointer to the block of symbolic information for above call back. + void *DisInfo; + // The assembly context for creating symbols and MCExprs in place of + // immediate operands when there is symbolic information. + MCContext *Ctx; + +public: + void setupForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, + void *disInfo, + MCContext *ctx) { + GetOpInfo = getOpInfo; + DisInfo = disInfo; + Ctx = ctx; + } + LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; } + void *getDisInfoBlock() const { return DisInfo; } + MCContext *getMCContext() const { return Ctx; } }; } // namespace llvm diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 6ccade99aa0..95c7b845b49 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -77,8 +77,9 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, assert(Ctx && "Unable to create MCContext!"); // Set up disassembler. - const MCDisassembler *DisAsm = TheTarget->createMCDisassembler(); + MCDisassembler *DisAsm = TheTarget->createMCDisassembler(); assert(DisAsm && "Unable to create disassembler!"); + DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx); // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h index f84495182d1..6208b1c8a06 100644 --- a/lib/MC/MCDisassembler/Disassembler.h +++ b/lib/MC/MCDisassembler/Disassembler.h @@ -69,7 +69,7 @@ private: public: LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType, - LLVMOpInfoCallback getOpInfo, + LLVMOpInfoCallback getOpInfo, LLVMSymbolLookupCallback symbolLookUp, const Target *theTarget, const MCAsmInfo *mAI, llvm::TargetMachine *tM, const TargetAsmInfo *tai, diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 74f8b53b48c..579e1a49b12 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -422,6 +422,10 @@ bool ARMDisassembler::getInstruction(MCInst &MI, if (!Builder) return false; + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; @@ -504,6 +508,10 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, Builder->SetSession(const_cast(&SO)); + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index 06a77736caa..d5675d26fcb 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -17,6 +17,7 @@ #include "ARMDisassemblerCore.h" #include "ARMAddressingModes.h" +#include "ARMMCExpr.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1066,7 +1067,8 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}). assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form"); unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0); - MI.addOperand(MCOperand::CreateImm(Imm16)); + if (!B->tryAddingSymbolicOperand(Imm16, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Imm16)); ++OpIdx; } else { // We have a reg/imm form. @@ -3628,3 +3630,80 @@ ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { return new ARMBasicMCBuilder(Opcode, Format, ARMInsts[Opcode].getNumOperands()); } + +/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic +/// operand in place of the immediate Value in the MCInst. The immediate +/// Value has had any PC adjustment made by the caller. If the getOpInfo() +/// function was set as part of the setupBuilderForSymbolicDisassembly() call +/// then that function is called to get any symbolic information at the +/// builder's Address for this instrution. If that returns non-zero then the +/// symbolic information is returns is used to create an MCExpr and that is +/// added as an operand to the MCInst. This function returns true if it adds +/// an operand to the MCInst and false otherwise. +bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value, + uint64_t InstSize, + MCInst &MI) { + if (!GetOpInfo) + return false; + + struct LLVMOpInfo1 SymbolicOp; + SymbolicOp.Value = Value; + if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) + return false; + + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); + else + Expr = Add; + } else + Expr = Off; + + if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) + MI.addOperand(MCOperand::CreateExpr(Expr)); + else + assert("bad SymbolicOp.VariantKind"); + + return true; +} diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 56dd85a7d99..f720e14d23a 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -22,12 +22,17 @@ #define ARMDISASSEMBLERCORE_H #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm-c/Disassembler.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMDisassembler.h" namespace llvm { +class MCContext; class ARMUtils { public: @@ -202,7 +207,7 @@ private: public: ARMBasicMCBuilder(ARMBasicMCBuilder &B) : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP) { + SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) { Err = 0; } @@ -261,6 +266,44 @@ private: assert(SP); return slice(SP->ITState, 7, 4); } + +private: + // + // Hooks for symbolic disassembly via the public 'C' interface. + // + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The pointer to the block of symbolic information for above call back. + void *DisInfo; + // The assembly context for creating symbols and MCExprs in place of + // immediate operands when there is symbolic information. + MCContext *Ctx; + // The address of the instruction being disassembled. + uint64_t Address; + +public: + void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, + void *disInfo, MCContext *ctx, + uint64_t address) { + GetOpInfo = getOpInfo; + DisInfo = disInfo; + Ctx = ctx; + Address = address; + } + + uint64_t getBuilderAddress() const { return Address; } + + /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic + /// operand in place of the immediate Value in the MCInst. The immediate + /// Value has had any PC adjustment made by the caller. If the getOpInfo() + /// function was set as part of the setupBuilderForSymbolicDisassembly() call + /// then that function is called to get any symbolic information at the + /// builder's Address for this instrution. If that returns non-zero then the + /// symbolic information is returns is used to create an MCExpr and that is + /// added as an operand to the MCInst. This function returns true if it adds + /// an operand to the MCInst and false otherwise. + bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI); + }; } // namespace llvm diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 618526816f7..61c4a13bcfd 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -1570,9 +1570,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12 || Opcode == ARM::t2LEApcrel) MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); - else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) - MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { + else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) { + if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI)) + MI.addOperand(MCOperand::CreateImm(getImm16(insn))); + } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { uint32_t mask = 0; if (getBitfieldInvMask(insn, mask)) MI.addOperand(MCOperand::CreateImm(mask)); @@ -1756,7 +1757,9 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, Offset = decodeImm32_BLX(insn); break; } - MI.addOperand(MCOperand::CreateImm(Offset)); + + if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Offset)); // This is an increment as some predicate operands may have been added first. NumOpsAdded += 1;