[WebAssembly] Enforce assembler emits to streamer in order.

Summary:
The assembler processes directives and instructions in whatever order
they are in the file, then directly emits them to the streamer. This
could cause badly written (or generated) .s files to produce
incorrect binaries.

It now has state that tracks what it has most recently seen, to
enforce they are emitted in a given order that always produces
correct wasm binaries.

Also added a new test that compares obj2yaml output from llc (the
backend) to that going via .s and the assembler to ensure both paths
generate the same binaries.

The features this test covers could be extended.

Passes all wasm Lit tests.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=39557

Reviewers: sbc100, dschuff, aheejin

Subscribers: jgravelle-google, sunfish, llvm-commits

Differential Revision: https://reviews.llvm.org/D55149

llvm-svn: 348185
This commit is contained in:
Wouter van Oortmerssen 2018-12-03 20:30:28 +00:00
parent 736a301e6b
commit 88d6faf35d
2 changed files with 139 additions and 0 deletions

View File

@ -136,6 +136,24 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
// Much like WebAssemblyAsmPrinter in the backend, we have to own these.
std::vector<std::unique_ptr<wasm::WasmSignature>> Signatures;
// Order of labels, directives and instructions in a .s file have no
// syntactical enforcement. This class is a callback from the actual parser,
// and yet we have to be feeding data to the streamer in a very particular
// order to ensure a correct binary encoding that matches the regular backend
// (the streamer does not enforce this). This "state machine" enum helps
// guarantee that correct order.
enum ParserState {
FileStart,
Label,
FunctionStart,
FunctionLocals,
Instructions,
} CurrentState = FileStart;
// We track this to see if a .functype following a label is the same,
// as this is how we recognize the start of a function.
MCSymbol *LastLabel = nullptr;
public:
WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
@ -334,6 +352,11 @@ public:
return false;
}
void onLabelParsed(MCSymbol *Symbol) override {
LastLabel = Symbol;
CurrentState = Label;
}
// This function processes wasm-specific directives streamed to
// WebAssemblyTargetStreamer, all others go to the generic parser
// (see WasmAsmParser).
@ -370,10 +393,19 @@ public:
TOut.emitGlobalType(WasmSym);
return Expect(AsmToken::EndOfStatement, "EOL");
} else if (DirectiveID.getString() == ".functype") {
// This code has to send things to the streamer similar to
// WebAssemblyAsmPrinter::EmitFunctionBodyStart.
// TODO: would be good to factor this into a common function, but the
// assembler and backend really don't share any common code, and this code
// parses the locals seperately.
auto SymName = ExpectIdent();
if (SymName.empty()) return true;
auto WasmSym = cast<MCSymbolWasm>(
TOut.getStreamer().getContext().getOrCreateSymbol(SymName));
if (CurrentState == Label && WasmSym == LastLabel) {
// This .functype indicates a start of a function.
CurrentState = FunctionStart;
}
auto Signature = make_unique<wasm::WasmSignature>();
if (Expect(AsmToken::LParen, "(")) return true;
if (ParseRegTypeList(Signature->Params)) return true;
@ -386,11 +418,16 @@ public:
addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
TOut.emitFunctionType(WasmSym);
// TODO: backend also calls TOut.emitIndIdx, but that is not implemented.
return Expect(AsmToken::EndOfStatement, "EOL");
} else if (DirectiveID.getString() == ".local") {
if (CurrentState != FunctionStart)
return Error(".local directive should follow the start of a function",
Lexer.getTok());
SmallVector<wasm::ValType, 4> Locals;
if (ParseRegTypeList(Locals)) return true;
TOut.emitLocal(Locals);
CurrentState = FunctionLocals;
return Expect(AsmToken::EndOfStatement, "EOL");
}
return true; // We didn't process this directive.
@ -405,6 +442,16 @@ public:
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
case Match_Success: {
if (CurrentState == FunctionStart) {
// This is the first instruction in a function, but we haven't seen
// a .local directive yet. The streamer requires locals to be encoded
// as a prelude to the instructions, so emit an empty list of locals
// here.
auto &TOut = reinterpret_cast<WebAssemblyTargetStreamer &>(
*Out.getTargetStreamer());
TOut.emitLocal(SmallVector<wasm::ValType, 0>());
}
CurrentState = Instructions;
Out.EmitInstruction(Inst, getSTI());
return false;
}

View File

@ -0,0 +1,92 @@
; RUN: llc -filetype=asm -asm-verbose=false %s -o %t.s
; RUN: FileCheck -check-prefix=ASM -input-file %t.s %s
; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=asm %t.s -o - | FileCheck -check-prefix=ASM %s
; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s
; RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj %t.s -o - | obj2yaml | FileCheck %s
; This specifically tests that we can generate a binary from the assembler
; that produces the same binary as the backend would.
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare void @bar()
define void @foo(i32 %n) {
entry:
call void @bar()
ret void
}
; Checking assembly is not the point of this test, but if something breaks
; it is easier to spot it here than in the yaml output.
; ASM: .text
; ASM: .file "assembler-binary.ll"
; ASM: .globl foo
; ASM: foo:
; ASM-NEXT: .functype foo (i32) -> ()
; ASM-NEXT: call bar@FUNCTION
; ASM-NEXT: end_function
; ASM-NEXT: .Lfunc_end0:
; ASM-NEXT: .size foo, .Lfunc_end0-foo
; ASM: .functype bar () -> ()
; CHECK: --- !WASM
; CHECK-NEXT: FileHeader:
; CHECK-NEXT: Version: 0x00000001
; CHECK-NEXT: Sections:
; CHECK-NEXT: - Type: TYPE
; CHECK-NEXT: Signatures:
; CHECK-NEXT: - Index: 0
; CHECK-NEXT: ReturnType: NORESULT
; CHECK-NEXT: ParamTypes:
; CHECK-NEXT: - I32
; CHECK-NEXT: - Index: 1
; CHECK-NEXT: ReturnType: NORESULT
; CHECK-NEXT: ParamTypes: []
; CHECK-NEXT: - Type: IMPORT
; CHECK-NEXT: Imports:
; CHECK-NEXT: - Module: env
; CHECK-NEXT: Field: __linear_memory
; CHECK-NEXT: Kind: MEMORY
; CHECK-NEXT: Memory:
; CHECK-NEXT: Initial: 0x00000000
; CHECK-NEXT: - Module: env
; CHECK-NEXT: Field: __indirect_function_table
; CHECK-NEXT: Kind: TABLE
; CHECK-NEXT: Table:
; CHECK-NEXT: ElemType: ANYFUNC
; CHECK-NEXT: Limits:
; CHECK-NEXT: Initial: 0x00000000
; CHECK-NEXT: - Module: env
; CHECK-NEXT: Field: bar
; CHECK-NEXT: Kind: FUNCTION
; CHECK-NEXT: SigIndex: 1
; CHECK-NEXT: - Type: FUNCTION
; CHECK-NEXT: FunctionTypes: [ 0 ]
; CHECK-NEXT: - Type: CODE
; CHECK-NEXT: Relocations:
; CHECK-NEXT: - Type: R_WEBASSEMBLY_FUNCTION_INDEX_LEB
; CHECK-NEXT: Index: 1
; CHECK-NEXT: Offset: 0x00000004
; CHECK-NEXT: Functions:
; CHECK-NEXT: - Index: 1
; CHECK-NEXT: Locals: []
; CHECK-NEXT: Body: 1080808080000B
; CHECK-NEXT: - Type: CUSTOM
; CHECK-NEXT: Name: linking
; CHECK-NEXT: Version: 1
; CHECK-NEXT: SymbolTable:
; CHECK-NEXT: - Index: 0
; CHECK-NEXT: Kind: FUNCTION
; CHECK-NEXT: Name: foo
; CHECK-NEXT: Flags: [ ]
; CHECK-NEXT: Function: 1
; CHECK-NEXT: - Index: 1
; CHECK-NEXT: Kind: FUNCTION
; CHECK-NEXT: Name: bar
; CHECK-NEXT: Flags: [ UNDEFINED ]
; CHECK-NEXT: Function: 0
; CHECK-NEXT: ...