diff --git a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index f3d8ae45bd1..510b1b058d0 100644 --- a/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -63,6 +63,8 @@ static const uint64_t kDynamicShadowSentinel = std::numeric_limits::max(); static const unsigned kPointerTagShift = 56; +static const unsigned kShadowBaseAlignment = 32; + static cl::opt ClMemoryAccessCallbackPrefix( "hwasan-memory-access-callback-prefix", cl::desc("Prefix for memory access callbacks"), cl::Hidden, @@ -132,6 +134,18 @@ static cl::opt cl::desc("Access dynamic shadow through an ifunc global on " "platforms that support this"), cl::Hidden, cl::init(false)); + +static cl::opt ClWithTls( + "hwasan-with-tls", + cl::desc("Access dynamic shadow through an thread-local pointer on " + "platforms that support this"), + cl::Hidden, cl::init(true)); + +static cl::opt + ClRecordStackHistory("hwasan-record-stack-history", + cl::desc("Record stack frames with tagged allocations " + "in a thread-local ring buffer"), + cl::Hidden, cl::init(true)); namespace { /// An instrumentation pass implementing detection of addressability bugs @@ -155,7 +169,7 @@ public: void initializeCallbacks(Module &M); - void maybeInsertDynamicShadowAtFunctionEntry(Function &F); + Value *getDynamicShadowNonTls(IRBuilder<> &IRB); void untagPointerOperand(Instruction *I, Value *Addr); Value *memToShadow(Value *Shadow, Type *Ty, IRBuilder<> &IRB); @@ -172,13 +186,16 @@ public: Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag); Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong); bool instrumentStack(SmallVectorImpl &Allocas, - SmallVectorImpl &RetVec); + SmallVectorImpl &RetVec, Value *StackTag); Value *getNextTagWithCall(IRBuilder<> &IRB); Value *getStackBaseTag(IRBuilder<> &IRB); Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI, unsigned AllocaNo); Value *getUARTag(IRBuilder<> &IRB, Value *StackTag); + Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty); + Value *emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord); + private: LLVMContext *C; Triple TargetTriple; @@ -188,10 +205,14 @@ private: /// If InGlobal is true, then /// extern char __hwasan_shadow[]; /// shadow = (mem >> Scale) + &__hwasan_shadow + /// If InTls is true, then + /// extern char *__hwasan_tls; + /// shadow = (mem >> Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment) struct ShadowMapping { int Scale; uint64_t Offset; bool InGlobal; + bool InTls; void init(Triple &TargetTriple); unsigned getAllocaAlignment() const { return 1U << Scale; } @@ -216,6 +237,7 @@ private: Constant *ShadowGlobal; Value *LocalDynamicShadow = nullptr; + GlobalValue *ThreadPtrGlobal = nullptr; }; } // end anonymous namespace @@ -263,6 +285,12 @@ bool HWAddressSanitizer::doInitialization(Module &M) { /*InitArgs=*/{}); appendToGlobalCtors(M, HwasanCtorFunction, 0); } + if (!TargetTriple.isAndroid()) + appendToCompilerUsed( + M, ThreadPtrGlobal = new GlobalVariable( + M, IntptrTy, false, GlobalVariable::ExternalLinkage, nullptr, + "__hwasan_tls", nullptr, GlobalVariable::InitialExecTLSModel)); + return true; } @@ -297,12 +325,11 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) { ArrayType::get(IRB.getInt8Ty(), 0)); } -void HWAddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) { +Value *HWAddressSanitizer::getDynamicShadowNonTls(IRBuilder<> &IRB) { // Generate code only when dynamic addressing is needed. if (Mapping.Offset != kDynamicShadowSentinel) - return; + return nullptr; - IRBuilder<> IRB(&F.front().front()); if (Mapping.InGlobal) { // An empty inline asm with input reg == output reg. // An opaque pointer-to-int cast, basically. @@ -310,11 +337,12 @@ void HWAddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) { FunctionType::get(IntptrTy, {ShadowGlobal->getType()}, false), StringRef(""), StringRef("=r,0"), /*hasSideEffects=*/false); - LocalDynamicShadow = IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow"); + return IRB.CreateCall(Asm, {ShadowGlobal}, ".hwasan.shadow"); } else { - Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( - kHwasanShadowMemoryDynamicAddress, IntptrTy); - LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress); + Value *GlobalDynamicAddress = + IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal( + kHwasanShadowMemoryDynamicAddress, IntptrTy); + return IRB.CreateLoad(GlobalDynamicAddress); } } @@ -563,7 +591,7 @@ Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) { Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) { if (ClGenerateTagsWithCalls) - return nullptr; + return getNextTagWithCall(IRB); // FIXME: use addressofreturnaddress (but implement it in aarch64 backend // first). Module *M = IRB.GetInsertBlock()->getParent()->getParent(); @@ -631,15 +659,88 @@ Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) { return UntaggedPtrLong; } +Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) { + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) { + Function *ThreadPointerFunc = + Intrinsic::getDeclaration(M, Intrinsic::thread_pointer); + Value *SlotPtr = IRB.CreatePointerCast( + IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), 0x40), + Ty->getPointerTo(0)); + return SlotPtr; + } + if (ThreadPtrGlobal) + return ThreadPtrGlobal; + + + return nullptr; +} + +Value *HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, + bool WithFrameRecord) { + if (!Mapping.InTls) + return getDynamicShadowNonTls(IRB); + + Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy); + assert(SlotPtr); + + Value *ThreadLong = IRB.CreateLoad(SlotPtr); + // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI. + Value *ThreadLongMaybeUntagged = + TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong); + + if (WithFrameRecord) { + // Prepare ring buffer data. + Function *F = IRB.GetInsertBlock()->getParent(); + auto PC = IRB.CreatePtrToInt(F, IntptrTy); + auto GetStackPointerFn = + Intrinsic::getDeclaration(F->getParent(), Intrinsic::frameaddress); + Value *SP = IRB.CreatePtrToInt( + IRB.CreateCall(GetStackPointerFn, + {Constant::getNullValue(IRB.getInt32Ty())}), + IntptrTy); + // Mix SP and PC. TODO: also add the tag to the mix. + // Assumptions: + // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero) + // SP is 0xsssssssssssSSSS0 (4 lower bits are zero) + // We only really need ~20 lower non-zero bits (SSSS), so we mix like this: + // 0xSSSSPPPPPPPPPPPP + SP = IRB.CreateShl(SP, 44); + + // Store data to ring buffer. + Value *RecordPtr = + IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0)); + IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr); + + // Update the ring buffer. Top byte of ThreadLong defines the size of the + // buffer in pages, it must be a power of two, and the start of the buffer + // must be aligned by twice that much. Therefore wrap around of the ring + // buffer is simply Addr &= ~((ThreadLong >> 56) << 12). + // The use of AShr instead of LShr is due to + // https://bugs.llvm.org/show_bug.cgi?id=39030 + // Runtime library makes sure not to use the highest bit. + Value *WrapMask = IRB.CreateXor( + IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true), + ConstantInt::get(IntptrTy, (uint64_t)-1)); + Value *ThreadLongNew = IRB.CreateAnd( + IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask); + IRB.CreateStore(ThreadLongNew, SlotPtr); + } + + // Get shadow base address by aligning RecordPtr up. + // Note: this is not correct if the pointer is already aligned. + // Runtime library will make sure this never happens. + Value *ShadowBase = IRB.CreateAdd( + IRB.CreateOr( + ThreadLongMaybeUntagged, + ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)), + ConstantInt::get(IntptrTy, 1), "hwasan.shadow"); + return ShadowBase; +} + bool HWAddressSanitizer::instrumentStack( SmallVectorImpl &Allocas, - SmallVectorImpl &RetVec) { - Function *F = Allocas[0]->getParent()->getParent(); - Instruction *InsertPt = &*F->getEntryBlock().begin(); - IRBuilder<> IRB(InsertPt); - - Value *StackTag = getStackBaseTag(IRB); - + SmallVectorImpl &RetVec, Value *StackTag) { // Ideally, we want to calculate tagged stack base pointer, and rewrite all // alloca addresses using that. Unfortunately, offsets are not known yet // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a @@ -647,7 +748,7 @@ bool HWAddressSanitizer::instrumentStack( // This generates one extra instruction per alloca use. for (unsigned N = 0; N < Allocas.size(); ++N) { auto *AI = Allocas[N]; - IRB.SetInsertPoint(AI->getNextNode()); + IRBuilder<> IRB(AI->getNextNode()); // Replace uses of the alloca with tagged address. Value *Tag = getAllocaTag(IRB, StackTag, AI, N); @@ -702,12 +803,6 @@ bool HWAddressSanitizer::runOnFunction(Function &F) { LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n"); - initializeCallbacks(*F.getParent()); - - assert(!LocalDynamicShadow); - maybeInsertDynamicShadowAtFunctionEntry(F); - - bool Changed = false; SmallVector ToInstrument; SmallVector AllocasToInstrument; SmallVector RetVec; @@ -740,8 +835,25 @@ bool HWAddressSanitizer::runOnFunction(Function &F) { } } - if (!AllocasToInstrument.empty()) - Changed |= instrumentStack(AllocasToInstrument, RetVec); + if (AllocasToInstrument.empty() && ToInstrument.empty()) + return false; + + initializeCallbacks(*F.getParent()); + + assert(!LocalDynamicShadow); + + Instruction *InsertPt = &*F.getEntryBlock().begin(); + IRBuilder<> EntryIRB(InsertPt); + LocalDynamicShadow = emitPrologue(EntryIRB, + /*WithFrameRecord*/ ClRecordStackHistory && + !AllocasToInstrument.empty()); + + bool Changed = false; + if (!AllocasToInstrument.empty()) { + Value *StackTag = + ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB); + Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag); + } for (auto Inst : ToInstrument) Changed |= instrumentMemAccess(Inst); @@ -752,26 +864,26 @@ bool HWAddressSanitizer::runOnFunction(Function &F) { } void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple) { - const bool IsAndroid = TargetTriple.isAndroid(); - const bool IsAndroidWithIfuncSupport = - IsAndroid && !TargetTriple.isAndroidVersionLT(21); - Scale = kDefaultShadowScale; - const bool WithIfunc = ClWithIfunc.getNumOccurrences() > 0 - ? ClWithIfunc - : IsAndroidWithIfuncSupport; - if (ClMappingOffset.getNumOccurrences() > 0) { InGlobal = false; + InTls = false; Offset = ClMappingOffset; } else if (ClEnableKhwasan || ClInstrumentWithCalls) { InGlobal = false; + InTls = false; Offset = 0; - } else if (WithIfunc) { + } else if (ClWithIfunc) { InGlobal = true; + InTls = false; + Offset = kDynamicShadowSentinel; + } else if (ClWithTls) { + InGlobal = false; + InTls = true; Offset = kDynamicShadowSentinel; } else { InGlobal = false; + InTls = false; Offset = kDynamicShadowSentinel; } } diff --git a/test/Instrumentation/HWAddressSanitizer/alloca.ll b/test/Instrumentation/HWAddressSanitizer/alloca.ll index 011e8e4ad63..f13274171f0 100644 --- a/test/Instrumentation/HWAddressSanitizer/alloca.ll +++ b/test/Instrumentation/HWAddressSanitizer/alloca.ll @@ -1,8 +1,8 @@ ; Test alloca instrumentation. ; -; RUN: opt < %s -hwasan -S | FileCheck %s --check-prefixes=CHECK,DYNAMIC-SHADOW,NO-UAR-TAGS +; RUN: opt < %s -hwasan -hwasan-with-ifunc=1 -S | FileCheck %s --check-prefixes=CHECK,DYNAMIC-SHADOW,NO-UAR-TAGS ; RUN: opt < %s -hwasan -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,ZERO-BASED-SHADOW,NO-UAR-TAGS -; RUN: opt < %s -hwasan -hwasan-uar-retag-to-zero=0 -S | FileCheck %s --check-prefixes=CHECK,DYNAMIC-SHADOW,UAR-TAGS +; RUN: opt < %s -hwasan -hwasan-with-ifunc=1 -hwasan-uar-retag-to-zero=0 -S | FileCheck %s --check-prefixes=CHECK,DYNAMIC-SHADOW,UAR-TAGS target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-android" diff --git a/test/Instrumentation/HWAddressSanitizer/basic.ll b/test/Instrumentation/HWAddressSanitizer/basic.ll index 77d3c8cc4a6..e8010992945 100644 --- a/test/Instrumentation/HWAddressSanitizer/basic.ll +++ b/test/Instrumentation/HWAddressSanitizer/basic.ll @@ -1,7 +1,7 @@ ; Test basic address sanitizer instrumentation. ; -; RUN: opt < %s -hwasan -hwasan-recover=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,DYNAMIC-SHADOW -; RUN: opt < %s -hwasan -hwasan-recover=1 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,DYNAMIC-SHADOW +; RUN: opt < %s -hwasan -hwasan-recover=0 -hwasan-with-ifunc=1 -hwasan-with-tls=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,DYNAMIC-SHADOW +; RUN: opt < %s -hwasan -hwasan-recover=1 -hwasan-with-ifunc=1 -hwasan-with-tls=0 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,DYNAMIC-SHADOW ; RUN: opt < %s -hwasan -hwasan-recover=0 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,ABORT,ZERO-BASED-SHADOW ; RUN: opt < %s -hwasan -hwasan-recover=1 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=CHECK,RECOVER,ZERO-BASED-SHADOW @@ -342,7 +342,6 @@ entry: define i8 @test_load_addrspace(i8 addrspace(256)* %a) sanitize_hwaddress { ; CHECK-LABEL: @test_load_addrspace( ; CHECK-NEXT: entry: -; DYNAMIC-SHADOW: %.hwasan.shadow = call i64 asm "", "=r,0"([0 x i8]* @__hwasan_shadow) ; CHECK-NEXT: %[[B:[^ ]*]] = load i8, i8 addrspace(256)* %a ; CHECK-NEXT: ret i8 %[[B]] diff --git a/test/Instrumentation/HWAddressSanitizer/prologue.ll b/test/Instrumentation/HWAddressSanitizer/prologue.ll new file mode 100644 index 00000000000..6b02b9863cd --- /dev/null +++ b/test/Instrumentation/HWAddressSanitizer/prologue.ll @@ -0,0 +1,88 @@ +; Test -hwasan-with-ifunc flag. +; +; RUN: opt -hwasan -S < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-TLS,CHECK-HISTORY +; RUN: opt -hwasan -S -hwasan-with-ifunc=0 -hwasan-with-tls=1 -hwasan-record-stack-history=1 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-TLS,CHECK-HISTORY +; RUN: opt -hwasan -S -hwasan-with-ifunc=0 -hwasan-with-tls=1 -hwasan-record-stack-history=0 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-TLS,CHECK-NOHISTORY +; RUN: opt -hwasan -S -hwasan-with-ifunc=0 -hwasan-with-tls=0 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-GLOBAL,CHECK-NOHISTORY +; RUN: opt -hwasan -S -hwasan-with-ifunc=1 -hwasan-with-tls=0 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC,CHECK-NOHISTORY + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-android22" + +; CHECK-IFUNC: @__hwasan_shadow = external global [0 x i8] +; CHECK-NOIFUNC: @__hwasan_shadow_memory_dynamic_address = external global i64 + +define i32 @test_load(i32* %a) sanitize_hwaddress { +; First instrumentation in the function must be to load the dynamic shadow +; address into a local variable. +; CHECK-LABEL: @test_load +; CHECK: entry: + +; CHECK-IFUNC: %[[A:[^ ]*]] = call i64 asm "", "=r,0"([0 x i8]* @__hwasan_shadow) +; CHECK-IFUNC: add i64 %{{.*}}, %[[A]] + +; CHECK-GLOBAL: load i64, i64* @__hwasan_shadow_memory_dynamic_address + +; CHECK-TLS: %[[A:[^ ]*]] = call i8* @llvm.thread.pointer() +; CHECK-TLS: %[[B:[^ ]*]] = getelementptr i8, i8* %[[A]], i32 64 +; CHECK-TLS: %[[C:[^ ]*]] = bitcast i8* %[[B]] to i64* +; CHECK-TLS: %[[D:[^ ]*]] = load i64, i64* %[[C]] +; CHECK-TLS: %[[E:[^ ]*]] = or i64 %[[D]], 4294967295 +; CHECK-TLS: = add i64 %[[E]], 1 + +; "store i64" is only used to update stack history (this input IR intentionally does not use any i64) +; W/o any allocas, the history is not updated, even if it is enabled explicitly with -hwasan-record-stack-history=1 +; CHECK-NOT: store i64 + +; CHECK: ret i32 + +entry: + %x = load i32, i32* %a, align 4 + ret i32 %x +} + +declare void @use(i32* %p) + +define void @test_alloca() sanitize_hwaddress { +; First instrumentation in the function must be to load the dynamic shadow +; address into a local variable. +; CHECK-LABEL: @test_alloca +; CHECK: entry: + +; CHECK-IFUNC: %[[A:[^ ]*]] = call i64 asm "", "=r,0"([0 x i8]* @__hwasan_shadow) +; CHECK-IFUNC: add i64 %{{.*}}, %[[A]] + +; CHECK-GLOBAL: load i64, i64* @__hwasan_shadow_memory_dynamic_address + +; CHECK-TLS: %[[A:[^ ]*]] = call i8* @llvm.thread.pointer() +; CHECK-TLS: %[[B:[^ ]*]] = getelementptr i8, i8* %[[A]], i32 64 +; CHECK-TLS: %[[C:[^ ]*]] = bitcast i8* %[[B]] to i64* +; CHECK-TLS: %[[D:[^ ]*]] = load i64, i64* %[[C]] + +; CHECK-NOHISTORY-NOT: store i64 + +; CHECK-HISTORY: %[[PTR:[^ ]*]] = inttoptr i64 %[[D]] to i64* +; CHECK-HISTORY: store i64 %{{.*}}, i64* %[[PTR]] +; CHECK-HISTORY: %[[D1:[^ ]*]] = ashr i64 %[[D]], 56 +; CHECK-HISTORY: %[[D2:[^ ]*]] = shl nuw nsw i64 %[[D1]], 12 +; CHECK-HISTORY: %[[D3:[^ ]*]] = xor i64 %[[D2]], -1 +; CHECK-HISTORY: %[[D4:[^ ]*]] = add i64 %[[D]], 8 +; CHECK-HISTORY: %[[D5:[^ ]*]] = and i64 %[[D4]], %[[D3]] +; CHECK-HISTORY: store i64 %[[D5]], i64* %[[C]] + +; CHECK-TLS: %[[E:[^ ]*]] = or i64 %[[D]], 4294967295 +; CHECK-TLS: = add i64 %[[E]], 1 + +; CHECK-NOHISTORY-NOT: store i64 + + +entry: + %x = alloca i32, align 4 + call void @use(i32* %x) + ret void +} diff --git a/test/Instrumentation/HWAddressSanitizer/with-ifunc.ll b/test/Instrumentation/HWAddressSanitizer/with-ifunc.ll deleted file mode 100644 index 2f4abb49b29..00000000000 --- a/test/Instrumentation/HWAddressSanitizer/with-ifunc.ll +++ /dev/null @@ -1,30 +0,0 @@ -; Test -hwasan-with-ifunc flag. -; -; RUN: opt -hwasan -S < %s | \ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC -; RUN: opt -hwasan -S -hwasan-with-ifunc=0 < %s | \ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC -; RUN: opt -hwasan -S -hwasan-with-ifunc=1 < %s | \ -; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC - -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-android22" - -; CHECK-IFUNC: @__hwasan_shadow = external global [0 x i8] -; CHECK-NOIFUNC: @__hwasan_shadow_memory_dynamic_address = external global i64 - -define i32 @test_load(i32* %a) sanitize_hwaddress { -; First instrumentation in the function must be to load the dynamic shadow -; address into a local variable. -; CHECK-LABEL: @test_load -; CHECK: entry: - -; CHECK-IFUNC: %[[A:[^ ]*]] = call i64 asm "", "=r,0"([0 x i8]* @__hwasan_shadow) -; CHECK-IFUNC: add i64 %{{.*}}, %[[A]] - -; CHECK-NOIFUNC: load i64, i64* @__hwasan_shadow_memory_dynamic_address - -entry: - %x = load i32, i32* %a, align 4 - ret i32 %x -}