From fefc8af84ae9d193cee62cb0666e55837643e27b Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 9 Jul 2015 20:13:25 +0000 Subject: [PATCH] [ImplicitNullChecks] Be smarter in picking the memory op. Summary: Before this change ImplicitNullChecks would only pick loads of the form: ``` test Reg, Reg jz elsewhere fallthrough: movl 32(Reg), Reg2 ``` but not (say) ``` test Reg, Reg jz elsewhere fallthrough: inc Reg3 movl 32(Reg), Reg2 ``` This change teaches ImplicitNullChecks to look through "unrelated" instructions like `inc Reg3` when searching for a load instruction to convert to a trapping load. Reviewers: atrick, JosephTremoulet, reames Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11044 llvm-svn: 241850 --- lib/CodeGen/ImplicitNullChecks.cpp | 83 +++++++++++++++++-- .../X86/implicit-null-check-negative.ll | 42 ++++++++++ test/CodeGen/X86/implicit-null-check.ll | 44 +++++++++- 3 files changed, 159 insertions(+), 10 deletions(-) diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 15bd7571a82..5faeae4527a 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -25,10 +25,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -177,6 +179,9 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // callq throw_NullPointerException // // LblNotNull: + // Inst0 + // Inst1 + // ... // Def = Load (%RAX + ) // ... // @@ -187,6 +192,8 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // jmp LblNotNull ;; explicit or fallthrough // // LblNotNull: + // Inst0 + // Inst1 // ... // // LblNull: @@ -194,16 +201,76 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks( // unsigned PointerReg = MBP.LHS.getReg(); - MachineInstr *MemOp = &*NotNullSucc->begin(); - unsigned BaseReg, Offset; - if (TII->getMemOpBaseRegImmOfs(MemOp, BaseReg, Offset, TRI)) - if (MemOp->mayLoad() && !MemOp->isPredicable() && BaseReg == PointerReg && - Offset < PageSize && MemOp->getDesc().getNumDefs() == 1) { - NullCheckList.emplace_back(MemOp, MBP.ConditionDef, &MBB, NotNullSucc, - NullSucc); - return true; + + // As we scan NotNullSucc for a suitable load instruction, we keep track of + // the registers defined and used by the instructions we scan past. This bit + // of information lets us decide if it is legal to hoist the load instruction + // we find (if we do find such an instruction) to before NotNullSucc. + DenseSet RegDefs, RegUses; + + // Returns true if it is safe to reorder MI to before NotNullSucc. + auto IsSafeToHoist = [&](MachineInstr *MI) { + // Right now we don't want to worry about LLVM's memory model. This can be + // made more precise later. + for (auto *MMO : MI->memoperands()) + if (!MMO->isUnordered()) + return false; + + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.getReg()) { + for (unsigned Reg : RegDefs) + if (TRI->regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-write or read-after-write + + if (MO.isDef()) + for (unsigned Reg : RegUses) + if (TRI->regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-read + } } + return true; + }; + + for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE; + ++MII) { + MachineInstr *MI = &*MII; + unsigned BaseReg, Offset; + if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg && + Offset < PageSize && MI->getDesc().getNumDefs() == 1 && + IsSafeToHoist(MI)) { + NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc, + NullSucc); + return true; + } + + // MI did not match our criteria for conversion to a trapping load. Check + // if we can continue looking. + + if (MI->mayStore() || MI->hasUnmodeledSideEffects()) + return false; + + for (auto *MMO : MI->memoperands()) + // Right now we don't want to worry about LLVM's memory model. + if (!MMO->isUnordered()) + return false; + + // It _may_ be okay to reorder a later load instruction across MI. Make a + // note of its operands so that we can make the legality check if we find a + // suitable load instruction: + + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.getReg()) + continue; + + if (MO.isDef()) + RegDefs.insert(MO.getReg()); + else + RegUses.insert(MO.getReg()); + } + } + return false; } diff --git a/test/CodeGen/X86/implicit-null-check-negative.ll b/test/CodeGen/X86/implicit-null-check-negative.ll index 8fbed9f7bee..c8d425c3889 100644 --- a/test/CodeGen/X86/implicit-null-check-negative.ll +++ b/test/CodeGen/X86/implicit-null-check-negative.ll @@ -51,4 +51,46 @@ define i32 @imp_null_check_load_no_md(i32* %x) { ret i32 %t } +define i32 @imp_null_check_no_hoist_over_acquire_load(i32* %x, i32* %y) { +; We cannot hoist %t1 over %t0 since %t0 is an acquire load + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t0 = load atomic i32, i32* %y acquire, align 4 + %t1 = load i32, i32* %x + %p = add i32 %t0, %t1 + ret i32 %p +} + +define i32 @imp_null_check_add_result(i32* %x, i32* %y) { +; This will codegen to: +; +; movl (%rsi), %eax +; addl (%rdi), %eax +; +; The load instruction we wish to hoist is the addl, but there is a +; write-after-write hazard preventing that from happening. We could +; get fancy here and exploit the commutativity of addition, but right +; now -implicit-null-checks isn't that smart. +; + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t0 = load i32, i32* %y + %t1 = load i32, i32* %x + %p = add i32 %t0, %t1 + ret i32 %p +} + !0 = !{} diff --git a/test/CodeGen/X86/implicit-null-check.ll b/test/CodeGen/X86/implicit-null-check.ll index 1d1b36bbd5d..fd7a902eefc 100644 --- a/test/CodeGen/X86/implicit-null-check.ll +++ b/test/CodeGen/X86/implicit-null-check.ll @@ -76,6 +76,31 @@ define i32 @imp_null_check_add_result(i32* %x, i32 %p) { ret i32 %p1 } +define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z) { +; CHECK-LABEL: _imp_null_check_hoist_over_unrelated_load: +; CHECK: Ltmp7: +; CHECK: movl (%rdi), %eax +; CHECK: movl (%rsi), %ecx +; CHECK: movl %ecx, (%rdx) +; CHECK: retq +; CHECK: Ltmp6: +; CHECK: movl $42, %eax +; CHECK: retq + + entry: + %c = icmp eq i32* %x, null + br i1 %c, label %is_null, label %not_null, !make.implicit !0 + + is_null: + ret i32 42 + + not_null: + %t0 = load i32, i32* %y + %t1 = load i32, i32* %x + store i32 %t0, i32* %z + ret i32 %t1 +} + !0 = !{} ; CHECK-LABEL: __LLVM_FaultMaps: @@ -88,7 +113,7 @@ define i32 @imp_null_check_add_result(i32* %x, i32 %p) { ; CHECK-NEXT: .short 0 ; # functions: -; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long 4 ; FunctionAddr: ; CHECK-NEXT: .quad _imp_null_check_add_result @@ -116,6 +141,19 @@ define i32 @imp_null_check_add_result(i32* %x, i32 %p) { ; Fault[0].HandlerOffset: ; CHECK-NEXT: .long Ltmp2-_imp_null_check_gep_load +; FunctionAddr: +; CHECK-NEXT: .quad _imp_null_check_hoist_over_unrelated_load +; NumFaultingPCs +; CHECK-NEXT: .long 1 +; Reserved: +; CHECK-NEXT: .long 0 +; Fault[0].Type: +; CHECK-NEXT: .long 1 +; Fault[0].FaultOffset: +; CHECK-NEXT: .long Ltmp7-_imp_null_check_hoist_over_unrelated_load +; Fault[0].HandlerOffset: +; CHECK-NEXT: .long Ltmp6-_imp_null_check_hoist_over_unrelated_load + ; FunctionAddr: ; CHECK-NEXT: .quad _imp_null_check_load ; NumFaultingPCs @@ -131,10 +169,12 @@ define i32 @imp_null_check_add_result(i32* %x, i32 %p) { ; OBJDUMP: FaultMap table: ; OBJDUMP-NEXT: Version: 0x1 -; OBJDUMP-NEXT: NumFunctions: 3 +; OBJDUMP-NEXT: NumFunctions: 4 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 5 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 7 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 +; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 7 +; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 3