diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index afa43c134f9..3d41bf587dc 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1772,6 +1772,27 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) { return false; } +/// getTruncatedShiftCount - check whether the shift count for a machine operand +/// is non-zero. +inline static unsigned getTruncatedShiftCount(MachineInstr *MI, + unsigned ShiftAmtOperandIdx) { + // The shift count is six bits with the REX.W prefix and five bits without. + unsigned ShiftCountMask = (MI->getDesc().TSFlags & X86II::REX_W) ? 63 : 31; + unsigned Imm = MI->getOperand(ShiftAmtOperandIdx).getImm(); + return Imm & ShiftCountMask; +} + +/// isTruncatedShiftCountForLEA - check whether the given shift count is appropriate +/// can be represented by a LEA instruction. +inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) { + // Left shift instructions can be transformed into load-effective-address + // instructions if we can encode them appropriately. + // A LEA instruction utilizes a SIB byte to encode it's scale factor. + // The SIB.scale field is two bits wide which means that we can encode any + // shift amount less than 4. + return ShAmt < 4 && ShAmt > 0; +} + /// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when /// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting /// to a 32-bit superregister and then truncating back down to a 16-bit @@ -1891,6 +1912,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const { MachineInstr *MI = MBBI; + + // The following opcodes also sets the condition code register(s). Only + // convert them to equivalent lea if the condition code register def's + // are dead! + if (hasLiveCondCodeDef(MI)) + return 0; + MachineFunction &MF = *MI->getParent()->getParent(); // All instructions input are two-addr instructions. Get the known operands. const MachineOperand &Dest = MI->getOperand(0); @@ -1935,10 +1963,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::SHL64ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); - // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses - // the flags produced by a shift yet, so this is safe. - unsigned ShAmt = MI->getOperand(2).getImm(); - if (ShAmt == 0 || ShAmt >= 4) return 0; + unsigned ShAmt = getTruncatedShiftCount(MI, 2); + if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; // LEA can't handle RSP. if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && @@ -1953,10 +1979,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::SHL32ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); - // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses - // the flags produced by a shift yet, so this is safe. - unsigned ShAmt = MI->getOperand(2).getImm(); - if (ShAmt == 0 || ShAmt >= 4) return 0; + unsigned ShAmt = getTruncatedShiftCount(MI, 2); + if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; // LEA can't handle ESP. if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && @@ -1972,10 +1996,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::SHL16ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); - // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses - // the flags produced by a shift yet, so this is safe. - unsigned ShAmt = MI->getOperand(2).getImm(); - if (ShAmt == 0 || ShAmt >= 4) return 0; + unsigned ShAmt = getTruncatedShiftCount(MI, 2); + if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; @@ -1985,11 +2007,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, break; } default: { - // The following opcodes also sets the condition code register(s). Only - // convert them to equivalent lea if the condition code register def's - // are dead! - if (hasLiveCondCodeDef(MI)) - return 0; switch (MIOpc) { default: return 0; @@ -3171,6 +3188,25 @@ inline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg, inline static bool isDefConvertible(MachineInstr *MI) { switch (MI->getOpcode()) { default: return false; + + // The shift instructions only modify ZF if their shift count is non-zero. + // N.B.: The processor truncates the shift count depending on the encoding. + case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri:case X86::SAR64ri: + case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri:case X86::SHR64ri: + return getTruncatedShiftCount(MI, 2) != 0; + + // Some left shift instructions can be turned into LEA instructions but only + // if their flags aren't used. Avoid transforming such instructions. + case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri:case X86::SHL64ri:{ + unsigned ShAmt = getTruncatedShiftCount(MI, 2); + if (isTruncatedShiftCountForLEA(ShAmt)) return false; + return ShAmt != 0; + } + + case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8: + case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8: + return getTruncatedShiftCount(MI, 3) != 0; + case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri: case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8: case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr: diff --git a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll index a65e6881540..cb9fa2eac7d 100644 --- a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll +++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll @@ -27,6 +27,5 @@ if.end: ; preds = %if.then, %entry ; CHECK: fn1: ; CHECK: shrq $32, [[REG:%.*]] -; CHECK: testq [[REG]], [[REG]] ; CHECK: je } diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll index 1855fe2fb89..5f5ba21800b 100644 --- a/test/CodeGen/X86/cmp.ll +++ b/test/CodeGen/X86/cmp.ll @@ -96,7 +96,6 @@ entry: ; CHECK: test7: ; CHECK-NOT: movabsq ; CHECK: shrq $32, %rdi -; CHECK: testq %rdi, %rdi ; CHECK: sete %lnot = icmp ult i64 %res, 4294967296 %lnot.ext = zext i1 %lnot to i32 @@ -119,7 +118,6 @@ entry: ; CHECK: test9: ; CHECK-NOT: movabsq ; CHECK: shrq $33, %rdi -; CHECK: testq %rdi, %rdi ; CHECK: sete %lnot = icmp ult i64 %res, 8589934592 %lnot.ext = zext i1 %lnot to i32 @@ -131,7 +129,6 @@ entry: ; CHECK: test10: ; CHECK-NOT: movabsq ; CHECK: shrq $32, %rdi -; CHECK: testq %rdi, %rdi ; CHECK: setne %lnot = icmp uge i64 %res, 4294967296 %lnot.ext = zext i1 %lnot to i32 diff --git a/test/CodeGen/X86/peep-test-4.ll b/test/CodeGen/X86/peep-test-4.ll index b670c317807..a1eea003ffa 100644 --- a/test/CodeGen/X86/peep-test-4.ll +++ b/test/CodeGen/X86/peep-test-4.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+bmi2,+popcnt | FileCheck %s declare void @foo(i32) +declare void @foo64(i64) ; CHECK: neg: ; CHECK: negl %edi @@ -55,6 +56,24 @@ return: ret void } +; CHECK: shri: +; CHECK: shrl $3, %edi +; CHECK-NEXT: je +; CHECK: jmp foo +; CHECK: ret +define void @shri(i32 %x) nounwind { + %ashr = lshr i32 %x, 3 + %cmp = icmp eq i32 %ashr, 0 + br i1 %cmp, label %return, label %bb + +bb: + tail call void @foo(i32 %ashr) + br label %return + +return: + ret void +} + ; CHECK: shl: ; CHECK: addl %edi, %edi ; CHECK-NEXT: je @@ -73,6 +92,24 @@ return: ret void } +; CHECK: shli: +; CHECK: shll $4, %edi +; CHECK-NEXT: je +; CHECK: jmp foo +; CHECK: ret +define void @shli(i32 %x) nounwind { + %shl = shl i32 %x, 4 + %cmp = icmp eq i32 %shl, 0 + br i1 %cmp, label %return, label %bb + +bb: + tail call void @foo(i32 %shl) + br label %return + +return: + ret void +} + ; CHECK: adc: ; CHECK: movabsq $-9223372036854775808, %rax ; CHECK-NEXT: addq %rdi, %rax