diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index 9b0bdd8505e..bcdc5b728f0 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -46,6 +46,7 @@ namespace { bool runOnMachineFunction(MachineFunction &F); private: + bool convertToLoadAndTest(MachineInstr *MI); bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, SmallVectorImpl &CCUsers); bool optimizeCompareZero(MachineInstr *Compare, @@ -83,9 +84,34 @@ static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) { MI->getOperand(0).getSubReg() == SubReg) return true; + switch (MI->getOpcode()) { + case SystemZ::LR: + case SystemZ::LGR: + case SystemZ::LGFR: + case SystemZ::LTR: + case SystemZ::LTGR: + case SystemZ::LTGFR: + if (MI->getOperand(1).getReg() == Reg && + MI->getOperand(1).getSubReg() == SubReg) + return true; + } + return false; } +// If MI is a load instruction, try to convert it into a LOAD AND TEST. +// Return true on success. +bool SystemZElimCompare::convertToLoadAndTest(MachineInstr *MI) { + unsigned Opcode = TII->getLoadAndTest(MI->getOpcode()); + if (!Opcode) + return false; + + MI->setDesc(TII->get(Opcode)); + MachineInstrBuilder(*MI->getParent()->getParent(), MI) + .addReg(SystemZ::CC, RegState::ImplicitDefine); + return true; +} + // The CC users in CCUsers are testing the result of a comparison of some // value X against zero and we know that any CC value produced by MI // would also reflect the value of X. Try to adjust CCUsers so that @@ -184,17 +210,21 @@ optimizeCompareZero(MachineInstr *Compare, unsigned SrcSubReg = Compare->getOperand(0).getSubReg(); MachineBasicBlock *MBB = Compare->getParent(); MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->begin(); + bool SeenUseOfCC = false; while (MBBI != MBBE) { --MBBI; MachineInstr *MI = MBBI; if (resultTests(MI, SrcReg, SrcSubReg) && - adjustCCMasksForInstr(MI, Compare, CCUsers)) { + ((!SeenUseOfCC && convertToLoadAndTest(MI)) || + adjustCCMasksForInstr(MI, Compare, CCUsers))) { EliminatedComparisons += 1; return true; } if (MI->modifiesRegister(SrcReg, TRI) || MI->modifiesRegister(SystemZ::CC, TRI)) return false; + if (MI->readsRegister(SystemZ::CC, TRI)) + SeenUseOfCC = true; } return false; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 9913db7b0e4..5dd8d98d27d 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -753,6 +753,19 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode, return 0; } +unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { + switch (Opcode) { + case SystemZ::L: return SystemZ::LT; + case SystemZ::LY: return SystemZ::LT; + case SystemZ::LG: return SystemZ::LTG; + case SystemZ::LGF: return SystemZ::LTGF; + case SystemZ::LR: return SystemZ::LTR; + case SystemZ::LGFR: return SystemZ::LTGFR; + case SystemZ::LGR: return SystemZ::LTGR; + default: return 0; + } +} + // Return true if Mask matches the regexp 0*1+0*, given that zero masks // have already been filtered out. Store the first set bit in LSB and // the number of set bits in Length if so. diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 763a3956fc1..1392745672f 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -193,6 +193,10 @@ public: // exists. unsigned getOpcodeForOffset(unsigned Opcode, int64_t Offset) const; + // If Opcode is a load instruction that has a LOAD AND TEST form, + // return the opcode for the testing form, otherwise return 0. + unsigned getLoadAndTest(unsigned Opcode) const; + // Return true if ROTATE AND ... SELECTED BITS can be used to select bits // Mask of the R2 operand, given that only the low BitSize bits of Mask are // significant. Set Start and End to the I3 and I4 operands if so. diff --git a/test/CodeGen/SystemZ/int-cmp-44.ll b/test/CodeGen/SystemZ/int-cmp-44.ll index 5218d41c6ad..b94f482f8b3 100644 --- a/test/CodeGen/SystemZ/int-cmp-44.ll +++ b/test/CodeGen/SystemZ/int-cmp-44.ll @@ -574,3 +574,226 @@ store: exit: ret void } + +; Test that L gets converted to LT where useful. +define i32 @f29(i64 %base, i64 %index, i32 *%dest) { +; CHECK-LABEL: f29: +; CHECK: lt %r2, 0({{%r2,%r3|%r3,%r2}}) +; CHECK-NEXT: jle .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %res = load i32 *%ptr + %cmp = icmp sle i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %res, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Test that LY gets converted to LT where useful. +define i32 @f30(i64 %base, i64 %index, i32 *%dest) { +; CHECK-LABEL: f30: +; CHECK: lt %r2, 100000({{%r2,%r3|%r3,%r2}}) +; CHECK-NEXT: jle .L{{.*}} +; CHECK: br %r14 +entry: + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 100000 + %ptr = inttoptr i64 %add2 to i32 * + %res = load i32 *%ptr + %cmp = icmp sle i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %res, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; Test that LG gets converted to LTG where useful. +define i64 @f31(i64 %base, i64 %index, i64 *%dest) { +; CHECK-LABEL: f31: +; CHECK: ltg %r2, 0({{%r2,%r3|%r3,%r2}}) +; CHECK-NEXT: jhe .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i64 * + %res = load i64 *%ptr + %cmp = icmp sge i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %res, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; Test that LGF gets converted to LTGF where useful. +define i64 @f32(i64 %base, i64 %index, i64 *%dest) { +; CHECK-LABEL: f32: +; CHECK: ltgf %r2, 0({{%r2,%r3|%r3,%r2}}) +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %add = add i64 %base, %index + %ptr = inttoptr i64 %add to i32 * + %val = load i32 *%ptr + %res = sext i32 %val to i64 + %cmp = icmp sgt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %res, i64 *%dest + br label %exit + +exit: + ret i64 %res +} + +; Test that LR gets converted to LTR where useful. +define i32 @f33(i32 %dummy, i32 %val, i32 *%dest) { +; CHECK-LABEL: f33: +; CHECK: ltr %r2, %r3 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{r2}"(i32 %val) + %cmp = icmp slt i32 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %val, i32 *%dest + br label %exit + +exit: + ret i32 %val +} + +; Test that LGR gets converted to LTGR where useful. +define i64 @f34(i64 %dummy, i64 %val, i64 *%dest) { +; CHECK-LABEL: f34: +; CHECK: ltgr %r2, %r3 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{r2}"(i64 %val) + %cmp = icmp sgt i64 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %val, i64 *%dest + br label %exit + +exit: + ret i64 %val +} + +; Test that LGFR gets converted to LTGFR where useful. +define i64 @f35(i64 %dummy, i32 %val, i64 *%dest) { +; CHECK-LABEL: f35: +; CHECK: ltgfr %r2, %r3 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r2 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jh .L{{.*}} +; CHECK: br %r14 +entry: + %ext = sext i32 %val to i64 + call void asm sideeffect "blah $0", "{r2}"(i64 %ext) + %cmp = icmp sgt i64 %ext, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %ext, i64 *%dest + br label %exit + +exit: + ret i64 %ext +} + +; Test a case where it is the source rather than destination of LR that +; we need. +define i32 @f36(i32 %val, i32 %dummy, i32 *%dest) { +; CHECK-LABEL: f36: +; CHECK: ltr %r3, %r2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{r3}"(i32 %val) + %cmp = icmp slt i32 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %val, i32 *%dest + br label %exit + +exit: + ret i32 %val +} + +; Test a case where it is the source rather than destination of LGR that +; we need. +define i64 @f37(i64 %val, i64 %dummy, i64 *%dest) { +; CHECK-LABEL: f37: +; CHECK: ltgr %r3, %r2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + call void asm sideeffect "blah $0", "{r3}"(i64 %val) + %cmp = icmp slt i64 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 %val, i64 *%dest + br label %exit + +exit: + ret i64 %val +} + +; Test a case where it is the source rather than destination of LGFR that +; we need. +define i32 @f38(i32 %val, i64 %dummy, i32 *%dest) { +; CHECK-LABEL: f38: +; CHECK: ltgfr %r3, %r2 +; CHECK-NEXT: #APP +; CHECK-NEXT: blah %r3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jl .L{{.*}} +; CHECK: br %r14 +entry: + %ext = sext i32 %val to i64 + call void asm sideeffect "blah $0", "{r3}"(i64 %ext) + %cmp = icmp slt i32 %val, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 %val, i32 *%dest + br label %exit + +exit: + ret i32 %val +}