diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 87710022c90..d7cd5256e01 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1997,17 +1997,10 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, if (IsSubWord) BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) .addReg(OldVal).addReg(BitShift).addImm(0); - unsigned FusedOpcode = TII->getCompareAndBranch(CompareOpcode); - if (FusedOpcode) - BuildMI(MBB, DL, TII->get(FusedOpcode)) - .addReg(RotatedOldVal).addReg(Src2) - .addImm(KeepOldMask).addMBB(UpdateMBB); - else { - BuildMI(MBB, DL, TII->get(CompareOpcode)) - .addReg(RotatedOldVal).addReg(Src2); - BuildMI(MBB, DL, TII->get(SystemZ::BRC)) - .addImm(KeepOldMask).addMBB(UpdateMBB); - } + BuildMI(MBB, DL, TII->get(CompareOpcode)) + .addReg(RotatedOldVal).addReg(Src2); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) + .addImm(KeepOldMask).addMBB(UpdateMBB); MBB->addSuccessor(UpdateMBB); MBB->addSuccessor(UseAltMBB); @@ -2109,7 +2102,8 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, // ^^ Replace the upper 32-BitSize bits of the // comparison value with those that we loaded, // so that we can use a full word comparison. - // CRJNE %Dest, %RetryCmpVal, DoneMBB + // CR %Dest, %RetryCmpVal + // JNE DoneMBB // # Fall through to SetMBB MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) @@ -2125,8 +2119,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, .addReg(OldVal).addReg(BitShift).addImm(BitSize); BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal) .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); - BuildMI(MBB, DL, TII->get(SystemZ::CRJ)) - .addReg(Dest).addReg(RetryCmpVal) + BuildMI(MBB, DL, TII->get(SystemZ::CR)) + .addReg(Dest).addReg(RetryCmpVal); + BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(MaskNE).addMBB(DoneMBB); MBB->addSuccessor(DoneMBB); MBB->addSuccessor(SetMBB); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 12211fe2c24..dfb5c0983cc 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -282,109 +282,6 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, return Count; } -bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const { - assert(MI->isCompare() && "Caller should check that this is a compare"); - - // Ignore comparisons involving memory for now. - if (MI->getNumExplicitOperands() != 2) - return false; - - SrcReg = MI->getOperand(0).getReg(); - if (MI->getOperand(1).isReg()) { - SrcReg2 = MI->getOperand(1).getReg(); - Value = 0; - Mask = ~0; - return true; - } else if (MI->getOperand(1).isImm()) { - SrcReg2 = 0; - Value = MI->getOperand(1).getImm(); - Mask = ~0; - return true; - } - return false; -} - -// Return true if CC is live after MBBI. We can't rely on kill information -// because of the way InsertBranch is used. -static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI, - const TargetRegisterInfo *TRI) { - if (MBBI->killsRegister(SystemZ::CC, TRI)) - return false; - - MachineBasicBlock *MBB = MBBI->getParent(); - MachineBasicBlock::iterator MBBE = MBB->end(); - for (++MBBI; MBBI != MBBE; ++MBBI) - if (MBBI->readsRegister(SystemZ::CC, TRI)) - return true; - - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(SystemZ::CC)) - return true; - - return false; -} - -bool -SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare, - unsigned SrcReg, unsigned SrcReg2, - int Mask, int Value, - const MachineRegisterInfo *MRI) const { - MachineBasicBlock *MBB = Compare->getParent(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); - - // Try to fold a comparison into a following branch, if it is only used once. - if (unsigned FusedOpcode = getCompareAndBranch(Compare->getOpcode(), - Compare)) { - MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->end(); - for (++MBBI; MBBI != MBBE; ++MBBI) { - if (MBBI->getOpcode() == SystemZ::BRC && !isCCLiveAfter(MBBI, TRI)) { - // Read the branch mask and target. - MachineOperand CCMask(MBBI->getOperand(0)); - MachineOperand Target(MBBI->getOperand(1)); - - // Clear out all current operands. - int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); - assert(CCUse >= 0 && "BRC must use CC"); - MBBI->RemoveOperand(CCUse); - MBBI->RemoveOperand(1); - MBBI->RemoveOperand(0); - - // Rebuild MBBI as a fused compare and branch. - MBBI->setDesc(get(FusedOpcode)); - MachineInstrBuilder(*MBB->getParent(), MBBI) - .addOperand(Compare->getOperand(0)) - .addOperand(Compare->getOperand(1)) - .addOperand(CCMask) - .addOperand(Target); - - // Clear any intervening kills of SrcReg and SrcReg2. - MBBI = Compare; - for (++MBBI; MBBI != MBBE; ++MBBI) { - MBBI->clearRegisterKills(SrcReg, TRI); - if (SrcReg2) - MBBI->clearRegisterKills(SrcReg2, TRI); - } - Compare->removeFromParent(); - return true; - } - - // Stop if we find another reference to CC before a branch. - if (MBBI->readsRegister(SystemZ::CC, TRI) || - MBBI->modifiesRegister(SystemZ::CC, TRI)) - break; - - // Stop if we find another assignment to the registers before the branch. - if (MBBI->modifiesRegister(SrcReg, TRI) || - (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) - break; - } - } - return false; -} - // If Opcode is a move that has a conditional variant, return that variant, // otherwise return 0. static unsigned getConditionalMove(unsigned Opcode) { diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 7fc0ca9a7a6..7617d03ddc7 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -104,14 +104,6 @@ public: MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const LLVM_OVERRIDE; - virtual bool analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const LLVM_OVERRIDE; - virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, - unsigned SrcReg, unsigned SrcReg2, - int Mask, int Value, - const MachineRegisterInfo *MRI) const - LLVM_OVERRIDE; virtual bool isPredicable(MachineInstr *MI) const LLVM_OVERRIDE; virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, unsigned ExtraPredCycles, diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index c9d5205673f..2d2605886b5 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -7,16 +7,26 @@ // //===----------------------------------------------------------------------===// // -// This pass makes sure that all branches are in range. There are several ways -// in which this could be done. One aggressive approach is to assume that all -// branches are in range and successively replace those that turn out not -// to be in range with a longer form (branch relaxation). A simple -// implementation is to continually walk through the function relaxing -// branches until no more changes are needed and a fixed point is reached. -// However, in the pathological worst case, this implementation is -// quadratic in the number of blocks; relaxing branch N can make branch N-1 -// go out of range, which in turn can make branch N-2 go out of range, -// and so on. +// This pass does two things: +// (1) fuse compares and branches into COMPARE AND BRANCH instructions +// (2) make sure that all branches are in range. +// +// We do (1) here rather than earlier because the fused form prevents +// predication. +// +// Doing it so late makes it more likely that a register will be reused +// between the compare and the branch, but it isn't clear whether preventing +// that would be a win or not. +// +// There are several ways in which (2) could be done. One aggressive +// approach is to assume that all branches are in range and successively +// replace those that turn out not to be in range with a longer form +// (branch relaxation). A simple implementation is to continually walk +// through the function relaxing branches until no more changes are +// needed and a fixed point is reached. However, in the pathological +// worst case, this implementation is quadratic in the number of blocks; +// relaxing branch N can make branch N-1 go out of range, which in turn +// can make branch N-2 go out of range, and so on. // // An alternative approach is to assume that all branches must be // converted to their long forms, then reinstate the short forms of @@ -146,6 +156,7 @@ namespace { void skipTerminator(BlockPosition &Position, TerminatorInfo &Terminator, bool AssumeRelaxed); TerminatorInfo describeTerminator(MachineInstr *MI); + bool fuseCompareAndBranch(MachineInstr *Compare); uint64_t initMBBInfo(); bool mustRelaxBranch(const TerminatorInfo &Terminator, uint64_t Address); bool mustRelaxABranch(); @@ -243,6 +254,90 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr *MI) { return Terminator; } +// Return true if CC is live after MBBI. +static bool isCCLiveAfter(MachineBasicBlock::iterator MBBI, + const TargetRegisterInfo *TRI) { + if (MBBI->killsRegister(SystemZ::CC, TRI)) + return false; + + MachineBasicBlock *MBB = MBBI->getParent(); + MachineBasicBlock::iterator MBBE = MBB->end(); + for (++MBBI; MBBI != MBBE; ++MBBI) { + if (MBBI->readsRegister(SystemZ::CC, TRI)) + return true; + if (MBBI->definesRegister(SystemZ::CC, TRI)) + return false; + } + + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) + if ((*SI)->isLiveIn(SystemZ::CC)) + return true; + + return false; +} + +// Try to fuse compare instruction Compare into a later branch. Return +// true on success and if Compare is therefore redundant. +bool SystemZLongBranch::fuseCompareAndBranch(MachineInstr *Compare) { + if (MF->getTarget().getOptLevel() == CodeGenOpt::None) + return false; + + unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(), + Compare); + if (!FusedOpcode) + return false; + + unsigned SrcReg = Compare->getOperand(0).getReg(); + unsigned SrcReg2 = (Compare->getOperand(1).isReg() ? + Compare->getOperand(1).getReg() : 0); + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + MachineBasicBlock *MBB = Compare->getParent(); + MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB->end(); + for (++MBBI; MBBI != MBBE; ++MBBI) { + if (MBBI->getOpcode() == SystemZ::BRC && !isCCLiveAfter(MBBI, TRI)) { + // Read the branch mask and target. + MachineOperand CCMask(MBBI->getOperand(0)); + MachineOperand Target(MBBI->getOperand(1)); + + // Clear out all current operands. + int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, false, TRI); + assert(CCUse >= 0 && "BRC must use CC"); + MBBI->RemoveOperand(CCUse); + MBBI->RemoveOperand(1); + MBBI->RemoveOperand(0); + + // Rebuild MBBI as a fused compare and branch. + MBBI->setDesc(TII->get(FusedOpcode)); + MachineInstrBuilder(*MBB->getParent(), MBBI) + .addOperand(Compare->getOperand(0)) + .addOperand(Compare->getOperand(1)) + .addOperand(CCMask) + .addOperand(Target); + + // Clear any intervening kills of SrcReg and SrcReg2. + MBBI = Compare; + for (++MBBI; MBBI != MBBE; ++MBBI) { + MBBI->clearRegisterKills(SrcReg, TRI); + if (SrcReg2) + MBBI->clearRegisterKills(SrcReg2, TRI); + } + return true; + } + + // Stop if we find another reference to CC before a branch. + if (MBBI->readsRegister(SystemZ::CC, TRI) || + MBBI->modifiesRegister(SystemZ::CC, TRI)) + return false; + + // Stop if we find another assignment to the registers before the branch. + if (MBBI->modifiesRegister(SrcReg, TRI) || + (SrcReg2 && MBBI->modifiesRegister(SrcReg2, TRI))) + return false; + } + return false; +} + // Fill MBBs and Terminators, setting the addresses on the assumption // that no branches need relaxation. Return the size of the function under // this assumption. @@ -268,8 +363,12 @@ uint64_t SystemZLongBranch::initMBBInfo() { MachineBasicBlock::iterator MI = MBB->begin(); MachineBasicBlock::iterator End = MBB->end(); while (MI != End && !MI->isTerminator()) { - Block.Size += TII->getInstSizeInBytes(MI); + MachineInstr *Current = MI; ++MI; + if (Current->isCompare() && fuseCompareAndBranch(Current)) + Current->removeFromParent(); + else + Block.Size += TII->getInstSizeInBytes(Current); } skipNonTerminators(Position, Block); diff --git a/test/CodeGen/SystemZ/branch-08.ll b/test/CodeGen/SystemZ/branch-08.ll new file mode 100644 index 00000000000..ee3f5b5f533 --- /dev/null +++ b/test/CodeGen/SystemZ/branch-08.ll @@ -0,0 +1,45 @@ +; Test SystemZInstrInfo::AnalyzeBranch and SystemZInstrInfo::InsertBranch. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @foo() noreturn + +; Check a case where a separate branch is needed and where the original +; order should be reversed. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: clr %r2, %r3 +; CHECK: jnhe .L[[LABEL:.*]] +; CHECK: br %r14 +; CHECK: .L[[LABEL]]: +; CHECK: brasl %r14, foo@PLT +entry: + %cmp = icmp ult i32 %a, %b + br i1 %cmp, label %callit, label %return + +callit: + call void @foo() + unreachable + +return: + ret i32 1 +} + +; Same again with a fused compare and branch. +define i32 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: cijnlh %r2, 0, .L[[LABEL:.*]] +; CHECK: br %r14 +; CHECK: .L[[LABEL]]: +; CHECK: brasl %r14, foo@PLT +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %callit, label %return + +callit: + call void @foo() + unreachable + +return: + ret i32 1 +}