diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 2e90f7472ed..80982bca8ce 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -131,13 +131,18 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, unsigned SrcReg = DefMI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) continue; - if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + if (DefMI->getOperand(0).getSubReg()) continue; - if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg))) + unsigned SrcSubReg = DefMI->getOperand(1).getSubReg(); + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (SrcSubReg) + RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, + SrcSubReg); + if (!MRI->constrainRegClass(SrcReg, RC)) continue; DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); - MO.setReg(SrcReg); + MO.substVirtReg(SrcReg, SrcSubReg, *TRI); MRI->clearKillFlags(SrcReg); DefMI->eraseFromParent(); ++NumCoalesces; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index b9a6b479c35..b9457602919 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1349,6 +1349,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, unsigned LastCopiedReg = 0; SlotIndex LastCopyIdx; unsigned RegB = 0; + unsigned SubRegB = 0; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; unsigned DstIdx = TiedPairs[tpi].second; @@ -1359,6 +1360,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Grab RegB from the instruction because it may have changed if the // instruction was commuted. RegB = MI->getOperand(SrcIdx).getReg(); + SubRegB = MI->getOperand(SrcIdx).getSubReg(); if (RegA == RegB) { // The register is tied to multiple destinations (or else we would @@ -1383,8 +1385,25 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, #endif // Emit a copy. - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA); + // If this operand is folding a truncation, the truncation now moves to the + // copy so that the register classes remain valid for the operands. + MIB.addReg(RegB, 0, SubRegB); + const TargetRegisterClass *RC = MRI->getRegClass(RegB); + if (SubRegB) { + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + assert(TRI->getMatchingSuperRegClass(MRI->getRegClass(RegB), + MRI->getRegClass(RegA), SubRegB) && + "tied subregister must be a truncation"); + // The superreg class will not be used to constrain the subreg class. + RC = 0; + } + else { + assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB)) + && "tied subregister must be a truncation"); + } + } // Update DistanceMap. MachineBasicBlock::iterator PrevMI = MI; @@ -1404,7 +1423,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, } } - DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + DEBUG(dbgs() << "\t\tprepend:\t" << *MIB); MachineOperand &MO = MI->getOperand(SrcIdx); assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && @@ -1417,9 +1436,9 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Make sure regA is a legal regclass for the SrcIdx operand. if (TargetRegisterInfo::isVirtualRegister(RegA) && TargetRegisterInfo::isVirtualRegister(RegB)) - MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); - + MRI->constrainRegClass(RegA, RC); MO.setReg(RegA); + MO.setSubReg(0); // Propagate SrcRegMap. SrcRegMap[RegA] = RegB; @@ -1431,12 +1450,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, // Replace other (un-tied) uses of regB with LastCopiedReg. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB && + MO.isUse()) { if (MO.isKill()) { MO.setIsKill(false); RemovedKillFlag = true; } MO.setReg(LastCopiedReg); + MO.setSubReg(0); } } } diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll index 215b86267a4..d7c684a730d 100644 --- a/test/CodeGen/X86/cmov.ll +++ b/test/CodeGen/X86/cmov.ll @@ -41,8 +41,8 @@ declare void @bar(i64) nounwind define void @test3(i64 %a, i64 %b, i1 %p) nounwind { ; CHECK-LABEL: test3: -; CHECK: cmovnel %edi, %esi -; CHECK-NEXT: movl %esi, %edi +; CHECK: cmov{{n?}}el %[[R1:e..]], %[[R2:e..]] +; CHECK-NEXT: movl %[[R2]], %[[R2]] %c = trunc i64 %a to i32 %d = trunc i64 %b to i32 diff --git a/test/CodeGen/X86/cse-add-with-overflow.ll b/test/CodeGen/X86/cse-add-with-overflow.ll new file mode 100644 index 00000000000..ee4fbad4506 --- /dev/null +++ b/test/CodeGen/X86/cse-add-with-overflow.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=generic | FileCheck %s +; rdar:15661073 simple example of redundant adds +; +; MachineCSE should coalesce trivial subregister copies. +; +; The extra movl+addl should be removed during MachineCSE. +; CHECK-LABEL: redundantadd +; CHECK: cmpq +; CHECK: movq +; CHECK-NOT: movl +; CHECK: addl +; CHECK-NOT: addl +; CHECK: ret + +define i64 @redundantadd(i64* %a0, i64* %a1) { +entry: + %tmp8 = load i64* %a0, align 8 + %tmp12 = load i64* %a1, align 8 + %tmp13 = icmp ult i64 %tmp12, -281474976710656 + br i1 %tmp13, label %exit1, label %body + +exit1: + unreachable + +body: + %tmp14 = trunc i64 %tmp8 to i32 + %tmp15 = trunc i64 %tmp12 to i32 + %tmp16 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp14, i32 %tmp15) + %tmp17 = extractvalue { i32, i1 } %tmp16, 1 + br i1 %tmp17, label %exit2, label %return + +exit2: + unreachable + +return: + %tmp18 = add i64 %tmp12, %tmp8 + %tmp19 = and i64 %tmp18, 4294967295 + %tmp20 = or i64 %tmp19, -281474976710656 + ret i64 %tmp20 +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)