diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index 85729956824..dfd666f96db 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -19,6 +19,7 @@ #include "llvm/Function.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -130,6 +131,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { MII != MBB->end(); ++MII) if (MII->killsRegister(DstReg, &TRI)) { MII->addRegisterKilled(SuperReg, &TRI, /*AddIfNotFound=*/true); + DOUT << "\nsubreg: killed here: " << *MII; break; } } else { @@ -231,7 +233,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?"); assert(SubIdx != 0 && "Invalid index for insert_subreg"); unsigned DstSubReg = TRI.getSubReg(DstReg, SubIdx); - + assert(DstSubReg && "invalid subregister index for register"); assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && "Insert superreg source must be in a physical register"); assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && @@ -240,24 +242,45 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { DOUT << "subreg: CONVERTING: " << *MI; if (DstSubReg == InsReg) { - // No need to insert an identify copy instruction. - DOUT << "subreg: eliminated!"; + // No need to insert an identity copy instruction. If the SrcReg was + // , we need to make sure it is alive by inserting an IMPLICIT_DEF + if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) { + BuildMI(*MBB, MI, MI->getDebugLoc(), + TII.get(TargetInstrInfo::IMPLICIT_DEF), DstReg) + .addReg(InsReg, RegState::ImplicitKill); + } else { + DOUT << "subreg: eliminated!\n"; + MBB->erase(MI); + return true; + } } else { // Insert sub-register copy const TargetRegisterClass *TRC0= TRI.getPhysicalRegisterRegClass(DstSubReg); const TargetRegisterClass *TRC1= TRI.getPhysicalRegisterRegClass(InsReg); TII.copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1); + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) + if (MI->getOperand(0).isDead()) { TransferDeadFlag(MI, DstSubReg, TRI); - if (MI->getOperand(1).isKill()) + // Also add a SrcReg of the super register. + CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true)); + } else if (MI->getOperand(1).isUndef()) { + // If SrcReg was marked we must make sure it is alive after this + // replacement. Add a SrcReg operand. + CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true)); + } + + // Make sure the inserted register gets killed + if (MI->getOperand(2).isKill()) TransferKillFlag(MI, InsReg, TRI); + } #ifndef NDEBUG - MachineBasicBlock::iterator dMI = MI; - DOUT << "subreg: " << *(--dMI); + MachineBasicBlock::iterator dMI = MI; + DOUT << "subreg: " << *(--dMI); #endif - } DOUT << "\n"; MBB->erase(MI); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 006a10a0ce6..c29a6e339e0 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1041,6 +1041,8 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, if (MO.isKill()) // The register is already marked kill. return true; + // This operand can no longer be undef since Reg is live-in. + MO.setIsUndef(false); if (isPhysReg && isRegTiedToDefOperand(i)) // Two-address uses of physregs must not be marked kill. return true; diff --git a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll new file mode 100644 index 00000000000..bdf5131ec32 --- /dev/null +++ b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll @@ -0,0 +1,29 @@ +; RUN: llvm-as < %s | llc -march=arm -mattr=+neon +; PR4657 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +define arm_apcscc <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind { +entry: + %v_addr = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] + %f_addr = alloca i32 ; [#uses=2] + %retval = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] + %0 = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + store <4 x i32> %v, <4 x i32>* %v_addr + store i32 %f, i32* %f_addr + %1 = load <4 x i32>* %v_addr, align 16 ; <<4 x i32>> [#uses=1] + %2 = load i32* %f_addr, align 4 ; [#uses=1] + %3 = insertelement <4 x i32> undef, i32 %2, i32 0 ; <<4 x i32>> [#uses=1] + %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer ; <<4 x i32>> [#uses=1] + %5 = mul <4 x i32> %1, %4 ; <<4 x i32>> [#uses=1] + store <4 x i32> %5, <4 x i32>* %0, align 16 + %6 = load <4 x i32>* %0, align 16 ; <<4 x i32>> [#uses=1] + store <4 x i32> %6, <4 x i32>* %retval, align 16 + br label %return + +return: ; preds = %entry + %retval1 = load <4 x i32>* %retval ; <<4 x i32>> [#uses=1] + ret <4 x i32> %retval1 +}