diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 2840e0e57da..915c3817e51 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -74,11 +74,14 @@ namespace { private: struct MemOpQueueEntry { int Offset; + unsigned Reg; + bool isKill; unsigned Position; MachineBasicBlock::iterator MBBI; bool Merged; - MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) - : Offset(o), Position(p), MBBI(i), Merged(false) {} + MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, + MachineBasicBlock::iterator i) + : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; typedef SmallVector MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -264,39 +267,53 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on // success. -void ARMLoadStoreOpt:: -MergeOpsUpdate(MachineBasicBlock &MBB, - MemOpQueue &memOps, - unsigned memOpsBegin, - unsigned memOpsEnd, - unsigned insertAfter, - int Offset, - unsigned Base, - bool BaseKill, - int Opcode, - ARMCC::CondCodes Pred, - unsigned PredReg, - unsigned Scratch, - DebugLoc dl, - SmallVector &Merges) { +void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, + MemOpQueue &memOps, + unsigned memOpsBegin, unsigned memOpsEnd, + unsigned insertAfter, int Offset, + unsigned Base, bool BaseKill, + int Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, + DebugLoc dl, + SmallVector &Merges) { // First calculate which of the registers should be killed by the merged // instruction. - SmallVector, 8> Regs; const unsigned insertPos = memOps[insertAfter].Position; + + SmallSet UnavailRegs; + SmallSet KilledRegs; + DenseMap Killer; + for (unsigned i = 0; i < memOpsBegin; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + if (memOps[i].Merged) + UnavailRegs.insert(Reg); + else { + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + } + for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + + SmallVector, 8> Regs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - const MachineOperand &MO = memOps[i].MBBI->getOperand(0); - unsigned Reg = MO.getReg(); - bool isKill = MO.isKill(); + unsigned Reg = memOps[i].Reg; + if (UnavailRegs.count(Reg)) + // Register is killed before and it's not easy / possible to update the + // kill marker on already merged instructions. Abort. + return; // If we are inserting the merged operation after an unmerged operation that // uses the same register, make sure to transfer any kill flag. - for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) - if (memOps[j].PositiongetOperand(0); - if (MOJ.getReg() == Reg && MOJ.isKill()) - isKill = true; - } - + bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); } @@ -311,13 +328,13 @@ MergeOpsUpdate(MachineBasicBlock &MBB, Merges.push_back(prior(Loc)); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { // Remove kill flags from any unmerged memops that come before insertPos. - if (Regs[i-memOpsBegin].second) - for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) - if (memOps[j].PositiongetOperand(0); - if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) - MOJ.setIsKill(false); - } + if (Regs[i-memOpsBegin].second) { + unsigned Reg = Regs[i-memOpsBegin].first; + if (KilledRegs.count(Reg)) { + unsigned j = Killer[Reg]; + memOps[j].MBBI->getOperand(0).setIsKill(false); + } + } MBB.erase(memOps[i].MBBI); memOps[i].Merged = true; } @@ -910,6 +927,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) return false; + MachineBasicBlock::iterator NewBBI = MBBI; bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; bool EvenDeadKill = isLd ? @@ -954,6 +972,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } + NewBBI = llvm::prior(MBBI); } else { // Split into two instructions. assert((!isT2 || !OffReg) && @@ -974,6 +993,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, @@ -990,6 +1010,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, @@ -1001,8 +1022,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, ++NumSTRD2STR; } - MBBI = prior(MBBI); MBB.erase(MI); + MBBI = NewBBI; + return true; } return false; } @@ -1035,6 +1057,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (isMemOp) { int Opcode = MBBI->getOpcode(); unsigned Size = getLSMultipleTransferSize(MBBI); + const MachineOperand &MO = MBBI->getOperand(0); + unsigned Reg = MO.getReg(); + bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); @@ -1056,7 +1081,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); NumMemOps++; Advance = true; } else { @@ -1069,14 +1094,16 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); NumMemOps++; Advance = true; } else { for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); + MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); NumMemOps++; Advance = true; break; diff --git a/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll b/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll new file mode 100644 index 00000000000..816a6d4f4b9 --- /dev/null +++ b/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll @@ -0,0 +1,148 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin -O3 -mcpu=arm1136jf-s +; PR7421 + +%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 } +%struct.FILE = type { i8* } +%struct.tilebox = type { %struct.tilebox*, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.UNCOMBOX = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.cellbox = type { i8*, i32, i32, i32, [9 x i32], i32, i32, i32, i32, i32, i32, i32, double, double, double, double, double, i32, i32, %struct.CONTENTBOX*, %struct.UNCOMBOX*, [8 x %struct.tilebox*] } +%struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 } + +@.str2708 = external constant [14 x i8], align 4 ; <[14 x i8]*> [#uses=1] + +define void @TW_oldinput(%struct.FILE* nocapture %fp) nounwind { +entry: + %xcenter = alloca i32, align 4 ; [#uses=2] + %0 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; [#uses=1] + %1 = icmp eq i32 %0, 4 ; [#uses=1] + br i1 %1, label %bb, label %return + +bb: ; preds = %bb445, %entry + %2 = load %struct.cellbox** undef, align 4 ; <%struct.cellbox*> [#uses=2] + %3 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 3 ; [#uses=1] + store i32 undef, i32* %3, align 4 + %4 = load i32* undef, align 4 ; [#uses=3] + %5 = icmp eq i32 undef, 1 ; [#uses=1] + br i1 %5, label %bb10, label %bb445 + +bb10: ; preds = %bb + br i1 undef, label %bb11, label %bb445 + +bb11: ; preds = %bb10 + %6 = load %struct.tilebox** undef, align 4 ; <%struct.tilebox*> [#uses=3] + %7 = load %struct.termbox** null, align 4 ; <%struct.termbox*> [#uses=1] + %8 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 13 ; [#uses=1] + %9 = load i32* %8, align 4 ; [#uses=3] + %10 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 15 ; [#uses=1] + %11 = load i32* %10, align 4 ; [#uses=1] + br i1 false, label %bb12, label %bb13 + +bb12: ; preds = %bb11 + unreachable + +bb13: ; preds = %bb11 + %iftmp.40.0.neg = sdiv i32 0, -2 ; [#uses=2] + %12 = sub nsw i32 0, %9 ; [#uses=1] + %13 = sitofp i32 %12 to double ; [#uses=1] + %14 = fdiv double %13, 0.000000e+00 ; [#uses=1] + %15 = fptosi double %14 to i32 ; [#uses=1] + %iftmp.41.0.in = add i32 0, %15 ; [#uses=1] + %iftmp.41.0.neg = sdiv i32 %iftmp.41.0.in, -2 ; [#uses=3] + br i1 undef, label %bb43.loopexit, label %bb21 + +bb21: ; preds = %bb13 + %16 = fptosi double undef to i32 ; [#uses=1] + %17 = fsub double undef, 0.000000e+00 ; [#uses=1] + %not.460 = fcmp oge double %17, 5.000000e-01 ; [#uses=1] + %18 = zext i1 %not.460 to i32 ; [#uses=1] + %iftmp.42.0 = add i32 %16, %iftmp.41.0.neg ; [#uses=1] + %19 = add i32 %iftmp.42.0, %18 ; [#uses=1] + store i32 %19, i32* undef, align 4 + %20 = sub nsw i32 0, %9 ; [#uses=1] + %21 = sitofp i32 %20 to double ; [#uses=1] + %22 = fdiv double %21, 0.000000e+00 ; [#uses=2] + %23 = fptosi double %22 to i32 ; [#uses=1] + %24 = fsub double %22, undef ; [#uses=1] + %not.461 = fcmp oge double %24, 5.000000e-01 ; [#uses=1] + %25 = zext i1 %not.461 to i32 ; [#uses=1] + %iftmp.43.0 = add i32 %23, %iftmp.41.0.neg ; [#uses=1] + %26 = add i32 %iftmp.43.0, %25 ; [#uses=1] + %27 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 10 ; [#uses=1] + store i32 %26, i32* %27, align 4 + %28 = fptosi double undef to i32 ; [#uses=1] + %iftmp.45.0 = add i32 %28, %iftmp.40.0.neg ; [#uses=1] + %29 = add i32 %iftmp.45.0, 0 ; [#uses=1] + store i32 %29, i32* undef, align 4 + br label %bb43.loopexit + +bb36: ; preds = %bb43.loopexit, %bb36 + %termptr.0478 = phi %struct.termbox* [ %42, %bb36 ], [ %7, %bb43.loopexit ] ; <%struct.termbox*> [#uses=1] + %30 = load i32* undef, align 4 ; [#uses=1] + %31 = sub nsw i32 %30, %9 ; [#uses=1] + %32 = sitofp i32 %31 to double ; [#uses=1] + %33 = fdiv double %32, 0.000000e+00 ; [#uses=1] + %34 = fptosi double %33 to i32 ; [#uses=1] + %iftmp.46.0 = add i32 %34, %iftmp.41.0.neg ; [#uses=1] + %35 = add i32 %iftmp.46.0, 0 ; [#uses=1] + store i32 %35, i32* undef, align 4 + %36 = sub nsw i32 0, %11 ; [#uses=1] + %37 = sitofp i32 %36 to double ; [#uses=1] + %38 = fmul double %37, 0.000000e+00 ; [#uses=1] + %39 = fptosi double %38 to i32 ; [#uses=1] + %iftmp.47.0 = add i32 %39, %iftmp.40.0.neg ; [#uses=1] + %40 = add i32 %iftmp.47.0, 0 ; [#uses=1] + store i32 %40, i32* undef, align 4 + %41 = getelementptr inbounds %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1] + %42 = load %struct.termbox** %41, align 4 ; <%struct.termbox*> [#uses=2] + %43 = icmp eq %struct.termbox* %42, null ; [#uses=1] + br i1 %43, label %bb52.loopexit, label %bb36 + +bb43.loopexit: ; preds = %bb21, %bb13 + br i1 undef, label %bb52.loopexit, label %bb36 + +bb52.loopexit: ; preds = %bb43.loopexit, %bb36 + %44 = icmp eq i32 %4, 0 ; [#uses=1] + br i1 %44, label %bb.nph485, label %bb54 + +bb54: ; preds = %bb52.loopexit + switch i32 %4, label %bb62 [ + i32 2, label %bb56 + i32 3, label %bb57 + ] + +bb56: ; preds = %bb54 + br label %bb62 + +bb57: ; preds = %bb54 + br label %bb62 + +bb62: ; preds = %bb57, %bb56, %bb54 + unreachable + +bb.nph485: ; preds = %bb52.loopexit + br label %bb248 + +bb248: ; preds = %bb322, %bb.nph485 + %45 = icmp eq i32 undef, %4 ; [#uses=1] + br i1 %45, label %bb322, label %bb249 + +bb249: ; preds = %bb248 + %46 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1] + %47 = load %struct.tilebox** %46, align 4 ; <%struct.tilebox*> [#uses=1] + %48 = getelementptr inbounds %struct.tilebox* %47, i32 0, i32 11 ; [#uses=1] + store i32 undef, i32* %48, align 4 + unreachable + +bb322: ; preds = %bb248 + br i1 undef, label %bb248, label %bb445 + +bb445: ; preds = %bb322, %bb10, %bb + %49 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; [#uses=1] + %50 = icmp eq i32 %49, 4 ; [#uses=1] + br i1 %50, label %bb, label %return + +return: ; preds = %bb445, %entry + ret void +} + +declare i32 @fscanf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind