diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 1a7f06cbe3a..46e63b2d3bd 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2162,7 +2162,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, const SmallVectorImpl &MOs, - unsigned Align) const { + unsigned Size, unsigned Align) const { const DenseMap > *OpcodeTablePtr=NULL; bool isTwoAddrFold = false; unsigned NumOps = MI->getDesc().getNumOperands(); @@ -2202,13 +2202,28 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, DenseMap >::iterator I = OpcodeTablePtr->find((unsigned*)MI->getOpcode()); if (I != OpcodeTablePtr->end()) { + unsigned Opcode = I->second.first; unsigned MinAlign = I->second.second; if (Align < MinAlign) return NULL; + if (Size) { + unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); + if (Size < RCSize) { + // Check if it's safe to fold the load. If the size of the object is + // narrower than the load width, then it's not. + if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) + return NULL; + // If this is a 64-bit load, but the spill slot is 32, then we can do + // a 32-bit load which is implicitly zero-extended. This likely is due + // to liveintervalanalysis remat'ing a load from stack slot. + Opcode = X86::MOV32rm; + } + } + if (isTwoAddrFold) - NewMI = FuseTwoAddrInst(MF, I->second.first, MOs, MI, *this); + NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); else - NewMI = FuseInst(MF, I->second.first, i, MOs, MI, *this); + NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); return NewMI; } } @@ -2228,16 +2243,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (NoFusing) return NULL; const MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Size = MFI->getObjectSize(FrameIndex); unsigned Alignment = MFI->getObjectAlignment(FrameIndex); if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; + unsigned RCSize = 0; switch (MI->getOpcode()) { default: return NULL; - case X86::TEST8rr: NewOpc = X86::CMP8ri; break; - case X86::TEST16rr: NewOpc = X86::CMP16ri; break; - case X86::TEST32rr: NewOpc = X86::CMP32ri; break; - case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; + case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; + case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; + case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; + case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; } + // Check if it's safe to fold the load. If the size of the object is + // narrower than the load width, then it's not. + if (Size < RCSize) + return NULL; // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); MI->getOperand(1).ChangeToImmediate(0); @@ -2246,7 +2267,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, SmallVector MOs; MOs.push_back(MachineOperand::CreateFI(FrameIndex)); - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Alignment); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); } MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -2318,7 +2339,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) MOs.push_back(LoadMI->getOperand(i)); } - return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Alignment); + return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index fd498444888..aff3603fd97 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -603,7 +603,7 @@ private: MachineInstr* MI, unsigned OpNum, const SmallVectorImpl &MOs, - unsigned Alignment) const; + unsigned Size, unsigned Alignment) const; }; } // End llvm namespace diff --git a/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll new file mode 100644 index 00000000000..df529497010 --- /dev/null +++ b/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim | FileCheck %s + +; It's not legal to fold a load from 32-bit stack slot into a 64-bit +; instruction. If done, the instruction does a 64-bit load and that's not +; safe. This can happen we a subreg_to_reg 0 has been coalesced. One +; exception is when the instruction that folds the load is a move, then we +; can simply turn it into a 32-bit load from the stack slot. +; rdar://7170444 + +%struct.ComplexType = type { i32 } + +define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp { +entry: +; CHECK: _t: +; CHECK: movl 16(%rbp), +; CHECK: movl 16(%rbp), + %0 = zext i32 %argumentsLength to i64 ; [#uses=1] + %1 = zext i32 %clientPort to i64 ; [#uses=1] + %2 = inttoptr i64 %1 to %struct.ComplexType* ; <%struct.ComplexType*> [#uses=1] + %3 = invoke i8* @pluginInstance(i8* undef, i32 %pluginID) + to label %invcont unwind label %lpad ; [#uses=1] + +invcont: ; preds = %entry + %4 = add i32 %requestID, %pluginID ; [#uses=0] + %5 = invoke zeroext i8 @invoke(i8* %3, i32 %objectID, i8* undef, i64 %argumentsData, i32 %argumentsLength, i64* undef, i32* undef) + to label %invcont1 unwind label %lpad ; [#uses=0] + +invcont1: ; preds = %invcont + %6 = getelementptr inbounds %struct.ComplexType* %2, i64 0, i32 0 ; [#uses=1] + %7 = load i32* %6, align 4 ; [#uses=1] + invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef) + to label %invcont2 unwind label %lpad + +invcont2: ; preds = %invcont1 + ret i32 0 + +lpad: ; preds = %invcont1, %invcont, %entry + %8 = call i32 @vm_deallocate(i32 undef, i64 0, i64 %0) ; [#uses=0] + unreachable +} + +declare i32 @vm_deallocate(i32, i64, i64) + +declare i8* @pluginInstance(i8*, i32) + +declare zeroext i8 @invoke(i8*, i32, i8*, i64, i32, i64*, i32*) + +declare void @booleanAndDataReply(i32, i32, i32, i32, i64, i32)