From b6bac51351d2a1a9db76381b92c40ec24cc59e8e Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 25 Feb 2004 06:13:04 +0000 Subject: [PATCH] * Make the previous patch more efficient by not allocating a temporary MachineInstr to do analysis. *** FOLD getelementptr instructions into loads and stores when possible, making use of some of the crazy X86 addressing modes. For example, the following C++ program fragment: struct complex { double re, im; complex(double r, double i) : re(r), im(i) {} }; inline complex operator+(const complex& a, const complex& b) { return complex(a.re+b.re, a.im+b.im); } complex addone(const complex& arg) { return arg + complex(1,0); } Used to be compiled to: _Z6addoneRK7complex: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] *** mov %EDX, %ECX fld QWORD PTR [%EDX] fld1 faddp %ST(1) *** add %ECX, 8 fld QWORD PTR [%ECX] fldz faddp %ST(1) *** mov %ECX, %EAX fxch %ST(1) fstp QWORD PTR [%ECX] *** add %EAX, 8 fstp QWORD PTR [%EAX] ret Now it is compiled to: _Z6addoneRK7complex: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] fld QWORD PTR [%ECX] fld1 faddp %ST(1) fld QWORD PTR [%ECX + 8] fldz faddp %ST(1) fxch %ST(1) fstp QWORD PTR [%EAX] fstp QWORD PTR [%EAX + 8] ret Other programs should see similar improvements, across the board. Note that in addition to reducing instruction count, this also reduces register pressure a lot, always a good thing on X86. :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@11819 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstSelectSimple.cpp | 248 +++++++++++++++++++++------- lib/Target/X86/X86ISelSimple.cpp | 248 +++++++++++++++++++++------- 2 files changed, 376 insertions(+), 120 deletions(-) diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index f0e0a5c69fe..d6e3a75186f 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -222,6 +222,20 @@ namespace { /// void promote32(unsigned targetReg, const ValueRecord &VR); + // getGEPIndex - This is used to fold GEP instructions into X86 addressing + // expressions. + void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, + std::vector &GEPOps, + std::vector &GEPTypes, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp); + + /// isGEPFoldable - Return true if the specified GEP can be completely + /// folded into the addressing mode of a load/store or lea instruction. + bool isGEPFoldable(MachineBasicBlock *MBB, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp); + /// emitGEPOperation - Common code shared between visitGetElementPtrInst and /// constant expression GEP support. /// @@ -1884,14 +1898,32 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB, /// need to worry about the memory layout of the target machine. /// void ISel::visitLoadInst(LoadInst &I) { - unsigned SrcAddrReg = getReg(I.getOperand(0)); unsigned DestReg = getReg(I); + unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; + Value *Addr = I.getOperand(0); + if (GetElementPtrInst *GEP = dyn_cast(Addr)) { + if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; // Address is consumed! + } else if (ConstantExpr *CE = dyn_cast(Addr)) { + if (CE->getOpcode() == Instruction::GetElementPtr) + if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; + } + + if (Addr) { + // If it's not foldable, reset addr mode. + BaseReg = getReg(Addr); + Scale = 1; IndexReg = 0; Disp = 0; + } unsigned Class = getClassB(I.getType()); - if (Class == cLong) { - addDirectMem(BuildMI(BB, X86::MOVrm32, 4, DestReg), SrcAddrReg); - addRegOffset(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), SrcAddrReg, 4); + addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg), + BaseReg, Scale, IndexReg, Disp); + addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), + BaseReg, Scale, IndexReg, Disp+4); return; } @@ -1900,37 +1932,61 @@ void ISel::visitLoadInst(LoadInst &I) { }; unsigned Opcode = Opcodes[Class]; if (I.getType() == Type::DoubleTy) Opcode = X86::FLDr64; - addDirectMem(BuildMI(BB, Opcode, 4, DestReg), SrcAddrReg); + addFullAddress(BuildMI(BB, Opcode, 4, DestReg), + BaseReg, Scale, IndexReg, Disp); } /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov' /// instruction. /// void ISel::visitStoreInst(StoreInst &I) { - unsigned AddressReg = getReg(I.getOperand(1)); + unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; + Value *Addr = I.getOperand(1); + if (GetElementPtrInst *GEP = dyn_cast(Addr)) { + if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; // Address is consumed! + } else if (ConstantExpr *CE = dyn_cast(Addr)) { + if (CE->getOpcode() == Instruction::GetElementPtr) + if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; + } + + if (Addr) { + // If it's not foldable, reset addr mode. + BaseReg = getReg(Addr); + Scale = 1; IndexReg = 0; Disp = 0; + } + const Type *ValTy = I.getOperand(0)->getType(); unsigned Class = getClassB(ValTy); if (ConstantInt *CI = dyn_cast(I.getOperand(0))) { uint64_t Val = CI->getRawValue(); if (Class == cLong) { - addDirectMem(BuildMI(BB, X86::MOVmi32, 5), AddressReg).addZImm(Val & ~0U); - addRegOffset(BuildMI(BB, X86::MOVmi32, 5), AddressReg,4).addZImm(Val>>32); + addFullAddress(BuildMI(BB, X86::MOVmi32, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(Val & ~0U); + addFullAddress(BuildMI(BB, X86::MOVmi32, 5), + BaseReg, Scale, IndexReg, Disp+4).addZImm(Val>>32); } else { static const unsigned Opcodes[] = { X86::MOVmi8, X86::MOVmi16, X86::MOVmi32 }; unsigned Opcode = Opcodes[Class]; - addDirectMem(BuildMI(BB, Opcode, 5), AddressReg).addZImm(Val); + addFullAddress(BuildMI(BB, Opcode, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(Val); } } else if (ConstantBool *CB = dyn_cast(I.getOperand(0))) { - addDirectMem(BuildMI(BB, X86::MOVmi8, 5), - AddressReg).addZImm(CB->getValue()); + addFullAddress(BuildMI(BB, X86::MOVmi8, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(CB->getValue()); } else { if (Class == cLong) { unsigned ValReg = getReg(I.getOperand(0)); - addDirectMem(BuildMI(BB, X86::MOVmr32, 5), AddressReg).addReg(ValReg); - addRegOffset(BuildMI(BB, X86::MOVmr32, 5), AddressReg,4).addReg(ValReg+1); + addFullAddress(BuildMI(BB, X86::MOVmr32, 5), + BaseReg, Scale, IndexReg, Disp).addReg(ValReg); + addFullAddress(BuildMI(BB, X86::MOVmr32, 5), + BaseReg, Scale, IndexReg, Disp+4).addReg(ValReg+1); } else { unsigned ValReg = getReg(I.getOperand(0)); static const unsigned Opcodes[] = { @@ -1938,7 +1994,8 @@ void ISel::visitStoreInst(StoreInst &I) { }; unsigned Opcode = Opcodes[Class]; if (ValTy == Type::DoubleTy) Opcode = X86::FSTr64; - addDirectMem(BuildMI(BB, Opcode, 1+4), AddressReg).addReg(ValReg); + addFullAddress(BuildMI(BB, Opcode, 1+4), + BaseReg, Scale, IndexReg, Disp).addReg(ValReg); } } } @@ -2138,7 +2195,8 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, } // Spill the integer to memory and reload it from there... - int FrameIdx = F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); + int FrameIdx = + F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); if (SrcClass == cLong) { addFrameReference(BMI(BB, IP, X86::MOVmr32, 5), FrameIdx).addReg(SrcReg); @@ -2160,15 +2218,18 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, // Emit a test instruction to see if the dynamic input value was signed. BMI(BB, IP, X86::TESTrr32, 2).addReg(SrcReg+1).addReg(SrcReg+1); - // If the sign bit is set, get a pointer to an offset, otherwise get a pointer to a zero. + // If the sign bit is set, get a pointer to an offset, otherwise get a + // pointer to a zero. MachineConstantPool *CP = F->getConstantPool(); unsigned Zero = makeAnotherReg(Type::IntTy); + Constant *Null = Constant::getNullValue(Type::UIntTy); addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Zero), - CP->getConstantPoolIndex(Constant::getNullValue(Type::UIntTy))); + CP->getConstantPoolIndex(Null)); unsigned Offset = makeAnotherReg(Type::IntTy); + Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000); + addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Offset), - CP->getConstantPoolIndex(ConstantUInt::get(Type::UIntTy, - 0x5f800000))); + CP->getConstantPoolIndex(OffsetCst)); unsigned Addr = makeAnotherReg(Type::IntTy); BMI(BB, IP, X86::CMOVSrr32, 2, Addr).addReg(Zero).addReg(Offset); @@ -2303,6 +2364,26 @@ void ISel::visitVAArgInst(VAArgInst &I) { void ISel::visitGetElementPtrInst(GetElementPtrInst &I) { + // If this GEP instruction will be folded into all of its users, we don't need + // to explicitly calculate it! + unsigned A, B, C, D; + if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), A,B,C,D)) { + // Check all of the users of the instruction to see if they are loads and + // stores. + bool AllWillFold = true; + for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) + if (cast(*UI)->getOpcode() != Instruction::Load) + if (cast(*UI)->getOpcode() != Instruction::Store || + cast(*UI)->getOperand(0) == &I) { + AllWillFold = false; + break; + } + + // If the instruction is foldable, and will be folded into all users, don't + // emit it! + if (AllWillFold) return; + } + unsigned outputReg = getReg(I); emitGEPOperation(BB, BB->end(), I.getOperand(0), I.op_begin()+1, I.op_end(), outputReg); @@ -2319,15 +2400,18 @@ void ISel::visitGetElementPtrInst(GetElementPtrInst &I) { /// /// Note that there is one fewer entry in GEPTypes than there is in GEPOps. /// -static void getGEPIndex(std::vector &GEPOps, - std::vector &GEPTypes, - MachineInstr *Ops, const TargetData &TD){ +void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, + std::vector &GEPOps, + std::vector &GEPTypes, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp) { + const TargetData &TD = TM.getTargetData(); + // Clear out the state we are working with... - Ops->getOperand(0).setReg(0); // No base register - Ops->getOperand(1).setImmedValue(1); // Unit scale - Ops->getOperand(2).setReg(0); // No index register - Ops->getOperand(3).setImmedValue(0); // No displacement - + BaseReg = 0; // No base register + Scale = 1; // Unit scale + IndexReg = 0; // No index register + Disp = 0; // No displacement + // While there are GEP indexes that can be folded into the current address, // keep processing them. while (!GEPTypes.empty()) { @@ -2340,14 +2424,7 @@ static void getGEPIndex(std::vector &GEPOps, // structure is in memory. Since the structure index must be constant, we // can get its value and use it to find the right byte offset from the // StructLayout class's list of structure member offsets. - unsigned idxValue = CUI->getValue(); - unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue]; - if (FieldOff) { - if (Ops->getOperand(2).getReg()) - return; // Already has an index, can't add offset. - Ops->getOperand(3).setImmedValue(FieldOff+ - Ops->getOperand(3).getImmedValue()); - } + Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()]; GEPOps.pop_back(); // Consume a GEP operand GEPTypes.pop_back(); } else { @@ -2362,10 +2439,7 @@ static void getGEPIndex(std::vector &GEPOps, // If idx is a constant, fold it into the offset. if (ConstantSInt *CSI = dyn_cast(idx)) { - unsigned elementSize = TD.getTypeSize(SqTy->getElementType()); - unsigned Offset = elementSize*CSI->getValue(); - Ops->getOperand(3).setImmedValue(Offset+ - Ops->getOperand(3).getImmedValue()); + Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue(); } else { // If we can't handle it, return. return; @@ -2375,15 +2449,24 @@ static void getGEPIndex(std::vector &GEPOps, GEPTypes.pop_back(); } } + + // GEPTypes is empty, which means we have a single operand left. See if we + // can set it as the base register. + // + // FIXME: When addressing modes are more powerful/correct, we could load + // global addresses directly as 32-bit immediates. + assert(BaseReg == 0); + BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0; + GEPOps.pop_back(); // Consume the last GEP operand } -void ISel::emitGEPOperation(MachineBasicBlock *MBB, - MachineBasicBlock::iterator IP, - Value *Src, User::op_iterator IdxBegin, - User::op_iterator IdxEnd, unsigned TargetReg) { - const TargetData &TD = TM.getTargetData(); - +/// isGEPFoldable - Return true if the specified GEP can be completely +/// folded into the addressing mode of a load/store or lea instruction. +bool ISel::isGEPFoldable(MachineBasicBlock *MBB, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp) { if (ConstantPointerRef *CPR = dyn_cast(Src)) Src = CPR->getValue(); @@ -2396,27 +2479,53 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), gep_type_end(Src->getType(), IdxBegin, IdxEnd)); - // DummyMI - A dummy instruction to pass into getGEPIndex. The opcode doesn't - // matter, we just need 4 MachineOperands. - MachineInstr *DummyMI = - BuildMI(X86::PHI, 4).addReg(0).addZImm(1).addReg(0).addSImm(0); + MachineBasicBlock::iterator IP; + if (MBB) IP = MBB->end(); + getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp); + + // We can fold it away iff the getGEPIndex call eliminated all operands. + return GEPOps.empty(); +} + +void ISel::emitGEPOperation(MachineBasicBlock *MBB, + MachineBasicBlock::iterator IP, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned TargetReg) { + const TargetData &TD = TM.getTargetData(); + if (ConstantPointerRef *CPR = dyn_cast(Src)) + Src = CPR->getValue(); + + std::vector GEPOps; + GEPOps.resize(IdxEnd-IdxBegin+1); + GEPOps[0] = Src; + std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1); + + std::vector GEPTypes; + GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), + gep_type_end(Src->getType(), IdxBegin, IdxEnd)); // Keep emitting instructions until we consume the entire GEP instruction. while (!GEPOps.empty()) { unsigned OldSize = GEPOps.size(); - getGEPIndex(GEPOps, GEPTypes, DummyMI, TD); + unsigned BaseReg, Scale, IndexReg, Disp; + getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp); if (GEPOps.size() != OldSize) { // getGEPIndex consumed some of the input. Build an LEA instruction here. - assert(DummyMI->getOperand(0).getReg() == 0 && - DummyMI->getOperand(1).getImmedValue() == 1 && - DummyMI->getOperand(2).getReg() == 0 && - "Unhandled GEP fold!"); - if (unsigned Offset = DummyMI->getOperand(3).getImmedValue()) { - unsigned Reg = makeAnotherReg(Type::UIntTy); - addRegOffset(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), Reg, Offset); - TargetReg = Reg; + unsigned NextTarget = 0; + if (!GEPOps.empty()) { + assert(BaseReg == 0 && + "getGEPIndex should have left the base register open for chaining!"); + NextTarget = BaseReg = makeAnotherReg(Type::UIntTy); } + + if (IndexReg == 0 && Disp == 0) + BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg); + else + addFullAddress(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), + BaseReg, Scale, IndexReg, Disp); + --IP; + TargetReg = NextTarget; } else if (GEPTypes.empty()) { // The getGEPIndex operation didn't want to build an LEA. Check to see if // all operands are consumed but the base pointer. If so, just load it @@ -2428,6 +2537,27 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg); } break; // we are now done + + } else if (const StructType *StTy = dyn_cast(GEPTypes.back())) { + // It's a struct access. CUI is the index into the structure, + // which names the field. This index must have unsigned type. + const ConstantUInt *CUI = cast(GEPOps.back()); + GEPOps.pop_back(); // Consume a GEP operand + GEPTypes.pop_back(); + + // Use the TargetData structure to pick out what the layout of the + // structure is in memory. Since the structure index must be constant, we + // can get its value and use it to find the right byte offset from the + // StructLayout class's list of structure member offsets. + unsigned idxValue = CUI->getValue(); + unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue]; + if (FieldOff) { + unsigned Reg = makeAnotherReg(Type::UIntTy); + // Emit an ADD to add FieldOff to the basePtr. + BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff); + --IP; // Insert the next instruction before this one. + TargetReg = Reg; // Codegen the rest of the GEP into this + } } else { // It's an array or pointer access: [ArraySize x ElementType]. const SequentialType *SqTy = cast(GEPTypes.back()); @@ -2496,8 +2626,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, } } } - - delete DummyMI; } diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp index f0e0a5c69fe..d6e3a75186f 100644 --- a/lib/Target/X86/X86ISelSimple.cpp +++ b/lib/Target/X86/X86ISelSimple.cpp @@ -222,6 +222,20 @@ namespace { /// void promote32(unsigned targetReg, const ValueRecord &VR); + // getGEPIndex - This is used to fold GEP instructions into X86 addressing + // expressions. + void getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, + std::vector &GEPOps, + std::vector &GEPTypes, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp); + + /// isGEPFoldable - Return true if the specified GEP can be completely + /// folded into the addressing mode of a load/store or lea instruction. + bool isGEPFoldable(MachineBasicBlock *MBB, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp); + /// emitGEPOperation - Common code shared between visitGetElementPtrInst and /// constant expression GEP support. /// @@ -1884,14 +1898,32 @@ void ISel::emitShiftOperation(MachineBasicBlock *MBB, /// need to worry about the memory layout of the target machine. /// void ISel::visitLoadInst(LoadInst &I) { - unsigned SrcAddrReg = getReg(I.getOperand(0)); unsigned DestReg = getReg(I); + unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; + Value *Addr = I.getOperand(0); + if (GetElementPtrInst *GEP = dyn_cast(Addr)) { + if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; // Address is consumed! + } else if (ConstantExpr *CE = dyn_cast(Addr)) { + if (CE->getOpcode() == Instruction::GetElementPtr) + if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; + } + + if (Addr) { + // If it's not foldable, reset addr mode. + BaseReg = getReg(Addr); + Scale = 1; IndexReg = 0; Disp = 0; + } unsigned Class = getClassB(I.getType()); - if (Class == cLong) { - addDirectMem(BuildMI(BB, X86::MOVrm32, 4, DestReg), SrcAddrReg); - addRegOffset(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), SrcAddrReg, 4); + addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg), + BaseReg, Scale, IndexReg, Disp); + addFullAddress(BuildMI(BB, X86::MOVrm32, 4, DestReg+1), + BaseReg, Scale, IndexReg, Disp+4); return; } @@ -1900,37 +1932,61 @@ void ISel::visitLoadInst(LoadInst &I) { }; unsigned Opcode = Opcodes[Class]; if (I.getType() == Type::DoubleTy) Opcode = X86::FLDr64; - addDirectMem(BuildMI(BB, Opcode, 4, DestReg), SrcAddrReg); + addFullAddress(BuildMI(BB, Opcode, 4, DestReg), + BaseReg, Scale, IndexReg, Disp); } /// visitStoreInst - Implement LLVM store instructions in terms of the x86 'mov' /// instruction. /// void ISel::visitStoreInst(StoreInst &I) { - unsigned AddressReg = getReg(I.getOperand(1)); + unsigned BaseReg = 0, Scale = 1, IndexReg = 0, Disp = 0; + Value *Addr = I.getOperand(1); + if (GetElementPtrInst *GEP = dyn_cast(Addr)) { + if (isGEPFoldable(BB, GEP->getOperand(0), GEP->op_begin()+1, GEP->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; // Address is consumed! + } else if (ConstantExpr *CE = dyn_cast(Addr)) { + if (CE->getOpcode() == Instruction::GetElementPtr) + if (isGEPFoldable(BB, CE->getOperand(0), CE->op_begin()+1, CE->op_end(), + BaseReg, Scale, IndexReg, Disp)) + Addr = 0; + } + + if (Addr) { + // If it's not foldable, reset addr mode. + BaseReg = getReg(Addr); + Scale = 1; IndexReg = 0; Disp = 0; + } + const Type *ValTy = I.getOperand(0)->getType(); unsigned Class = getClassB(ValTy); if (ConstantInt *CI = dyn_cast(I.getOperand(0))) { uint64_t Val = CI->getRawValue(); if (Class == cLong) { - addDirectMem(BuildMI(BB, X86::MOVmi32, 5), AddressReg).addZImm(Val & ~0U); - addRegOffset(BuildMI(BB, X86::MOVmi32, 5), AddressReg,4).addZImm(Val>>32); + addFullAddress(BuildMI(BB, X86::MOVmi32, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(Val & ~0U); + addFullAddress(BuildMI(BB, X86::MOVmi32, 5), + BaseReg, Scale, IndexReg, Disp+4).addZImm(Val>>32); } else { static const unsigned Opcodes[] = { X86::MOVmi8, X86::MOVmi16, X86::MOVmi32 }; unsigned Opcode = Opcodes[Class]; - addDirectMem(BuildMI(BB, Opcode, 5), AddressReg).addZImm(Val); + addFullAddress(BuildMI(BB, Opcode, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(Val); } } else if (ConstantBool *CB = dyn_cast(I.getOperand(0))) { - addDirectMem(BuildMI(BB, X86::MOVmi8, 5), - AddressReg).addZImm(CB->getValue()); + addFullAddress(BuildMI(BB, X86::MOVmi8, 5), + BaseReg, Scale, IndexReg, Disp).addZImm(CB->getValue()); } else { if (Class == cLong) { unsigned ValReg = getReg(I.getOperand(0)); - addDirectMem(BuildMI(BB, X86::MOVmr32, 5), AddressReg).addReg(ValReg); - addRegOffset(BuildMI(BB, X86::MOVmr32, 5), AddressReg,4).addReg(ValReg+1); + addFullAddress(BuildMI(BB, X86::MOVmr32, 5), + BaseReg, Scale, IndexReg, Disp).addReg(ValReg); + addFullAddress(BuildMI(BB, X86::MOVmr32, 5), + BaseReg, Scale, IndexReg, Disp+4).addReg(ValReg+1); } else { unsigned ValReg = getReg(I.getOperand(0)); static const unsigned Opcodes[] = { @@ -1938,7 +1994,8 @@ void ISel::visitStoreInst(StoreInst &I) { }; unsigned Opcode = Opcodes[Class]; if (ValTy == Type::DoubleTy) Opcode = X86::FSTr64; - addDirectMem(BuildMI(BB, Opcode, 1+4), AddressReg).addReg(ValReg); + addFullAddress(BuildMI(BB, Opcode, 1+4), + BaseReg, Scale, IndexReg, Disp).addReg(ValReg); } } } @@ -2138,7 +2195,8 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, } // Spill the integer to memory and reload it from there... - int FrameIdx = F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); + int FrameIdx = + F->getFrameInfo()->CreateStackObject(SrcTy, TM.getTargetData()); if (SrcClass == cLong) { addFrameReference(BMI(BB, IP, X86::MOVmr32, 5), FrameIdx).addReg(SrcReg); @@ -2160,15 +2218,18 @@ void ISel::emitCastOperation(MachineBasicBlock *BB, // Emit a test instruction to see if the dynamic input value was signed. BMI(BB, IP, X86::TESTrr32, 2).addReg(SrcReg+1).addReg(SrcReg+1); - // If the sign bit is set, get a pointer to an offset, otherwise get a pointer to a zero. + // If the sign bit is set, get a pointer to an offset, otherwise get a + // pointer to a zero. MachineConstantPool *CP = F->getConstantPool(); unsigned Zero = makeAnotherReg(Type::IntTy); + Constant *Null = Constant::getNullValue(Type::UIntTy); addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Zero), - CP->getConstantPoolIndex(Constant::getNullValue(Type::UIntTy))); + CP->getConstantPoolIndex(Null)); unsigned Offset = makeAnotherReg(Type::IntTy); + Constant *OffsetCst = ConstantUInt::get(Type::UIntTy, 0x5f800000); + addConstantPoolReference(BMI(BB, IP, X86::LEAr32, 5, Offset), - CP->getConstantPoolIndex(ConstantUInt::get(Type::UIntTy, - 0x5f800000))); + CP->getConstantPoolIndex(OffsetCst)); unsigned Addr = makeAnotherReg(Type::IntTy); BMI(BB, IP, X86::CMOVSrr32, 2, Addr).addReg(Zero).addReg(Offset); @@ -2303,6 +2364,26 @@ void ISel::visitVAArgInst(VAArgInst &I) { void ISel::visitGetElementPtrInst(GetElementPtrInst &I) { + // If this GEP instruction will be folded into all of its users, we don't need + // to explicitly calculate it! + unsigned A, B, C, D; + if (isGEPFoldable(0, I.getOperand(0), I.op_begin()+1, I.op_end(), A,B,C,D)) { + // Check all of the users of the instruction to see if they are loads and + // stores. + bool AllWillFold = true; + for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) + if (cast(*UI)->getOpcode() != Instruction::Load) + if (cast(*UI)->getOpcode() != Instruction::Store || + cast(*UI)->getOperand(0) == &I) { + AllWillFold = false; + break; + } + + // If the instruction is foldable, and will be folded into all users, don't + // emit it! + if (AllWillFold) return; + } + unsigned outputReg = getReg(I); emitGEPOperation(BB, BB->end(), I.getOperand(0), I.op_begin()+1, I.op_end(), outputReg); @@ -2319,15 +2400,18 @@ void ISel::visitGetElementPtrInst(GetElementPtrInst &I) { /// /// Note that there is one fewer entry in GEPTypes than there is in GEPOps. /// -static void getGEPIndex(std::vector &GEPOps, - std::vector &GEPTypes, - MachineInstr *Ops, const TargetData &TD){ +void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP, + std::vector &GEPOps, + std::vector &GEPTypes, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp) { + const TargetData &TD = TM.getTargetData(); + // Clear out the state we are working with... - Ops->getOperand(0).setReg(0); // No base register - Ops->getOperand(1).setImmedValue(1); // Unit scale - Ops->getOperand(2).setReg(0); // No index register - Ops->getOperand(3).setImmedValue(0); // No displacement - + BaseReg = 0; // No base register + Scale = 1; // Unit scale + IndexReg = 0; // No index register + Disp = 0; // No displacement + // While there are GEP indexes that can be folded into the current address, // keep processing them. while (!GEPTypes.empty()) { @@ -2340,14 +2424,7 @@ static void getGEPIndex(std::vector &GEPOps, // structure is in memory. Since the structure index must be constant, we // can get its value and use it to find the right byte offset from the // StructLayout class's list of structure member offsets. - unsigned idxValue = CUI->getValue(); - unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue]; - if (FieldOff) { - if (Ops->getOperand(2).getReg()) - return; // Already has an index, can't add offset. - Ops->getOperand(3).setImmedValue(FieldOff+ - Ops->getOperand(3).getImmedValue()); - } + Disp += TD.getStructLayout(StTy)->MemberOffsets[CUI->getValue()]; GEPOps.pop_back(); // Consume a GEP operand GEPTypes.pop_back(); } else { @@ -2362,10 +2439,7 @@ static void getGEPIndex(std::vector &GEPOps, // If idx is a constant, fold it into the offset. if (ConstantSInt *CSI = dyn_cast(idx)) { - unsigned elementSize = TD.getTypeSize(SqTy->getElementType()); - unsigned Offset = elementSize*CSI->getValue(); - Ops->getOperand(3).setImmedValue(Offset+ - Ops->getOperand(3).getImmedValue()); + Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue(); } else { // If we can't handle it, return. return; @@ -2375,15 +2449,24 @@ static void getGEPIndex(std::vector &GEPOps, GEPTypes.pop_back(); } } + + // GEPTypes is empty, which means we have a single operand left. See if we + // can set it as the base register. + // + // FIXME: When addressing modes are more powerful/correct, we could load + // global addresses directly as 32-bit immediates. + assert(BaseReg == 0); + BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0; + GEPOps.pop_back(); // Consume the last GEP operand } -void ISel::emitGEPOperation(MachineBasicBlock *MBB, - MachineBasicBlock::iterator IP, - Value *Src, User::op_iterator IdxBegin, - User::op_iterator IdxEnd, unsigned TargetReg) { - const TargetData &TD = TM.getTargetData(); - +/// isGEPFoldable - Return true if the specified GEP can be completely +/// folded into the addressing mode of a load/store or lea instruction. +bool ISel::isGEPFoldable(MachineBasicBlock *MBB, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned &BaseReg, + unsigned &Scale, unsigned &IndexReg, unsigned &Disp) { if (ConstantPointerRef *CPR = dyn_cast(Src)) Src = CPR->getValue(); @@ -2396,27 +2479,53 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), gep_type_end(Src->getType(), IdxBegin, IdxEnd)); - // DummyMI - A dummy instruction to pass into getGEPIndex. The opcode doesn't - // matter, we just need 4 MachineOperands. - MachineInstr *DummyMI = - BuildMI(X86::PHI, 4).addReg(0).addZImm(1).addReg(0).addSImm(0); + MachineBasicBlock::iterator IP; + if (MBB) IP = MBB->end(); + getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp); + + // We can fold it away iff the getGEPIndex call eliminated all operands. + return GEPOps.empty(); +} + +void ISel::emitGEPOperation(MachineBasicBlock *MBB, + MachineBasicBlock::iterator IP, + Value *Src, User::op_iterator IdxBegin, + User::op_iterator IdxEnd, unsigned TargetReg) { + const TargetData &TD = TM.getTargetData(); + if (ConstantPointerRef *CPR = dyn_cast(Src)) + Src = CPR->getValue(); + + std::vector GEPOps; + GEPOps.resize(IdxEnd-IdxBegin+1); + GEPOps[0] = Src; + std::copy(IdxBegin, IdxEnd, GEPOps.begin()+1); + + std::vector GEPTypes; + GEPTypes.assign(gep_type_begin(Src->getType(), IdxBegin, IdxEnd), + gep_type_end(Src->getType(), IdxBegin, IdxEnd)); // Keep emitting instructions until we consume the entire GEP instruction. while (!GEPOps.empty()) { unsigned OldSize = GEPOps.size(); - getGEPIndex(GEPOps, GEPTypes, DummyMI, TD); + unsigned BaseReg, Scale, IndexReg, Disp; + getGEPIndex(MBB, IP, GEPOps, GEPTypes, BaseReg, Scale, IndexReg, Disp); if (GEPOps.size() != OldSize) { // getGEPIndex consumed some of the input. Build an LEA instruction here. - assert(DummyMI->getOperand(0).getReg() == 0 && - DummyMI->getOperand(1).getImmedValue() == 1 && - DummyMI->getOperand(2).getReg() == 0 && - "Unhandled GEP fold!"); - if (unsigned Offset = DummyMI->getOperand(3).getImmedValue()) { - unsigned Reg = makeAnotherReg(Type::UIntTy); - addRegOffset(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), Reg, Offset); - TargetReg = Reg; + unsigned NextTarget = 0; + if (!GEPOps.empty()) { + assert(BaseReg == 0 && + "getGEPIndex should have left the base register open for chaining!"); + NextTarget = BaseReg = makeAnotherReg(Type::UIntTy); } + + if (IndexReg == 0 && Disp == 0) + BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg); + else + addFullAddress(BMI(MBB, IP, X86::LEAr32, 5, TargetReg), + BaseReg, Scale, IndexReg, Disp); + --IP; + TargetReg = NextTarget; } else if (GEPTypes.empty()) { // The getGEPIndex operation didn't want to build an LEA. Check to see if // all operands are consumed but the base pointer. If so, just load it @@ -2428,6 +2537,27 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, BMI(MBB, IP, X86::MOVrr32, 1, TargetReg).addReg(BaseReg); } break; // we are now done + + } else if (const StructType *StTy = dyn_cast(GEPTypes.back())) { + // It's a struct access. CUI is the index into the structure, + // which names the field. This index must have unsigned type. + const ConstantUInt *CUI = cast(GEPOps.back()); + GEPOps.pop_back(); // Consume a GEP operand + GEPTypes.pop_back(); + + // Use the TargetData structure to pick out what the layout of the + // structure is in memory. Since the structure index must be constant, we + // can get its value and use it to find the right byte offset from the + // StructLayout class's list of structure member offsets. + unsigned idxValue = CUI->getValue(); + unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue]; + if (FieldOff) { + unsigned Reg = makeAnotherReg(Type::UIntTy); + // Emit an ADD to add FieldOff to the basePtr. + BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff); + --IP; // Insert the next instruction before this one. + TargetReg = Reg; // Codegen the rest of the GEP into this + } } else { // It's an array or pointer access: [ArraySize x ElementType]. const SequentialType *SqTy = cast(GEPTypes.back()); @@ -2496,8 +2626,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB, } } } - - delete DummyMI; }