Teach the instruction selector how to transform 'array' GEP computations into X86

scaled indexes.  This allows us to compile GEP's like this:

int* %test([10 x { int, { int } }]* %X, int %Idx) {
        %Idx = cast int %Idx to long
        %X = getelementptr [10 x { int, { int } }]* %X, long 0, long %Idx, ubyte 1, ubyte 0
        ret int* %X
}

Into a single address computation:

test:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, DWORD PTR [%ESP + 8]
        lea %EAX, DWORD PTR [%EAX + 8*%ECX + 4]
        ret

Before it generated:
test:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, DWORD PTR [%ESP + 8]
        shl %ECX, 3
        add %EAX, %ECX
        lea %EAX, DWORD PTR [%EAX + 4]
        ret

This is useful for things like int/float/double arrays, as the indexing can be folded into
the loads&stores, reducing register pressure and decreasing the pressure on the decode unit.
With these changes, I expect our performance on 256.bzip2 and gzip to improve a lot.  On
bzip2 for example, we go from this:

10665 asm-printer           - Number of machine instrs printed
   40 ra-local              - Number of loads/stores folded into instructions
 1708 ra-local              - Number of loads added
 1532 ra-local              - Number of stores added
 1354 twoaddressinstruction - Number of instructions added
 1354 twoaddressinstruction - Number of two-address instructions
 2794 x86-peephole          - Number of peephole optimization performed

to this:
9873 asm-printer           - Number of machine instrs printed
  41 ra-local              - Number of loads/stores folded into instructions
1710 ra-local              - Number of loads added
1521 ra-local              - Number of stores added
 789 twoaddressinstruction - Number of instructions added
 789 twoaddressinstruction - Number of two-address instructions
2142 x86-peephole          - Number of peephole optimization performed

... and these types of instructions are often in tight loops.

Linear scan is also helped, but not as much.  It goes from:

8787 asm-printer           - Number of machine instrs printed
2389 liveintervals         - Number of identity moves eliminated after coalescing
2288 liveintervals         - Number of interval joins performed
3522 liveintervals         - Number of intervals after coalescing
5810 liveintervals         - Number of original intervals
 700 spiller               - Number of loads added
 487 spiller               - Number of stores added
 303 spiller               - Number of register spills
1354 twoaddressinstruction - Number of instructions added
1354 twoaddressinstruction - Number of two-address instructions
 363 x86-peephole          - Number of peephole optimization performed

to:

7982 asm-printer           - Number of machine instrs printed
1759 liveintervals         - Number of identity moves eliminated after coalescing
1658 liveintervals         - Number of interval joins performed
3282 liveintervals         - Number of intervals after coalescing
4940 liveintervals         - Number of original intervals
 635 spiller               - Number of loads added
 452 spiller               - Number of stores added
 288 spiller               - Number of register spills
 789 twoaddressinstruction - Number of instructions added
 789 twoaddressinstruction - Number of two-address instructions
 258 x86-peephole          - Number of peephole optimization performed

Though I'm not complaining about the drop in the number of intervals.  :)


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@11820 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2004-02-25 07:00:55 +00:00
parent b6bac51351
commit 5f2c7b1975
2 changed files with 46 additions and 48 deletions

View File

@ -2438,11 +2438,30 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
assert(idx->getType() == Type::LongTy && "Bad GEP array index!");
// If idx is a constant, fold it into the offset.
unsigned TypeSize = TD.getTypeSize(SqTy->getElementType());
if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue();
Disp += TypeSize*CSI->getValue();
} else {
// If we can't handle it, return.
return;
// If the index reg is already taken, we can't handle this index.
if (IndexReg) return;
// If this is a size that we can handle, then add the index as
switch (TypeSize) {
case 1: case 2: case 4: case 8:
// These are all acceptable scales on X86.
Scale = TypeSize;
break;
default:
// Otherwise, we can't handle this scale
return;
}
if (CastInst *CI = dyn_cast<CastInst>(idx))
if (CI->getOperand(0)->getType() == Type::IntTy ||
CI->getOperand(0)->getType() == Type::UIntTy)
idx = CI->getOperand(0);
IndexReg = MBB ? getReg(idx, MBB, IP) : 1;
}
GEPOps.pop_back(); // Consume a GEP operand
@ -2456,7 +2475,7 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
// FIXME: When addressing modes are more powerful/correct, we could load
// global addresses directly as 32-bit immediates.
assert(BaseReg == 0);
BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0;
BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 1;
GEPOps.pop_back(); // Consume the last GEP operand
}
@ -2538,26 +2557,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
}
break; // we are now done
} else if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
// It's a struct access. CUI is the index into the structure,
// which names the field. This index must have unsigned type.
const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
GEPOps.pop_back(); // Consume a GEP operand
GEPTypes.pop_back();
// Use the TargetData structure to pick out what the layout of the
// structure is in memory. Since the structure index must be constant, we
// can get its value and use it to find the right byte offset from the
// StructLayout class's list of structure member offsets.
unsigned idxValue = CUI->getValue();
unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
if (FieldOff) {
unsigned Reg = makeAnotherReg(Type::UIntTy);
// Emit an ADD to add FieldOff to the basePtr.
BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff);
--IP; // Insert the next instruction before this one.
TargetReg = Reg; // Codegen the rest of the GEP into this
}
} else {
// It's an array or pointer access: [ArraySize x ElementType].
const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());

View File

@ -2438,11 +2438,30 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
assert(idx->getType() == Type::LongTy && "Bad GEP array index!");
// If idx is a constant, fold it into the offset.
unsigned TypeSize = TD.getTypeSize(SqTy->getElementType());
if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue();
Disp += TypeSize*CSI->getValue();
} else {
// If we can't handle it, return.
return;
// If the index reg is already taken, we can't handle this index.
if (IndexReg) return;
// If this is a size that we can handle, then add the index as
switch (TypeSize) {
case 1: case 2: case 4: case 8:
// These are all acceptable scales on X86.
Scale = TypeSize;
break;
default:
// Otherwise, we can't handle this scale
return;
}
if (CastInst *CI = dyn_cast<CastInst>(idx))
if (CI->getOperand(0)->getType() == Type::IntTy ||
CI->getOperand(0)->getType() == Type::UIntTy)
idx = CI->getOperand(0);
IndexReg = MBB ? getReg(idx, MBB, IP) : 1;
}
GEPOps.pop_back(); // Consume a GEP operand
@ -2456,7 +2475,7 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
// FIXME: When addressing modes are more powerful/correct, we could load
// global addresses directly as 32-bit immediates.
assert(BaseReg == 0);
BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0;
BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 1;
GEPOps.pop_back(); // Consume the last GEP operand
}
@ -2538,26 +2557,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
}
break; // we are now done
} else if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
// It's a struct access. CUI is the index into the structure,
// which names the field. This index must have unsigned type.
const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
GEPOps.pop_back(); // Consume a GEP operand
GEPTypes.pop_back();
// Use the TargetData structure to pick out what the layout of the
// structure is in memory. Since the structure index must be constant, we
// can get its value and use it to find the right byte offset from the
// StructLayout class's list of structure member offsets.
unsigned idxValue = CUI->getValue();
unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
if (FieldOff) {
unsigned Reg = makeAnotherReg(Type::UIntTy);
// Emit an ADD to add FieldOff to the basePtr.
BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff);
--IP; // Insert the next instruction before this one.
TargetReg = Reg; // Codegen the rest of the GEP into this
}
} else {
// It's an array or pointer access: [ArraySize x ElementType].
const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());