mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-16 23:19:37 +00:00
Teach the instruction selector how to transform 'array' GEP computations into X86
scaled indexes. This allows us to compile GEP's like this: int* %test([10 x { int, { int } }]* %X, int %Idx) { %Idx = cast int %Idx to long %X = getelementptr [10 x { int, { int } }]* %X, long 0, long %Idx, ubyte 1, ubyte 0 ret int* %X } Into a single address computation: test: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] lea %EAX, DWORD PTR [%EAX + 8*%ECX + 4] ret Before it generated: test: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, DWORD PTR [%ESP + 8] shl %ECX, 3 add %EAX, %ECX lea %EAX, DWORD PTR [%EAX + 4] ret This is useful for things like int/float/double arrays, as the indexing can be folded into the loads&stores, reducing register pressure and decreasing the pressure on the decode unit. With these changes, I expect our performance on 256.bzip2 and gzip to improve a lot. On bzip2 for example, we go from this: 10665 asm-printer - Number of machine instrs printed 40 ra-local - Number of loads/stores folded into instructions 1708 ra-local - Number of loads added 1532 ra-local - Number of stores added 1354 twoaddressinstruction - Number of instructions added 1354 twoaddressinstruction - Number of two-address instructions 2794 x86-peephole - Number of peephole optimization performed to this: 9873 asm-printer - Number of machine instrs printed 41 ra-local - Number of loads/stores folded into instructions 1710 ra-local - Number of loads added 1521 ra-local - Number of stores added 789 twoaddressinstruction - Number of instructions added 789 twoaddressinstruction - Number of two-address instructions 2142 x86-peephole - Number of peephole optimization performed ... and these types of instructions are often in tight loops. Linear scan is also helped, but not as much. It goes from: 8787 asm-printer - Number of machine instrs printed 2389 liveintervals - Number of identity moves eliminated after coalescing 2288 liveintervals - Number of interval joins performed 3522 liveintervals - Number of intervals after coalescing 5810 liveintervals - Number of original intervals 700 spiller - Number of loads added 487 spiller - Number of stores added 303 spiller - Number of register spills 1354 twoaddressinstruction - Number of instructions added 1354 twoaddressinstruction - Number of two-address instructions 363 x86-peephole - Number of peephole optimization performed to: 7982 asm-printer - Number of machine instrs printed 1759 liveintervals - Number of identity moves eliminated after coalescing 1658 liveintervals - Number of interval joins performed 3282 liveintervals - Number of intervals after coalescing 4940 liveintervals - Number of original intervals 635 spiller - Number of loads added 452 spiller - Number of stores added 288 spiller - Number of register spills 789 twoaddressinstruction - Number of instructions added 789 twoaddressinstruction - Number of two-address instructions 258 x86-peephole - Number of peephole optimization performed Though I'm not complaining about the drop in the number of intervals. :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@11820 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b6bac51351
commit
5f2c7b1975
@ -2438,11 +2438,30 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
|
||||
assert(idx->getType() == Type::LongTy && "Bad GEP array index!");
|
||||
|
||||
// If idx is a constant, fold it into the offset.
|
||||
unsigned TypeSize = TD.getTypeSize(SqTy->getElementType());
|
||||
if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
|
||||
Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue();
|
||||
Disp += TypeSize*CSI->getValue();
|
||||
} else {
|
||||
// If we can't handle it, return.
|
||||
return;
|
||||
// If the index reg is already taken, we can't handle this index.
|
||||
if (IndexReg) return;
|
||||
|
||||
// If this is a size that we can handle, then add the index as
|
||||
switch (TypeSize) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
// These are all acceptable scales on X86.
|
||||
Scale = TypeSize;
|
||||
break;
|
||||
default:
|
||||
// Otherwise, we can't handle this scale
|
||||
return;
|
||||
}
|
||||
|
||||
if (CastInst *CI = dyn_cast<CastInst>(idx))
|
||||
if (CI->getOperand(0)->getType() == Type::IntTy ||
|
||||
CI->getOperand(0)->getType() == Type::UIntTy)
|
||||
idx = CI->getOperand(0);
|
||||
|
||||
IndexReg = MBB ? getReg(idx, MBB, IP) : 1;
|
||||
}
|
||||
|
||||
GEPOps.pop_back(); // Consume a GEP operand
|
||||
@ -2456,7 +2475,7 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
|
||||
// FIXME: When addressing modes are more powerful/correct, we could load
|
||||
// global addresses directly as 32-bit immediates.
|
||||
assert(BaseReg == 0);
|
||||
BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0;
|
||||
BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 1;
|
||||
GEPOps.pop_back(); // Consume the last GEP operand
|
||||
}
|
||||
|
||||
@ -2538,26 +2557,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
|
||||
}
|
||||
break; // we are now done
|
||||
|
||||
} else if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
|
||||
// It's a struct access. CUI is the index into the structure,
|
||||
// which names the field. This index must have unsigned type.
|
||||
const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
|
||||
GEPOps.pop_back(); // Consume a GEP operand
|
||||
GEPTypes.pop_back();
|
||||
|
||||
// Use the TargetData structure to pick out what the layout of the
|
||||
// structure is in memory. Since the structure index must be constant, we
|
||||
// can get its value and use it to find the right byte offset from the
|
||||
// StructLayout class's list of structure member offsets.
|
||||
unsigned idxValue = CUI->getValue();
|
||||
unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
|
||||
if (FieldOff) {
|
||||
unsigned Reg = makeAnotherReg(Type::UIntTy);
|
||||
// Emit an ADD to add FieldOff to the basePtr.
|
||||
BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff);
|
||||
--IP; // Insert the next instruction before this one.
|
||||
TargetReg = Reg; // Codegen the rest of the GEP into this
|
||||
}
|
||||
} else {
|
||||
// It's an array or pointer access: [ArraySize x ElementType].
|
||||
const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
|
||||
|
@ -2438,11 +2438,30 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
|
||||
assert(idx->getType() == Type::LongTy && "Bad GEP array index!");
|
||||
|
||||
// If idx is a constant, fold it into the offset.
|
||||
unsigned TypeSize = TD.getTypeSize(SqTy->getElementType());
|
||||
if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(idx)) {
|
||||
Disp += TD.getTypeSize(SqTy->getElementType())*CSI->getValue();
|
||||
Disp += TypeSize*CSI->getValue();
|
||||
} else {
|
||||
// If we can't handle it, return.
|
||||
return;
|
||||
// If the index reg is already taken, we can't handle this index.
|
||||
if (IndexReg) return;
|
||||
|
||||
// If this is a size that we can handle, then add the index as
|
||||
switch (TypeSize) {
|
||||
case 1: case 2: case 4: case 8:
|
||||
// These are all acceptable scales on X86.
|
||||
Scale = TypeSize;
|
||||
break;
|
||||
default:
|
||||
// Otherwise, we can't handle this scale
|
||||
return;
|
||||
}
|
||||
|
||||
if (CastInst *CI = dyn_cast<CastInst>(idx))
|
||||
if (CI->getOperand(0)->getType() == Type::IntTy ||
|
||||
CI->getOperand(0)->getType() == Type::UIntTy)
|
||||
idx = CI->getOperand(0);
|
||||
|
||||
IndexReg = MBB ? getReg(idx, MBB, IP) : 1;
|
||||
}
|
||||
|
||||
GEPOps.pop_back(); // Consume a GEP operand
|
||||
@ -2456,7 +2475,7 @@ void ISel::getGEPIndex(MachineBasicBlock *MBB, MachineBasicBlock::iterator IP,
|
||||
// FIXME: When addressing modes are more powerful/correct, we could load
|
||||
// global addresses directly as 32-bit immediates.
|
||||
assert(BaseReg == 0);
|
||||
BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 0;
|
||||
BaseReg = MBB ? getReg(GEPOps[0], MBB, IP) : 1;
|
||||
GEPOps.pop_back(); // Consume the last GEP operand
|
||||
}
|
||||
|
||||
@ -2538,26 +2557,6 @@ void ISel::emitGEPOperation(MachineBasicBlock *MBB,
|
||||
}
|
||||
break; // we are now done
|
||||
|
||||
} else if (const StructType *StTy = dyn_cast<StructType>(GEPTypes.back())) {
|
||||
// It's a struct access. CUI is the index into the structure,
|
||||
// which names the field. This index must have unsigned type.
|
||||
const ConstantUInt *CUI = cast<ConstantUInt>(GEPOps.back());
|
||||
GEPOps.pop_back(); // Consume a GEP operand
|
||||
GEPTypes.pop_back();
|
||||
|
||||
// Use the TargetData structure to pick out what the layout of the
|
||||
// structure is in memory. Since the structure index must be constant, we
|
||||
// can get its value and use it to find the right byte offset from the
|
||||
// StructLayout class's list of structure member offsets.
|
||||
unsigned idxValue = CUI->getValue();
|
||||
unsigned FieldOff = TD.getStructLayout(StTy)->MemberOffsets[idxValue];
|
||||
if (FieldOff) {
|
||||
unsigned Reg = makeAnotherReg(Type::UIntTy);
|
||||
// Emit an ADD to add FieldOff to the basePtr.
|
||||
BMI(MBB, IP, X86::ADDri32, 2, TargetReg).addReg(Reg).addZImm(FieldOff);
|
||||
--IP; // Insert the next instruction before this one.
|
||||
TargetReg = Reg; // Codegen the rest of the GEP into this
|
||||
}
|
||||
} else {
|
||||
// It's an array or pointer access: [ArraySize x ElementType].
|
||||
const SequentialType *SqTy = cast<SequentialType>(GEPTypes.back());
|
||||
|
Loading…
x
Reference in New Issue
Block a user