Two changes:

1) codegen a shift of a register as a shift, not an LEA.
2) teach the RA to convert a shift to an LEA instruction if it wants something
   in three-address form.

This gives us asm diffs like:

-       leal (,%eax,4), %eax
+       shll $2, %eax

which is faster on some processors and smaller on all of them.

and, more interestingly:

-       movl 24(%esi), %eax
-       leal (,%eax,4), %edi
+       movl 24(%esi), %edi
+       shll $2, %edi

Without #2, #1 was a significant pessimization in some cases.

This implements CodeGen/X86/shift-codegen.ll


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35204 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2007-03-20 06:08:29 +00:00
parent ec13dd5cc9
commit a16b7cb1d3
2 changed files with 39 additions and 13 deletions

View File

@ -886,10 +886,9 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N,
else
AM.IndexReg = CurDAG->getRegister(0, VT);
if (AM.Scale > 2)
Complexity += 2;
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg
else if (AM.Scale > 1)
// Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
// a simple shift.
if (AM.Scale > 1)
Complexity++;
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA

View File

@ -132,29 +132,57 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *NewMI = NULL;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
// we have subtarget support, enable the 16-bit LEA generation here.
// we have better subtarget support, enable the 16-bit LEA generation here.
bool DisableLEA16 = true;
switch (MI->getOpcode()) {
default: break;
default: return 0;
case X86::SHUFPSrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
unsigned A = MI->getOperand(0).getReg();
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
unsigned M = MI->getOperand(3).getImmedValue();
if (!Subtarget->hasSSE2() || B != C) return 0;
unsigned M = MI->getOperand(3).getImm();
if (B != C) return 0;
NewMI = BuildMI(get(X86::PSHUFDri), A).addReg(B).addImm(M);
goto Done;
break;
}
case X86::SHL32ri: {
assert(MI->getNumOperands() == 3 && "Unknown shift instruction!");
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
// the flags produced by a shift yet, so this is safe.
unsigned Dest = MI->getOperand(0).getReg();
unsigned Src = MI->getOperand(1).getReg();
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
NewMI = BuildMI(get(X86::LEA32r), Dest)
.addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
break;
}
case X86::SHL16ri: {
assert(MI->getNumOperands() == 3 && "Unknown shift instruction!");
if (DisableLEA16) return 0;
// NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
// the flags produced by a shift yet, so this is safe.
unsigned Dest = MI->getOperand(0).getReg();
unsigned Src = MI->getOperand(1).getReg();
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
NewMI = BuildMI(get(X86::LEA16r), Dest)
.addReg(0).addImm(1 << ShAmt).addReg(Src).addImm(0);
break;
}
}
// FIXME: None of these instructions are promotable to LEAs without
// additional information. In particular, LEA doesn't set the flags that
// add and inc do. :(
return 0;
if (0)
switch (MI->getOpcode()) {
case X86::INC32r:
case X86::INC64_32r:
@ -220,7 +248,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
Done:
if (NewMI) {
NewMI->copyKillDeadInfo(MI);
LV.instructionChanged(MI, NewMI); // Update live variables