Rewrite support for cast uint -> FP. In particular, we used to compile this:

double %test(uint %X) {
        %tmp.1 = cast uint %X to double         ; <double> [#uses=1]
        ret double %tmp.1
}

into:

test:
        sub %ESP, 8
        mov %EAX, DWORD PTR [%ESP + 12]
        mov %ECX, 0
        mov DWORD PTR [%ESP], %EAX
        mov DWORD PTR [%ESP + 4], %ECX
        fild QWORD PTR [%ESP]
        add %ESP, 8
        ret

... which basically zero extends to 8 bytes, then does an fild for an
8-byte signed int.

Now we generate this:


test:
        sub %ESP, 4
        mov %EAX, DWORD PTR [%ESP + 8]
        mov DWORD PTR [%ESP], %EAX
        fild DWORD PTR [%ESP]
        shr %EAX, 31
        fadd DWORD PTR [.CPItest_0 + 4*%EAX]
        add %ESP, 4
        ret

        .section .rodata
        .align  4
.CPItest_0:
        .quad   5728578726015270912

This does a 32-bit signed integer load, then adds in an offset if the sign
bit of the integer was set.

It turns out that this is substantially faster than the preceeding sequence.
Consider this testcase:

unsigned a[2]={1,2};
volatile double G;

void main() {
    int i;
    for (i=0; i<100000000; ++i )
        G += a[i&1];
}

On zion (a P4 Xeon, 3Ghz), this patch speeds up the testcase from 2.140s
to 0.94s.

On apoc, an athlon MP 2100+, this patch speeds up the testcase from 1.72s
to 1.34s.

Note that the program takes 2.5s/1.97s on zion/apoc with GCC 3.3 -O3
-fomit-frame-pointer.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@17083 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2004-10-17 08:01:28 +00:00
parent 07306de06e
commit 56a31c69c8

View File

@ -3404,17 +3404,8 @@ void X86ISel::emitCastOperation(MachineBasicBlock *BB,
PromoteType = Type::IntTy;
PromoteOpcode = X86::MOVZX32rr16;
break;
case Type::UIntTyID: {
// Make a 64 bit temporary... and zero out the top of it...
unsigned TmpReg = makeAnotherReg(Type::LongTy);
BuildMI(*BB, IP, X86::MOV32rr, 1, TmpReg).addReg(SrcReg);
BuildMI(*BB, IP, X86::MOV32ri, 1, TmpReg+1).addImm(0);
SrcTy = Type::LongTy;
SrcClass = cLong;
SrcReg = TmpReg;
break;
}
case Type::ULongTyID:
case Type::UIntTyID:
// Don't fild into the read destination.
DestReg = makeAnotherReg(Type::DoubleTy);
break;
@ -3449,10 +3440,28 @@ void X86ISel::emitCastOperation(MachineBasicBlock *BB,
{ 0/*byte*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m };
addFrameReference(BuildMI(*BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx);
// We need special handling for unsigned 64-bit integer sources. If the
// input number has the "sign bit" set, then we loaded it incorrectly as a
// negative 64-bit number. In this case, add an offset value.
if (SrcTy == Type::ULongTy) {
if (SrcTy == Type::UIntTy) {
// If this is a cast from uint -> double, we need to be careful about if
// the "sign" bit is set. If so, we don't want to make a negative number,
// we want to make a positive number. Emit code to add an offset if the
// sign bit is set.
// Compute whether the sign bit is set by shifting the reg right 31 bits.
unsigned IsNeg = makeAnotherReg(Type::IntTy);
BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(SrcReg).addImm(31);
// Create a CP value that has the offset in one word and 0 in the other.
static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy,
0x4f80000000000000ULL);
unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset);
BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(DestReg)
.addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0);
} else if (SrcTy == Type::ULongTy) {
// We need special handling for unsigned 64-bit integer sources. If the
// input number has the "sign bit" set, then we loaded it incorrectly as a
// negative 64-bit number. In this case, add an offset value.
// Emit a test instruction to see if the dynamic input value was signed.
BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg+1).addReg(SrcReg+1);