Rewrite support for cast uint -> FP. In particular, we used to compile this:

double %test(uint %X) { %tmp.1 = cast uint %X to double ; <double> [#uses=1] ret double %tmp.1 } into: test: sub %ESP, 8 mov %EAX, DWORD PTR [%ESP + 12] mov %ECX, 0 mov DWORD PTR [%ESP], %EAX mov DWORD PTR [%ESP + 4], %ECX fild QWORD PTR [%ESP] add %ESP, 8 ret ... which basically zero extends to 8 bytes, then does an fild for an 8-byte signed int. Now we generate this: test: sub %ESP, 4 mov %EAX, DWORD PTR [%ESP + 8] mov DWORD PTR [%ESP], %EAX fild DWORD PTR [%ESP] shr %EAX, 31 fadd DWORD PTR [.CPItest_0 + 4*%EAX] add %ESP, 4 ret .section .rodata .align 4 .CPItest_0: .quad 5728578726015270912 This does a 32-bit signed integer load, then adds in an offset if the sign bit of the integer was set. It turns out that this is substantially faster than the preceeding sequence. Consider this testcase: unsigned a[2]={1,2}; volatile double G; void main() { int i; for (i=0; i<100000000; ++i ) G += a[i&1]; } On zion (a P4 Xeon, 3Ghz), this patch speeds up the testcase from 2.140s to 0.94s. On apoc, an athlon MP 2100+, this patch speeds up the testcase from 1.72s to 1.34s. Note that the program takes 2.5s/1.97s on zion/apoc with GCC 3.3 -O3 -fomit-frame-pointer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@17083 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-14 07:31:47 +00:00 · 2004-10-17 08:01:28 +00:00 · 2004-10-17 08:01:28 +00:00 · 56a31c69c8
commit 56a31c69c8
parent 07306de06e
1 changed files with 23 additions and 14 deletions
--- a/lib/Target/X86/X86ISelSimple.cpp
+++ b/lib/Target/X86/X86ISelSimple.cpp
@ -3404,17 +3404,8 @@ void X86ISel::emitCastOperation(MachineBasicBlock *BB,
      PromoteType = Type::IntTy;
      PromoteOpcode = X86::MOVZX32rr16;
      break;
-    case Type::UIntTyID: {
-      // Make a 64 bit temporary... and zero out the top of it...
-      unsigned TmpReg = makeAnotherReg(Type::LongTy);
-      BuildMI(*BB, IP, X86::MOV32rr, 1, TmpReg).addReg(SrcReg);
-      BuildMI(*BB, IP, X86::MOV32ri, 1, TmpReg+1).addImm(0);
-      SrcTy = Type::LongTy;
-      SrcClass = cLong;
-      SrcReg = TmpReg;
-      break;
-    }
    case Type::ULongTyID:
+    case Type::UIntTyID:
      // Don't fild into the read destination.
      DestReg = makeAnotherReg(Type::DoubleTy);
      break;
@ -3449,10 +3440,28 @@ void X86ISel::emitCastOperation(MachineBasicBlock *BB,
      { 0/*byte*/, X86::FILD16m, X86::FILD32m, 0/*FP*/, X86::FILD64m };
    addFrameReference(BuildMI(*BB, IP, Op2[SrcClass], 5, DestReg), FrameIdx);

-    // We need special handling for unsigned 64-bit integer sources.  If the
-    // input number has the "sign bit" set, then we loaded it incorrectly as a
-    // negative 64-bit number.  In this case, add an offset value.
-    if (SrcTy == Type::ULongTy) {
+    if (SrcTy == Type::UIntTy) {
+      // If this is a cast from uint -> double, we need to be careful about if
+      // the "sign" bit is set.  If so, we don't want to make a negative number,
+      // we want to make a positive number.  Emit code to add an offset if the
+      // sign bit is set.
+
+      // Compute whether the sign bit is set by shifting the reg right 31 bits.
+      unsigned IsNeg = makeAnotherReg(Type::IntTy);
+      BuildMI(BB, X86::SHR32ri, 2, IsNeg).addReg(SrcReg).addImm(31);
+
+      // Create a CP value that has the offset in one word and 0 in the other.
+      static ConstantInt *TheOffset = ConstantUInt::get(Type::ULongTy,
+                                                        0x4f80000000000000ULL);
+      unsigned CPI = F->getConstantPool()->getConstantPoolIndex(TheOffset);
+      BuildMI(BB, X86::FADD32m, 5, RealDestReg).addReg(DestReg)
+        .addConstantPoolIndex(CPI).addZImm(4).addReg(IsNeg).addSImm(0);
+
+    } else if (SrcTy == Type::ULongTy) {
+      // We need special handling for unsigned 64-bit integer sources.  If the
+      // input number has the "sign bit" set, then we loaded it incorrectly as a
+      // negative 64-bit number.  In this case, add an offset value.
+
      // Emit a test instruction to see if the dynamic input value was signed.
      BuildMI(*BB, IP, X86::TEST32rr, 2).addReg(SrcReg+1).addReg(SrcReg+1);