git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30245 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2006-09-11 05:35:17 +00:00
parent 0f4aa6ee20
commit f47d167c3b
2 changed files with 0 additions and 154 deletions

View File

@ -147,32 +147,6 @@ and ISD::FMAX node types?
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
The first BB of this code:
declare bool %foo()
int %bar() {
%V = call bool %foo()
br bool %V, label %T, label %F
T:
ret int 1
F:
call bool %foo()
ret int 12
}
compiles to:
_bar:
subl $12, %esp
call L_foo$stub
xorb $1, %al
testb %al, %al
jne LBB_bar_2 # F
It would be better to emit "cmp %al, 1" than a xor and test.
//===---------------------------------------------------------------------===//
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
feasible. feasible.
@ -274,33 +248,6 @@ instead of por and movdqa. Does it matter?
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
Use movddup to splat a v2f64 directly from a memory source. e.g.
#include <emmintrin.h>
void test(__m128d *r, double A) {
*r = _mm_set1_pd(A);
}
llc:
_test:
movsd 8(%esp), %xmm0
unpcklpd %xmm0, %xmm0
movl 4(%esp), %eax
movapd %xmm0, (%eax)
ret
icc:
_test:
movl 4(%esp), %eax
movddup 8(%esp), %xmm0
movapd %xmm0, (%eax)
ret
//===---------------------------------------------------------------------===//
X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
to choose between movaps, movapd, and movdqa based on types of source and to choose between movaps, movapd, and movdqa based on types of source and
destination? destination?
@ -311,69 +258,6 @@ shorter.
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
We are emitting bad code for this:
float %test(float* %V, int %I, int %D, float %V) {
entry:
%tmp = seteq int %D, 0
br bool %tmp, label %cond_true, label %cond_false23
cond_true:
%tmp3 = getelementptr float* %V, int %I
%tmp = load float* %tmp3
%tmp5 = setgt float %tmp, %V
%tmp6 = tail call bool %llvm.isunordered.f32( float %tmp, float %V )
%tmp7 = or bool %tmp5, %tmp6
br bool %tmp7, label %UnifiedReturnBlock, label %cond_next
cond_next:
%tmp10 = add int %I, 1
%tmp12 = getelementptr float* %V, int %tmp10
%tmp13 = load float* %tmp12
%tmp15 = setle float %tmp13, %V
%tmp16 = tail call bool %llvm.isunordered.f32( float %tmp13, float %V )
%tmp17 = or bool %tmp15, %tmp16
%retval = select bool %tmp17, float 0.000000e+00, float 1.000000e+00
ret float %retval
cond_false23:
%tmp28 = tail call float %foo( float* %V, int %I, int %D, float %V )
ret float %tmp28
UnifiedReturnBlock: ; preds = %cond_true
ret float 0.000000e+00
}
declare bool %llvm.isunordered.f32(float, float)
declare float %foo(float*, int, int, float)
It exposes a known load folding problem:
movss (%edx,%ecx,4), %xmm1
ucomiss %xmm1, %xmm0
As well as this:
LBB_test_2: # cond_next
movss LCPI1_0, %xmm2
pxor %xmm3, %xmm3
ucomiss %xmm0, %xmm1
jbe LBB_test_6 # cond_next
LBB_test_5: # cond_next
movaps %xmm2, %xmm3
LBB_test_6: # cond_next
movss %xmm3, 40(%esp)
flds 40(%esp)
addl $44, %esp
ret
Clearly it's unnecessary to clear %xmm3. It's also not clear why we are emitting
three moves (movss, movaps, movss).
//===---------------------------------------------------------------------===//
External test Nurbs exposed some problems. Look for External test Nurbs exposed some problems. Look for
__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc __ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
emits: emits:

View File

@ -390,44 +390,6 @@ require a copy to be inserted (in X86InstrInfo::convertToThreeAddress).
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
This code generates ugly code, probably due to costs being off or something:
void %test(float* %P, <4 x float>* %P2 ) {
%xFloat0.688 = load float* %P
%loadVector37.712 = load <4 x float>* %P2
%inFloat3.713 = insertelement <4 x float> %loadVector37.712, float 0.000000e+00, uint 3
store <4 x float> %inFloat3.713, <4 x float>* %P2
ret void
}
Generates:
_test:
pxor %xmm0, %xmm0
movd %xmm0, %eax ;; EAX = 0!
movl 8(%esp), %ecx
movaps (%ecx), %xmm0
pinsrw $6, %eax, %xmm0
shrl $16, %eax ;; EAX = 0 again!
pinsrw $7, %eax, %xmm0
movaps %xmm0, (%ecx)
ret
It would be better to generate:
_test:
movl 8(%esp), %ecx
movaps (%ecx), %xmm0
xor %eax, %eax
pinsrw $6, %eax, %xmm0
pinsrw $7, %eax, %xmm0
movaps %xmm0, (%ecx)
ret
or use pxor (to make a zero vector) and shuffle (to insert it).
//===---------------------------------------------------------------------===//
Bad codegen: Bad codegen:
char foo(int x) { return x; } char foo(int x) { return x; }