mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-12 02:47:10 +00:00
Updates.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30245 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0f4aa6ee20
commit
f47d167c3b
@ -147,32 +147,6 @@ and ISD::FMAX node types?
|
|||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
The first BB of this code:
|
|
||||||
|
|
||||||
declare bool %foo()
|
|
||||||
int %bar() {
|
|
||||||
%V = call bool %foo()
|
|
||||||
br bool %V, label %T, label %F
|
|
||||||
T:
|
|
||||||
ret int 1
|
|
||||||
F:
|
|
||||||
call bool %foo()
|
|
||||||
ret int 12
|
|
||||||
}
|
|
||||||
|
|
||||||
compiles to:
|
|
||||||
|
|
||||||
_bar:
|
|
||||||
subl $12, %esp
|
|
||||||
call L_foo$stub
|
|
||||||
xorb $1, %al
|
|
||||||
testb %al, %al
|
|
||||||
jne LBB_bar_2 # F
|
|
||||||
|
|
||||||
It would be better to emit "cmp %al, 1" than a xor and test.
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
|
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
|
||||||
feasible.
|
feasible.
|
||||||
|
|
||||||
@ -274,33 +248,6 @@ instead of por and movdqa. Does it matter?
|
|||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
Use movddup to splat a v2f64 directly from a memory source. e.g.
|
|
||||||
|
|
||||||
#include <emmintrin.h>
|
|
||||||
|
|
||||||
void test(__m128d *r, double A) {
|
|
||||||
*r = _mm_set1_pd(A);
|
|
||||||
}
|
|
||||||
|
|
||||||
llc:
|
|
||||||
|
|
||||||
_test:
|
|
||||||
movsd 8(%esp), %xmm0
|
|
||||||
unpcklpd %xmm0, %xmm0
|
|
||||||
movl 4(%esp), %eax
|
|
||||||
movapd %xmm0, (%eax)
|
|
||||||
ret
|
|
||||||
|
|
||||||
icc:
|
|
||||||
|
|
||||||
_test:
|
|
||||||
movl 4(%esp), %eax
|
|
||||||
movddup 8(%esp), %xmm0
|
|
||||||
movapd %xmm0, (%eax)
|
|
||||||
ret
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
|
X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
|
||||||
to choose between movaps, movapd, and movdqa based on types of source and
|
to choose between movaps, movapd, and movdqa based on types of source and
|
||||||
destination?
|
destination?
|
||||||
@ -311,69 +258,6 @@ shorter.
|
|||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
We are emitting bad code for this:
|
|
||||||
|
|
||||||
float %test(float* %V, int %I, int %D, float %V) {
|
|
||||||
entry:
|
|
||||||
%tmp = seteq int %D, 0
|
|
||||||
br bool %tmp, label %cond_true, label %cond_false23
|
|
||||||
|
|
||||||
cond_true:
|
|
||||||
%tmp3 = getelementptr float* %V, int %I
|
|
||||||
%tmp = load float* %tmp3
|
|
||||||
%tmp5 = setgt float %tmp, %V
|
|
||||||
%tmp6 = tail call bool %llvm.isunordered.f32( float %tmp, float %V )
|
|
||||||
%tmp7 = or bool %tmp5, %tmp6
|
|
||||||
br bool %tmp7, label %UnifiedReturnBlock, label %cond_next
|
|
||||||
|
|
||||||
cond_next:
|
|
||||||
%tmp10 = add int %I, 1
|
|
||||||
%tmp12 = getelementptr float* %V, int %tmp10
|
|
||||||
%tmp13 = load float* %tmp12
|
|
||||||
%tmp15 = setle float %tmp13, %V
|
|
||||||
%tmp16 = tail call bool %llvm.isunordered.f32( float %tmp13, float %V )
|
|
||||||
%tmp17 = or bool %tmp15, %tmp16
|
|
||||||
%retval = select bool %tmp17, float 0.000000e+00, float 1.000000e+00
|
|
||||||
ret float %retval
|
|
||||||
|
|
||||||
cond_false23:
|
|
||||||
%tmp28 = tail call float %foo( float* %V, int %I, int %D, float %V )
|
|
||||||
ret float %tmp28
|
|
||||||
|
|
||||||
UnifiedReturnBlock: ; preds = %cond_true
|
|
||||||
ret float 0.000000e+00
|
|
||||||
}
|
|
||||||
|
|
||||||
declare bool %llvm.isunordered.f32(float, float)
|
|
||||||
|
|
||||||
declare float %foo(float*, int, int, float)
|
|
||||||
|
|
||||||
|
|
||||||
It exposes a known load folding problem:
|
|
||||||
|
|
||||||
movss (%edx,%ecx,4), %xmm1
|
|
||||||
ucomiss %xmm1, %xmm0
|
|
||||||
|
|
||||||
As well as this:
|
|
||||||
|
|
||||||
LBB_test_2: # cond_next
|
|
||||||
movss LCPI1_0, %xmm2
|
|
||||||
pxor %xmm3, %xmm3
|
|
||||||
ucomiss %xmm0, %xmm1
|
|
||||||
jbe LBB_test_6 # cond_next
|
|
||||||
LBB_test_5: # cond_next
|
|
||||||
movaps %xmm2, %xmm3
|
|
||||||
LBB_test_6: # cond_next
|
|
||||||
movss %xmm3, 40(%esp)
|
|
||||||
flds 40(%esp)
|
|
||||||
addl $44, %esp
|
|
||||||
ret
|
|
||||||
|
|
||||||
Clearly it's unnecessary to clear %xmm3. It's also not clear why we are emitting
|
|
||||||
three moves (movss, movaps, movss).
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
External test Nurbs exposed some problems. Look for
|
External test Nurbs exposed some problems. Look for
|
||||||
__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
|
__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
|
||||||
emits:
|
emits:
|
||||||
|
@ -390,44 +390,6 @@ require a copy to be inserted (in X86InstrInfo::convertToThreeAddress).
|
|||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
This code generates ugly code, probably due to costs being off or something:
|
|
||||||
|
|
||||||
void %test(float* %P, <4 x float>* %P2 ) {
|
|
||||||
%xFloat0.688 = load float* %P
|
|
||||||
%loadVector37.712 = load <4 x float>* %P2
|
|
||||||
%inFloat3.713 = insertelement <4 x float> %loadVector37.712, float 0.000000e+00, uint 3
|
|
||||||
store <4 x float> %inFloat3.713, <4 x float>* %P2
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
Generates:
|
|
||||||
|
|
||||||
_test:
|
|
||||||
pxor %xmm0, %xmm0
|
|
||||||
movd %xmm0, %eax ;; EAX = 0!
|
|
||||||
movl 8(%esp), %ecx
|
|
||||||
movaps (%ecx), %xmm0
|
|
||||||
pinsrw $6, %eax, %xmm0
|
|
||||||
shrl $16, %eax ;; EAX = 0 again!
|
|
||||||
pinsrw $7, %eax, %xmm0
|
|
||||||
movaps %xmm0, (%ecx)
|
|
||||||
ret
|
|
||||||
|
|
||||||
It would be better to generate:
|
|
||||||
|
|
||||||
_test:
|
|
||||||
movl 8(%esp), %ecx
|
|
||||||
movaps (%ecx), %xmm0
|
|
||||||
xor %eax, %eax
|
|
||||||
pinsrw $6, %eax, %xmm0
|
|
||||||
pinsrw $7, %eax, %xmm0
|
|
||||||
movaps %xmm0, (%ecx)
|
|
||||||
ret
|
|
||||||
|
|
||||||
or use pxor (to make a zero vector) and shuffle (to insert it).
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
Bad codegen:
|
Bad codegen:
|
||||||
|
|
||||||
char foo(int x) { return x; }
|
char foo(int x) { return x; }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user