mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-02 08:46:37 +00:00
Update the readme to remove duplicate information and clarify the loop
problem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@29468 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8f74680c78
commit
83a6d49102
@ -198,7 +198,7 @@ on some processors (which ones?), it is more efficient to do this:
|
||||
|
||||
_test:
|
||||
movl 8(%esp), %ebx
|
||||
xor %eax, %eax
|
||||
xor %eax, %eax
|
||||
cmpl %ebx, 4(%esp)
|
||||
setl %al
|
||||
ret
|
||||
@ -340,22 +340,6 @@ Enable X86InstrInfo::convertToThreeAddress().
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Investigate whether it is better to codegen the following
|
||||
|
||||
%tmp.1 = mul int %x, 9
|
||||
to
|
||||
|
||||
movl 4(%esp), %eax
|
||||
leal (%eax,%eax,8), %eax
|
||||
|
||||
as opposed to what llc is currently generating:
|
||||
|
||||
imull $9, 4(%esp), %eax
|
||||
|
||||
Currently the load folding imull has a higher complexity than the LEA32 pattern.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl
|
||||
We should leave these as libcalls for everything over a much lower threshold,
|
||||
since libc is hand tuned for medium and large mem ops (avoiding RFO for large
|
||||
@ -671,35 +655,26 @@ We should handle __attribute__ ((__visibility__ ("hidden"))).
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Consider:
|
||||
int foo(int *a, int t) {
|
||||
int x;
|
||||
for (x=0; x<40; ++x)
|
||||
t = t + a[x] + x;
|
||||
return t;
|
||||
int %foo(int* %a, int %t) {
|
||||
entry:
|
||||
br label %cond_true
|
||||
|
||||
cond_true: ; preds = %cond_true, %entry
|
||||
%x.0.0 = phi int [ 0, %entry ], [ %tmp9, %cond_true ] ; <int> [#uses=3]
|
||||
%t_addr.0.0 = phi int [ %t, %entry ], [ %tmp7, %cond_true ] ; <int> [#uses=1]
|
||||
%tmp2 = getelementptr int* %a, int %x.0.0 ; <int*> [#uses=1]
|
||||
%tmp3 = load int* %tmp2 ; <int> [#uses=1]
|
||||
%tmp5 = add int %t_addr.0.0, %x.0.0 ; <int> [#uses=1]
|
||||
%tmp7 = add int %tmp5, %tmp3 ; <int> [#uses=2]
|
||||
%tmp9 = add int %x.0.0, 1 ; <int> [#uses=2]
|
||||
%tmp = setgt int %tmp9, 39 ; <bool> [#uses=1]
|
||||
br bool %tmp, label %bb12, label %cond_true
|
||||
|
||||
bb12: ; preds = %cond_true
|
||||
ret int %tmp7
|
||||
}
|
||||
|
||||
We generate:
|
||||
LBB1_1: #cond_true
|
||||
movl %ecx, %esi
|
||||
movl (%edx,%eax,4), %edi
|
||||
movl %esi, %ecx
|
||||
addl %edi, %ecx
|
||||
addl %eax, %ecx
|
||||
incl %eax
|
||||
cmpl $40, %eax
|
||||
jne LBB1_1 #cond_true
|
||||
|
||||
GCC generates:
|
||||
|
||||
L2:
|
||||
addl (%ecx,%edx,4), %eax
|
||||
addl %edx, %eax
|
||||
addl $1, %edx
|
||||
cmpl $40, %edx
|
||||
jne L2
|
||||
|
||||
Smells like a register coallescing/reassociation issue.
|
||||
is pessimized by -loop-reduce and -indvars
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user