mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-23 12:45:47 +00:00
transfer some notes from my email to somewhere useful.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@25361 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
84b26b600d
commit
1db4b4f5c4
@ -225,3 +225,29 @@ struct foo { double X, Y; };
|
||||
void xxx(struct foo F);
|
||||
void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
|
||||
|
||||
===-------------------------------------------------------------------------===
|
||||
|
||||
For this:
|
||||
|
||||
int h(int i, int j, int k) {
|
||||
return (i==0||j==0||k == 0);
|
||||
}
|
||||
|
||||
We currently emit this:
|
||||
|
||||
_h:
|
||||
cntlzw r2, r3
|
||||
cntlzw r3, r4
|
||||
cntlzw r4, r5
|
||||
srwi r2, r2, 5
|
||||
srwi r3, r3, 5
|
||||
srwi r4, r4, 5
|
||||
or r2, r3, r2
|
||||
or r3, r2, r4
|
||||
blr
|
||||
|
||||
The ctlz/shift instructions are created by the isel, so the dag combiner doesn't
|
||||
have a chance to pull the shifts through the or's (eliminating two
|
||||
instructions). SETCC nodes should be custom lowered in this case, not expanded
|
||||
by the isel.
|
||||
|
||||
|
@ -54,6 +54,10 @@ fxch -> fucomi
|
||||
fucomi jl X
|
||||
jg X
|
||||
|
||||
Ideas:
|
||||
http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html
|
||||
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Improvements to the multiply -> shift/add algorithm:
|
||||
@ -121,3 +125,44 @@ Model X86 EFLAGS as a real register to avoid redudant cmp / test. e.g.
|
||||
setg %al
|
||||
testb %al, %al # unnecessary
|
||||
jne .BB7
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Count leading zeros and count trailing zeros:
|
||||
|
||||
int clz(int X) { return __builtin_clz(X); }
|
||||
int ctz(int X) { return __builtin_ctz(X); }
|
||||
|
||||
$ gcc t.c -S -o - -O3 -fomit-frame-pointer -masm=intel
|
||||
clz:
|
||||
bsr %eax, DWORD PTR [%esp+4]
|
||||
xor %eax, 31
|
||||
ret
|
||||
ctz:
|
||||
bsf %eax, DWORD PTR [%esp+4]
|
||||
ret
|
||||
|
||||
however, check that these are defined for 0 and 32. Our intrinsics are, GCC's
|
||||
aren't.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Use push/pop instructions in prolog/epilog sequences instead of stores off
|
||||
ESP (certain code size win, perf win on some [which?] processors).
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Only use inc/neg/not instructions on processors where they are faster than
|
||||
add/sub/xor. They are slower on the P4 due to only updating some processor
|
||||
flags.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Open code rint,floor,ceil,trunc:
|
||||
http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html
|
||||
http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user