mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-11 14:16:59 +00:00
transfer some notes from my email to somewhere useful.
llvm-svn: 25361
This commit is contained in:
parent
8650cec8bb
commit
62ab456f6b
@ -225,3 +225,29 @@ struct foo { double X, Y; };
|
|||||||
void xxx(struct foo F);
|
void xxx(struct foo F);
|
||||||
void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
|
void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
|
||||||
|
|
||||||
|
===-------------------------------------------------------------------------===
|
||||||
|
|
||||||
|
For this:
|
||||||
|
|
||||||
|
int h(int i, int j, int k) {
|
||||||
|
return (i==0||j==0||k == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
We currently emit this:
|
||||||
|
|
||||||
|
_h:
|
||||||
|
cntlzw r2, r3
|
||||||
|
cntlzw r3, r4
|
||||||
|
cntlzw r4, r5
|
||||||
|
srwi r2, r2, 5
|
||||||
|
srwi r3, r3, 5
|
||||||
|
srwi r4, r4, 5
|
||||||
|
or r2, r3, r2
|
||||||
|
or r3, r2, r4
|
||||||
|
blr
|
||||||
|
|
||||||
|
The ctlz/shift instructions are created by the isel, so the dag combiner doesn't
|
||||||
|
have a chance to pull the shifts through the or's (eliminating two
|
||||||
|
instructions). SETCC nodes should be custom lowered in this case, not expanded
|
||||||
|
by the isel.
|
||||||
|
|
||||||
|
@ -54,6 +54,10 @@ fxch -> fucomi
|
|||||||
fucomi jl X
|
fucomi jl X
|
||||||
jg X
|
jg X
|
||||||
|
|
||||||
|
Ideas:
|
||||||
|
http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html
|
||||||
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
Improvements to the multiply -> shift/add algorithm:
|
Improvements to the multiply -> shift/add algorithm:
|
||||||
@ -121,3 +125,44 @@ Model X86 EFLAGS as a real register to avoid redudant cmp / test. e.g.
|
|||||||
setg %al
|
setg %al
|
||||||
testb %al, %al # unnecessary
|
testb %al, %al # unnecessary
|
||||||
jne .BB7
|
jne .BB7
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
Count leading zeros and count trailing zeros:
|
||||||
|
|
||||||
|
int clz(int X) { return __builtin_clz(X); }
|
||||||
|
int ctz(int X) { return __builtin_ctz(X); }
|
||||||
|
|
||||||
|
$ gcc t.c -S -o - -O3 -fomit-frame-pointer -masm=intel
|
||||||
|
clz:
|
||||||
|
bsr %eax, DWORD PTR [%esp+4]
|
||||||
|
xor %eax, 31
|
||||||
|
ret
|
||||||
|
ctz:
|
||||||
|
bsf %eax, DWORD PTR [%esp+4]
|
||||||
|
ret
|
||||||
|
|
||||||
|
however, check that these are defined for 0 and 32. Our intrinsics are, GCC's
|
||||||
|
aren't.
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
Use push/pop instructions in prolog/epilog sequences instead of stores off
|
||||||
|
ESP (certain code size win, perf win on some [which?] processors).
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
Only use inc/neg/not instructions on processors where they are faster than
|
||||||
|
add/sub/xor. They are slower on the P4 due to only updating some processor
|
||||||
|
flags.
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
Open code rint,floor,ceil,trunc:
|
||||||
|
http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html
|
||||||
|
http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user