mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-10 14:36:12 +00:00
a serious "compare CSE" issue that is nontrivial to get right,
but which is responsible for us doing really bad things to 256.bzip2. llvm-svn: 126126
This commit is contained in:
parent
83c60ae907
commit
e7dc7e1e5b
@ -1878,3 +1878,72 @@ _add32carry:
|
|||||||
ret
|
ret
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
The hot loop of 256.bzip2 contains code that looks a bit like this:
|
||||||
|
|
||||||
|
int foo(char *P, char *Q, int x, int y) {
|
||||||
|
if (P[0] != Q[0])
|
||||||
|
return P[0] < Q[0];
|
||||||
|
if (P[1] != Q[1])
|
||||||
|
return P[1] < Q[1];
|
||||||
|
if (P[2] != Q[2])
|
||||||
|
return P[2] < Q[2];
|
||||||
|
return P[3] < Q[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
In the real code, we get a lot more wrong than this. However, even in this
|
||||||
|
code we generate:
|
||||||
|
|
||||||
|
_foo: ## @foo
|
||||||
|
## BB#0: ## %entry
|
||||||
|
movb (%rsi), %al
|
||||||
|
movb (%rdi), %cl
|
||||||
|
cmpb %al, %cl
|
||||||
|
je LBB0_2
|
||||||
|
LBB0_1: ## %if.then
|
||||||
|
cmpb %al, %cl
|
||||||
|
jmp LBB0_5
|
||||||
|
LBB0_2: ## %if.end
|
||||||
|
movb 1(%rsi), %al
|
||||||
|
movb 1(%rdi), %cl
|
||||||
|
cmpb %al, %cl
|
||||||
|
jne LBB0_1
|
||||||
|
## BB#3: ## %if.end38
|
||||||
|
movb 2(%rsi), %al
|
||||||
|
movb 2(%rdi), %cl
|
||||||
|
cmpb %al, %cl
|
||||||
|
jne LBB0_1
|
||||||
|
## BB#4: ## %if.end60
|
||||||
|
movb 3(%rdi), %al
|
||||||
|
cmpb 3(%rsi), %al
|
||||||
|
LBB0_5: ## %if.end60
|
||||||
|
setl %al
|
||||||
|
movzbl %al, %eax
|
||||||
|
ret
|
||||||
|
|
||||||
|
Note that we generate jumps to LBB0_1 which does a redundant compare. The
|
||||||
|
redundant compare also forces the register values to be live, which prevents
|
||||||
|
folding one of the loads into the compare. In contrast, GCC 4.2 produces:
|
||||||
|
|
||||||
|
_foo:
|
||||||
|
movzbl (%rsi), %eax
|
||||||
|
cmpb %al, (%rdi)
|
||||||
|
jne L10
|
||||||
|
L12:
|
||||||
|
movzbl 1(%rsi), %eax
|
||||||
|
cmpb %al, 1(%rdi)
|
||||||
|
jne L10
|
||||||
|
movzbl 2(%rsi), %eax
|
||||||
|
cmpb %al, 2(%rdi)
|
||||||
|
jne L10
|
||||||
|
movzbl 3(%rdi), %eax
|
||||||
|
cmpb 3(%rsi), %al
|
||||||
|
L10:
|
||||||
|
setl %al
|
||||||
|
movzbl %al, %eax
|
||||||
|
ret
|
||||||
|
|
||||||
|
which is "perfect".
|
||||||
|
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user