mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-01 18:12:49 +00:00
Revert r304824 "Fix PR23384 (part 3 of 3)"
This seems to be interacting badly with ASan somehow, causing false reports of heap-buffer overflows: PR33514. > Summary: > The patch makes instruction count the highest priority for > LSR solution for X86 (previously registers had highest priority). > > Reviewers: qcolombet > > Differential Revision: http://reviews.llvm.org/D30562 > > From: Evgeny Stupachenko <evstupac@gmail.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305720 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
403309b018
commit
08030e7683
@ -2178,17 +2178,6 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
|
||||
return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
|
||||
}
|
||||
|
||||
bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2) {
|
||||
// X86 specific here are "instruction number 1st priority".
|
||||
return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
|
||||
C1.NumIVMuls, C1.NumBaseAdds,
|
||||
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
|
||||
std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
|
||||
C2.NumIVMuls, C2.NumBaseAdds,
|
||||
C2.ScaleCost, C2.ImmCost, C2.SetupCost);
|
||||
}
|
||||
|
||||
bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
|
||||
Type *ScalarTy = DataTy->getScalarType();
|
||||
int DataWidth = isa<PointerType>(ScalarTy) ?
|
||||
|
@ -101,8 +101,6 @@ public:
|
||||
int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
|
||||
int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
|
||||
Type *Ty);
|
||||
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
|
||||
TargetTransformInfo::LSRCost &C2);
|
||||
bool isLegalMaskedLoad(Type *DataType);
|
||||
bool isLegalMaskedStore(Type *DataType);
|
||||
bool isLegalMaskedGather(Type *DataType);
|
||||
|
@ -131,7 +131,7 @@ static cl::opt<bool> EnablePhiElim(
|
||||
|
||||
// The flag adds instruction count to solutions cost comparision.
|
||||
static cl::opt<bool> InsnsCost(
|
||||
"lsr-insns-cost", cl::Hidden, cl::init(true),
|
||||
"lsr-insns-cost", cl::Hidden, cl::init(false),
|
||||
cl::desc("Add instruction count to a LSR cost model"));
|
||||
|
||||
// Flag to choose how to narrow complex lsr solution
|
||||
|
@ -1,6 +1,6 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats 2>&1 | \
|
||||
; RUN: grep "asm-printer" | grep 33
|
||||
; RUN: grep "asm-printer" | grep 35
|
||||
|
||||
target datalayout = "e-p:32:32"
|
||||
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
|
||||
|
@ -1,8 +1,6 @@
|
||||
; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
|
||||
; CHECK: addl ({{%[a-z]+}},[[reg:%[a-z]+]],4)
|
||||
; CHECK-NEXT: movl
|
||||
; CHECK-NEXT: addl 4({{%[a-z]+}},[[reg:%[a-z]+]],4)
|
||||
; CHECK-NEXT: incl
|
||||
; CHECK: addl ([[reg:%[a-z]+]])
|
||||
; CHECK-NEXT: addl $4, [[reg]]
|
||||
|
||||
; Test for the FixupLEAs pre-emit pass.
|
||||
; An LEA should NOT be substituted for the ADD instruction
|
||||
@ -22,7 +20,7 @@
|
||||
; return sum;
|
||||
;}
|
||||
|
||||
define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %k, i32* nocapture %l, i32* nocapture %m, i32* nocapture %array2) #0 {
|
||||
define i32 @test(i32 %n, i32* nocapture %array, i32* nocapture %m, i32* nocapture %array2) #0 {
|
||||
entry:
|
||||
%cmp7 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp7, label %for.body.lr.ph, label %for.end
|
||||
@ -37,9 +35,6 @@ for.body: ; preds = %for.body, %for.body
|
||||
%j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ]
|
||||
%inc1 = add nsw i32 %j.09, 1
|
||||
%arrayidx = getelementptr inbounds i32, i32* %array2, i32 %j.09
|
||||
store i32 %0, i32* %m, align 4
|
||||
store i32 %sum.010, i32* %m, align 4
|
||||
store i32 %0, i32* %m, align 4
|
||||
%1 = load i32, i32* %arrayidx, align 4
|
||||
%add = add nsw i32 %0, %1
|
||||
store i32 %add, i32* %m, align 4
|
||||
|
@ -1,10 +1,16 @@
|
||||
; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s
|
||||
; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck %s
|
||||
; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
|
||||
|
||||
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
|
||||
; ATOM: foo
|
||||
; ATOM: addl
|
||||
; ATOM: addl
|
||||
; ATOM: leal
|
||||
|
||||
; CHECK: foo
|
||||
; CHECK: incl
|
||||
; CHECK: addl
|
||||
; CHECK: addl
|
||||
; CHECK: addl
|
||||
|
||||
entry:
|
||||
%0 = icmp sgt i32 %N, 0 ; <i1> [#uses=1]
|
||||
|
@ -3,8 +3,10 @@
|
||||
; Check no spills to the same stack slot after hoisting.
|
||||
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
|
||||
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
|
||||
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET3:-?[0-9]*]](%rsp)
|
||||
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
|
||||
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
|
||||
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET3]](%rsp)
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
@ -4,19 +4,16 @@
|
||||
; By starting the IV at -64 instead of 0, a cmp is eliminated,
|
||||
; as the flags from the add can be used directly.
|
||||
|
||||
; STATIC: movl $-64, [[EAX:%e..]]
|
||||
; STATIC: movl $-64, [[ECX:%e..]]
|
||||
|
||||
; STATIC: movl %{{.+}}, _state+76([[EAX]])
|
||||
; STATIC: addl $16, [[EAX]]
|
||||
; STATIC: movl [[EAX:%e..]], _state+76([[ECX]])
|
||||
; STATIC: addl $16, [[ECX]]
|
||||
; STATIC: jne
|
||||
|
||||
; The same for PIC mode.
|
||||
; In PIC mode the symbol can't be folded, so the change-compare-stride
|
||||
; trick applies.
|
||||
|
||||
; PIC: movl $-64, [[EAX:%e..]]
|
||||
|
||||
; PIC: movl %{{.+}}, 76(%{{.+}},[[EAX]])
|
||||
; PIC: addl $16, [[EAX]]
|
||||
; PIC: jne
|
||||
; PIC: cmpl $64
|
||||
|
||||
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
||||
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
||||
|
@ -9,17 +9,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movl %edx, %eax
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: xorl %ecx, %ecx
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: .p2align 4, 0x90
|
||||
; SSE2-NEXT: .LBB0_1: # %vector.body
|
||||
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2
|
||||
; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm2
|
||||
; SSE2-NEXT: movdqu (%rsi), %xmm3
|
||||
; SSE2-NEXT: pmaddwd %xmm2, %xmm3
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE2-NEXT: addq $8, %rcx
|
||||
; SSE2-NEXT: cmpq %rcx, %rax
|
||||
; SSE2-NEXT: addq $16, %rsi
|
||||
; SSE2-NEXT: addq $16, %rdi
|
||||
; SSE2-NEXT: addq $-8, %rax
|
||||
; SSE2-NEXT: jne .LBB0_1
|
||||
; SSE2-NEXT: # BB#2: # %middle.block
|
||||
; SSE2-NEXT: paddd %xmm0, %xmm1
|
||||
@ -34,17 +34,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: movl %edx, %eax
|
||||
; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: xorl %ecx, %ecx
|
||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX2-NEXT: .p2align 4, 0x90
|
||||
; AVX2-NEXT: .LBB0_1: # %vector.body
|
||||
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX2-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2
|
||||
; AVX2-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vmovdqu (%rsi), %xmm2
|
||||
; AVX2-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
||||
; AVX2-NEXT: addq $8, %rcx
|
||||
; AVX2-NEXT: cmpq %rcx, %rax
|
||||
; AVX2-NEXT: addq $16, %rsi
|
||||
; AVX2-NEXT: addq $16, %rdi
|
||||
; AVX2-NEXT: addq $-8, %rax
|
||||
; AVX2-NEXT: jne .LBB0_1
|
||||
; AVX2-NEXT: # BB#2: # %middle.block
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
@ -60,17 +60,17 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
|
||||
; AVX512: # BB#0: # %entry
|
||||
; AVX512-NEXT: movl %edx, %eax
|
||||
; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
|
||||
; AVX512-NEXT: xorl %ecx, %ecx
|
||||
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512-NEXT: .p2align 4, 0x90
|
||||
; AVX512-NEXT: .LBB0_1: # %vector.body
|
||||
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX512-NEXT: vmovdqu (%rsi,%rcx,2), %xmm2
|
||||
; AVX512-NEXT: vpmaddwd (%rdi,%rcx,2), %xmm2, %xmm2
|
||||
; AVX512-NEXT: vmovdqu (%rsi), %xmm2
|
||||
; AVX512-NEXT: vpmaddwd (%rdi), %xmm2, %xmm2
|
||||
; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
|
||||
; AVX512-NEXT: vpaddd %ymm0, %ymm2, %ymm0
|
||||
; AVX512-NEXT: addq $8, %rcx
|
||||
; AVX512-NEXT: cmpq %rcx, %rax
|
||||
; AVX512-NEXT: addq $16, %rsi
|
||||
; AVX512-NEXT: addq $16, %rdi
|
||||
; AVX512-NEXT: addq $-8, %rax
|
||||
; AVX512-NEXT: jne .LBB0_1
|
||||
; AVX512-NEXT: # BB#2: # %middle.block
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
@ -118,13 +118,12 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movl %edx, %eax
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: xorl %ecx, %ecx
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: .p2align 4, 0x90
|
||||
; SSE2-NEXT: .LBB1_1: # %vector.body
|
||||
; SSE2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2
|
||||
; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3
|
||||
; SSE2-NEXT: movdqu (%rdi), %xmm2
|
||||
; SSE2-NEXT: movdqu (%rsi), %xmm3
|
||||
; SSE2-NEXT: movdqa %xmm3, %xmm4
|
||||
; SSE2-NEXT: pmulhuw %xmm2, %xmm4
|
||||
; SSE2-NEXT: pmullw %xmm2, %xmm3
|
||||
@ -133,8 +132,9 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
|
||||
; SSE2-NEXT: paddd %xmm2, %xmm0
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm1
|
||||
; SSE2-NEXT: addq $8, %rcx
|
||||
; SSE2-NEXT: cmpq %rcx, %rax
|
||||
; SSE2-NEXT: addq $16, %rsi
|
||||
; SSE2-NEXT: addq $16, %rdi
|
||||
; SSE2-NEXT: addq $-8, %rax
|
||||
; SSE2-NEXT: jne .LBB1_1
|
||||
; SSE2-NEXT: # BB#2: # %middle.block
|
||||
; SSE2-NEXT: paddd %xmm1, %xmm0
|
||||
@ -149,7 +149,6 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: movl %edx, %eax
|
||||
; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: xorl %ecx, %ecx
|
||||
; AVX2-NEXT: .p2align 4, 0x90
|
||||
; AVX2-NEXT: .LBB1_1: # %vector.body
|
||||
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
@ -157,8 +156,9 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
|
||||
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX2-NEXT: vpmulld %ymm1, %ymm2, %ymm1
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: addq $8, %rcx
|
||||
; AVX2-NEXT: cmpq %rcx, %rax
|
||||
; AVX2-NEXT: addq $16, %rsi
|
||||
; AVX2-NEXT: addq $16, %rdi
|
||||
; AVX2-NEXT: addq $-8, %rax
|
||||
; AVX2-NEXT: jne .LBB1_1
|
||||
; AVX2-NEXT: # BB#2: # %middle.block
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
@ -174,7 +174,6 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
|
||||
; AVX512: # BB#0: # %entry
|
||||
; AVX512-NEXT: movl %edx, %eax
|
||||
; AVX512-NEXT: vpxor %ymm0, %ymm0, %ymm0
|
||||
; AVX512-NEXT: xorl %ecx, %ecx
|
||||
; AVX512-NEXT: .p2align 4, 0x90
|
||||
; AVX512-NEXT: .LBB1_1: # %vector.body
|
||||
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
@ -182,8 +181,9 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
|
||||
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||||
; AVX512-NEXT: vpmulld %ymm1, %ymm2, %ymm1
|
||||
; AVX512-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: addq $8, %rcx
|
||||
; AVX512-NEXT: cmpq %rcx, %rax
|
||||
; AVX512-NEXT: addq $16, %rsi
|
||||
; AVX512-NEXT: addq $16, %rdi
|
||||
; AVX512-NEXT: addq $-8, %rax
|
||||
; AVX512-NEXT: jne .LBB1_1
|
||||
; AVX512-NEXT: # BB#2: # %middle.block
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
@ -231,7 +231,6 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
|
||||
; SSE2: # BB#0: # %entry
|
||||
; SSE2-NEXT: movl %edx, %eax
|
||||
; SSE2-NEXT: pxor %xmm0, %xmm0
|
||||
; SSE2-NEXT: xorl %ecx, %ecx
|
||||
; SSE2-NEXT: pxor %xmm1, %xmm1
|
||||
; SSE2-NEXT: pxor %xmm3, %xmm3
|
||||
; SSE2-NEXT: pxor %xmm2, %xmm2
|
||||
@ -264,8 +263,9 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
|
||||
; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
|
||||
; SSE2-NEXT: psrad $16, %xmm4
|
||||
; SSE2-NEXT: paddd %xmm4, %xmm2
|
||||
; SSE2-NEXT: addq $16, %rcx
|
||||
; SSE2-NEXT: cmpq %rcx, %rax
|
||||
; SSE2-NEXT: addq $16, %rsi
|
||||
; SSE2-NEXT: addq $16, %rdi
|
||||
; SSE2-NEXT: addq $-16, %rax
|
||||
; SSE2-NEXT: jne .LBB2_1
|
||||
; SSE2-NEXT: # BB#2: # %middle.block
|
||||
; SSE2-NEXT: paddd %xmm3, %xmm0
|
||||
@ -282,17 +282,17 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: movl %edx, %eax
|
||||
; AVX2-NEXT: vpxor %ymm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: xorl %ecx, %ecx
|
||||
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX2-NEXT: .p2align 4, 0x90
|
||||
; AVX2-NEXT: .LBB2_1: # %vector.body
|
||||
; AVX2-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX2-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2
|
||||
; AVX2-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3
|
||||
; AVX2-NEXT: vpmovsxbw (%rdi), %ymm2
|
||||
; AVX2-NEXT: vpmovsxbw (%rsi), %ymm3
|
||||
; AVX2-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
|
||||
; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
|
||||
; AVX2-NEXT: addq $16, %rcx
|
||||
; AVX2-NEXT: cmpq %rcx, %rax
|
||||
; AVX2-NEXT: addq $16, %rsi
|
||||
; AVX2-NEXT: addq $16, %rdi
|
||||
; AVX2-NEXT: addq $-16, %rax
|
||||
; AVX2-NEXT: jne .LBB2_1
|
||||
; AVX2-NEXT: # BB#2: # %middle.block
|
||||
; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0
|
||||
@ -309,18 +309,18 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
|
||||
; AVX512: # BB#0: # %entry
|
||||
; AVX512-NEXT: movl %edx, %eax
|
||||
; AVX512-NEXT: vpxord %zmm0, %zmm0, %zmm0
|
||||
; AVX512-NEXT: xorl %ecx, %ecx
|
||||
; AVX512-NEXT: vpxor %ymm1, %ymm1, %ymm1
|
||||
; AVX512-NEXT: .p2align 4, 0x90
|
||||
; AVX512-NEXT: .LBB2_1: # %vector.body
|
||||
; AVX512-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; AVX512-NEXT: vpmovsxbw (%rdi,%rcx), %ymm2
|
||||
; AVX512-NEXT: vpmovsxbw (%rsi,%rcx), %ymm3
|
||||
; AVX512-NEXT: vpmovsxbw (%rdi), %ymm2
|
||||
; AVX512-NEXT: vpmovsxbw (%rsi), %ymm3
|
||||
; AVX512-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
|
||||
; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm2
|
||||
; AVX512-NEXT: vpaddd %zmm0, %zmm2, %zmm0
|
||||
; AVX512-NEXT: addq $16, %rcx
|
||||
; AVX512-NEXT: cmpq %rcx, %rax
|
||||
; AVX512-NEXT: addq $16, %rsi
|
||||
; AVX512-NEXT: addq $16, %rdi
|
||||
; AVX512-NEXT: addq $-16, %rax
|
||||
; AVX512-NEXT: jne .LBB2_1
|
||||
; AVX512-NEXT: # BB#2: # %middle.block
|
||||
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
; CHECK-LABEL: count_up
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $8
|
||||
; CHECK: incq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @count_up(double* %d, i64 %n) nounwind {
|
||||
@ -38,7 +38,7 @@ return:
|
||||
|
||||
; CHECK-LABEL: count_down
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $-8
|
||||
; CHECK: addq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @count_down(double* %d, i64 %n) nounwind {
|
||||
@ -71,7 +71,7 @@ return:
|
||||
|
||||
; CHECK-LABEL: count_up_signed
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $8
|
||||
; CHECK: incq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @count_up_signed(double* %d, i64 %n) nounwind {
|
||||
@ -106,7 +106,7 @@ return:
|
||||
|
||||
; CHECK-LABEL: count_down_signed
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $-8
|
||||
; CHECK: addq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @count_down_signed(double* %d, i64 %n) nounwind {
|
||||
@ -141,7 +141,7 @@ return:
|
||||
|
||||
; CHECK-LABEL: another_count_up
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $8
|
||||
; CHECK: addq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @another_count_up(double* %d, i64 %n) nounwind {
|
||||
@ -174,7 +174,7 @@ return:
|
||||
|
||||
; CHECK-LABEL: another_count_down
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $-8
|
||||
; CHECK: addq $-8,
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @another_count_down(double* %d, i64 %n) nounwind {
|
||||
@ -207,7 +207,7 @@ return:
|
||||
|
||||
; CHECK-LABEL: another_count_up_signed
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $8
|
||||
; CHECK: addq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @another_count_up_signed(double* %d, i64 %n) nounwind {
|
||||
@ -242,7 +242,7 @@ return:
|
||||
|
||||
; CHECK-LABEL: another_count_down_signed
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: addq $-8
|
||||
; CHECK: decq
|
||||
; CHECK-NOT: {{and|movz|sar|shl}}
|
||||
; CHECK: jne
|
||||
define void @another_count_down_signed(double* %d, i64 %n) nounwind {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -lsr-insns-cost=false -S < %s | FileCheck %s
|
||||
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -loop-reduce -S < %s | FileCheck %s
|
||||
; Check LSR formula canonicalization will put loop invariant regs before
|
||||
; induction variable of current loop, so exprs involving loop invariant regs
|
||||
; can be promoted outside of current loop.
|
||||
|
@ -163,7 +163,7 @@ for.end: ; preds = %for.body, %entry
|
||||
; X64: movzbl -3(
|
||||
;
|
||||
; X32: foldedidx:
|
||||
; X32: movzbl 400(
|
||||
; X32: movzbl -3(
|
||||
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
|
||||
entry:
|
||||
br label %for.body
|
||||
@ -275,7 +275,7 @@ exit:
|
||||
;
|
||||
; X32: @testCmpZero
|
||||
; X32: %for.body82.us
|
||||
; X32: cmp
|
||||
; X32: dec
|
||||
; X32: jne
|
||||
define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
|
||||
entry:
|
||||
|
@ -22,16 +22,16 @@ target triple = "x86_64-apple-macosx"
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK-LABEL: test2.loop:
|
||||
; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ 1, %entry ]
|
||||
; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, -1
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
|
||||
; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 1
|
||||
; CHECK: %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
|
||||
;
|
||||
; CHECK-LABEL: for.end:
|
||||
; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next, 0
|
||||
; CHECK: %tobool.us = icmp eq i32 %lsr.iv.next2, 0
|
||||
; CHECK: %sub.us = select i1 %tobool.us, i32 0, i32 0
|
||||
; CHECK: %0 = sub i32 0, %sub.us
|
||||
; CHECK: %1 = sub i32 %0, %lsr.iv.next
|
||||
; CHECK: %sext.us = mul i32 %lsr.iv.next2, %1
|
||||
; CHECK: %1 = sub i32 0, %sub.us
|
||||
; CHECK: %2 = add i32 %1, %lsr.iv.next
|
||||
; CHECK: %sext.us = mul i32 %lsr.iv.next2, %2
|
||||
; CHECK: %f = ashr i32 %sext.us, 24
|
||||
; CHECK: ret i32 %f
|
||||
define i32 @test2() {
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
|
||||
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
|
||||
|
||||
; OPT test checks that LSR optimize compare for static counter to compare with 0.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
|
||||
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
|
||||
; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
|
||||
|
||||
; OPT checks that LSR prefers less instructions to less registers.
|
||||
|
@ -1,4 +1,3 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -loop-reduce -S < %s | FileCheck %s
|
||||
; Check when we use an outerloop induction variable inside of an innerloop
|
||||
; induction value expr, LSR can still choose to use single induction variable
|
||||
@ -23,21 +22,18 @@ for.body: ; preds = %for.inc, %entry
|
||||
for.body2.preheader: ; preds = %for.body
|
||||
br label %for.body2
|
||||
|
||||
; Check LSR only generates two induction variables for for.body2 one for compare and
|
||||
; one to shared by multiple array accesses.
|
||||
; Check LSR only generates one induction variable for for.body2 and the induction
|
||||
; variable will be shared by multiple array accesses.
|
||||
; CHECK: for.body2:
|
||||
; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ]
|
||||
; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ]
|
||||
; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ 0, %for.body2.preheader ]
|
||||
; CHECK-NOT: = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ]
|
||||
; CHECK: [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64
|
||||
; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1
|
||||
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP1]], align 1
|
||||
; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]]
|
||||
; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* %maxarray, i64 [[LSR]]
|
||||
; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* [[SCEVGEP1]], i64 1
|
||||
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP2]], align 1
|
||||
; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]]
|
||||
; CHECK: store i8 {{.*}}, i8* [[SCEVGEP3]], align 1
|
||||
; CHECK: [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1
|
||||
; CHECK: %exitcond = icmp ne i64 [[LSRNEXT]], 0
|
||||
; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
|
||||
; CHECK: {{.*}} = load i8, i8* [[SCEVGEP3]], align 1
|
||||
; CHECK: [[SCEVGEP4:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSR]]
|
||||
; CHECK: store i8 {{.*}}, i8* [[SCEVGEP4]], align 1
|
||||
; CHECK: br i1 %exitcond, label %for.body2, label %for.inc.loopexit
|
||||
|
||||
for.body2: ; preds = %for.body2.preheader, %for.body2
|
||||
|
Loading…
x
Reference in New Issue
Block a user