mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-05 03:36:43 +00:00
[MIScheduler] Slightly better handling of constrainLocalCopy when both source and dest are local
This fixes PR21792. Differential Revision: http://reviews.llvm.org/D6823 llvm-svn: 226433
This commit is contained in:
parent
89328f88bf
commit
83ab484af5
@ -1434,12 +1434,15 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
|
||||
// Check if either the dest or source is local. If it's live across a back
|
||||
// edge, it's not local. Note that if both vregs are live across the back
|
||||
// edge, we cannot successfully contrain the copy without cyclic scheduling.
|
||||
unsigned LocalReg = DstReg;
|
||||
unsigned GlobalReg = SrcReg;
|
||||
// If both the copy's source and dest are local live intervals, then we
|
||||
// should treat the dest as the global for the purpose of adding
|
||||
// constraints. This adds edges from source's other uses to the copy.
|
||||
unsigned LocalReg = SrcReg;
|
||||
unsigned GlobalReg = DstReg;
|
||||
LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
|
||||
if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
|
||||
LocalReg = SrcReg;
|
||||
GlobalReg = DstReg;
|
||||
LocalReg = DstReg;
|
||||
GlobalReg = SrcReg;
|
||||
LocalLI = &LIS->getInterval(LocalReg);
|
||||
if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
|
||||
return;
|
||||
|
41
test/CodeGen/X86/pr21792.ll
Normal file
41
test/CodeGen/X86/pr21792.ll
Normal file
@ -0,0 +1,41 @@
|
||||
; RUN: llc -mtriple=x86_64-linux -mcpu=corei7 < %s | FileCheck %s
|
||||
; This fixes a missing cases in the MI scheduler's constrainLocalCopy exposed by
|
||||
; PR21792
|
||||
|
||||
@stuff = external constant [256 x double], align 16
|
||||
|
||||
define void @func(<4 x float> %vx) {
|
||||
entry:
|
||||
%tmp2 = bitcast <4 x float> %vx to <2 x i64>
|
||||
%and.i = and <2 x i64> %tmp2, <i64 8727373547504, i64 8727373547504>
|
||||
%tmp3 = bitcast <2 x i64> %and.i to <4 x i32>
|
||||
%index.sroa.0.0.vec.extract = extractelement <4 x i32> %tmp3, i32 0
|
||||
%idx.ext = sext i32 %index.sroa.0.0.vec.extract to i64
|
||||
%add.ptr = getelementptr inbounds i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext
|
||||
%tmp4 = bitcast i8* %add.ptr to double*
|
||||
%index.sroa.0.4.vec.extract = extractelement <4 x i32> %tmp3, i32 1
|
||||
%idx.ext5 = sext i32 %index.sroa.0.4.vec.extract to i64
|
||||
%add.ptr6 = getelementptr inbounds i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext5
|
||||
%tmp5 = bitcast i8* %add.ptr6 to double*
|
||||
%index.sroa.0.8.vec.extract = extractelement <4 x i32> %tmp3, i32 2
|
||||
%idx.ext14 = sext i32 %index.sroa.0.8.vec.extract to i64
|
||||
%add.ptr15 = getelementptr inbounds i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext14
|
||||
%tmp6 = bitcast i8* %add.ptr15 to double*
|
||||
%index.sroa.0.12.vec.extract = extractelement <4 x i32> %tmp3, i32 3
|
||||
%idx.ext19 = sext i32 %index.sroa.0.12.vec.extract to i64
|
||||
%add.ptr20 = getelementptr inbounds i8* bitcast ([256 x double]* @stuff to i8*), i64 %idx.ext19
|
||||
%tmp7 = bitcast i8* %add.ptr20 to double*
|
||||
%add.ptr46 = getelementptr inbounds i8* bitcast (double* getelementptr inbounds ([256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext
|
||||
%tmp16 = bitcast i8* %add.ptr46 to double*
|
||||
%add.ptr51 = getelementptr inbounds i8* bitcast (double* getelementptr inbounds ([256 x double]* @stuff, i64 0, i64 1) to i8*), i64 %idx.ext5
|
||||
%tmp17 = bitcast i8* %add.ptr51 to double*
|
||||
call void @toto(double* %tmp4, double* %tmp5, double* %tmp6, double* %tmp7, double* %tmp16, double* %tmp17)
|
||||
ret void
|
||||
; CHECK-LABEL: func:
|
||||
; CHECK: pextrq $1, %xmm0,
|
||||
; CHECK-NEXT: movd %xmm0, %r[[AX:..]]
|
||||
; CHECK-NEXT: movslq %e[[AX]],
|
||||
; CHECK-NEXT: sarq $32, %r[[AX]]
|
||||
}
|
||||
|
||||
declare void @toto(double*, double*, double*, double*, double*, double*)
|
@ -841,19 +841,18 @@ define <4 x i32> @test8(<4 x i32> %a) {
|
||||
; SSE-LABEL: test8:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
|
||||
; SSE-NEXT: movdqa %xmm2, %xmm1
|
||||
; SSE-NEXT: psrad $31, %xmm1
|
||||
; SSE-NEXT: pand %xmm0, %xmm1
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm3
|
||||
; SSE-NEXT: psrad $31, %xmm3
|
||||
; SSE-NEXT: pand %xmm2, %xmm3
|
||||
; SSE-NEXT: paddd %xmm1, %xmm3
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE-NEXT: pmuludq %xmm2, %xmm1
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
|
||||
; SSE-NEXT: pmuludq %xmm2, %xmm4
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3]
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
|
||||
; SSE-NEXT: psrad $31, %xmm2
|
||||
; SSE-NEXT: pand %xmm0, %xmm2
|
||||
; SSE-NEXT: paddd %xmm2, %xmm3
|
||||
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
|
||||
; SSE-NEXT: pmuludq %xmm4, %xmm2
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
|
||||
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
|
||||
; SSE-NEXT: psubd %xmm3, %xmm1
|
||||
; SSE-NEXT: paddd %xmm0, %xmm1
|
||||
|
@ -76,10 +76,9 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
|
||||
; CHECK: pmovzxwd (%{{.*}}), %[[R0:xmm[0-9]+]]
|
||||
; CHECK-NEXT: pmovzxwd (%{{.*}}), %[[R1:xmm[0-9]+]]
|
||||
; CHECK-NEXT: paddd %[[R0]], %[[R1]]
|
||||
; CHECK-NEXT: movdqa %[[R1]], %[[R0]]
|
||||
; CHECK-NEXT: pshufb {{.*}}, %[[R0]]
|
||||
; CHECK-NEXT: pmovzxdq %[[R0]], %[[R0]]
|
||||
; CHECK-NEXT: pextrw $4, %[[R1]], 4(%{{.*}})
|
||||
; CHECK-NEXT: pshufb {{.*}}, %[[R1]]
|
||||
; CHECK-NEXT: pmovzxdq %[[R1]], %[[R0]]
|
||||
; CHECK-NEXT: movd %[[R0]], (%{{.*}})
|
||||
%a = load %i16vec3* %ap, align 16
|
||||
%b = load %i16vec3* %bp, align 16
|
||||
@ -144,10 +143,9 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
|
||||
; CHECK: pmovzxbd (%{{.*}}), %[[R0:xmm[0-9]+]]
|
||||
; CHECK-NEXT: pmovzxbd (%{{.*}}), %[[R1:xmm[0-9]+]]
|
||||
; CHECK-NEXT: paddd %[[R0]], %[[R1]]
|
||||
; CHECK-NEXT: movdqa %[[R1]], %[[R0]]
|
||||
; CHECK-NEXT: pshufb {{.*}}, %[[R0]]
|
||||
; CHECK-NEXT: pmovzxwq %[[R0]], %[[R0]]
|
||||
; CHECK-NEXT: pextrb $8, %[[R1]], 2(%{{.*}})
|
||||
; CHECK-NEXT: pshufb {{.*}}, %[[R1]]
|
||||
; CHECK-NEXT: pmovzxwq %[[R1]], %[[R0]]
|
||||
; CHECK-NEXT: movd %[[R0]], %e[[R2:[abcd]]]x
|
||||
; CHECK-NEXT: movw %[[R2]]x, (%{{.*}})
|
||||
%a = load %i8vec3* %ap, align 16
|
||||
@ -206,10 +204,9 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
|
||||
; CHECK-NEXT: pinsrd $2, %e[[R0]]x, %[[X1]]
|
||||
; CHECK-NEXT: pextrd $3, %[[X0]], %e[[R0:[abcd]]]x
|
||||
; CHECK-NEXT: pinsrd $3, %e[[R0]]x, %[[X1]]
|
||||
; CHECK-NEXT: movdqa %[[X1]], %[[X2:xmm[0-9]+]]
|
||||
; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[X2]]
|
||||
; CHECK-NEXT: pmovzxwq %[[X2]], %[[X3:xmm[0-9]+]]
|
||||
; CHECK-NEXT: pextrb $8, %[[X1]], 2(%{{.*}})
|
||||
; CHECK-NEXT: pshufb %[[SHUFFLE_MASK]], %[[X1]]
|
||||
; CHECK-NEXT: pmovzxwq %[[X1]], %[[X3:xmm[0-9]+]]
|
||||
; CHECK-NEXT: movd %[[X3]], %e[[R0:[abcd]]]x
|
||||
; CHECK-NEXT: movw %[[R0]]x, (%{{.*}})
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user