mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-04-03 05:32:24 +00:00
[GlobalISel][Legalizer] Use ArtifactValueFinder first for unmerge combines before trying others.
This is motivated by an pathological compile time issue during unmerge combining. We should be able to use the AVF to do simplification. However AMDGPU has a lot of codegen changes which I'm not sure how to evaluate. Differential Revision: https://reviews.llvm.org/D109748
This commit is contained in:
parent
129cf33604
commit
cc65e08fe7
@ -821,6 +821,12 @@ public:
|
||||
|
||||
Builder.setInstrAndDebugLoc(MI);
|
||||
|
||||
ArtifactValueFinder Finder(MRI, Builder, LI);
|
||||
if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
|
||||
markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
|
||||
// %0:_(<4 x s16>) = G_FOO
|
||||
// %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0
|
||||
@ -844,15 +850,9 @@ public:
|
||||
if (ActionStep.TypeIdx == 1)
|
||||
return false;
|
||||
break;
|
||||
default: {
|
||||
ArtifactValueFinder Finder(MRI, Builder, LI);
|
||||
if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
|
||||
markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
|
||||
|
||||
@ -883,16 +883,7 @@ public:
|
||||
ConvertOp, OpTy, DestTy)) {
|
||||
// We might have a chance to combine later by trying to combine
|
||||
// unmerge(cast) first
|
||||
if (tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs))
|
||||
return true;
|
||||
|
||||
// Try using the value finder.
|
||||
ArtifactValueFinder Finder(MRI, Builder, LI);
|
||||
if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
|
||||
markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
|
||||
}
|
||||
|
||||
const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
|
||||
|
@ -241,8 +241,8 @@ body: |
|
||||
liveins: $x0
|
||||
; CHECK-LABEL: name: test_eve_v4p0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF
|
||||
; CHECK: %idx:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[DEF]](p0), [[DEF]](p0)
|
||||
; CHECK: %idx:_(s64) = G_CONSTANT i64 1
|
||||
; CHECK: %eve:_(p0) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x p0>), %idx(s64)
|
||||
; CHECK: $x0 = COPY %eve(p0)
|
||||
; CHECK: RET_ReallyLR
|
||||
|
@ -314,9 +314,9 @@ body: |
|
||||
; CHECK-LABEL: name: store_32xs8
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
|
||||
; CHECK: %ptr:_(p0) = COPY $x0
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
|
||||
; CHECK: %ptr:_(p0) = COPY $x0
|
||||
; CHECK: G_STORE [[BUILD_VECTOR]](<16 x s8>), %ptr(p0) :: (store (<16 x s8>), align 32)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
|
||||
@ -338,9 +338,9 @@ body: |
|
||||
; CHECK-LABEL: name: store_16xs16
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
|
||||
; CHECK: %ptr:_(p0) = COPY $x0
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16), [[DEF]](s16)
|
||||
; CHECK: %ptr:_(p0) = COPY $x0
|
||||
; CHECK: G_STORE [[BUILD_VECTOR]](<8 x s16>), %ptr(p0) :: (store (<8 x s16>), align 32)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
|
||||
@ -362,9 +362,9 @@ body: |
|
||||
; CHECK-LABEL: name: store_8xs32
|
||||
; CHECK: liveins: $x0
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: %ptr:_(p0) = COPY $x0
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
|
||||
; CHECK: %ptr:_(p0) = COPY $x0
|
||||
; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), %ptr(p0) :: (store (<4 x s32>), align 32)
|
||||
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD %ptr, [[C]](s64)
|
||||
|
@ -254,9 +254,9 @@ body: |
|
||||
; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64)
|
||||
; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64)
|
||||
; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64)
|
||||
; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
|
||||
; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64)
|
||||
; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64)
|
||||
; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
|
||||
; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64)
|
||||
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64)
|
||||
|
@ -846,19 +846,16 @@ body: |
|
||||
; GFX9: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>)
|
||||
; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]]
|
||||
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>)
|
||||
; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
|
||||
; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
|
||||
; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
|
||||
; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
|
||||
; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
|
||||
; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR7]](s32)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR8]](s32), [[BITCAST9]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
|
||||
; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
|
||||
; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>)
|
||||
; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
|
||||
; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[BITCAST8]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>)
|
||||
; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
|
||||
; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr1
|
||||
%2:_(<2 x s16>) = COPY $vgpr2
|
||||
|
@ -837,31 +837,24 @@ body: |
|
||||
; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL17]]
|
||||
; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
|
||||
; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST10]](<2 x s16>)
|
||||
; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32)
|
||||
; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
|
||||
; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
|
||||
; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
|
||||
; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32)
|
||||
; SI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32)
|
||||
; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; SI: [[AND34:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]]
|
||||
; SI: [[AND35:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
|
||||
; SI: [[AND35:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
|
||||
; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32)
|
||||
; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]]
|
||||
; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
|
||||
; SI: [[AND36:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
|
||||
; SI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
|
||||
; SI: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
|
||||
; SI: [[AND37:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]]
|
||||
; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32)
|
||||
; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]]
|
||||
; SI: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
|
||||
; SI: [[AND38:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C1]]
|
||||
; SI: [[AND39:%[0-9]+]]:_(s32) = G_AND [[BITCAST17]], [[C1]]
|
||||
; SI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND39]], [[C]](s32)
|
||||
; SI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND38]], [[SHL20]]
|
||||
; SI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32)
|
||||
; SI: $vgpr0 = COPY [[BITCAST18]](<2 x s16>)
|
||||
; SI: $vgpr1 = COPY [[BITCAST19]](<2 x s16>)
|
||||
; SI: $vgpr2 = COPY [[BITCAST20]](<2 x s16>)
|
||||
; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
|
||||
; SI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>)
|
||||
; SI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>)
|
||||
; SI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>)
|
||||
; VI-LABEL: name: test_fshr_v3s16_v3s16
|
||||
; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
|
||||
@ -990,31 +983,24 @@ body: |
|
||||
; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL17]]
|
||||
; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32)
|
||||
; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST10]](<2 x s16>)
|
||||
; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST14]], [[C]](s32)
|
||||
; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
|
||||
; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>)
|
||||
; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
|
||||
; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; VI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32)
|
||||
; VI: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32)
|
||||
; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]]
|
||||
; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
|
||||
; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
|
||||
; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32)
|
||||
; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]]
|
||||
; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
|
||||
; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]]
|
||||
; VI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32)
|
||||
; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]]
|
||||
; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]]
|
||||
; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32)
|
||||
; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]]
|
||||
; VI: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
|
||||
; VI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C1]]
|
||||
; VI: [[AND24:%[0-9]+]]:_(s32) = G_AND [[BITCAST17]], [[C1]]
|
||||
; VI: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C]](s32)
|
||||
; VI: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL20]]
|
||||
; VI: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32)
|
||||
; VI: $vgpr0 = COPY [[BITCAST18]](<2 x s16>)
|
||||
; VI: $vgpr1 = COPY [[BITCAST19]](<2 x s16>)
|
||||
; VI: $vgpr2 = COPY [[BITCAST20]](<2 x s16>)
|
||||
; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32)
|
||||
; VI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>)
|
||||
; VI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>)
|
||||
; VI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>)
|
||||
; GFX9-LABEL: name: test_fshr_v3s16_v3s16
|
||||
; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
|
||||
@ -1074,19 +1060,16 @@ body: |
|
||||
; GFX9: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>)
|
||||
; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]]
|
||||
; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>)
|
||||
; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
|
||||
; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
|
||||
; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>)
|
||||
; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
|
||||
; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32)
|
||||
; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR5]](s32)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST9]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
|
||||
; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
|
||||
; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>)
|
||||
; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32)
|
||||
; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32)
|
||||
; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST8]](s32)
|
||||
; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>)
|
||||
; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>)
|
||||
; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>)
|
||||
%0:_(<2 x s16>) = COPY $vgpr0
|
||||
%1:_(<2 x s16>) = COPY $vgpr1
|
||||
%2:_(<2 x s16>) = COPY $vgpr2
|
||||
|
@ -711,7 +711,6 @@ body: |
|
||||
; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load (s32) from unknown-address + 244, addrspace 5)
|
||||
; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load (s32) from unknown-address + 248, align 8, addrspace 5)
|
||||
; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load (s32) from unknown-address + 252, addrspace 5)
|
||||
; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32)
|
||||
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32)
|
||||
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
|
||||
@ -728,6 +727,7 @@ body: |
|
||||
; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32)
|
||||
; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32)
|
||||
; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32)
|
||||
; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
|
||||
; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store (<4 x s32>), align 4, addrspace 1)
|
||||
; CHECK: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
|
||||
; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64)
|
||||
|
@ -148,25 +148,18 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32
|
||||
; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
|
||||
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>)
|
||||
; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
|
||||
; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
|
||||
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
|
||||
; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
|
||||
; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
|
||||
; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
|
||||
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <3 x half> %tex
|
||||
@ -397,25 +390,18 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
|
||||
; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
|
||||
; PACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
|
||||
; PACKED: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
|
||||
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
|
||||
; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
|
||||
; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
|
||||
; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
|
||||
; PACKED: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST5]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST6]](<2 x s16>)
|
||||
; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
|
||||
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
|
||||
%tex = extractvalue { <3 x half>, i32 } %res, 0
|
||||
@ -634,25 +620,18 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc,
|
||||
; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
|
||||
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
|
||||
; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
|
||||
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
|
||||
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
|
||||
; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
|
||||
; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
|
||||
; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
|
||||
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <3 x half> %tex
|
||||
@ -710,25 +689,18 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc,
|
||||
; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
|
||||
; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
|
||||
; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>)
|
||||
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
|
||||
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
|
||||
; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
|
||||
; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
|
||||
; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
|
||||
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <3 x half> %tex
|
||||
@ -1212,25 +1184,18 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
|
||||
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
|
||||
; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
|
||||
; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
|
||||
; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
|
||||
; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
|
||||
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
|
||||
%tex = extractvalue { <3 x half>, i32 } %res, 0
|
||||
@ -1298,25 +1263,18 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
|
||||
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
|
||||
; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
|
||||
; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
|
||||
; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
|
||||
; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
|
||||
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
|
||||
%tex = extractvalue { <3 x half>, i32 } %res, 0
|
||||
@ -1384,25 +1342,18 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
|
||||
; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
|
||||
; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>)
|
||||
; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
|
||||
; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
|
||||
; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
|
||||
; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
|
||||
; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
|
||||
; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
|
||||
; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]]
|
||||
; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]]
|
||||
; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
|
||||
; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
|
||||
; PACKED: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST4]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST5]](<2 x s16>)
|
||||
; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
|
||||
; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
|
||||
; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>)
|
||||
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
|
||||
%res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
|
||||
%tex = extractvalue { <3 x half>, i32 } %res, 0
|
||||
|
@ -535,10 +535,7 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
|
||||
; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0
|
||||
; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v0
|
||||
; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3
|
||||
; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
|
||||
; GFX8-PACKED-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: load_1d_v3f16_xyz:
|
||||
@ -555,10 +552,7 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
|
||||
; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3
|
||||
; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0
|
||||
; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX10-LABEL: load_1d_v3f16_xyz:
|
||||
@ -571,15 +565,11 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) {
|
||||
; GFX10-NEXT: s_mov_b32 s5, s7
|
||||
; GFX10-NEXT: s_mov_b32 s6, s8
|
||||
; GFX10-NEXT: s_mov_b32 s7, s9
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff
|
||||
; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16
|
||||
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
|
||||
; GFX10-NEXT: s_lshl_b32 s0, s0, 16
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0
|
||||
; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0
|
||||
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
|
||||
; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2
|
||||
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s0
|
||||
; GFX10-NEXT: ; return to shader part epilog
|
||||
%v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret <3 x half> %v
|
||||
|
@ -140,9 +140,9 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_mov_b32 s4, 0xffff
|
||||
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||
; GFX8-NEXT: s_mov_b32 s5, s4
|
||||
; GFX8-NEXT: s_lshr_b32 s3, s0, 16
|
||||
; GFX8-NEXT: s_and_b32 s2, s0, s4
|
||||
; GFX8-NEXT: s_mov_b32 s5, s4
|
||||
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
|
||||
; GFX8-NEXT: s_and_b32 s6, s1, s4
|
||||
; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5]
|
||||
|
Loading…
x
Reference in New Issue
Block a user