mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-23 13:50:11 +00:00
[AMDGPU][GFX12] Add new v_permlane16 variants (#75475)
This commit is contained in:
parent
d63f54f91f
commit
966416b9e8
@ -410,6 +410,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32, "ifiiIi", "nc", "fp8-insts")
|
||||
// GFX12+ only builtins.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
TARGET_BUILTIN(__builtin_amdgcn_permlane16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_permlanex16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal, "vIi", "n", "gfx12-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vi", "n", "gfx12-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts")
|
||||
|
@ -1,6 +1,54 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -S -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
// CHECK-LABEL: @test_permlane16_var(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false)
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_permlane16_var(global uint* out, uint a, uint b, uint c) {
|
||||
*out = __builtin_amdgcn_permlane16_var(a, b, c, 0, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_permlanex16_var(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
|
||||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[B:%.*]], ptr addrspace(5) [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: store i32 [[C:%.*]], ptr addrspace(5) [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlanex16.var(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i1 false, i1 false)
|
||||
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
|
||||
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
|
||||
// CHECK-NEXT: ret void
|
||||
//
|
||||
void test_permlanex16_var(global uint* out, uint a, uint b, uint c) {
|
||||
*out = __builtin_amdgcn_permlanex16_var(a, b, c, 0, 0);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_s_barrier_signal(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
|
||||
|
14
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12-param.cl
Normal file
14
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12-param.cl
Normal file
@ -0,0 +1,14 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1200 -verify -S -o - %s
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
void test_permlane16_var(global uint* out, uint a, uint b, uint c, uint d) {
|
||||
*out = __builtin_amdgcn_permlane16_var(a, b, c, d, 1); // expected-error{{argument to '__builtin_amdgcn_permlane16_var' must be a constant integer}}
|
||||
*out = __builtin_amdgcn_permlane16_var(a, b, c, 1, d); // expected-error{{argument to '__builtin_amdgcn_permlane16_var' must be a constant integer}}
|
||||
}
|
||||
|
||||
void test_permlanex16_var(global uint* out, uint a, uint b, uint c, uint d) {
|
||||
*out = __builtin_amdgcn_permlanex16_var(a, b, c, d, 1); // expected-error{{argument to '__builtin_amdgcn_permlanex16_var' must be a constant integer}}
|
||||
*out = __builtin_amdgcn_permlanex16_var(a, b, c, 1, d); // expected-error{{argument to '__builtin_amdgcn_permlanex16_var' must be a constant integer}}
|
||||
}
|
16
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl
Normal file
16
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl
Normal file
@ -0,0 +1,16 @@
|
||||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu hawaii -verify -S -o - %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu fiji -verify -S -o - %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx908 -verify -S -o - %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1030 -verify -S -o - %s
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -verify -S -o - %s
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
void test(global uint* out, uint a, uint b, uint c) {
|
||||
*out = __builtin_amdgcn_permlane16_var(a, b, c, 1, 1); // expected-error {{'__builtin_amdgcn_permlane16_var' needs target feature gfx12-insts}}
|
||||
*out = __builtin_amdgcn_permlanex16_var(a, b, c, 1, 1); // expected-error {{'__builtin_amdgcn_permlanex16_var' needs target feature gfx12-insts}}
|
||||
}
|
@ -2466,6 +2466,24 @@ def int_amdgcn_s_wait_event_export_ready :
|
||||
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX12 Intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// llvm.amdgcn.permlane16.var <old> <src0> <src1> <fi> <bound_control>
|
||||
def int_amdgcn_permlane16_var : ClangBuiltin<"__builtin_amdgcn_permlane16_var">,
|
||||
Intrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
|
||||
[IntrNoMem, IntrConvergent, IntrWillReturn,
|
||||
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
|
||||
|
||||
// llvm.amdgcn.permlanex16.var <old> <src0> <src1> <fi> <bound_control>
|
||||
def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var">,
|
||||
Intrinsic<[llvm_i32_ty],
|
||||
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty, llvm_i1_ty],
|
||||
[IntrNoMem, IntrConvergent, IntrWillReturn,
|
||||
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Deep learning intrinsics.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -992,14 +992,27 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
|
||||
}
|
||||
case Intrinsic::amdgcn_permlane16:
|
||||
case Intrinsic::amdgcn_permlanex16: {
|
||||
case Intrinsic::amdgcn_permlane16_var:
|
||||
case Intrinsic::amdgcn_permlanex16:
|
||||
case Intrinsic::amdgcn_permlanex16_var: {
|
||||
// Discard vdst_in if it's not going to be read.
|
||||
Value *VDstIn = II.getArgOperand(0);
|
||||
if (isa<UndefValue>(VDstIn))
|
||||
break;
|
||||
|
||||
ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
|
||||
ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
|
||||
// FetchInvalid operand idx.
|
||||
unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
|
||||
IID == Intrinsic::amdgcn_permlanex16)
|
||||
? 4 /* for permlane16 and permlanex16 */
|
||||
: 3; /* for permlane16_var and permlanex16_var */
|
||||
|
||||
// BoundCtrl operand idx.
|
||||
// For permlane16 and permlanex16 it should be 5
|
||||
// For Permlane16_var and permlanex16_var it should be 4
|
||||
unsigned int BcIdx = FiIdx + 1;
|
||||
|
||||
ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(FiIdx));
|
||||
ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(BcIdx));
|
||||
if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
|
||||
break;
|
||||
|
||||
|
@ -4439,6 +4439,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_permlane16_var:
|
||||
case Intrinsic::amdgcn_permlanex16_var: {
|
||||
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
|
||||
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
|
||||
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
|
||||
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
|
||||
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
|
||||
case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
|
||||
case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
|
||||
|
@ -333,6 +333,8 @@ def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
|
||||
def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
|
||||
def : SourceOfDivergence<int_amdgcn_permlane16>;
|
||||
def : SourceOfDivergence<int_amdgcn_permlanex16>;
|
||||
def : SourceOfDivergence<int_amdgcn_permlane16_var>;
|
||||
def : SourceOfDivergence<int_amdgcn_permlanex16_var>;
|
||||
def : SourceOfDivergence<int_amdgcn_mov_dpp>;
|
||||
def : SourceOfDivergence<int_amdgcn_mov_dpp8>;
|
||||
def : SourceOfDivergence<int_amdgcn_update_dpp>;
|
||||
|
@ -163,7 +163,9 @@ static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
|
||||
static bool isPermlane(const MachineInstr &MI) {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
return Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
|
||||
Opcode == AMDGPU::V_PERMLANEX16_B32_e64;
|
||||
Opcode == AMDGPU::V_PERMLANEX16_B32_e64 ||
|
||||
Opcode == AMDGPU::V_PERMLANE16_VAR_B32_e64 ||
|
||||
Opcode == AMDGPU::V_PERMLANEX16_VAR_B32_e64;
|
||||
}
|
||||
|
||||
static bool isLdsDma(const MachineInstr &MI) {
|
||||
|
@ -498,7 +498,9 @@ bool isPermlane16(unsigned Opc) {
|
||||
Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
|
||||
Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
|
||||
Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
|
||||
Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12;
|
||||
Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
|
||||
Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
|
||||
Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
|
||||
}
|
||||
|
||||
bool isGenericAtomic(unsigned Opc) {
|
||||
|
@ -735,6 +735,15 @@ def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3
|
||||
let HasExtDPP = 0;
|
||||
}
|
||||
|
||||
def VOP3_PERMLANE_VAR_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, untyped]>, VOP3_OPSEL> {
|
||||
let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
|
||||
IntOpSelMods:$src1_modifiers, VRegSrc_32:$src1,
|
||||
VGPR_32:$vdst_in, op_sel0:$op_sel);
|
||||
let HasClamp = 0;
|
||||
let HasExtVOP3DPP = 0;
|
||||
let HasExtDPP = 0;
|
||||
}
|
||||
|
||||
def opsel_i1timm : SDNodeXForm<timm, [{
|
||||
return CurDAG->getTargetConstant(
|
||||
N->getZExtValue() ? SISrcMods::OP_SEL_0 : SISrcMods::NONE,
|
||||
@ -751,6 +760,13 @@ class PermlanePat<SDPatternOperator permlane,
|
||||
SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in)
|
||||
>;
|
||||
|
||||
class PermlaneVarPat<SDPatternOperator permlane,
|
||||
Instruction inst> : GCNPat<
|
||||
(permlane i32:$vdst_in, i32:$src0, i32:$src1,
|
||||
timm:$fi, timm:$bc),
|
||||
(inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc),
|
||||
VGPR_32:$src1, VGPR_32:$vdst_in)
|
||||
>;
|
||||
|
||||
let SubtargetPredicate = isGFX10Plus in {
|
||||
let isCommutable = 1, isReMaterializable = 1 in {
|
||||
@ -781,6 +797,17 @@ let SubtargetPredicate = isGFX10Plus in {
|
||||
|
||||
} // End SubtargetPredicate = isGFX10Plus
|
||||
|
||||
let SubtargetPredicate = isGFX12Plus in {
|
||||
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
|
||||
defm V_PERMLANE16_VAR_B32 : VOP3Inst<"v_permlane16_var_b32", VOP3_PERMLANE_VAR_Profile>;
|
||||
defm V_PERMLANEX16_VAR_B32 : VOP3Inst<"v_permlanex16_var_b32", VOP3_PERMLANE_VAR_Profile>;
|
||||
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
|
||||
|
||||
def : PermlaneVarPat<int_amdgcn_permlane16_var, V_PERMLANE16_VAR_B32_e64>;
|
||||
def : PermlaneVarPat<int_amdgcn_permlanex16_var, V_PERMLANEX16_VAR_B32_e64>;
|
||||
|
||||
} // End SubtargetPredicate = isGFX12Plus
|
||||
|
||||
class DivFmasPat<ValueType vt, Instruction inst, Register CondReg> : GCNPat<
|
||||
(AMDGPUdiv_fmas (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
|
||||
(vt (VOP3Mods vt:$src1, i32:$src1_modifiers)),
|
||||
@ -915,6 +942,9 @@ defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
|
||||
defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x367>;
|
||||
defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>;
|
||||
|
||||
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
|
||||
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX11, GFX12
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -21,6 +21,20 @@ define amdgpu_kernel void @v_permlanex16_b32(ptr addrspace(1) %out, i32 %src0, i
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
|
||||
define amdgpu_kernel void @v_permlane16_var_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
|
||||
define amdgpu_kernel void @v_permlanex16_var_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false) #0
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: DIVERGENT: %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false) #0
|
||||
define amdgpu_kernel void @update_dpp(ptr addrspace(1) %out, i32 %in1, i32 %in2) #0 {
|
||||
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false) #0
|
||||
@ -98,6 +112,8 @@ bb:
|
||||
declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1
|
||||
declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1) #1
|
||||
declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1) #1
|
||||
declare i32 @llvm.amdgcn.permlane16.var(i32, i32, i32, i1, i1) #1
|
||||
declare i32 @llvm.amdgcn.permlanex16.var(i32, i32, i32, i1, i1) #1
|
||||
declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #1
|
||||
declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #1
|
||||
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #1
|
||||
|
896
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.var.ll
Normal file
896
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.var.ll
Normal file
@ -0,0 +1,896 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
|
||||
; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
|
||||
|
||||
declare i32 @llvm.amdgcn.permlane16.var(i32, i32, i32, i1, i1)
|
||||
declare i32 @llvm.amdgcn.permlanex16.var(i32, i32, i32, i1, i1)
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
||||
declare i32 @llvm.amdgcn.workitem.id.y()
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_vv(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_vv:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_vv:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_vi(ptr addrspace(1) %out, i32 %src0) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_vi:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v1, v0
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_vi:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v1, v0
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_vl(ptr addrspace(1) %out, i32 %src0) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_vl:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0xc1d1
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v1, v0
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_vl:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 0xc1d1 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v1, v0
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 49617, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_vvv:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v1, v0
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_vvv:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v1, v0
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %tidx, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_vv_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_fi:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_fi:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_vv_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_bc:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_bc:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 true)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_vv_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_fi_bc:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_fi_bc:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 true)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_vv(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_vi(ptr addrspace(1) %out, i32 %src0) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_vi:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v1, v0
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_vi:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v1, v0
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_vl(ptr addrspace(1) %out, i32 %src0) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_vl:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0xc1d1
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v1, v0
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_vl:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 0xc1d1 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v1, v0
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 49617, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_vvv(ptr addrspace(1) %out, i32 %src0) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_vvv:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v1, v0
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_vvv:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v1, v0
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %tidx, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_vv_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_fi:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_fi:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_vv_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_bc:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_bc:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 true)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_vv_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_fi_bc:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_fi_bc:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 true)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_tid_tid:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_tid_tid:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %tidx, i32 %tidx, i32 %src1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_undef_tid:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_undef_tid:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%undef = freeze i32 poison
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0x3039
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v2, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v0, v2
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v3, v1, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0x3039 :: v_dual_mov_b32 v2, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v0, v2
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 12345, i32 %tidx, i32 %src1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_fi:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_fi:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%undef = freeze i32 poison
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_bc:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_bc:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%undef = freeze i32 poison
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 true)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlane16var_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_fi_bc:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_fi_bc:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%undef = freeze i32 poison
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 true)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_tid_tid:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_tid_tid:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %tidx, i32 %tidx, i32 %src1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_undef_tid:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_undef_tid:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%undef = freeze i32 poison
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, 0x3039
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v2, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v0, v2
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v3, v1, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0x3039 :: v_dual_mov_b32 v2, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v0, v2
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 12345, i32 %tidx, i32 %src1, i1 false, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_fi:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_fi:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%undef = freeze i32 poison
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 false)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_bc:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_bc:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%undef = freeze i32 poison
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 true)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @v_permlanex16var_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_fi_bc:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG-NEXT: s_clause 0x1
|
||||
; GFX12-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-SDAG-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2
|
||||
; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
|
||||
; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1]
|
||||
; GFX12-SDAG-NEXT: s_nop 0
|
||||
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-SDAG-NEXT: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_fi_bc:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL-NEXT: s_clause 0x1
|
||||
; GFX12-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x30
|
||||
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
|
||||
; GFX12-GISEL-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
|
||||
; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1]
|
||||
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0
|
||||
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
|
||||
; GFX12-GISEL-NEXT: s_nop 0
|
||||
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
||||
; GFX12-GISEL-NEXT: s_endpgm
|
||||
%tidx = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%undef = freeze i32 poison
|
||||
%v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 true)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
15
llvm/test/CodeGen/AMDGPU/permlane16_var-op-sel.ll
Normal file
15
llvm/test/CodeGen/AMDGPU/permlane16_var-op-sel.ll
Normal file
@ -0,0 +1,15 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1200 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1200 -d - | FileCheck -check-prefix=OBJ %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefix=ASM %s
|
||||
|
||||
declare i32 @llvm.amdgcn.permlane16.var(i32, i32, i32, i1, i1)
|
||||
|
||||
; OBJ-LABEL: <permlane_op_sel>:
|
||||
; OBJ: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
|
||||
|
||||
; ASM-LABEL: permlane_op_sel:
|
||||
; ASM: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0] ; encoding: [0x00,0x08,0x0f,0xd7,0x00,0x03,0x02,0x00]
|
||||
define amdgpu_kernel void @permlane_op_sel(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
|
||||
%v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 1, i1 0)
|
||||
store i32 %v, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
168
llvm/test/CodeGen/AMDGPU/vcmpx-permlane16var-hazard.mir
Normal file
168
llvm/test/CodeGen/AMDGPU/vcmpx-permlane16var-hazard.mir
Normal file
@ -0,0 +1,168 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: hazard_vcmpx_permlane16var
|
||||
# GCN: V_CMPX_LE_F32_nosdst_e32
|
||||
# GCN: S_ADD_U32
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: V_PERMLANE16_VAR_B32_e64
|
||||
---
|
||||
name: hazard_vcmpx_permlane16var
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = IMPLICIT_DEF
|
||||
$vgpr2 = IMPLICIT_DEF
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$sgpr1 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
||||
$vgpr1 = V_PERMLANE16_VAR_B32_e64 0, killed $vgpr1, 0, killed $vgpr2, $vgpr1, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: hazard_vcmpx_permlanex16var
|
||||
# GCN: V_CMPX_LE_F32_nosdst_e32
|
||||
# GCN: $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: V_PERMLANEX16_VAR_B32_e64
|
||||
---
|
||||
name: hazard_vcmpx_permlanex16var
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = IMPLICIT_DEF
|
||||
$vgpr2 = IMPLICIT_DEF
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$sgpr1 = IMPLICIT_DEF
|
||||
$vgpr1 = V_PERMLANEX16_VAR_B32_e64 0, killed $vgpr1, 0, killed $vgpr2, $vgpr1, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: hazard_vcmpx_permlane16var_v_nop
|
||||
# GCN: V_CMPX_LE_F32_nosdst_e32
|
||||
# GCN: V_NOP
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: V_PERMLANE16_VAR_B32_e64
|
||||
---
|
||||
name: hazard_vcmpx_permlane16var_v_nop
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = IMPLICIT_DEF
|
||||
$vgpr2 = IMPLICIT_DEF
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$sgpr1 = IMPLICIT_DEF
|
||||
V_NOP_e32 implicit $exec
|
||||
$vgpr1 = V_PERMLANE16_VAR_B32_e64 0, killed $vgpr1, 0, killed $vgpr2, $vgpr1, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: hazard_vcmpx_permlane16var_far
|
||||
# GCN: V_CMPX_LE_F32_nosdst_e32
|
||||
# GCN: $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: V_PERMLANE16_VAR_B32_e64
|
||||
---
|
||||
name: hazard_vcmpx_permlane16var_far
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = IMPLICIT_DEF
|
||||
$vgpr2 = IMPLICIT_DEF
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$sgpr1 = IMPLICIT_DEF
|
||||
V_NOP_e32 implicit $exec
|
||||
V_NOP_e32 implicit $exec
|
||||
V_NOP_e32 implicit $exec
|
||||
V_NOP_e32 implicit $exec
|
||||
V_NOP_e32 implicit $exec
|
||||
V_NOP_e32 implicit $exec
|
||||
V_NOP_e32 implicit $exec
|
||||
V_NOP_e32 implicit $exec
|
||||
V_NOP_e32 implicit $exec
|
||||
$vgpr1 = V_PERMLANE16_VAR_B32_e64 0, killed $vgpr1, 0, killed $vgpr1, $vgpr1, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: hazard_vcmpx_permlane16var_no_hazard
|
||||
# GCN: V_CMPX_LE_F32_nosdst_e32
|
||||
# GCN: V_ADD_F32
|
||||
# GCN-NEXT: V_PERMLANE16_VAR_B32_e64
|
||||
---
|
||||
name: hazard_vcmpx_permlane16var_no_hazard
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = IMPLICIT_DEF
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$sgpr1 = IMPLICIT_DEF
|
||||
$vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
||||
$vgpr1 = V_PERMLANE16_VAR_B32_e64 0, killed $vgpr1, 0, killed $vgpr2, $vgpr1, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: hazard_vcmpx_permlane16var_undef_src
|
||||
# GCN: V_CMPX_LE_F32_nosdst_e32
|
||||
# GCN: S_ADD_U32
|
||||
# GCN-NEXT: dead $vgpr1 = V_MOV_B32_e32 undef $vgpr1, implicit $exec
|
||||
# GCN-NEXT: V_PERMLANE16_VAR_B32_e64
|
||||
---
|
||||
name: hazard_vcmpx_permlane16var_undef_src
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr2 = IMPLICIT_DEF
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$sgpr1 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
||||
$vgpr1 = V_PERMLANE16_VAR_B32_e64 0, undef $vgpr1, 0, killed $vgpr2, undef $vgpr1, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: hazard_vcmpx_e64_permlane16var
|
||||
# GCN: V_CMPX_LE_F32_nosdst_e64
|
||||
# GCN: S_ADD_U32
|
||||
# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
|
||||
# GCN-NEXT: V_PERMLANE16_VAR_B32_e64
|
||||
---
|
||||
name: hazard_vcmpx_e64_permlane16var
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
V_CMPX_LE_F32_nosdst_e64 0, 0, 0, $vgpr0, 0, implicit-def $exec, implicit $mode, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = IMPLICIT_DEF
|
||||
$vgpr2 = IMPLICIT_DEF
|
||||
$sgpr0 = IMPLICIT_DEF
|
||||
$sgpr1 = S_ADD_U32 $sgpr0, 0, implicit-def $scc
|
||||
$vgpr1 = V_PERMLANE16_VAR_B32_e64 0, killed $vgpr1, 0, killed $vgpr2, $vgpr1, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
@ -1585,6 +1585,12 @@ v_mov_b64_e64 v[10:11], v[2:3]
|
||||
v_mul_lo_i32 v0, v1, v2
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_permlane16_var_b32 v0, v0, v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_permlanex16_var_b32 v0, v0, v1
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
v_pk_add_f32 v[10:11], v[2:3], v[4:5]
|
||||
// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
|
||||
|
||||
|
@ -5152,6 +5152,57 @@ v_permlanex16_b32 v5, v1, 0.5, null op_sel:[1,0]
|
||||
v_permlanex16_b32 v255, v255, src_scc, exec_hi op_sel:[0,1]
|
||||
// GFX12: encoding: [0xff,0x10,0x5c,0xd6,0xff,0xfb,0xfd,0x01]
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2
|
||||
// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v255
|
||||
// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00]
|
||||
|
||||
v_permlane16_var_b32 v5, v255, v0
|
||||
// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0xff,0x01,0x02,0x00]
|
||||
|
||||
v_permlane16_var_b32 v255, v1, v2
|
||||
// GFX12: encoding: [0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v50, op_sel:[1,1]
|
||||
// GFX12: encoding: [0x05,0x18,0x0f,0xd7,0x01,0x65,0x02,0x00]
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v50, op_sel:[0,0]
|
||||
// GFX12: encoding: [0x05,0x00,0x0f,0xd7,0x01,0x65,0x02,0x00]
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v50, op_sel:[1,0]
|
||||
// GFX12: encoding: [0x05,0x08,0x0f,0xd7,0x01,0x65,0x02,0x00]
|
||||
|
||||
v_permlane16_var_b32 v255, v255, v0, op_sel:[0,1]
|
||||
// GFX12: encoding: [0xff,0x10,0x0f,0xd7,0xff,0x01,0x02,0x00]
|
||||
|
||||
v_permlanex16_var_b32 v5, v1, v2
|
||||
// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_permlanex16_var_b32 v5, v1, v105
|
||||
// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0xd3,0x02,0x00]
|
||||
|
||||
v_permlanex16_var_b32 v5, v1, v255
|
||||
// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00]
|
||||
|
||||
v_permlanex16_var_b32 v255, v1, v2
|
||||
// GFX12: encoding: [0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00]
|
||||
|
||||
v_permlanex16_var_b32 v1, v255, v2
|
||||
// GFX12: encoding: [0x01,0x00,0x10,0xd7,0xff,0x05,0x02,0x00]
|
||||
|
||||
v_permlanex16_var_b32 v5, v1, v100, op_sel:[1,1]
|
||||
// GFX12: encoding: [0x05,0x18,0x10,0xd7,0x01,0xc9,0x02,0x00]
|
||||
|
||||
v_permlanex16_var_b32 v5, v1, v100, op_sel:[0,0]
|
||||
// GFX12: encoding: [0x05,0x00,0x10,0xd7,0x01,0xc9,0x02,0x00]
|
||||
|
||||
v_permlanex16_var_b32 v5, v1, v100, op_sel:[1,0]
|
||||
// GFX12: encoding: [0x05,0x08,0x10,0xd7,0x01,0xc9,0x02,0x00]
|
||||
|
||||
v_permlanex16_var_b32 v255, v255, v100, op_sel:[0,1]
|
||||
// GFX12: encoding: [0xff,0x10,0x10,0xd7,0xff,0xc9,0x02,0x00]
|
||||
|
||||
v_qsad_pk_u16_u8 v[5:6], v[1:2], v2, ttmp[14:15]
|
||||
// GFX12: encoding: [0x05,0x00,0x3a,0xd6,0x01,0x05,0xea,0x01]
|
||||
|
||||
|
@ -6,3 +6,98 @@ v_permlane16_b32 v5, v1, s2, s3 op_sel:[0, 0, 0, 1]
|
||||
|
||||
v_permlanex16_b32 v5, v1, s2, s3 op_sel:[0, 0, 1, 0]
|
||||
// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid op_sel operand
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 clamp
|
||||
// GFX12: error: invalid operand for instruction
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 clamp
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 div:2
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 div:2
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 mul:1
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 mul:1
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 -v5, v1, v2 op_sel:[0, 1]
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 -v5, v1, v2 op_sel:[0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, -v1, v2 op_sel:[0, 1]
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, -v1, v2 op_sel:[0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, -v2 op_sel:[0, 1]
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, -v2 op_sel:[0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 -|v5|, v1, v2 op_sel:[0, 1]
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 -|v5|, v1, v2 op_sel:[0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, -v1, |v2| op_sel:[0, 1]
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, -v1, |v2| op_sel:[0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, -|v2| op_sel:[0, 1]
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, -|v2| op_sel:[0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 |v5|, v1, v2 op_sel:[0, 1]
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 |v5|, v1, v2 op_sel:[0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, |v1|, v2 op_sel:[0, 1]
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, |v1|, v2 op_sel:[0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, |v2| op_sel:[0, 1]
|
||||
// GFX12: error: not a valid operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, |v2| op_sel:[0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 op_sel:[-1, 0]
|
||||
// GFX12: error: invalid op_sel value
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 op_sel:[-1, 0]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 op_sel:[1, -1]
|
||||
// GFX12: error: invalid op_sel value
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 op_sel:[1, -1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 0, 1]
|
||||
// GFX12: error: invalid op_sel operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 0, -1]
|
||||
// GFX12: error: invalid op_sel value
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 0, -1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 1, 0]
|
||||
// GFX12: error: invalid op_sel operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 1, 0]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, -1, 0]
|
||||
// GFX12: error: invalid op_sel value
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, -1, 0]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
||||
v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 1]
|
||||
// GFX12: error: invalid op_sel operand
|
||||
// GFX12-NEXT:{{^}}v_permlane16_var_b32 v5, v1, v2 op_sel:[0, 0, 1]
|
||||
// GFX12-NEXT:{{^}} ^
|
||||
|
@ -4774,6 +4774,57 @@
|
||||
# GFX12: v_permlanex16_b32 v5, v1, s2, s3 op_sel:[0,1] ; encoding: [0x05,0x10,0x5c,0xd6,0x01,0x05,0x0c,0x00]
|
||||
0x05,0x10,0x5c,0xd6,0x01,0x05,0x0c,0x00
|
||||
|
||||
# GFX12: v_permlane16_var_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00]
|
||||
0x05,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00
|
||||
|
||||
# GFX12: v_permlane16_var_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00]
|
||||
0x05,0x00,0x0f,0xd7,0x01,0xff,0x03,0x00
|
||||
|
||||
# GFX12: v_permlane16_var_b32 v5, v255, v0 ; encoding: [0x05,0x00,0x0f,0xd7,0xff,0x01,0x02,0x00]
|
||||
0x05,0x00,0x0f,0xd7,0xff,0x01,0x02,0x00
|
||||
|
||||
# GFX12: v_permlane16_var_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00]
|
||||
0xff,0x00,0x0f,0xd7,0x01,0x05,0x02,0x00
|
||||
|
||||
# GFX12: v_permlane16_var_b32 v5, v1, v50 op_sel:[1,1] ; encoding: [0x05,0x18,0x0f,0xd7,0x01,0x65,0x02,0x00]
|
||||
0x05,0x18,0x0f,0xd7,0x01,0x65,0x02,0x00
|
||||
|
||||
# GFX12: v_permlane16_var_b32 v5, v1, v50 ; encoding: [0x05,0x00,0x0f,0xd7,0x01,0x65,0x02,0x00]
|
||||
0x05,0x00,0x0f,0xd7,0x01,0x65,0x02,0x00
|
||||
|
||||
# GFX12: v_permlane16_var_b32 v5, v1, v50 op_sel:[1,0] ; encoding: [0x05,0x08,0x0f,0xd7,0x01,0x65,0x02,0x00]
|
||||
0x05,0x08,0x0f,0xd7,0x01,0x65,0x02,0x00
|
||||
|
||||
# GFX12: v_permlane16_var_b32 v255, v255, v0 op_sel:[0,1] ; encoding: [0xff,0x10,0x0f,0xd7,0xff,0x01,0x02,0x00]
|
||||
0xff,0x10,0x0f,0xd7,0xff,0x01,0x02,0x00
|
||||
|
||||
# GFX12: v_permlanex16_var_b32 v5, v1, v2 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00]
|
||||
0x05,0x00,0x10,0xd7,0x01,0x05,0x02,0x00
|
||||
|
||||
# GFX12: v_permlanex16_var_b32 v5, v1, v105 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xd3,0x02,0x00]
|
||||
0x05,0x00,0x10,0xd7,0x01,0xd3,0x02,0x00
|
||||
|
||||
# GFX12: v_permlanex16_var_b32 v5, v1, v255 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00]
|
||||
0x05,0x00,0x10,0xd7,0x01,0xff,0x03,0x00
|
||||
|
||||
# GFX12: v_permlanex16_var_b32 v255, v1, v2 ; encoding: [0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00]
|
||||
0xff,0x00,0x10,0xd7,0x01,0x05,0x02,0x00
|
||||
|
||||
# GFX12: v_permlanex16_var_b32 v1, v255, v2 ; encoding: [0x01,0x00,0x10,0xd7,0xff,0x05,0x02,0x00]
|
||||
0x01,0x00,0x10,0xd7,0xff,0x05,0x02,0x00
|
||||
|
||||
# GFX12: v_permlanex16_var_b32 v5, v1, v100 op_sel:[1,1] ; encoding: [0x05,0x18,0x10,0xd7,0x01,0xc9,0x02,0x00]
|
||||
0x05,0x18,0x10,0xd7,0x01,0xc9,0x02,0x00
|
||||
|
||||
# GFX12: v_permlanex16_var_b32 v5, v1, v100 ; encoding: [0x05,0x00,0x10,0xd7,0x01,0xc9,0x02,0x00]
|
||||
0x05,0x00,0x10,0xd7,0x01,0xc9,0x02,0x00
|
||||
|
||||
# GFX12: v_permlanex16_var_b32 v5, v1, v100 op_sel:[1,0] ; encoding: [0x05,0x08,0x10,0xd7,0x01,0xc9,0x02,0x00]
|
||||
0x05,0x08,0x10,0xd7,0x01,0xc9,0x02,0x00
|
||||
|
||||
# GFX12: v_permlanex16_var_b32 v255, v255, v100 op_sel:[0,1] ; encoding: [0xff,0x10,0x10,0xd7,0xff,0xc9,0x02,0x00]
|
||||
0xff,0x10,0x10,0xd7,0xff,0xc9,0x02,0x00
|
||||
|
||||
# GFX12: v_pipeflush ; encoding: [0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00]
|
||||
0x00,0x00,0x9b,0xd5,0x00,0x00,0x00,0x00
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user