mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-12 05:56:28 +00:00
Implement AArch64 NEON instruction set AdvSIMD (table).
llvm-svn: 194648
This commit is contained in:
parent
2ab2f4eec5
commit
5a9b5605ba
@ -84,6 +84,51 @@ def int_aarch64_neon_vminv : Neon_Across_Intrinsic;
|
||||
def int_aarch64_neon_vmaxnmv : Neon_Across_Intrinsic;
|
||||
def int_aarch64_neon_vminnmv : Neon_Across_Intrinsic;
|
||||
|
||||
// Vector Table Lookup.
|
||||
def int_aarch64_neon_vtbl1 :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
|
||||
def int_aarch64_neon_vtbl2 :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_aarch64_neon_vtbl3 :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
|
||||
LLVMMatchType<0>], [IntrNoMem]>;
|
||||
|
||||
def int_aarch64_neon_vtbl4 :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
|
||||
LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
|
||||
// Vector Table Extension.
|
||||
// Some elements of the destination vector may not be updated, so the original
|
||||
// value of that vector is passed as the first argument. The next 1-4
|
||||
// arguments after that are the table.
|
||||
def int_aarch64_neon_vtbx1 :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_aarch64_neon_vtbx2 :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
|
||||
LLVMMatchType<0>], [IntrNoMem]>;
|
||||
|
||||
def int_aarch64_neon_vtbx3 :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
|
||||
LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
|
||||
def int_aarch64_neon_vtbx4 :
|
||||
Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
|
||||
LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// Scalar Add
|
||||
def int_aarch64_neon_vaddds :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
@ -109,6 +109,13 @@ public:
|
||||
|
||||
SDNode* Select(SDNode*);
|
||||
private:
|
||||
/// Get the opcode for table lookup instruction
|
||||
unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
|
||||
|
||||
/// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4.
|
||||
/// IsExt is to indicate if the result will be extended with an argument.
|
||||
SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
|
||||
|
||||
/// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
|
||||
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
|
||||
const uint16_t *Opcode);
|
||||
@ -682,6 +689,73 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
|
||||
return VSt;
|
||||
}
|
||||
|
||||
unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
|
||||
unsigned NumOfVec) {
|
||||
assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
|
||||
|
||||
unsigned Opc = 0;
|
||||
switch (NumOfVec) {
|
||||
default:
|
||||
break;
|
||||
case 1:
|
||||
if (IsExt)
|
||||
Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
|
||||
else
|
||||
Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
|
||||
break;
|
||||
case 2:
|
||||
if (IsExt)
|
||||
Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
|
||||
else
|
||||
Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
|
||||
break;
|
||||
case 3:
|
||||
if (IsExt)
|
||||
Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
|
||||
else
|
||||
Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
|
||||
break;
|
||||
case 4:
|
||||
if (IsExt)
|
||||
Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
|
||||
else
|
||||
Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
|
||||
break;
|
||||
}
|
||||
|
||||
return Opc;
|
||||
}
|
||||
|
||||
SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
|
||||
bool IsExt) {
|
||||
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
|
||||
SDLoc dl(N);
|
||||
|
||||
// Check the element of look up table is 64-bit or not
|
||||
unsigned Vec0Idx = IsExt ? 2 : 1;
|
||||
SDValue V0 = N->getOperand(Vec0Idx + 0);
|
||||
EVT VT = V0.getValueType();
|
||||
assert(!VT.is64BitVector() &&
|
||||
"The element of lookup table for vtbl and vtbx must be 128-bit");
|
||||
|
||||
// Check the return value type is 64-bit or not
|
||||
EVT ResVT = N->getValueType(0);
|
||||
bool is64BitRes = ResVT.is64BitVector();
|
||||
|
||||
// Create new SDValue for vector list
|
||||
SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
|
||||
N->op_begin() + Vec0Idx + NumVecs);
|
||||
SDValue TblReg = createQTuple(Regs);
|
||||
unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
|
||||
|
||||
SmallVector<SDValue, 3> Ops;
|
||||
if (IsExt)
|
||||
Ops.push_back(N->getOperand(1));
|
||||
Ops.push_back(TblReg);
|
||||
Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
|
||||
return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
|
||||
}
|
||||
|
||||
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
// Dump information about the Node being selected
|
||||
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
|
||||
@ -900,6 +974,31 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
};
|
||||
return SelectVST(Node, 4, true, Opcodes);
|
||||
}
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
|
||||
bool IsExt = false;
|
||||
switch (IntNo) {
|
||||
default:
|
||||
break;
|
||||
case Intrinsic::aarch64_neon_vtbx1:
|
||||
IsExt = true;
|
||||
case Intrinsic::aarch64_neon_vtbl1:
|
||||
return SelectVTBL(Node, 1, IsExt);
|
||||
case Intrinsic::aarch64_neon_vtbx2:
|
||||
IsExt = true;
|
||||
case Intrinsic::aarch64_neon_vtbl2:
|
||||
return SelectVTBL(Node, 2, IsExt);
|
||||
case Intrinsic::aarch64_neon_vtbx3:
|
||||
IsExt = true;
|
||||
case Intrinsic::aarch64_neon_vtbl3:
|
||||
return SelectVTBL(Node, 3, IsExt);
|
||||
case Intrinsic::aarch64_neon_vtbx4:
|
||||
IsExt = true;
|
||||
case Intrinsic::aarch64_neon_vtbl4:
|
||||
return SelectVTBL(Node, 4, IsExt);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::INTRINSIC_VOID:
|
||||
case ISD::INTRINSIC_W_CHAIN: {
|
||||
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
|
||||
|
@ -1019,6 +1019,25 @@ class NeonI_Perm<bit q, bits<2> size, bits<3> opcode,
|
||||
// Inherit Rd in 4-0
|
||||
}
|
||||
|
||||
// Format AdvSIMD table lookup
|
||||
class NeonI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
|
||||
dag outs, dag ins, string asmstr,
|
||||
list<dag> patterns, InstrItinClass itin>
|
||||
: A64InstRdnm<outs, ins, asmstr, patterns, itin> {
|
||||
let Inst{31} = 0b0;
|
||||
let Inst{30} = q;
|
||||
let Inst{29-24} = 0b001110;
|
||||
let Inst{23-22} = op2;
|
||||
let Inst{21} = 0b0;
|
||||
// Inherit Rm in 20-16
|
||||
let Inst{15} = 0b0;
|
||||
let Inst{14-13} = len;
|
||||
let Inst{12} = op;
|
||||
let Inst{11-10} = 0b00;
|
||||
// Inherit Rn in 9-5
|
||||
// Inherit Rd in 4-0
|
||||
}
|
||||
|
||||
// Format AdvSIMD 3 vector registers with same vector type
|
||||
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
|
||||
dag outs, dag ins, string asmstr,
|
||||
|
@ -5163,6 +5163,56 @@ def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
|
||||
def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
|
||||
def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
|
||||
|
||||
// Table lookup
|
||||
class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
|
||||
string asmop, string OpS, RegisterOperand OpVPR,
|
||||
RegisterOperand VecList>
|
||||
: NeonI_TBL<q, op2, len, op,
|
||||
(outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
|
||||
asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
|
||||
[],
|
||||
NoItinerary>;
|
||||
|
||||
// The vectors in look up table are always 16b
|
||||
multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
|
||||
def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
|
||||
!cast<RegisterOperand>(List # "16B_operand")>;
|
||||
|
||||
def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
|
||||
!cast<RegisterOperand>(List # "16B_operand")>;
|
||||
}
|
||||
|
||||
defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
|
||||
defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
|
||||
defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
|
||||
defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
|
||||
|
||||
// Table lookup extention
|
||||
class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
|
||||
string asmop, string OpS, RegisterOperand OpVPR,
|
||||
RegisterOperand VecList>
|
||||
: NeonI_TBL<q, op2, len, op,
|
||||
(outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
|
||||
asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
|
||||
[],
|
||||
NoItinerary> {
|
||||
let Constraints = "$src = $Rd";
|
||||
}
|
||||
|
||||
// The vectors in look up table are always 16b
|
||||
multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
|
||||
def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
|
||||
!cast<RegisterOperand>(List # "16B_operand")>;
|
||||
|
||||
def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
|
||||
!cast<RegisterOperand>(List # "16B_operand")>;
|
||||
}
|
||||
|
||||
defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
|
||||
defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
|
||||
defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
|
||||
defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
|
||||
|
||||
// The followings are for instruction class (3V Elem)
|
||||
|
||||
// Variant 1
|
||||
|
828
test/CodeGen/AArch64/neon-simd-tbl.ll
Normal file
828
test/CodeGen/AArch64/neon-simd-tbl.ll
Normal file
@ -0,0 +1,828 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8>, <16 x i8>, <8 x i8>)
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8>, <16 x i8>, <8 x i8>)
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8>, <8 x i8>)
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
|
||||
|
||||
declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8>, <16 x i8>)
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
|
||||
|
||||
declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
|
||||
|
||||
define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl1_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl11.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl1_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl1.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl2_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
|
||||
%vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl17.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl2_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
|
||||
%vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl2.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl3_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
|
||||
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl212.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl3_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
|
||||
%vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl3.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl4_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
|
||||
%__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
|
||||
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl216.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl4_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
|
||||
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
|
||||
%vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl4.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl1q_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl1.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl2q_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
|
||||
%vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl2.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl3q_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
|
||||
%vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl3.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl4q_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
|
||||
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
|
||||
%vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl4.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx1_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
|
||||
%0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
|
||||
%1 = sext <8 x i1> %0 to <8 x i8>
|
||||
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
|
||||
ret <8 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx2_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
|
||||
%vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx17.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx3_s8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
|
||||
%vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
|
||||
%0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
|
||||
%1 = sext <8 x i1> %0 to <8 x i8>
|
||||
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
|
||||
ret <8 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx4_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
|
||||
%__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
|
||||
%vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx216.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx1_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx1.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx2_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
|
||||
%vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx2.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx3_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
|
||||
%vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx3.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx4_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
|
||||
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
|
||||
%vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx4.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx1q_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx1.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx2q_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
|
||||
%vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx2.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx3q_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
|
||||
%vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx3.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx4q_s8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
|
||||
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
|
||||
%vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx4.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl1_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl11.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl1_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl1.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl2_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
|
||||
%vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl17.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl2_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
|
||||
%vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl2.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl3_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
|
||||
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl212.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl3_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
|
||||
%vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl3.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl4_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
|
||||
%__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
|
||||
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl216.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl4_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
|
||||
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
|
||||
%vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl4.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl1q_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl1.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl2q_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
|
||||
%vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl2.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl3q_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
|
||||
%vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl3.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl4q_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
|
||||
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
|
||||
%vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl4.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx1_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
|
||||
%0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
|
||||
%1 = sext <8 x i1> %0 to <8 x i8>
|
||||
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
|
||||
ret <8 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx2_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
|
||||
%vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx17.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx3_u8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
|
||||
%vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
|
||||
%0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
|
||||
%1 = sext <8 x i1> %0 to <8 x i8>
|
||||
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
|
||||
ret <8 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx4_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
|
||||
%__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
|
||||
%vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx216.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx1_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx1.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx2_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
|
||||
%vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx2.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx3_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
|
||||
%vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx3.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx4_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
|
||||
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
|
||||
%vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx4.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx1q_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx1.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx2q_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
|
||||
%vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx2.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx3q_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
|
||||
%vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx3.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx4q_u8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
|
||||
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
|
||||
%vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx4.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl1_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl11.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl1_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl1.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl2_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
|
||||
%vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl17.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl2_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
|
||||
%vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl2.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl3_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
|
||||
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl212.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl3_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
|
||||
%vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl3.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vtbl4_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
|
||||
%__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
|
||||
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl216.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
|
||||
; CHECK: test_vqtbl4_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
|
||||
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
|
||||
%vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
|
||||
ret <8 x i8> %vtbl4.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl1q_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl1.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl2q_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
|
||||
%vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl2.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl3q_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
|
||||
%vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl3.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
|
||||
; CHECK: test_vqtbl4q_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
|
||||
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
|
||||
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
|
||||
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
|
||||
%vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
|
||||
ret <16 x i8> %vtbl4.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx1_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
|
||||
%0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
|
||||
%1 = sext <8 x i1> %0 to <8 x i8>
|
||||
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
|
||||
ret <8 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx2_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
|
||||
%vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx17.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx3_p8:
|
||||
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
|
||||
%vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
|
||||
%0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
|
||||
%1 = sext <8 x i1> %0 to <8 x i8>
|
||||
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
|
||||
ret <8 x i8> %vbsl.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vtbx4_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
|
||||
%__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
|
||||
%vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx216.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx1_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx1.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx2_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
|
||||
%vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx2.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx3_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
|
||||
%vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx3.i
|
||||
}
|
||||
|
||||
define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
|
||||
; CHECK: test_vqtbx4_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
|
||||
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
|
||||
%vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
|
||||
ret <8 x i8> %vtbx4.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx1q_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx1.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx2q_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
|
||||
%vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx2.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx3q_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
|
||||
%vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx3.i
|
||||
}
|
||||
|
||||
define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
|
||||
; CHECK: test_vqtbx4q_p8:
|
||||
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
|
||||
entry:
|
||||
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
|
||||
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
|
||||
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
|
||||
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
|
||||
%vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
|
||||
ret <16 x i8> %vtbx4.i
|
||||
}
|
||||
|
@ -5928,3 +5928,51 @@
|
||||
// CHECK-ERROR: error: lane number incompatible with layout
|
||||
// CHECK-ERROR: dup b1, v3.b[16]
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Table look up
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
tbl v0.8b, {v1.8b}, v2.8b
|
||||
tbl v0.8b, {v1.8b, v2.8b}, v2.8b
|
||||
tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
|
||||
tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
|
||||
tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: tbl v0.8b, {v1.8b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: tbl v0.8b, {v1.8b, v2.8b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid number of vectors
|
||||
// CHECK-ERROR: tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
tbx v0.8b, {v1.8b}, v2.8b
|
||||
tbx v0.8b, {v1.8b, v2.8b}, v2.8b
|
||||
tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
|
||||
tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
|
||||
tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: tbx v0.8b, {v1.8b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: tbx v0.8b, {v1.8b, v2.8b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid number of vectors
|
||||
// CHECK-ERROR: tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
|
||||
// CHECK-ERROR: ^
|
||||
|
56
test/MC/AArch64/neon-tbl.s
Normal file
56
test/MC/AArch64/neon-tbl.s
Normal file
@ -0,0 +1,56 @@
|
||||
// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Instructions across vector registers
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
tbl v0.8b, {v1.16b}, v2.8b
|
||||
tbl v0.8b, {v1.16b, v2.16b}, v2.8b
|
||||
tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
|
||||
tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
|
||||
tbl v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
|
||||
|
||||
// CHECK: tbl v0.8b, {v1.16b}, v2.8b // encoding: [0x20,0x00,0x02,0x0e]
|
||||
// CHECK: tbl v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x20,0x02,0x0e]
|
||||
// CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x40,0x02,0x0e]
|
||||
// CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x60,0x02,0x0e]
|
||||
// CHECK: tbl v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x63,0x02,0x0e]
|
||||
|
||||
tbl v0.16b, {v1.16b}, v2.16b
|
||||
tbl v0.16b, {v1.16b, v2.16b}, v2.16b
|
||||
tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
|
||||
tbl v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
|
||||
tbl v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
|
||||
|
||||
// CHECK: tbl v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x00,0x02,0x4e]
|
||||
// CHECK: tbl v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x20,0x02,0x4e]
|
||||
// CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x40,0x02,0x4e]
|
||||
// CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x60,0x02,0x4e]
|
||||
// CHECK: tbl v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x63,0x02,0x4e]
|
||||
|
||||
tbx v0.8b, {v1.16b}, v2.8b
|
||||
tbx v0.8b, {v1.16b, v2.16b}, v2.8b
|
||||
tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
|
||||
tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
|
||||
tbx v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
|
||||
|
||||
// CHECK: tbx v0.8b, {v1.16b}, v2.8b // encoding: [0x20,0x10,0x02,0x0e]
|
||||
// CHECK: tbx v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x30,0x02,0x0e]
|
||||
// CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x50,0x02,0x0e]
|
||||
// CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x70,0x02,0x0e]
|
||||
// CHECK: tbx v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x73,0x02,0x0e]
|
||||
|
||||
tbx v0.16b, {v1.16b}, v2.16b
|
||||
tbx v0.16b, {v1.16b, v2.16b}, v2.16b
|
||||
tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
|
||||
tbx v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
|
||||
tbx v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
|
||||
|
||||
// CHECK: tbx v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x10,0x02,0x4e]
|
||||
// CHECK: tbx v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x30,0x02,0x4e]
|
||||
// CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x50,0x02,0x4e]
|
||||
// CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x70,0x02,0x4e]
|
||||
// CHECK: tbx v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x73,0x02,0x4e]
|
||||
|
@ -1,4 +1,4 @@
|
||||
G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s
|
||||
# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Integer Add/Sub
|
||||
@ -2387,3 +2387,42 @@ G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s |
|
||||
0x51 0x04 0x14 0x5e
|
||||
0x86 0x05 0x18 0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Table look up
|
||||
#----------------------------------------------------------------------
|
||||
0x20,0x00,0x02,0x0e
|
||||
0xf0,0x23,0x02,0x0e
|
||||
0x20,0x40,0x02,0x0e
|
||||
0xf0,0x62,0x02,0x0e
|
||||
# CHECK: tbl v0.8b, {v1.16b}, v2.8b
|
||||
# CHECK: tbl v16.8b, {v31.16b, v0.16b}, v2.8b
|
||||
# CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
|
||||
# CHECK: tbl v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
|
||||
|
||||
0x20,0x00,0x02,0x4e
|
||||
0xf0,0x23,0x02,0x4e
|
||||
0x20,0x40,0x02,0x4e
|
||||
0xe0,0x63,0x02,0x4e
|
||||
# CHECK: tbl v0.16b, {v1.16b}, v2.16b
|
||||
# CHECK: tbl v16.16b, {v31.16b, v0.16b}, v2.16b
|
||||
# CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
|
||||
# CHECK: tbl v0.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
|
||||
|
||||
0x20,0x10,0x02,0x0e
|
||||
0xf0,0x33,0x02,0x0e
|
||||
0x20,0x50,0x02,0x0e
|
||||
0xf0,0x72,0x02,0x0e
|
||||
# CHECK: tbx v0.8b, {v1.16b}, v2.8b
|
||||
# CHECK: tbx v16.8b, {v31.16b, v0.16b}, v2.8b
|
||||
# CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
|
||||
# CHECK: tbx v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
|
||||
|
||||
0x20,0x10,0x02,0x4e
|
||||
0xf0,0x33,0x02,0x4e
|
||||
0x20,0x50,0x02,0x4e
|
||||
0xf0,0x73,0x02,0x4e
|
||||
# CHECK: tbx v0.16b, {v1.16b}, v2.16b
|
||||
# CHECK: tbx v16.16b, {v31.16b, v0.16b}, v2.16b
|
||||
# CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
|
||||
# CHECK: tbx v16.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user