Implement AArch64 NEON instruction set AdvSIMD (table).

llvm-svn: 194648
This commit is contained in:
Jiangning Liu 2013-11-14 01:57:32 +00:00
parent 2ab2f4eec5
commit 5a9b5605ba
8 changed files with 1185 additions and 1 deletions

View File

@ -84,6 +84,51 @@ def int_aarch64_neon_vminv : Neon_Across_Intrinsic;
def int_aarch64_neon_vmaxnmv : Neon_Across_Intrinsic;
def int_aarch64_neon_vminnmv : Neon_Across_Intrinsic;
// Vector Table Lookup.
def int_aarch64_neon_vtbl1 :
Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_neon_vtbl2 :
Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<0>],
[IntrNoMem]>;
def int_aarch64_neon_vtbl3 :
Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_neon_vtbl4 :
Intrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
// Vector Table Extension.
// Some elements of the destination vector may not be updated, so the original
// value of that vector is passed as the first argument. The next 1-4
// arguments after that are the table.
def int_aarch64_neon_vtbx1 :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
def int_aarch64_neon_vtbx2 :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_neon_vtbx3 :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
LLVMMatchType<1>, LLVMMatchType<0>], [IntrNoMem]>;
def int_aarch64_neon_vtbx4 :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>,
LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<0>],
[IntrNoMem]>;
// Scalar Add
def int_aarch64_neon_vaddds :
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;

View File

@ -109,6 +109,13 @@ public:
SDNode* Select(SDNode*);
private:
/// Get the opcode for table lookup instruction
unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
/// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4.
/// IsExt is to indicate if the result will be extended with an argument.
SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
/// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, bool isUpdating,
const uint16_t *Opcode);
@ -682,6 +689,73 @@ SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
return VSt;
}
unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
unsigned NumOfVec) {
assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
unsigned Opc = 0;
switch (NumOfVec) {
default:
break;
case 1:
if (IsExt)
Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
else
Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
break;
case 2:
if (IsExt)
Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
else
Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
break;
case 3:
if (IsExt)
Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
else
Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
break;
case 4:
if (IsExt)
Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
else
Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
break;
}
return Opc;
}
SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
bool IsExt) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
SDLoc dl(N);
// Check the element of look up table is 64-bit or not
unsigned Vec0Idx = IsExt ? 2 : 1;
SDValue V0 = N->getOperand(Vec0Idx + 0);
EVT VT = V0.getValueType();
assert(!VT.is64BitVector() &&
"The element of lookup table for vtbl and vtbx must be 128-bit");
// Check the return value type is 64-bit or not
EVT ResVT = N->getValueType(0);
bool is64BitRes = ResVT.is64BitVector();
// Create new SDValue for vector list
SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
N->op_begin() + Vec0Idx + NumVecs);
SDValue TblReg = createQTuple(Regs);
unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
SmallVector<SDValue, 3> Ops;
if (IsExt)
Ops.push_back(N->getOperand(1));
Ops.push_back(TblReg);
Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
}
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
@ -900,6 +974,31 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
};
return SelectVST(Node, 4, true, Opcodes);
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
bool IsExt = false;
switch (IntNo) {
default:
break;
case Intrinsic::aarch64_neon_vtbx1:
IsExt = true;
case Intrinsic::aarch64_neon_vtbl1:
return SelectVTBL(Node, 1, IsExt);
case Intrinsic::aarch64_neon_vtbx2:
IsExt = true;
case Intrinsic::aarch64_neon_vtbl2:
return SelectVTBL(Node, 2, IsExt);
case Intrinsic::aarch64_neon_vtbx3:
IsExt = true;
case Intrinsic::aarch64_neon_vtbl3:
return SelectVTBL(Node, 3, IsExt);
case Intrinsic::aarch64_neon_vtbx4:
IsExt = true;
case Intrinsic::aarch64_neon_vtbl4:
return SelectVTBL(Node, 4, IsExt);
}
break;
}
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();

View File

@ -1019,6 +1019,25 @@ class NeonI_Perm<bit q, bits<2> size, bits<3> opcode,
// Inherit Rd in 4-0
}
// Format AdvSIMD table lookup
class NeonI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: A64InstRdnm<outs, ins, asmstr, patterns, itin> {
let Inst{31} = 0b0;
let Inst{30} = q;
let Inst{29-24} = 0b001110;
let Inst{23-22} = op2;
let Inst{21} = 0b0;
// Inherit Rm in 20-16
let Inst{15} = 0b0;
let Inst{14-13} = len;
let Inst{12} = op;
let Inst{11-10} = 0b00;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
// Format AdvSIMD 3 vector registers with same vector type
class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,

View File

@ -5163,6 +5163,56 @@ def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
// Table lookup
class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
string asmop, string OpS, RegisterOperand OpVPR,
RegisterOperand VecList>
: NeonI_TBL<q, op2, len, op,
(outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
[],
NoItinerary>;
// The vectors in look up table are always 16b
multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
!cast<RegisterOperand>(List # "16B_operand")>;
def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
!cast<RegisterOperand>(List # "16B_operand")>;
}
defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
// Table lookup extention
class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
string asmop, string OpS, RegisterOperand OpVPR,
RegisterOperand VecList>
: NeonI_TBL<q, op2, len, op,
(outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
[],
NoItinerary> {
let Constraints = "$src = $Rd";
}
// The vectors in look up table are always 16b
multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
!cast<RegisterOperand>(List # "16B_operand")>;
def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
!cast<RegisterOperand>(List # "16B_operand")>;
}
defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
// The followings are for instruction class (3V Elem)
// Variant 1

View File

@ -0,0 +1,828 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
declare <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
declare <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8>, <16 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8>, <16 x i8>, <8 x i8>)
declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8>, <8 x i8>)
declare <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>)
define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) {
; CHECK: test_vtbl1_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
ret <8 x i8> %vtbl11.i
}
define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) {
; CHECK: test_vqtbl1_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
ret <8 x i8> %vtbl1.i
}
define <8 x i8> @test_vtbl2_s8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vtbl2_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
%vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
ret <8 x i8> %vtbl17.i
}
define <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vqtbl2_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
%vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
ret <8 x i8> %vtbl2.i
}
define <8 x i8> @test_vtbl3_s8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vtbl3_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
ret <8 x i8> %vtbl212.i
}
define <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vqtbl3_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
%vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
ret <8 x i8> %vtbl3.i
}
define <8 x i8> @test_vtbl4_s8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vtbl4_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
%__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
ret <8 x i8> %vtbl216.i
}
define <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vqtbl4_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
%vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
ret <8 x i8> %vtbl4.i
}
define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: test_vqtbl1q_s8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %vtbl1.i
}
define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
; CHECK: test_vqtbl2q_s8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
%vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
ret <16 x i8> %vtbl2.i
}
define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
; CHECK: test_vqtbl3q_s8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
%vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
ret <16 x i8> %vtbl3.i
}
define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
; CHECK: test_vqtbl4q_s8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
%vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
ret <16 x i8> %vtbl4.i
}
define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK: test_vtbx1_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
%0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
%1 = sext <8 x i1> %0 to <8 x i8>
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
ret <8 x i8> %vbsl.i
}
define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vtbx2_s8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
%vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
ret <8 x i8> %vtbx17.i
}
define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vtbx3_s8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
%vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
%0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
%1 = sext <8 x i1> %0 to <8 x i8>
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
ret <8 x i8> %vbsl.i
}
define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vtbx4_s8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
%__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
%vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
ret <8 x i8> %vtbx216.i
}
define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
; CHECK: test_vqtbx1_s8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
ret <8 x i8> %vtbx1.i
}
define <8 x i8> @test_vqtbx2_s8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vqtbx2_s8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
%vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
ret <8 x i8> %vtbx2.i
}
define <8 x i8> @test_vqtbx3_s8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vqtbx3_s8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
%vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
ret <8 x i8> %vtbx3.i
}
define <8 x i8> @test_vqtbx4_s8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vqtbx4_s8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
%vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
ret <8 x i8> %vtbx4.i
}
define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK: test_vqtbx1q_s8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
ret <16 x i8> %vtbx1.i
}
define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
; CHECK: test_vqtbx2q_s8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
%vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
ret <16 x i8> %vtbx2.i
}
define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
; CHECK: test_vqtbx3q_s8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
%vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
ret <16 x i8> %vtbx3.i
}
define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
; CHECK: test_vqtbx4q_s8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
%vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
ret <16 x i8> %vtbx4.i
}
define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) {
; CHECK: test_vtbl1_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
ret <8 x i8> %vtbl11.i
}
define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) {
; CHECK: test_vqtbl1_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
ret <8 x i8> %vtbl1.i
}
define <8 x i8> @test_vtbl2_u8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vtbl2_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
%vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
ret <8 x i8> %vtbl17.i
}
define <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vqtbl2_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
%vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
ret <8 x i8> %vtbl2.i
}
define <8 x i8> @test_vtbl3_u8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vtbl3_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
ret <8 x i8> %vtbl212.i
}
define <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vqtbl3_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
%vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
ret <8 x i8> %vtbl3.i
}
define <8 x i8> @test_vtbl4_u8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vtbl4_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
%__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
ret <8 x i8> %vtbl216.i
}
define <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vqtbl4_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
%vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
ret <8 x i8> %vtbl4.i
}
define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: test_vqtbl1q_u8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %vtbl1.i
}
define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
; CHECK: test_vqtbl2q_u8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
%vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
ret <16 x i8> %vtbl2.i
}
define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
; CHECK: test_vqtbl3q_u8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
%vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
ret <16 x i8> %vtbl3.i
}
define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
; CHECK: test_vqtbl4q_u8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
%vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
ret <16 x i8> %vtbl4.i
}
define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK: test_vtbx1_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
%0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
%1 = sext <8 x i1> %0 to <8 x i8>
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
ret <8 x i8> %vbsl.i
}
define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vtbx2_u8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
%vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
ret <8 x i8> %vtbx17.i
}
define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vtbx3_u8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
%vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
%0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
%1 = sext <8 x i1> %0 to <8 x i8>
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
ret <8 x i8> %vbsl.i
}
define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vtbx4_u8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
%__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
%vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
ret <8 x i8> %vtbx216.i
}
define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
; CHECK: test_vqtbx1_u8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
ret <8 x i8> %vtbx1.i
}
define <8 x i8> @test_vqtbx2_u8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vqtbx2_u8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
%vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
ret <8 x i8> %vtbx2.i
}
define <8 x i8> @test_vqtbx3_u8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vqtbx3_u8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
%vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
ret <8 x i8> %vtbx3.i
}
define <8 x i8> @test_vqtbx4_u8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vqtbx4_u8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
%vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
ret <8 x i8> %vtbx4.i
}
define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK: test_vqtbx1q_u8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
ret <16 x i8> %vtbx1.i
}
define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
; CHECK: test_vqtbx2q_u8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
%vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
ret <16 x i8> %vtbx2.i
}
define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
; CHECK: test_vqtbx3q_u8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
%vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
ret <16 x i8> %vtbx3.i
}
define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
; CHECK: test_vqtbx4q_u8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
%vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
ret <16 x i8> %vtbx4.i
}
define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) {
; CHECK: test_vtbl1_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbl1.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
ret <8 x i8> %vtbl11.i
}
define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) {
; CHECK: test_vqtbl1_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbl1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %a, <8 x i8> %b)
ret <8 x i8> %vtbl1.i
}
define <8 x i8> @test_vtbl2_p8([2 x <8 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vtbl2_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %a.coerce, 1
%vtbl1.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %b)
ret <8 x i8> %vtbl17.i
}
define <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vqtbl2_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
%vtbl2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <8 x i8> %b)
ret <8 x i8> %vtbl2.i
}
define <8 x i8> @test_vtbl3_p8([3 x <8 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vtbl3_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %a.coerce, 2
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl211.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %b)
ret <8 x i8> %vtbl212.i
}
define <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vqtbl3_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
%vtbl3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl3.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %b)
ret <8 x i8> %vtbl3.i
}
define <8 x i8> @test_vtbl4_p8([4 x <8 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vtbl4_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 2
%__a.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %a.coerce, 3
%vtbl2.i = shufflevector <8 x i8> %__a.coerce.fca.0.extract.i, <8 x i8> %__a.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl215.i = shufflevector <8 x i8> %__a.coerce.fca.2.extract.i, <8 x i8> %__a.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl215.i, <8 x i8> %b)
ret <8 x i8> %vtbl216.i
}
define <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> %b) {
; CHECK: test_vqtbl4_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
%vtbl4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl4.v8i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <8 x i8> %b)
ret <8 x i8> %vtbl4.i
}
define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: test_vqtbl1q_p8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%vtbl1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %vtbl1.i
}
define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) {
; CHECK: test_vqtbl2q_p8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %a.coerce, 1
%vtbl2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl2.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %b)
ret <16 x i8> %vtbl2.i
}
define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) {
; CHECK: test_vqtbl3q_p8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %a.coerce, 2
%vtbl3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl3.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %b)
ret <16 x i8> %vtbl3.i
}
define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) {
; CHECK: test_vqtbl4q_p8:
; CHECK: tbl {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__a.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 0
%__a.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 1
%__a.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 2
%__a.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %a.coerce, 3
%vtbl4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbl4.v16i8.v16i8(<16 x i8> %__a.coerce.fca.0.extract.i, <16 x i8> %__a.coerce.fca.1.extract.i, <16 x i8> %__a.coerce.fca.2.extract.i, <16 x i8> %__a.coerce.fca.3.extract.i, <16 x i8> %b)
ret <16 x i8> %vtbl4.i
}
define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) {
; CHECK: test_vtbx1_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbl1.i = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl1.v8i8.v16i8(<16 x i8> %vtbl1.i, <8 x i8> %c)
%0 = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
%1 = sext <8 x i1> %0 to <8 x i8>
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl11.i)
ret <8 x i8> %vbsl.i
}
define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x <8 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vtbx2_p8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [2 x <8 x i8>] %b.coerce, 1
%vtbx1.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbx17.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx1.i, <8 x i8> %c)
ret <8 x i8> %vtbx17.i
}
define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x <8 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vtbx3_p8:
; CHECK: tbl {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [3 x <8 x i8>] %b.coerce, 2
%vtbl2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl211.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbl212.i = tail call <8 x i8> @llvm.aarch64.neon.vtbl2.v8i8.v16i8(<16 x i8> %vtbl2.i, <16 x i8> %vtbl211.i, <8 x i8> %c)
%0 = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
%1 = sext <8 x i1> %0 to <8 x i8>
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %1, <8 x i8> %a, <8 x i8> %vtbl212.i)
ret <8 x i8> %vbsl.i
}
define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x <8 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vtbx4_p8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 2
%__b.coerce.fca.3.extract.i = extractvalue [4 x <8 x i8>] %b.coerce, 3
%vtbx2.i = shufflevector <8 x i8> %__b.coerce.fca.0.extract.i, <8 x i8> %__b.coerce.fca.1.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbx215.i = shufflevector <8 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %__b.coerce.fca.3.extract.i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%vtbx216.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %vtbx2.i, <16 x i8> %vtbx215.i, <8 x i8> %c)
ret <8 x i8> %vtbx216.i
}
define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) {
; CHECK: test_vqtbx1_p8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%vtbx1.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx1.v8i8.v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c)
ret <8 x i8> %vtbx1.i
}
define <8 x i8> @test_vqtbx2_p8(<8 x i8> %a, [2 x <16 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vqtbx2_p8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
%vtbx2.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx2.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <8 x i8> %c)
ret <8 x i8> %vtbx2.i
}
define <8 x i8> @test_vqtbx3_p8(<8 x i8> %a, [3 x <16 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vqtbx3_p8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
%vtbx3.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx3.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <8 x i8> %c)
ret <8 x i8> %vtbx3.i
}
define <8 x i8> @test_vqtbx4_p8(<8 x i8> %a, [4 x <16 x i8>] %b.coerce, <8 x i8> %c) {
; CHECK: test_vqtbx4_p8:
; CHECK: tbx {{v[0-9]+}}.8b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.8b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
%vtbx4.i = tail call <8 x i8> @llvm.aarch64.neon.vtbx4.v8i8.v16i8(<8 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <8 x i8> %c)
ret <8 x i8> %vtbx4.i
}
define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK: test_vqtbx1q_p8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%vtbx1.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx1.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
ret <16 x i8> %vtbx1.i
}
define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) {
; CHECK: test_vqtbx2q_p8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [2 x <16 x i8>] %b.coerce, 1
%vtbx2.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx2.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %c)
ret <16 x i8> %vtbx2.i
}
define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) {
; CHECK: test_vqtbx3q_p8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [3 x <16 x i8>] %b.coerce, 2
%vtbx3.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx3.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %c)
ret <16 x i8> %vtbx3.i
}
define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) {
; CHECK: test_vqtbx4q_p8:
; CHECK: tbx {{v[0-9]+}}.16b, {{{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b}, {{v[0-9]+}}.16b
entry:
%__b.coerce.fca.0.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 0
%__b.coerce.fca.1.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 1
%__b.coerce.fca.2.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 2
%__b.coerce.fca.3.extract.i = extractvalue [4 x <16 x i8>] %b.coerce, 3
%vtbx4.i = tail call <16 x i8> @llvm.aarch64.neon.vtbx4.v16i8.v16i8(<16 x i8> %a, <16 x i8> %__b.coerce.fca.0.extract.i, <16 x i8> %__b.coerce.fca.1.extract.i, <16 x i8> %__b.coerce.fca.2.extract.i, <16 x i8> %__b.coerce.fca.3.extract.i, <16 x i8> %c)
ret <16 x i8> %vtbx4.i
}

View File

@ -5928,3 +5928,51 @@
// CHECK-ERROR: error: lane number incompatible with layout
// CHECK-ERROR: dup b1, v3.b[16]
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Table look up
//----------------------------------------------------------------------
tbl v0.8b, {v1.8b}, v2.8b
tbl v0.8b, {v1.8b, v2.8b}, v2.8b
tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: tbl v0.8b, {v1.8b}, v2.8b
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: tbl v0.8b, {v1.8b, v2.8b}, v2.8b
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: tbl v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: tbl v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
// CHECK-ERROR: ^
tbx v0.8b, {v1.8b}, v2.8b
tbx v0.8b, {v1.8b, v2.8b}, v2.8b
tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: tbx v0.8b, {v1.8b}, v2.8b
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: tbx v0.8b, {v1.8b, v2.8b}, v2.8b
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: tbx v0.8b, {v1.8b, v2.8b, v3.8b}, v2.8b
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: tbx v0.8b, {v1.8b, v2.8b, v3.8b, v4.8b}, v2.8b
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid number of vectors
// CHECK-ERROR: tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b, v5.16b}, v2.8b
// CHECK-ERROR: ^

View File

@ -0,0 +1,56 @@
// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//------------------------------------------------------------------------------
// Instructions across vector registers
//------------------------------------------------------------------------------
tbl v0.8b, {v1.16b}, v2.8b
tbl v0.8b, {v1.16b, v2.16b}, v2.8b
tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
tbl v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
// CHECK: tbl v0.8b, {v1.16b}, v2.8b // encoding: [0x20,0x00,0x02,0x0e]
// CHECK: tbl v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x20,0x02,0x0e]
// CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x40,0x02,0x0e]
// CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x60,0x02,0x0e]
// CHECK: tbl v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x63,0x02,0x0e]
tbl v0.16b, {v1.16b}, v2.16b
tbl v0.16b, {v1.16b, v2.16b}, v2.16b
tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
tbl v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
tbl v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
// CHECK: tbl v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x00,0x02,0x4e]
// CHECK: tbl v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x20,0x02,0x4e]
// CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x40,0x02,0x4e]
// CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x60,0x02,0x4e]
// CHECK: tbl v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x63,0x02,0x4e]
tbx v0.8b, {v1.16b}, v2.8b
tbx v0.8b, {v1.16b, v2.16b}, v2.8b
tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b
tbx v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b
// CHECK: tbx v0.8b, {v1.16b}, v2.8b // encoding: [0x20,0x10,0x02,0x0e]
// CHECK: tbx v0.8b, {v1.16b, v2.16b}, v2.8b // encoding: [0x20,0x30,0x02,0x0e]
// CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b // encoding: [0x20,0x50,0x02,0x0e]
// CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.8b // encoding: [0x20,0x70,0x02,0x0e]
// CHECK: tbx v0.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.8b // encoding: [0xe0,0x73,0x02,0x0e]
tbx v0.16b, {v1.16b}, v2.16b
tbx v0.16b, {v1.16b, v2.16b}, v2.16b
tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
tbx v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b
tbx v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b
// CHECK: tbx v0.16b, {v1.16b}, v2.16b // encoding: [0x20,0x10,0x02,0x4e]
// CHECK: tbx v0.16b, {v1.16b, v2.16b}, v2.16b // encoding: [0x20,0x30,0x02,0x4e]
// CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b // encoding: [0x20,0x50,0x02,0x4e]
// CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b, v4.16b}, v2.16b // encoding: [0x20,0x70,0x02,0x4e]
// CHECK: tbx v0.16b, {v30.16b, v31.16b, v0.16b, v1.16b}, v2.16b // encoding: [0xc0,0x73,0x02,0x4e]

View File

@ -1,4 +1,4 @@
G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s | FileCheck %s
#------------------------------------------------------------------------------
# Vector Integer Add/Sub
@ -2387,3 +2387,42 @@ G# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -disassemble < %s |
0x51 0x04 0x14 0x5e
0x86 0x05 0x18 0x5e
#----------------------------------------------------------------------
# Table look up
#----------------------------------------------------------------------
0x20,0x00,0x02,0x0e
0xf0,0x23,0x02,0x0e
0x20,0x40,0x02,0x0e
0xf0,0x62,0x02,0x0e
# CHECK: tbl v0.8b, {v1.16b}, v2.8b
# CHECK: tbl v16.8b, {v31.16b, v0.16b}, v2.8b
# CHECK: tbl v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
# CHECK: tbl v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
0x20,0x00,0x02,0x4e
0xf0,0x23,0x02,0x4e
0x20,0x40,0x02,0x4e
0xe0,0x63,0x02,0x4e
# CHECK: tbl v0.16b, {v1.16b}, v2.16b
# CHECK: tbl v16.16b, {v31.16b, v0.16b}, v2.16b
# CHECK: tbl v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
# CHECK: tbl v0.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b
0x20,0x10,0x02,0x0e
0xf0,0x33,0x02,0x0e
0x20,0x50,0x02,0x0e
0xf0,0x72,0x02,0x0e
# CHECK: tbx v0.8b, {v1.16b}, v2.8b
# CHECK: tbx v16.8b, {v31.16b, v0.16b}, v2.8b
# CHECK: tbx v0.8b, {v1.16b, v2.16b, v3.16b}, v2.8b
# CHECK: tbx v16.8b, {v23.16b, v24.16b, v25.16b, v26.16b}, v2.8b
0x20,0x10,0x02,0x4e
0xf0,0x33,0x02,0x4e
0x20,0x50,0x02,0x4e
0xf0,0x73,0x02,0x4e
# CHECK: tbx v0.16b, {v1.16b}, v2.16b
# CHECK: tbx v16.16b, {v31.16b, v0.16b}, v2.16b
# CHECK: tbx v0.16b, {v1.16b, v2.16b, v3.16b}, v2.16b
# CHECK: tbx v16.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, v2.16b