mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-21 23:10:54 +00:00
[ARM][MVE] Add intrinsics for immediate shifts. (reland)
This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which shift every lane of a vector left or right by a compile-time immediate. They mostly work by expanding to the IR `shl`, `lshr` and `ashr` operations, with their second operand being a vector splat of the immediate. There's a fiddly special case, though. ACLE specifies that the immediate in `vshrq_n` can take values up to //and including// the bit size of the vector lane. But LLVM IR thinks that shifting right by the full size of the lane is UB, and feels free to replace the `lshr` with an `undef` half way through the optimization pipeline. Hence, to keep this legal in source code, I have to detect it at codegen time. Logical (unsigned) right shifts by the element size are handled by simply emitting the zero vector; arithmetic ones are converted into a shift of one bit less, which will always give the same output. In order to do that check, I also had to enhance the tablegen MveEmitter so that it can cope with converting a builtin function's operand into a bare integer to pass to a code-generating subfunction. Previously the only bare integers it knew how to handle were flags generated from within `arm_mve.td`. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: dmgreen, MarkMurrayARM Subscribers: echristo, hokein, rdhindsa, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71065
This commit is contained in:
parent
c8b74ee264
commit
bd0f271c9e
@ -609,6 +609,33 @@ defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>;
|
||||
defm vstrwq: scatter_offset_both<T.All32, u32, 2>;
|
||||
defm vstrdq: scatter_offset_both<T.Int64, u64, 3>;
|
||||
|
||||
multiclass PredicatedImmediateVectorShift<
|
||||
Immediate immtype, string predIntrName, list<dag> unsignedFlag = []> {
|
||||
foreach predIntr = [IRInt<predIntrName, [Vector, Predicate]>] in {
|
||||
def _m_n: Intrinsic<Vector, (args Vector:$inactive, Vector:$v,
|
||||
immtype:$sh, Predicate:$pred),
|
||||
!con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?),
|
||||
(predIntr $pred, $inactive))>;
|
||||
def _x_n: Intrinsic<Vector, (args Vector:$v, immtype:$sh,
|
||||
Predicate:$pred),
|
||||
!con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?),
|
||||
(predIntr $pred, (undef Vector)))>;
|
||||
}
|
||||
}
|
||||
|
||||
let params = T.Int in {
|
||||
def vshlq_n: Intrinsic<Vector, (args Vector:$v, imm_0toNm1:$sh),
|
||||
(shl $v, (splat (Scalar $sh)))>;
|
||||
defm vshlq: PredicatedImmediateVectorShift<imm_0toNm1, "shl_imm_predicated">;
|
||||
|
||||
let pnt = PNT_NType in {
|
||||
def vshrq_n: Intrinsic<Vector, (args Vector:$v, imm_1toN:$sh),
|
||||
(immshr $v, $sh, (unsignedflag Scalar))>;
|
||||
defm vshrq: PredicatedImmediateVectorShift<imm_1toN, "shr_imm_predicated",
|
||||
[(unsignedflag Scalar)]>;
|
||||
}
|
||||
}
|
||||
|
||||
// Base class for the scalar shift intrinsics.
|
||||
class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>:
|
||||
Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> {
|
||||
|
@ -66,6 +66,10 @@ def xor: IRBuilder<"CreateXor">;
|
||||
def sub: IRBuilder<"CreateSub">;
|
||||
def shl: IRBuilder<"CreateShl">;
|
||||
def lshr: IRBuilder<"CreateLShr">;
|
||||
def immshr: CGHelperFn<"MVEImmediateShr"> {
|
||||
let special_params = [IRBuilderIntParam<1, "unsigned">,
|
||||
IRBuilderIntParam<2, "bool">];
|
||||
}
|
||||
def fadd: IRBuilder<"CreateFAdd">;
|
||||
def fmul: IRBuilder<"CreateFMul">;
|
||||
def fsub: IRBuilder<"CreateFSub">;
|
||||
@ -318,8 +322,8 @@ def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
|
||||
//
|
||||
// imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
|
||||
// inclusive.
|
||||
def imm_1toN : Immediate<u32, IB_EltBit<1>>;
|
||||
def imm_0toNm1 : Immediate<u32, IB_EltBit<0>>;
|
||||
def imm_1toN : Immediate<sint, IB_EltBit<1>>;
|
||||
def imm_0toNm1 : Immediate<sint, IB_EltBit<0>>;
|
||||
|
||||
// imm_lane has to be the index of a vector lane in the main vector type, i.e
|
||||
// it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)
|
||||
|
@ -6916,6 +6916,15 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Integer>
|
||||
static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
|
||||
llvm::APSInt IntVal;
|
||||
bool IsConst = E->isIntegerConstantExpr(IntVal, Context);
|
||||
assert(IsConst && "Sema should have checked this was a constant");
|
||||
(void)IsConst;
|
||||
return IntVal.getExtValue();
|
||||
}
|
||||
|
||||
static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
|
||||
llvm::Type *T, bool Unsigned) {
|
||||
// Helper function called by Tablegen-constructed ARM MVE builtin codegen,
|
||||
@ -6923,6 +6932,27 @@ static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
|
||||
return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);
|
||||
}
|
||||
|
||||
static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,
|
||||
uint32_t Shift, bool Unsigned) {
|
||||
// MVE helper function for integer shift right. This must handle signed vs
|
||||
// unsigned, and also deal specially with the case where the shift count is
|
||||
// equal to the lane size. In LLVM IR, an LShr with that parameter would be
|
||||
// undefined behavior, but in MVE it's legal, so we must convert it to code
|
||||
// that is not undefined in IR.
|
||||
unsigned LaneBits =
|
||||
V->getType()->getVectorElementType()->getPrimitiveSizeInBits();
|
||||
if (Shift == LaneBits) {
|
||||
// An unsigned shift of the full lane size always generates zero, so we can
|
||||
// simply emit a zero vector. A signed shift of the full lane size does the
|
||||
// same thing as shifting by one bit fewer.
|
||||
if (Unsigned)
|
||||
return llvm::Constant::getNullValue(V->getType());
|
||||
else
|
||||
--Shift;
|
||||
}
|
||||
return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);
|
||||
}
|
||||
|
||||
static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {
|
||||
// MVE-specific helper function for a vector splat, which infers the element
|
||||
// count of the output vector by knowing that MVE vectors are all 128 bits
|
||||
|
722
clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
Normal file
722
clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
Normal file
@ -0,0 +1,722 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
|
||||
// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
|
||||
|
||||
#include <arm_mve.h>
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
|
||||
//
|
||||
int8x16_t test_vshlq_n_s8(int8x16_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 5);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_s8(a, 5);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
|
||||
//
|
||||
int16x8_t test_vshlq_n_s16(int16x8_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 5);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_s16(a, 5);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 18, i32 18, i32 18, i32 18>
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
|
||||
//
|
||||
int32x4_t test_vshlq_n_s32(int32x4_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 18);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_s32(a, 18);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_s8_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
|
||||
//
|
||||
int8x16_t test_vshlq_n_s8_trivial(int8x16_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 0);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_s8(a, 0);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_s16_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
|
||||
//
|
||||
int16x8_t test_vshlq_n_s16_trivial(int16x8_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 0);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_s16(a, 0);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_s32_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
|
||||
//
|
||||
int32x4_t test_vshlq_n_s32_trivial(int32x4_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 0);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_s32(a, 0);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
|
||||
//
|
||||
uint8x16_t test_vshlq_n_u8(uint8x16_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 3);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_u8(a, 3);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
|
||||
//
|
||||
uint16x8_t test_vshlq_n_u16(uint16x8_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 11);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_u16(a, 11);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7>
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
|
||||
//
|
||||
uint32x4_t test_vshlq_n_u32(uint32x4_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 7);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_u32(a, 7);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_u8_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
|
||||
//
|
||||
uint8x16_t test_vshlq_n_u8_trivial(uint8x16_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 0);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_u8(a, 0);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_u16_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
|
||||
//
|
||||
uint16x8_t test_vshlq_n_u16_trivial(uint16x8_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 0);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_u16(a, 0);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_n_u32_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
|
||||
//
|
||||
uint32x4_t test_vshlq_n_u32_trivial(uint32x4_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_n(a, 0);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_n_u32(a, 0);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
|
||||
//
|
||||
int8x16_t test_vshrq_n_s8(int8x16_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 4);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_s8(a, 4);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
|
||||
//
|
||||
int16x8_t test_vshrq_n_s16(int16x8_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 10);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_s16(a, 10);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 19, i32 19, i32 19, i32 19>
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
|
||||
//
|
||||
int32x4_t test_vshrq_n_s32(int32x4_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 19);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_s32(a, 19);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_s8_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
|
||||
//
|
||||
int8x16_t test_vshrq_n_s8_trivial(int8x16_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 8);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_s8(a, 8);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_s16_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
|
||||
//
|
||||
int16x8_t test_vshrq_n_s16_trivial(int16x8_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 16);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_s16(a, 16);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_s32_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 31, i32 31, i32 31, i32 31>
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
|
||||
//
|
||||
int32x4_t test_vshrq_n_s32_trivial(int32x4_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 32);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_s32(a, 32);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP0]]
|
||||
//
|
||||
uint8x16_t test_vshrq_n_u8(uint8x16_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 1);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_u8(a, 1);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP0]]
|
||||
//
|
||||
uint16x8_t test_vshrq_n_u16(uint16x8_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 10);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_u16(a, 10);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], <i32 10, i32 10, i32 10, i32 10>
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
|
||||
//
|
||||
uint32x4_t test_vshrq_n_u32(uint32x4_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 10);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_u32(a, 10);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_u8_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: ret <16 x i8> zeroinitializer
|
||||
//
|
||||
uint8x16_t test_vshrq_n_u8_trivial(uint8x16_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 8);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_u8(a, 8);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_u16_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: ret <8 x i16> zeroinitializer
|
||||
//
|
||||
uint16x8_t test_vshrq_n_u16_trivial(uint16x8_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 16);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_u16(a, 16);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_n_u32_trivial(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: ret <4 x i32> zeroinitializer
|
||||
//
|
||||
uint32x4_t test_vshrq_n_u32_trivial(uint32x4_t a)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq(a, 32);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_n_u32(a, 32);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_m_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
||||
//
|
||||
int8x16_t test_vshlq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_m_n(inactive, a, 6, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_m_n_s8(inactive, a, 6, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_m_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
||||
//
|
||||
int16x8_t test_vshlq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_m_n(inactive, a, 13, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_m_n_s16(inactive, a, 13, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_m_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
int32x4_t test_vshlq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_m_n(inactive, a, 0, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_m_n_s32(inactive, a, 0, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_m_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
||||
//
|
||||
uint8x16_t test_vshlq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_m_n(inactive, a, 3, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_m_n_u8(inactive, a, 3, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_m_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
||||
//
|
||||
uint16x8_t test_vshlq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_m_n(inactive, a, 1, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_m_n_u16(inactive, a, 1, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_m_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
uint32x4_t test_vshlq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_m_n(inactive, a, 24, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_m_n_u32(inactive, a, 24, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_m_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
||||
//
|
||||
int8x16_t test_vshrq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_m(inactive, a, 2, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_m_n_s8(inactive, a, 2, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_m_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 3, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
||||
//
|
||||
int16x8_t test_vshrq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_m(inactive, a, 3, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_m_n_s16(inactive, a, 3, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_m_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
int32x4_t test_vshrq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_m(inactive, a, 13, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_m_n_s32(inactive, a, 13, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_m_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
||||
//
|
||||
uint8x16_t test_vshrq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_m(inactive, a, 4, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_m_n_u8(inactive, a, 4, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_m_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 14, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
||||
//
|
||||
uint16x8_t test_vshrq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_m(inactive, a, 14, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_m_n_u16(inactive, a, 14, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_m_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 21, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
uint32x4_t test_vshrq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_m(inactive, a, 21, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_m_n_u32(inactive, a, 21, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_x_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
||||
//
|
||||
int8x16_t test_vshlq_x_n_s8(int8x16_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_x_n(a, 1, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_x_n_s8(a, 1, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_x_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 15, <8 x i1> [[TMP1]], <8 x i16> undef)
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
||||
//
|
||||
int16x8_t test_vshlq_x_n_s16(int16x8_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_x_n(a, 15, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_x_n_s16(a, 15, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_x_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, <4 x i1> [[TMP1]], <4 x i32> undef)
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
int32x4_t test_vshlq_x_n_s32(int32x4_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_x_n(a, 13, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_x_n_s32(a, 13, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_x_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef)
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
||||
//
|
||||
uint8x16_t test_vshlq_x_n_u8(uint8x16_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_x_n(a, 4, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_x_n_u8(a, 4, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_x_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, <8 x i1> [[TMP1]], <8 x i16> undef)
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
||||
//
|
||||
uint16x8_t test_vshlq_x_n_u16(uint16x8_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_x_n(a, 10, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_x_n_u16(a, 10, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshlq_x_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 30, <4 x i1> [[TMP1]], <4 x i32> undef)
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
uint32x4_t test_vshlq_x_n_u32(uint32x4_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshlq_x_n(a, 30, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshlq_x_n_u32(a, 30, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_x_n_s8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 0, <16 x i1> [[TMP1]], <16 x i8> undef)
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
||||
//
|
||||
int8x16_t test_vshrq_x_n_s8(int8x16_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_x(a, 4, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_x_n_s8(a, 4, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_x_n_s16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
||||
//
|
||||
int16x8_t test_vshrq_x_n_s16(int16x8_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_x(a, 10, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_x_n_s16(a, 10, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_x_n_s32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 7, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef)
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
int32x4_t test_vshrq_x_n_s32(int32x4_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_x(a, 7, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_x_n_s32(a, 7, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_x_n_u8(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 7, i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
|
||||
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
|
||||
//
|
||||
uint8x16_t test_vshrq_x_n_u8(uint8x16_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_x(a, 7, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_x_n_u8(a, 7, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_x_n_u16(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 7, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef)
|
||||
// CHECK-NEXT: ret <8 x i16> [[TMP2]]
|
||||
//
|
||||
uint16x8_t test_vshrq_x_n_u16(uint16x8_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_x(a, 7, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_x_n_u16(a, 7, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_vshrq_x_n_u32(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
|
||||
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 6, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
|
||||
// CHECK-NEXT: ret <4 x i32> [[TMP2]]
|
||||
//
|
||||
uint32x4_t test_vshrq_x_n_u32(uint32x4_t a, mve_pred16_t p)
|
||||
{
|
||||
#ifdef POLYMORPHIC
|
||||
return vshrq_x(a, 6, p);
|
||||
#else /* POLYMORPHIC */
|
||||
return vshrq_x_n_u32(a, 6, p);
|
||||
#endif /* POLYMORPHIC */
|
||||
}
|
@ -470,6 +470,10 @@ public:
|
||||
virtual void genCode(raw_ostream &OS, CodeGenParamAllocator &) const = 0;
|
||||
virtual bool hasIntegerConstantValue() const { return false; }
|
||||
virtual uint32_t integerConstantValue() const { return 0; }
|
||||
virtual bool hasIntegerValue() const { return false; }
|
||||
virtual std::string getIntegerValue(const std::string &) {
|
||||
llvm_unreachable("non-working Result::getIntegerValue called");
|
||||
}
|
||||
virtual std::string typeName() const { return "Value *"; }
|
||||
|
||||
// Mostly, when a code-generation operation has a dependency on prior
|
||||
@ -544,8 +548,9 @@ class BuiltinArgResult : public Result {
|
||||
public:
|
||||
unsigned ArgNum;
|
||||
bool AddressType;
|
||||
BuiltinArgResult(unsigned ArgNum, bool AddressType)
|
||||
: ArgNum(ArgNum), AddressType(AddressType) {}
|
||||
bool Immediate;
|
||||
BuiltinArgResult(unsigned ArgNum, bool AddressType, bool Immediate)
|
||||
: ArgNum(ArgNum), AddressType(AddressType), Immediate(Immediate) {}
|
||||
void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override {
|
||||
OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr")
|
||||
<< "(E->getArg(" << ArgNum << "))";
|
||||
@ -559,6 +564,11 @@ public:
|
||||
return "(" + varname() + ".getPointer())";
|
||||
return Result::asValue();
|
||||
}
|
||||
bool hasIntegerValue() const override { return Immediate; }
|
||||
std::string getIntegerValue(const std::string &IntType) override {
|
||||
return "GetIntegerConstantValue<" + IntType + ">(E->getArg(" +
|
||||
utostr(ArgNum) + "), getContext())";
|
||||
}
|
||||
};
|
||||
|
||||
// Result subclass for an integer literal appearing in Tablegen. This may need
|
||||
@ -633,36 +643,42 @@ public:
|
||||
StringRef CallPrefix;
|
||||
std::vector<Ptr> Args;
|
||||
std::set<unsigned> AddressArgs;
|
||||
std::map<unsigned, std::string> IntConstantArgs;
|
||||
std::map<unsigned, std::string> IntegerArgs;
|
||||
IRBuilderResult(StringRef CallPrefix, std::vector<Ptr> Args,
|
||||
std::set<unsigned> AddressArgs,
|
||||
std::map<unsigned, std::string> IntConstantArgs)
|
||||
std::map<unsigned, std::string> IntegerArgs)
|
||||
: CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs),
|
||||
IntConstantArgs(IntConstantArgs) {}
|
||||
IntegerArgs(IntegerArgs) {}
|
||||
void genCode(raw_ostream &OS,
|
||||
CodeGenParamAllocator &ParamAlloc) const override {
|
||||
OS << CallPrefix;
|
||||
const char *Sep = "";
|
||||
for (unsigned i = 0, e = Args.size(); i < e; ++i) {
|
||||
Ptr Arg = Args[i];
|
||||
auto it = IntConstantArgs.find(i);
|
||||
if (it != IntConstantArgs.end()) {
|
||||
assert(Arg->hasIntegerConstantValue());
|
||||
OS << Sep << "static_cast<" << it->second << ">("
|
||||
<< ParamAlloc.allocParam("unsigned",
|
||||
auto it = IntegerArgs.find(i);
|
||||
|
||||
OS << Sep;
|
||||
Sep = ", ";
|
||||
|
||||
if (it != IntegerArgs.end()) {
|
||||
if (Arg->hasIntegerConstantValue())
|
||||
OS << "static_cast<" << it->second << ">("
|
||||
<< ParamAlloc.allocParam(it->second,
|
||||
utostr(Arg->integerConstantValue()))
|
||||
<< ")";
|
||||
else if (Arg->hasIntegerValue())
|
||||
OS << ParamAlloc.allocParam(it->second,
|
||||
Arg->getIntegerValue(it->second));
|
||||
} else {
|
||||
OS << Sep << Arg->varname();
|
||||
OS << Arg->varname();
|
||||
}
|
||||
Sep = ", ";
|
||||
}
|
||||
OS << ")";
|
||||
}
|
||||
void morePrerequisites(std::vector<Ptr> &output) const override {
|
||||
for (unsigned i = 0, e = Args.size(); i < e; ++i) {
|
||||
Ptr Arg = Args[i];
|
||||
if (IntConstantArgs.find(i) != IntConstantArgs.end())
|
||||
if (IntegerArgs.find(i) != IntegerArgs.end())
|
||||
continue;
|
||||
output.push_back(Arg);
|
||||
}
|
||||
@ -981,8 +997,8 @@ public:
|
||||
const Type *Param);
|
||||
Result::Ptr getCodeForDagArg(DagInit *D, unsigned ArgNum,
|
||||
const Result::Scope &Scope, const Type *Param);
|
||||
Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType,
|
||||
bool Promote);
|
||||
Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, bool Promote,
|
||||
bool Immediate);
|
||||
|
||||
// Constructor and top-level functions.
|
||||
|
||||
@ -1155,17 +1171,17 @@ Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope,
|
||||
Args.push_back(getCodeForDagArg(D, i, Scope, Param));
|
||||
if (Op->isSubClassOf("IRBuilderBase")) {
|
||||
std::set<unsigned> AddressArgs;
|
||||
std::map<unsigned, std::string> IntConstantArgs;
|
||||
std::map<unsigned, std::string> IntegerArgs;
|
||||
for (Record *sp : Op->getValueAsListOfDefs("special_params")) {
|
||||
unsigned Index = sp->getValueAsInt("index");
|
||||
if (sp->isSubClassOf("IRBuilderAddrParam")) {
|
||||
AddressArgs.insert(Index);
|
||||
} else if (sp->isSubClassOf("IRBuilderIntParam")) {
|
||||
IntConstantArgs[Index] = sp->getValueAsString("type");
|
||||
IntegerArgs[Index] = sp->getValueAsString("type");
|
||||
}
|
||||
}
|
||||
return std::make_shared<IRBuilderResult>(
|
||||
Op->getValueAsString("prefix"), Args, AddressArgs, IntConstantArgs);
|
||||
return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"),
|
||||
Args, AddressArgs, IntegerArgs);
|
||||
} else if (Op->isSubClassOf("IRIntBase")) {
|
||||
std::vector<const Type *> ParamTypes;
|
||||
for (Record *RParam : Op->getValueAsListOfDefs("params"))
|
||||
@ -1215,9 +1231,9 @@ Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum,
|
||||
}
|
||||
|
||||
Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType,
|
||||
bool Promote) {
|
||||
Result::Ptr V =
|
||||
std::make_shared<BuiltinArgResult>(ArgNum, isa<PointerType>(ArgType));
|
||||
bool Promote, bool Immediate) {
|
||||
Result::Ptr V = std::make_shared<BuiltinArgResult>(
|
||||
ArgNum, isa<PointerType>(ArgType), Immediate);
|
||||
|
||||
if (Promote) {
|
||||
if (const auto *ST = dyn_cast<ScalarType>(ArgType)) {
|
||||
@ -1291,17 +1307,14 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
|
||||
const Type *ArgType = ME.getType(TypeInit, Param);
|
||||
ArgTypes.push_back(ArgType);
|
||||
|
||||
// The argument will usually have a name in the arguments dag, which goes
|
||||
// into the variable-name scope that the code gen will refer to.
|
||||
StringRef ArgName = ArgsDag->getArgNameStr(i);
|
||||
if (!ArgName.empty())
|
||||
Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote);
|
||||
|
||||
// If the argument is a subclass of Immediate, record the details about
|
||||
// what values it can take, for Sema checking.
|
||||
bool Immediate = false;
|
||||
if (auto TypeDI = dyn_cast<DefInit>(TypeInit)) {
|
||||
Record *TypeRec = TypeDI->getDef();
|
||||
if (TypeRec->isSubClassOf("Immediate")) {
|
||||
Immediate = true;
|
||||
|
||||
Record *Bounds = TypeRec->getValueAsDef("bounds");
|
||||
ImmediateArg &IA = ImmediateArgs[i];
|
||||
if (Bounds->isSubClassOf("IB_ConstRange")) {
|
||||
@ -1315,7 +1328,7 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
|
||||
IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
|
||||
IA.i1 = 0;
|
||||
IA.i2 = 128 / Param->sizeInBits() - 1;
|
||||
} else if (Bounds->getName() == "IB_EltBit") {
|
||||
} else if (Bounds->isSubClassOf("IB_EltBit")) {
|
||||
IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
|
||||
IA.i1 = Bounds->getValueAsInt("base");
|
||||
IA.i2 = IA.i1 + Param->sizeInBits() - 1;
|
||||
@ -1332,6 +1345,12 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The argument will usually have a name in the arguments dag, which goes
|
||||
// into the variable-name scope that the code gen will refer to.
|
||||
StringRef ArgName = ArgsDag->getArgNameStr(i);
|
||||
if (!ArgName.empty())
|
||||
Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote, Immediate);
|
||||
}
|
||||
|
||||
// Finally, go through the codegen dag and translate it into a Result object
|
||||
|
@ -913,6 +913,14 @@ defm int_arm_mve_vstr_scatter_offset: MVEPredicated<
|
||||
[], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty,
|
||||
llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>;
|
||||
|
||||
def int_arm_mve_shl_imm_predicated: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag
|
||||
llvm_anyvector_ty, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// MVE scalar shifts.
|
||||
class ARM_MVE_qrshift_single<list<LLVMType> value,
|
||||
list<LLVMType> saturate = []> :
|
||||
|
@ -2816,27 +2816,39 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
|
||||
let Inst{21} = 0b1;
|
||||
}
|
||||
|
||||
multiclass MVE_immediate_shift_patterns_inner<
|
||||
MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op,
|
||||
Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> {
|
||||
|
||||
def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)),
|
||||
(VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>;
|
||||
|
||||
def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm),
|
||||
!dag(pred_int, unsignedFlag, ?),
|
||||
(pred_int (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$inactive)))),
|
||||
(VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
|
||||
ARMVCCThen, (VTI.Pred VCCR:$mask),
|
||||
(VTI.Vec MQPR:$inactive)))>;
|
||||
}
|
||||
|
||||
multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI,
|
||||
Operand imm_operand_type> {
|
||||
defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
|
||||
ARMvshlImm, int_arm_mve_shl_imm_predicated,
|
||||
!cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>;
|
||||
defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
|
||||
ARMvshruImm, int_arm_mve_shr_imm_predicated,
|
||||
!cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>;
|
||||
defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type,
|
||||
ARMvshrsImm, int_arm_mve_shr_imm_predicated,
|
||||
!cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
|
||||
(v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
|
||||
def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
|
||||
(v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
|
||||
def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
|
||||
(v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;
|
||||
|
||||
def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
|
||||
(v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
|
||||
def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
|
||||
(v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
|
||||
def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
|
||||
(v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;
|
||||
|
||||
def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
|
||||
(v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
|
||||
def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
|
||||
(v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
|
||||
def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
|
||||
(v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
|
||||
defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>;
|
||||
defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>;
|
||||
defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>;
|
||||
}
|
||||
|
||||
// end of mve_shift instructions
|
||||
|
398
llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
Normal file
398
llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll
Normal file
@ -0,0 +1,398 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) {
|
||||
; CHECK-LABEL: test_vshlq_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshl.i8 q0, q0, #5
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = shl <16 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
|
||||
ret <16 x i8> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) {
|
||||
; CHECK-LABEL: test_vshlq_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshl.i16 q0, q0, #5
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = shl <8 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) {
|
||||
; CHECK-LABEL: test_vshlq_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshl.i32 q0, q0, #18
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = shl <4 x i32> %a, <i32 18, i32 18, i32 18, i32 18>
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s8 q0, q0, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = ashr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
|
||||
ret <16 x i8> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s16 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = ashr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #19
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = ashr <4 x i32> %a, <i32 19, i32 19, i32 19, i32 19>
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u8 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||||
ret <16 x i8> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u16 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = lshr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
|
||||
ret <8 x i16> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
|
||||
; CHECK-LABEL: test_vshrq_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u32 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = lshr <4 x i32> %a, <i32 10, i32 10, i32 10, i32 10>
|
||||
ret <4 x i32> %0
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_m_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i8 q0, q1, #6
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, <16 x i1> %1, <16 x i8> %inactive)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_m_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i16 q0, q1, #13
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, <8 x i1> %1, <8 x i16> %inactive)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_m_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i32 q0, q1, #0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s8 q0, q1, #2
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s16 q0, q1, #3
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 3, i32 0, <8 x i1> %1, <8 x i16> %inactive)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s32 q0, q1, #13
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, i32 0, <4 x i1> %1, <4 x i32> %inactive)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u8 q0, q1, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u16 q0, q1, #14
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, <8 x i1> %1, <8 x i16> %inactive)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_m_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u32 q0, q1, #21
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 21, i32 1, <4 x i1> %1, <4 x i32> %inactive)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i8 q0, q0, #1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1, <16 x i8> undef)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i16 q0, q0, #15
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 15, <8 x i1> %1, <8 x i16> undef)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i32 q0, q0, #13
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, <4 x i1> %1, <4 x i32> undef)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i8 q0, q0, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, <16 x i1> %1, <16 x i8> undef)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i16 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, <8 x i1> %1, <8 x i16> undef)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshlq_x_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshlt.i32 q0, q0, #30
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 30, <4 x i1> %1, <4 x i32> undef)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_s8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s8 q0, q0, #4
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, <16 x i1> %1, <16 x i8> undef)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_s16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s16 q0, q0, #10
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, <8 x i1> %1, <8 x i16> undef)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_s32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.s32 q0, q0, #7
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 7, i32 0, <4 x i1> %1, <4 x i32> undef)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_u8:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u8 q0, q0, #7
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
|
||||
%2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> undef)
|
||||
ret <16 x i8> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_u16:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u16 q0, q0, #7
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
|
||||
%2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 7, i32 1, <8 x i1> %1, <8 x i16> undef)
|
||||
ret <8 x i16> %2
|
||||
}
|
||||
|
||||
define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) {
|
||||
; CHECK-LABEL: test_vshrq_x_n_u32:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmsr p0, r0
|
||||
; CHECK-NEXT: vpst
|
||||
; CHECK-NEXT: vshrt.u32 q0, q0, #6
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%0 = zext i16 %p to i32
|
||||
%1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
|
||||
%2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef)
|
||||
ret <4 x i32> %2
|
||||
}
|
||||
|
||||
declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
|
||||
declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
|
||||
declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
|
||||
|
||||
declare <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>)
|
||||
declare <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>)
|
||||
|
||||
declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>)
|
||||
declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>)
|
||||
declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)
|
Loading…
Reference in New Issue
Block a user