diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index def47393a47..b4a7352a046 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6987,8 +6987,8 @@ bool AArch64TargetLowering::lowerInterleavedLoad( VectorType *VecTy = Shuffles[0]->getType(); unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); - // Skip illegal vector types. - if (VecSize != 64 && VecSize != 128) + // Skip if we do not have NEON and skip illegal vector types. + if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128)) return false; // A pointer vector can not be the return type of the ldN intrinsics. Need to @@ -7073,8 +7073,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, const DataLayout &DL = SI->getModule()->getDataLayout(); unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); - // Skip illegal vector types. - if (SubVecSize != 64 && SubVecSize != 128) + // Skip if we do not have NEON and skip illegal vector types. + if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128)) return false; Value *Op0 = SVI->getOperand(0); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a0b482080c8..6f886220bd2 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -11825,9 +11825,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; - // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't - // support i64/f64 element). - if ((VecSize != 64 && VecSize != 128) || EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types and vector types + // with i64/f64 elements (vldN doesn't support i64/f64 elements). + if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits) return false; // A pointer vector can not be the return type of the ldN intrinsics. Need to @@ -11915,9 +11915,10 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; - // Skip illegal sub vector types and vector types of i64/f64 element (vstN - // doesn't support i64/f64 element). - if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits) + // Skip if we do not have NEON and skip illegal vector types and vector types + // with i64/f64 elements (vstN doesn't support i64/f64 elements). + if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) || + EltIs64Bits) return false; Value *Op0 = SVI->getOperand(0); diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll index ea3b8fa5573..545aeda8860 100644 --- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll +++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll @@ -1,7 +1,10 @@ -; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s +; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON +; RUN: llc -march=aarch64 -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON -; CHECK-LABEL: load_factor2: -; CHECK: ld2 { v0.8b, v1.8b }, [x0] +; NEON-LABEL: load_factor2: +; NEON: ld2 { v0.8b, v1.8b }, [x0] +; NONEON-LABEL: load_factor2: +; NONEON-NOT: ld2 define <8 x i8> @load_factor2(<16 x i8>* %ptr) { %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4 %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> @@ -10,8 +13,10 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) { ret <8 x i8> %add } -; CHECK-LABEL: load_factor3: -; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0] +; NEON-LABEL: load_factor3: +; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0] +; NONEON-LABEL: load_factor3: +; NONEON-NOT: ld3 define <4 x i32> @load_factor3(i32* %ptr) { %base = bitcast i32* %ptr to <12 x i32>* %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4 @@ -21,8 +26,10 @@ define <4 x i32> @load_factor3(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: load_factor4: -; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NEON-LABEL: load_factor4: +; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NONEON-LABEL: load_factor4: +; NONEON-NOT: ld4 define <4 x i32> @load_factor4(i32* %ptr) { %base = bitcast i32* %ptr to <16 x i32>* %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 @@ -32,16 +39,20 @@ define <4 x i32> @load_factor4(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: store_factor2: -; CHECK: st2 { v0.8b, v1.8b }, [x0] +; NEON-LABEL: store_factor2: +; NEON: st2 { v0.8b, v1.8b }, [x0] +; NONEON-LABEL: store_factor2: +; NONEON-NOT: st2 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) { %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4 ret void } -; CHECK-LABEL: store_factor3: -; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0] +; NEON-LABEL: store_factor3: +; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0] +; NONEON-LABEL: store_factor3: +; NONEON-NOT: st3 define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { %base = bitcast i32* %ptr to <12 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -51,8 +62,10 @@ define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v ret void } -; CHECK-LABEL: store_factor4: -; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NEON-LABEL: store_factor4: +; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NONEON-LABEL: store_factor4: +; NONEON-NOT: st4 define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { %base = bitcast i32* %ptr to <16 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -65,8 +78,10 @@ define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v ; The following cases test that interleaved access of pointer vectors can be ; matched to ldN/stN instruction. -; CHECK-LABEL: load_ptrvec_factor2: -; CHECK: ld2 { v0.2d, v1.2d }, [x0] +; NEON-LABEL: load_ptrvec_factor2: +; NEON: ld2 { v0.2d, v1.2d }, [x0] +; NONEON-LABEL: load_ptrvec_factor2: +; NONEON-NOT: ld2 define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) { %base = bitcast i32** %ptr to <4 x i32*>* %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4 @@ -74,8 +89,10 @@ define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) { ret <2 x i32*> %strided.v0 } -; CHECK-LABEL: load_ptrvec_factor3: -; CHECK: ld3 { v0.2d, v1.2d, v2.2d }, [x0] +; NEON-LABEL: load_ptrvec_factor3: +; NEON: ld3 { v0.2d, v1.2d, v2.2d }, [x0] +; NONEON-LABEL: load_ptrvec_factor3: +; NONEON-NOT: ld3 define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { %base = bitcast i32** %ptr to <6 x i32*>* %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4 @@ -86,8 +103,10 @@ define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr ret void } -; CHECK-LABEL: load_ptrvec_factor4: -; CHECK: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +; NEON-LABEL: load_ptrvec_factor4: +; NEON: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +; NONEON-LABEL: load_ptrvec_factor4: +; NONEON-NOT: ld4 define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { %base = bitcast i32** %ptr to <8 x i32*>* %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4 @@ -98,8 +117,10 @@ define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr ret void } -; CHECK-LABEL: store_ptrvec_factor2: -; CHECK: st2 { v0.2d, v1.2d }, [x0] +; NEON-LABEL: store_ptrvec_factor2: +; NEON: st2 { v0.2d, v1.2d }, [x0] +; NONEON-LABEL: store_ptrvec_factor2: +; NONEON-NOT: st2 define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) { %base = bitcast i32** %ptr to <4 x i32*>* %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> @@ -107,8 +128,10 @@ define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) { ret void } -; CHECK-LABEL: store_ptrvec_factor3: -; CHECK: st3 { v0.2d, v1.2d, v2.2d }, [x0] +; NEON-LABEL: store_ptrvec_factor3: +; NEON: st3 { v0.2d, v1.2d, v2.2d }, [x0] +; NONEON-LABEL: store_ptrvec_factor3: +; NONEON-NOT: st3 define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) { %base = bitcast i32** %ptr to <6 x i32*>* %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> @@ -118,8 +141,10 @@ define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 ret void } -; CHECK-LABEL: store_ptrvec_factor4: -; CHECK: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +; NEON-LABEL: store_ptrvec_factor4: +; NEON: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +; NONEON-LABEL: store_ptrvec_factor4: +; NONEON-NOT: st4 define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) { %base = bitcast i32* %ptr to <8 x i32*>* %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> @@ -132,8 +157,10 @@ define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 ; Following cases check that shuffle maskes with undef indices can be matched ; into ldN/stN instruction. -; CHECK-LABEL: load_undef_mask_factor2: -; CHECK: ld2 { v0.4s, v1.4s }, [x0] +; NEON-LABEL: load_undef_mask_factor2: +; NEON: ld2 { v0.4s, v1.4s }, [x0] +; NONEON-LABEL: load_undef_mask_factor2: +; NONEON-NOT: ld2 define <4 x i32> @load_undef_mask_factor2(i32* %ptr) { %base = bitcast i32* %ptr to <8 x i32>* %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4 @@ -143,8 +170,10 @@ define <4 x i32> @load_undef_mask_factor2(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: load_undef_mask_factor3: -; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0] +; NEON-LABEL: load_undef_mask_factor3: +; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0] +; NONEON-LABEL: load_undef_mask_factor3: +; NONEON-NOT: ld3 define <4 x i32> @load_undef_mask_factor3(i32* %ptr) { %base = bitcast i32* %ptr to <12 x i32>* %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4 @@ -154,8 +183,10 @@ define <4 x i32> @load_undef_mask_factor3(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: load_undef_mask_factor4: -; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NEON-LABEL: load_undef_mask_factor4: +; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NONEON-LABEL: load_undef_mask_factor4: +; NONEON-NOT: ld4 define <4 x i32> @load_undef_mask_factor4(i32* %ptr) { %base = bitcast i32* %ptr to <16 x i32>* %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 @@ -165,8 +196,10 @@ define <4 x i32> @load_undef_mask_factor4(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: store_undef_mask_factor2: -; CHECK: st2 { v0.4s, v1.4s }, [x0] +; NEON-LABEL: store_undef_mask_factor2: +; NEON: st2 { v0.4s, v1.4s }, [x0] +; NONEON-LABEL: store_undef_mask_factor2: +; NONEON-NOT: st2 define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) { %base = bitcast i32* %ptr to <8 x i32>* %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -174,8 +207,10 @@ define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) { ret void } -; CHECK-LABEL: store_undef_mask_factor3: -; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0] +; NEON-LABEL: store_undef_mask_factor3: +; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0] +; NONEON-LABEL: store_undef_mask_factor3: +; NONEON-NOT: st3 define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { %base = bitcast i32* %ptr to <12 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -185,8 +220,10 @@ define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, < ret void } -; CHECK-LABEL: store_undef_mask_factor4: -; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NEON-LABEL: store_undef_mask_factor4: +; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] +; NONEON-LABEL: store_undef_mask_factor4: +; NONEON-NOT: st4 define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { %base = bitcast i32* %ptr to <16 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> diff --git a/test/CodeGen/ARM/arm-interleaved-accesses.ll b/test/CodeGen/ARM/arm-interleaved-accesses.ll index a1edf39ddf2..c3aa2d6b4da 100644 --- a/test/CodeGen/ARM/arm-interleaved-accesses.ll +++ b/test/CodeGen/ARM/arm-interleaved-accesses.ll @@ -1,7 +1,10 @@ -; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s +; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON +; RUN: llc -mtriple=arm-eabi -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON -; CHECK-LABEL: load_factor2: -; CHECK: vld2.8 {d16, d17}, [r0] +; NEON-LABEL: load_factor2: +; NEON: vld2.8 {d16, d17}, [r0] +; NONEON-LABEL: load_factor2: +; NONEON-NOT: vld2 define <8 x i8> @load_factor2(<16 x i8>* %ptr) { %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4 %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> @@ -10,8 +13,10 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) { ret <8 x i8> %add } -; CHECK-LABEL: load_factor3: -; CHECK: vld3.32 {d16, d17, d18}, [r0] +; NEON-LABEL: load_factor3: +; NEON: vld3.32 {d16, d17, d18}, [r0] +; NONEON-LABEL: load_factor3: +; NONEON-NOT: vld3 define <2 x i32> @load_factor3(i32* %ptr) { %base = bitcast i32* %ptr to <6 x i32>* %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4 @@ -21,9 +26,11 @@ define <2 x i32> @load_factor3(i32* %ptr) { ret <2 x i32> %add } -; CHECK-LABEL: load_factor4: -; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]! -; CHECK: vld4.32 {d17, d19, d21, d23}, [r0] +; NEON-LABEL: load_factor4: +; NEON: vld4.32 {d16, d18, d20, d22}, [r0]! +; NEON: vld4.32 {d17, d19, d21, d23}, [r0] +; NONEON-LABEL: load_factor4: +; NONEON-NOT: vld4 define <4 x i32> @load_factor4(i32* %ptr) { %base = bitcast i32* %ptr to <16 x i32>* %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 @@ -33,17 +40,21 @@ define <4 x i32> @load_factor4(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: store_factor2: -; CHECK: vst2.8 {d16, d17}, [r0] +; NEON-LABEL: store_factor2: +; NEON: vst2.8 {d16, d17}, [r0] +; NONEON-LABEL: store_factor2: +; NONEON-NOT: vst2 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) { %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4 ret void } -; CHECK-LABEL: store_factor3: -; CHECK: vst3.32 {d16, d18, d20}, [r0]! -; CHECK: vst3.32 {d17, d19, d21}, [r0] +; NEON-LABEL: store_factor3: +; NEON: vst3.32 {d16, d18, d20}, [r0]! +; NEON: vst3.32 {d17, d19, d21}, [r0] +; NONEON-LABEL: store_factor3: +; NONEON-NOT: vst3.32 define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { %base = bitcast i32* %ptr to <12 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -53,9 +64,11 @@ define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v ret void } -; CHECK-LABEL: store_factor4: -; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]! -; CHECK: vst4.32 {d17, d19, d21, d23}, [r0] +; NEON-LABEL: store_factor4: +; NEON: vst4.32 {d16, d18, d20, d22}, [r0]! +; NEON: vst4.32 {d17, d19, d21, d23}, [r0] +; NONEON-LABEL: store_factor4: +; NONEON-NOT: vst4 define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { %base = bitcast i32* %ptr to <16 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -68,8 +81,10 @@ define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v ; The following cases test that interleaved access of pointer vectors can be ; matched to ldN/stN instruction. -; CHECK-LABEL: load_ptrvec_factor2: -; CHECK: vld2.32 {d16, d17}, [r0] +; NEON-LABEL: load_ptrvec_factor2: +; NEON: vld2.32 {d16, d17}, [r0] +; NONEON-LABEL: load_ptrvec_factor2: +; NONEON-NOT: vld2 define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) { %base = bitcast i32** %ptr to <4 x i32*>* %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4 @@ -77,8 +92,10 @@ define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) { ret <2 x i32*> %strided.v0 } -; CHECK-LABEL: load_ptrvec_factor3: -; CHECK: vld3.32 {d16, d17, d18}, [r0] +; NEON-LABEL: load_ptrvec_factor3: +; NEON: vld3.32 {d16, d17, d18}, [r0] +; NONEON-LABEL: load_ptrvec_factor3: +; NONEON-NOT: vld3 define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { %base = bitcast i32** %ptr to <6 x i32*>* %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4 @@ -89,8 +106,10 @@ define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr ret void } -; CHECK-LABEL: load_ptrvec_factor4: -; CHECK: vld4.32 {d16, d17, d18, d19}, [r0] +; NEON-LABEL: load_ptrvec_factor4: +; NEON: vld4.32 {d16, d17, d18, d19}, [r0] +; NONEON-LABEL: load_ptrvec_factor4: +; NONEON-NOT: vld4 define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { %base = bitcast i32** %ptr to <8 x i32*>* %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4 @@ -101,8 +120,10 @@ define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr ret void } -; CHECK-LABEL: store_ptrvec_factor2: -; CHECK: vst2.32 {d16, d17}, [r0] +; NEON-LABEL: store_ptrvec_factor2: +; NEON: vst2.32 {d16, d17}, [r0] +; NONEON-LABEL: store_ptrvec_factor2: +; NONEON-NOT: vst2 define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) { %base = bitcast i32** %ptr to <4 x i32*>* %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> @@ -110,8 +131,10 @@ define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) { ret void } -; CHECK-LABEL: store_ptrvec_factor3: -; CHECK: vst3.32 {d16, d17, d18}, [r0] +; NEON-LABEL: store_ptrvec_factor3: +; NEON: vst3.32 {d16, d17, d18}, [r0] +; NONEON-LABEL: store_ptrvec_factor3: +; NONEON-NOT: vst3 define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) { %base = bitcast i32** %ptr to <6 x i32*>* %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> @@ -121,8 +144,10 @@ define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 ret void } -; CHECK-LABEL: store_ptrvec_factor4: -; CHECK: vst4.32 {d16, d17, d18, d19}, [r0] +; NEON-LABEL: store_ptrvec_factor4: +; NEON: vst4.32 {d16, d17, d18, d19}, [r0] +; NONEON-LABEL: store_ptrvec_factor4: +; NONEON-NOT: vst4 define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) { %base = bitcast i32* %ptr to <8 x i32*>* %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> @@ -135,8 +160,10 @@ define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 ; Following cases check that shuffle maskes with undef indices can be matched ; into ldN/stN instruction. -; CHECK-LABEL: load_undef_mask_factor2: -; CHECK: vld2.32 {d16, d17, d18, d19}, [r0] +; NEON-LABEL: load_undef_mask_factor2: +; NEON: vld2.32 {d16, d17, d18, d19}, [r0] +; NONEON-LABEL: load_undef_mask_factor2: +; NONEON-NOT: vld2 define <4 x i32> @load_undef_mask_factor2(i32* %ptr) { %base = bitcast i32* %ptr to <8 x i32>* %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4 @@ -146,9 +173,11 @@ define <4 x i32> @load_undef_mask_factor2(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: load_undef_mask_factor3: -; CHECK: vld3.32 {d16, d18, d20}, [r0]! -; CHECK: vld3.32 {d17, d19, d21}, [r0] +; NEON-LABEL: load_undef_mask_factor3: +; NEON: vld3.32 {d16, d18, d20}, [r0]! +; NEON: vld3.32 {d17, d19, d21}, [r0] +; NONEON-LABEL: load_undef_mask_factor3: +; NONEON-NOT: vld3 define <4 x i32> @load_undef_mask_factor3(i32* %ptr) { %base = bitcast i32* %ptr to <12 x i32>* %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4 @@ -158,9 +187,11 @@ define <4 x i32> @load_undef_mask_factor3(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: load_undef_mask_factor4: -; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]! -; CHECK: vld4.32 {d17, d19, d21, d23}, [r0] +; NEON-LABEL: load_undef_mask_factor4: +; NEON: vld4.32 {d16, d18, d20, d22}, [r0]! +; NEON: vld4.32 {d17, d19, d21, d23}, [r0] +; NONEON-LABEL: load_undef_mask_factor4: +; NONEON-NOT: vld4 define <4 x i32> @load_undef_mask_factor4(i32* %ptr) { %base = bitcast i32* %ptr to <16 x i32>* %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 @@ -170,8 +201,10 @@ define <4 x i32> @load_undef_mask_factor4(i32* %ptr) { ret <4 x i32> %add } -; CHECK-LABEL: store_undef_mask_factor2: -; CHECK: vst2.32 {d16, d17, d18, d19}, [r0] +; NEON-LABEL: store_undef_mask_factor2: +; NEON: vst2.32 {d16, d17, d18, d19}, [r0] +; NONEON-LABEL: store_undef_mask_factor2: +; NONEON-NOT: vst2 define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) { %base = bitcast i32* %ptr to <8 x i32>* %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -179,9 +212,11 @@ define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) { ret void } -; CHECK-LABEL: store_undef_mask_factor3: -; CHECK: vst3.32 {d16, d18, d20}, [r0]! -; CHECK: vst3.32 {d17, d19, d21}, [r0] +; NEON-LABEL: store_undef_mask_factor3: +; NEON: vst3.32 {d16, d18, d20}, [r0]! +; NEON: vst3.32 {d17, d19, d21}, [r0] +; NONEON-LABEL: store_undef_mask_factor3: +; NONEON-NOT: vst3 define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { %base = bitcast i32* %ptr to <12 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -191,9 +226,11 @@ define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, < ret void } -; CHECK-LABEL: store_undef_mask_factor4: -; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]! -; CHECK: vst4.32 {d17, d19, d21, d23}, [r0] +; NEON-LABEL: store_undef_mask_factor4: +; NEON: vst4.32 {d16, d18, d20, d22}, [r0]! +; NEON: vst4.32 {d17, d19, d21, d23}, [r0] +; NONEON-LABEL: store_undef_mask_factor4: +; NONEON-NOT: vst4 define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { %base = bitcast i32* %ptr to <16 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> @@ -205,8 +242,10 @@ define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, < ; The following test cases check that address spaces are properly handled -; CHECK-LABEL: load_address_space -; CHECK: vld3.32 +; NEON-LABEL: load_address_space +; NEON: vld3.32 +; NONEON-LABEL: load_address_space +; NONEON-NOT: vld3 define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) { %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> @@ -214,8 +253,10 @@ define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) { ret void } -; CHECK-LABEL: store_address_space -; CHECK: vst2.32 +; NEON-LABEL: store_address_space +; NEON: vst2.32 +; NONEON-LABEL: store_address_space +; NONEON-NOT: vst2 define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) { %tmp0 = load <2 x i32>, <2 x i32>* %A %tmp1 = load <2 x i32>, <2 x i32>* %B