diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 305b232e6a9..42222f539a2 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -387,16 +387,18 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, ARM::D4, ARM::D5, ARM::D6, ARM::D7, ARM::D8, ARM::D9, ARM::D10, ARM::D11, ARM::D12, ARM::D13, ARM::D14, ARM::D15 }; - // VFP3 + // VFP3: D8-D15 are callee saved and should be allocated last. + // Save other low registers for use as DPR_VFP2 and DPR_8 classes. static const unsigned ARM_DPR_VFP3[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7, - ARM::D8, ARM::D9, ARM::D10, ARM::D11, - ARM::D12, ARM::D13, ARM::D14, ARM::D15, ARM::D16, ARM::D17, ARM::D18, ARM::D19, ARM::D20, ARM::D21, ARM::D22, ARM::D23, ARM::D24, ARM::D25, ARM::D26, ARM::D27, - ARM::D28, ARM::D29, ARM::D30, ARM::D31 }; + ARM::D28, ARM::D29, ARM::D30, ARM::D31, + ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7, + ARM::D8, ARM::D9, ARM::D10, ARM::D11, + ARM::D12, ARM::D13, ARM::D14, ARM::D15 }; + DPRClass::iterator DPRClass::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); @@ -438,6 +440,29 @@ def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> { let SubRegClasses = [(DPR dsub_0, dsub_1)]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // Q4-Q7 are callee saved and should be allocated last. + // Save other low registers for use as QPR_VFP2 and QPR_8 classes. + static const unsigned ARM_QPR[] = { + ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11, + ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15, + ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, + ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7 }; + + QPRClass::iterator + QPRClass::allocation_order_begin(const MachineFunction &MF) const { + return ARM_QPR; + } + + QPRClass::iterator + QPRClass::allocation_order_end(const MachineFunction &MF) const { + return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned)); + } + }]; } // Subset of QPR that have 32-bit SPR subregs. @@ -463,6 +488,27 @@ def QQPR : RegisterClass<"ARM", [v4i64], [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), (QPR qsub_0, qsub_1)]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // QQ2-QQ3 are callee saved and should be allocated last. + // Save other low registers for use as QPR_VFP2 and QPR_8 classes. + static const unsigned ARM_QQPR[] = { + ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7, + ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 }; + + QQPRClass::iterator + QQPRClass::allocation_order_begin(const MachineFunction &MF) const { + return ARM_QQPR; + } + + QQPRClass::iterator + QQPRClass::allocation_order_end(const MachineFunction &MF) const { + return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned)); + } + }]; } // Subset of QQPR that have 32-bit SPR subregs. @@ -483,6 +529,26 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, dsub_4, dsub_5, dsub_6, dsub_7), (QPR qsub_0, qsub_1, qsub_2, qsub_3)]; + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // QQQQ1 is callee saved and should be allocated last. + // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes. + static const unsigned ARM_QQQQPR[] = { + ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 }; + + QQQQPRClass::iterator + QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const { + return ARM_QQQQPR; + } + + QQQQPRClass::iterator + QQQQPRClass::allocation_order_end(const MachineFunction &MF) const { + return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned)); + } + }]; } // Condition code registers. diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll index ffc47ebdf19..b9d5600d2ad 100644 --- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll +++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll @@ -10,9 +10,9 @@ target triple = "thumbv7-apple-darwin10" ; %reg1028 gets allocated %Q0, and if %reg1030 is reloaded for the partial ; redef, it cannot also get %Q0. -; CHECK: vld1.64 {d0, d1}, [r{{.}}] -; CHECK-NOT: vld1.64 {d0, d1} -; CHECK: vmov.f64 d3, d0 +; CHECK: vld1.64 {d16, d17}, [r{{.}}] +; CHECK-NOT: vld1.64 {d16, d17} +; CHECK: vmov.f64 d19, d16 define i32 @test(i8* %arg) nounwind { entry: diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll index f1d6a16f3ed..9e7a8ae28b9 100644 --- a/test/CodeGen/ARM/fpconsts.ll +++ b/test/CodeGen/ARM/fpconsts.ll @@ -11,7 +11,7 @@ entry: define double @t2(double %x) nounwind readnone optsize { entry: ; CHECK: t2: -; CHECK: vmov.f64 d1, #3.000000e+00 +; CHECK: vmov.f64 d{{.*}}, #3.000000e+00 %0 = fadd double %x, 3.000000e+00 ret double %0 } @@ -19,7 +19,7 @@ entry: define double @t3(double %x) nounwind readnone optsize { entry: ; CHECK: t3: -; CHECK: vmov.f64 d1, #-1.300000e+01 +; CHECK: vmov.f64 d{{.*}}, #-1.300000e+01 %0 = fmul double %x, -1.300000e+01 ret double %0 } diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll index 687e138c1b4..9f77ad1f794 100644 --- a/test/CodeGen/ARM/inlineasm3.ll +++ b/test/CodeGen/ARM/inlineasm3.ll @@ -7,7 +7,7 @@ define void @t() nounwind { entry: ; CHECK: vmov.I64 q15, #0 ; CHECK: vmov.32 d30[0], r0 -; CHECK: vmov q0, q15 +; CHECK: vmov q8, q15 %tmp = alloca %struct.int32x4_t, align 16 call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind ret void @@ -18,7 +18,7 @@ entry: define void @t2() nounwind { entry: -; CHECK: vmov d30, d0 +; CHECK: vmov d30, d16 ; CHECK: vmov.32 r0, d30[0] %asmtmp2 = tail call i32 asm sideeffect "vmov d30, $1\0Avmov.32 $0, d30[0]\0A", "=r,w,~{d30}"(<2 x i32> undef) nounwind ret void diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 2e4f10d8a63..b96762abe3d 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -122,9 +122,9 @@ return1: return2: ; CHECK: %return2 ; CHECK: vadd.i32 -; CHECK: vmov q1, q3 +; CHECK: vmov q9, q11 ; CHECK-NOT: vmov -; CHECK: vst2.32 {d0, d1, d2, d3} +; CHECK: vst2.32 {d16, d17, d18, d19} %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1] %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1] @@ -136,9 +136,9 @@ return2: define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { ; CHECK: t5: ; CHECK: vldmia -; CHECK: vmov q1, q0 +; CHECK: vmov q9, q8 ; CHECK-NOT: vmov -; CHECK: vld2.16 {d0[1], d2[1]}, [r0] +; CHECK: vld2.16 {d16[1], d18[1]}, [r0] ; CHECK-NOT: vmov ; CHECK: vadd.i16 %tmp0 = bitcast i16* %A to i8* ; [#uses=1] @@ -153,8 +153,8 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { ; CHECK: t6: ; CHECK: vldr.64 -; CHECK: vmov d1, d0 -; CHECK-NEXT: vld2.8 {d0[1], d1[1]} +; CHECK: vmov d17, d16 +; CHECK-NEXT: vld2.8 {d16[1], d17[1]} %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2] %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1] @@ -168,10 +168,10 @@ entry: ; CHECK: t7: ; CHECK: vld2.32 ; CHECK: vst2.32 -; CHECK: vld1.32 {d0, d1}, -; CHECK: vmov q1, q0 +; CHECK: vld1.32 {d16, d17}, +; CHECK: vmov q9, q8 ; CHECK-NOT: vmov -; CHECK: vuzp.32 q0, q1 +; CHECK: vuzp.32 q8, q9 ; CHECK: vst1.32 %0 = bitcast i32* %iptr to i8* ; [#uses=2] %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] @@ -188,7 +188,7 @@ entry: ; PR7156 define arm_aapcs_vfpcc i32 @t8() nounwind { ; CHECK: t8: -; CHECK: vrsqrte.f32 q0, q0 +; CHECK: vrsqrte.f32 q8, q8 bb.nph55.bb.nph55.split_crit_edge: br label %bb3 @@ -238,10 +238,10 @@ bb14: ; preds = %bb6 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { ; CHECK: t9: ; CHECK: vldr.64 -; CHECK-NOT: vmov d{{.*}}, d0 -; CHECK: vmov.i32 d1 -; CHECK-NEXT: vstmia r0, {d0, d1} -; CHECK-NEXT: vstmia r0, {d0, d1} +; CHECK-NOT: vmov d{{.*}}, d16 +; CHECK: vmov.i32 d17 +; CHECK-NEXT: vstmia r0, {d16, d17} +; CHECK-NEXT: vstmia r0, {d16, d17} %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2] %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] store <4 x float> %4, <4 x float>* undef, align 16 @@ -269,9 +269,9 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { define arm_aapcs_vfpcc i32 @t10() nounwind { entry: ; CHECK: t10: -; CHECK: vmov.i32 q1, #0x3F000000 -; CHECK: vmov d0, d1 -; CHECK: vmla.f32 q0, q0, d0[0] +; CHECK: vmov.i32 q9, #0x3F000000 +; CHECK: vmov d0, d17 +; CHECK: vmla.f32 q8, q8, d0[0] %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1] %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll index ae1ba2f7382..dc83e25905c 100644 --- a/test/CodeGen/ARM/spill-q.ll +++ b/test/CodeGen/ARM/spill-q.ll @@ -20,6 +20,26 @@ entry: %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] + %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] br label %bb4 @@ -44,7 +64,16 @@ bb4: ; preds = %bb193, %entry %18 = fmul <4 x float> %17, %val173 ; <<4 x float>> [#uses=1] %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] - %21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2] + %tmp1 = fadd <4 x float> %20, %ld3 + %tmp2 = fadd <4 x float> %tmp1, %ld4 + %tmp3 = fadd <4 x float> %tmp2, %ld5 + %tmp4 = fadd <4 x float> %tmp3, %ld6 + %tmp5 = fadd <4 x float> %tmp4, %ld7 + %tmp6 = fadd <4 x float> %tmp5, %ld8 + %tmp7 = fadd <4 x float> %tmp6, %ld9 + %tmp8 = fadd <4 x float> %tmp7, %ld10 + %tmp9 = fadd <4 x float> %tmp8, %ld11 + %21 = fadd <4 x float> %tmp9, %ld12 %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0] %tmp = extractelement <4 x i1> %22, i32 0 br i1 %tmp, label %bb193, label %bb186 diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index 194093c8418..e3318cd2069 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -161,9 +161,9 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ; rdar://7923010 define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vcgt_zext: -;CHECK: vcgt.f32 q0 -;CHECK: vmov.i32 q1, #0x1 -;CHECK: vand q0, q0, q1 +;CHECK: vcgt.f32 q8 +;CHECK: vmov.i32 q9, #0x1 +;CHECK: vand q8, q8, q9 %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2 diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll index 05e7f509095..0fd184388da 100644 --- a/test/CodeGen/ARM/vget_lane.ll +++ b/test/CodeGen/ARM/vget_lane.ll @@ -96,7 +96,7 @@ define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind { define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind { entry: -; CHECK: vmov.u16 r0, d0[1] +; CHECK: vmov.u16 r0, d{{.*}}[1] %arg0_uint16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1] %out_uint16_t = alloca i16 ; [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] @@ -111,7 +111,7 @@ return: ; preds = %entry define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind { entry: -; CHECK: vmov.u8 r0, d0[1] +; CHECK: vmov.u8 r0, d{{.*}}[1] %arg0_uint8x8_t = alloca <8 x i8> ; <<8 x i8>*> [#uses=1] %out_uint8_t = alloca i8 ; [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] @@ -126,7 +126,7 @@ return: ; preds = %entry define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind { entry: -; CHECK: vmov.u16 r0, d0[1] +; CHECK: vmov.u16 r0, d{{.*}}[1] %arg0_uint16x8_t = alloca <8 x i16> ; <<8 x i16>*> [#uses=1] %out_uint16_t = alloca i16 ; [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] @@ -141,7 +141,7 @@ return: ; preds = %entry define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind { entry: -; CHECK: vmov.u8 r0, d0[1] +; CHECK: vmov.u8 r0, d{{.*}}[1] %arg0_uint8x16_t = alloca <16 x i8> ; <<16 x i8>*> [#uses=1] %out_uint8_t = alloca i8 ; [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll index 2800475b2fd..16bd832bb10 100644 --- a/test/CodeGen/ARM/vld1.ll +++ b/test/CodeGen/ARM/vld1.ll @@ -3,7 +3,7 @@ define <8 x i8> @vld1i8(i8* %A) nounwind { ;CHECK: vld1i8: ;Check the alignment value. Max for this instruction is 64 bits: -;CHECK: vld1.8 {d0}, [r0, :64] +;CHECK: vld1.8 {d16}, [r0, :64] %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16) ret <8 x i8> %tmp1 } @@ -43,7 +43,7 @@ define <1 x i64> @vld1i64(i64* %A) nounwind { define <16 x i8> @vld1Qi8(i8* %A) nounwind { ;CHECK: vld1Qi8: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vld1.8 {d0, d1}, [r0, :64] +;CHECK: vld1.8 {d16, d17}, [r0, :64] %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8) ret <16 x i8> %tmp1 } @@ -51,7 +51,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind { define <8 x i16> @vld1Qi16(i16* %A) nounwind { ;CHECK: vld1Qi16: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vld1.16 {d0, d1}, [r0, :128] +;CHECK: vld1.16 {d16, d17}, [r0, :128] %tmp0 = bitcast i16* %A to i8* %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32) ret <8 x i16> %tmp1 diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll index fe99dd50255..3fdd7b7aa88 100644 --- a/test/CodeGen/ARM/vld2.ll +++ b/test/CodeGen/ARM/vld2.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind { ;CHECK: vld2i8: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vld2.8 {d0, d1}, [r0, :64] +;CHECK: vld2.8 {d16, d17}, [r0, :64] %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8) %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1 @@ -25,7 +25,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind { define <4 x i16> @vld2i16(i16* %A) nounwind { ;CHECK: vld2i16: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vld2.16 {d0, d1}, [r0, :128] +;CHECK: vld2.16 {d16, d17}, [r0, :128] %tmp0 = bitcast i16* %A to i8* %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32) %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0 @@ -59,7 +59,7 @@ define <2 x float> @vld2f(float* %A) nounwind { define <1 x i64> @vld2i64(i64* %A) nounwind { ;CHECK: vld2i64: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vld1.64 {d0, d1}, [r0, :128] +;CHECK: vld1.64 {d16, d17}, [r0, :128] %tmp0 = bitcast i64* %A to i8* %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32) %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0 @@ -71,7 +71,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind { define <16 x i8> @vld2Qi8(i8* %A) nounwind { ;CHECK: vld2Qi8: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vld2.8 {d0, d1, d2, d3}, [r0, :64] +;CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64] %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8) %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1 @@ -82,7 +82,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind { define <8 x i16> @vld2Qi16(i16* %A) nounwind { ;CHECK: vld2Qi16: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vld2.16 {d0, d1, d2, d3}, [r0, :128] +;CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128] %tmp0 = bitcast i16* %A to i8* %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16) %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0 @@ -94,7 +94,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind { define <4 x i32> @vld2Qi32(i32* %A) nounwind { ;CHECK: vld2Qi32: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vld2.32 {d0, d1, d2, d3}, [r0, :256] +;CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256] %tmp0 = bitcast i32* %A to i8* %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64) %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0 diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index 7c4b8e1d122..0e541bbb5ae 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind { ;CHECK: vld3i8: ;Check the alignment value. Max for this instruction is 64 bits: -;CHECK: vld3.8 {d0, d1, d2}, [r0, :64] +;CHECK: vld3.8 {d16, d17, d18}, [r0, :64] %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32) %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 @@ -58,7 +58,7 @@ define <2 x float> @vld3f(float* %A) nounwind { define <1 x i64> @vld3i64(i64* %A) nounwind { ;CHECK: vld3i64: ;Check the alignment value. Max for this instruction is 64 bits: -;CHECK: vld1.64 {d0, d1, d2}, [r0, :64] +;CHECK: vld1.64 {d16, d17, d18}, [r0, :64] %tmp0 = bitcast i64* %A to i8* %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16) %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 @@ -70,8 +70,8 @@ define <1 x i64> @vld3i64(i64* %A) nounwind { define <16 x i8> @vld3Qi8(i8* %A) nounwind { ;CHECK: vld3Qi8: ;Check the alignment value. Max for this instruction is 64 bits: -;CHECK: vld3.8 {d0, d2, d4}, [r0, :64]! -;CHECK: vld3.8 {d1, d3, d5}, [r0, :64] +;CHECK: vld3.8 {d16, d18, d20}, [r0, :64]! +;CHECK: vld3.8 {d17, d19, d21}, [r0, :64] %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32) %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll index 6330d4f7950..a616a98a25a 100644 --- a/test/CodeGen/ARM/vld4.ll +++ b/test/CodeGen/ARM/vld4.ll @@ -14,7 +14,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind { ;CHECK: vld4i8: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vld4.8 {d0, d1, d2, d3}, [r0, :64] +;CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64] %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8) %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2 @@ -25,7 +25,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind { define <4 x i16> @vld4i16(i16* %A) nounwind { ;CHECK: vld4i16: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vld4.16 {d0, d1, d2, d3}, [r0, :128] +;CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128] %tmp0 = bitcast i16* %A to i8* %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16) %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0 @@ -37,7 +37,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind { define <2 x i32> @vld4i32(i32* %A) nounwind { ;CHECK: vld4i32: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vld4.32 {d0, d1, d2, d3}, [r0, :256] +;CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256] %tmp0 = bitcast i32* %A to i8* %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32) %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 @@ -60,7 +60,7 @@ define <2 x float> @vld4f(float* %A) nounwind { define <1 x i64> @vld4i64(i64* %A) nounwind { ;CHECK: vld4i64: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vld1.64 {d0, d1, d2, d3}, [r0, :256] +;CHECK: vld1.64 {d16, d17, d18, d19}, [r0, :256] %tmp0 = bitcast i64* %A to i8* %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64) %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0 @@ -72,8 +72,8 @@ define <1 x i64> @vld4i64(i64* %A) nounwind { define <16 x i8> @vld4Qi8(i8* %A) nounwind { ;CHECK: vld4Qi8: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vld4.8 {d0, d2, d4, d6}, [r0, :256]! -;CHECK: vld4.8 {d1, d3, d5, d7}, [r0, :256] +;CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]! +;CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256] %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64) %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0 %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2 @@ -84,8 +84,8 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind { define <8 x i16> @vld4Qi16(i16* %A) nounwind { ;CHECK: vld4Qi16: ;Check for no alignment specifier. -;CHECK: vld4.16 {d0, d2, d4, d6}, [r0]! -;CHECK: vld4.16 {d1, d3, d5, d7}, [r0] +;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]! +;CHECK: vld4.16 {d17, d19, d21, d23}, [r0] %tmp0 = bitcast i16* %A to i8* %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1) %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0 diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index 8cd94576b0c..4d654d771f4 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -2,169 +2,169 @@ define <8 x i8> @v_movi8() nounwind { ;CHECK: v_movi8: -;CHECK: vmov.i8 d0, #0x8 +;CHECK: vmov.i8 d{{.*}}, #0x8 ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <4 x i16> @v_movi16a() nounwind { ;CHECK: v_movi16a: -;CHECK: vmov.i16 d0, #0x10 +;CHECK: vmov.i16 d{{.*}}, #0x10 ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 > } define <4 x i16> @v_movi16b() nounwind { ;CHECK: v_movi16b: -;CHECK: vmov.i16 d0, #0x1000 +;CHECK: vmov.i16 d{{.*}}, #0x1000 ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 > } define <4 x i16> @v_mvni16a() nounwind { ;CHECK: v_mvni16a: -;CHECK: vmvn.i16 d0, #0x10 +;CHECK: vmvn.i16 d{{.*}}, #0x10 ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 > } define <4 x i16> @v_mvni16b() nounwind { ;CHECK: v_mvni16b: -;CHECK: vmvn.i16 d0, #0x1000 +;CHECK: vmvn.i16 d{{.*}}, #0x1000 ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 > } define <2 x i32> @v_movi32a() nounwind { ;CHECK: v_movi32a: -;CHECK: vmov.i32 d0, #0x20 +;CHECK: vmov.i32 d{{.*}}, #0x20 ret <2 x i32> < i32 32, i32 32 > } define <2 x i32> @v_movi32b() nounwind { ;CHECK: v_movi32b: -;CHECK: vmov.i32 d0, #0x2000 +;CHECK: vmov.i32 d{{.*}}, #0x2000 ret <2 x i32> < i32 8192, i32 8192 > } define <2 x i32> @v_movi32c() nounwind { ;CHECK: v_movi32c: -;CHECK: vmov.i32 d0, #0x200000 +;CHECK: vmov.i32 d{{.*}}, #0x200000 ret <2 x i32> < i32 2097152, i32 2097152 > } define <2 x i32> @v_movi32d() nounwind { ;CHECK: v_movi32d: -;CHECK: vmov.i32 d0, #0x20000000 +;CHECK: vmov.i32 d{{.*}}, #0x20000000 ret <2 x i32> < i32 536870912, i32 536870912 > } define <2 x i32> @v_movi32e() nounwind { ;CHECK: v_movi32e: -;CHECK: vmov.i32 d0, #0x20FF +;CHECK: vmov.i32 d{{.*}}, #0x20FF ret <2 x i32> < i32 8447, i32 8447 > } define <2 x i32> @v_movi32f() nounwind { ;CHECK: v_movi32f: -;CHECK: vmov.i32 d0, #0x20FFFF +;CHECK: vmov.i32 d{{.*}}, #0x20FFFF ret <2 x i32> < i32 2162687, i32 2162687 > } define <2 x i32> @v_mvni32a() nounwind { ;CHECK: v_mvni32a: -;CHECK: vmvn.i32 d0, #0x20 +;CHECK: vmvn.i32 d{{.*}}, #0x20 ret <2 x i32> < i32 4294967263, i32 4294967263 > } define <2 x i32> @v_mvni32b() nounwind { ;CHECK: v_mvni32b: -;CHECK: vmvn.i32 d0, #0x2000 +;CHECK: vmvn.i32 d{{.*}}, #0x2000 ret <2 x i32> < i32 4294959103, i32 4294959103 > } define <2 x i32> @v_mvni32c() nounwind { ;CHECK: v_mvni32c: -;CHECK: vmvn.i32 d0, #0x200000 +;CHECK: vmvn.i32 d{{.*}}, #0x200000 ret <2 x i32> < i32 4292870143, i32 4292870143 > } define <2 x i32> @v_mvni32d() nounwind { ;CHECK: v_mvni32d: -;CHECK: vmvn.i32 d0, #0x20000000 +;CHECK: vmvn.i32 d{{.*}}, #0x20000000 ret <2 x i32> < i32 3758096383, i32 3758096383 > } define <2 x i32> @v_mvni32e() nounwind { ;CHECK: v_mvni32e: -;CHECK: vmvn.i32 d0, #0x20FF +;CHECK: vmvn.i32 d{{.*}}, #0x20FF ret <2 x i32> < i32 4294958848, i32 4294958848 > } define <2 x i32> @v_mvni32f() nounwind { ;CHECK: v_mvni32f: -;CHECK: vmvn.i32 d0, #0x20FFFF +;CHECK: vmvn.i32 d{{.*}}, #0x20FFFF ret <2 x i32> < i32 4292804608, i32 4292804608 > } define <1 x i64> @v_movi64() nounwind { ;CHECK: v_movi64: -;CHECK: vmov.i64 d0, #0xFF0000FF0000FFFF +;CHECK: vmov.i64 d{{.*}}, #0xFF0000FF0000FFFF ret <1 x i64> < i64 18374687574888349695 > } define <16 x i8> @v_movQi8() nounwind { ;CHECK: v_movQi8: -;CHECK: vmov.i8 q0, #0x8 +;CHECK: vmov.i8 q{{.*}}, #0x8 ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 > } define <8 x i16> @v_movQi16a() nounwind { ;CHECK: v_movQi16a: -;CHECK: vmov.i16 q0, #0x10 +;CHECK: vmov.i16 q{{.*}}, #0x10 ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 > } define <8 x i16> @v_movQi16b() nounwind { ;CHECK: v_movQi16b: -;CHECK: vmov.i16 q0, #0x1000 +;CHECK: vmov.i16 q{{.*}}, #0x1000 ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 > } define <4 x i32> @v_movQi32a() nounwind { ;CHECK: v_movQi32a: -;CHECK: vmov.i32 q0, #0x20 +;CHECK: vmov.i32 q{{.*}}, #0x20 ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 > } define <4 x i32> @v_movQi32b() nounwind { ;CHECK: v_movQi32b: -;CHECK: vmov.i32 q0, #0x2000 +;CHECK: vmov.i32 q{{.*}}, #0x2000 ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 > } define <4 x i32> @v_movQi32c() nounwind { ;CHECK: v_movQi32c: -;CHECK: vmov.i32 q0, #0x200000 +;CHECK: vmov.i32 q{{.*}}, #0x200000 ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 > } define <4 x i32> @v_movQi32d() nounwind { ;CHECK: v_movQi32d: -;CHECK: vmov.i32 q0, #0x20000000 +;CHECK: vmov.i32 q{{.*}}, #0x20000000 ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 > } define <4 x i32> @v_movQi32e() nounwind { ;CHECK: v_movQi32e: -;CHECK: vmov.i32 q0, #0x20FF +;CHECK: vmov.i32 q{{.*}}, #0x20FF ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 > } define <4 x i32> @v_movQi32f() nounwind { ;CHECK: v_movQi32f: -;CHECK: vmov.i32 q0, #0x20FFFF +;CHECK: vmov.i32 q{{.*}}, #0x20FFFF ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 > } define <2 x i64> @v_movQi64() nounwind { ;CHECK: v_movQi64: -;CHECK: vmov.i64 q0, #0xFF0000FF0000FFFF +;CHECK: vmov.i64 q{{.*}}, #0xFF0000FF0000FFFF ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 > } @@ -173,7 +173,7 @@ define <2 x i64> @v_movQi64() nounwind { define void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind { entry: ;CHECK: vdupn128: -;CHECK: vmov.i8 d0, #0x80 +;CHECK: vmov.i8 d{{.*}}, #0x80 %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1] store <8 x i8> , <8 x i8>* %0, align 8 ret void @@ -182,7 +182,7 @@ entry: define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind { entry: ;CHECK: vdupnneg75: -;CHECK: vmov.i8 d0, #0xB5 +;CHECK: vmov.i8 d{{.*}}, #0xB5 %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1] store <8 x i8> , <8 x i8>* %0, align 8 ret void diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll index c43b8aa9923..70f3a4cfa28 100644 --- a/test/CodeGen/ARM/vst1.ll +++ b/test/CodeGen/ARM/vst1.ll @@ -3,7 +3,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst1i8: ;Check the alignment value. Max for this instruction is 64 bits: -;CHECK: vst1.8 {d0}, [r0, :64] +;CHECK: vst1.8 {d16}, [r0, :64] %tmp1 = load <8 x i8>* %B call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16) ret void @@ -48,7 +48,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind { define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst1Qi8: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vst1.8 {d0, d1}, [r0, :64] +;CHECK: vst1.8 {d16, d17}, [r0, :64] %tmp1 = load <16 x i8>* %B call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8) ret void @@ -57,7 +57,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind { define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst1Qi16: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vst1.16 {d0, d1}, [r0, :128] +;CHECK: vst1.16 {d16, d17}, [r0, :128] %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32) diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll index 1124640f6ae..ed2498b276b 100644 --- a/test/CodeGen/ARM/vst2.ll +++ b/test/CodeGen/ARM/vst2.ll @@ -3,7 +3,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst2i8: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vst2.8 {d0, d1}, [r0, :64] +;CHECK: vst2.8 {d16, d17}, [r0, :64] %tmp1 = load <8 x i8>* %B call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8) ret void @@ -12,7 +12,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind { define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst2i16: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vst2.16 {d0, d1}, [r0, :128] +;CHECK: vst2.16 {d16, d17}, [r0, :128] %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32) @@ -40,7 +40,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind { define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst2i64: ;Check the alignment value. Max for this instruction is 128 bits: -;CHECK: vst1.64 {d0, d1}, [r0, :128] +;CHECK: vst1.64 {d16, d17}, [r0, :128] %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32) @@ -50,7 +50,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind { define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst2Qi8: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vst2.8 {d0, d1, d2, d3}, [r0, :64] +;CHECK: vst2.8 {d16, d17, d18, d19}, [r0, :64] %tmp1 = load <16 x i8>* %B call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8) ret void @@ -59,7 +59,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst2Qi16: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vst2.16 {d0, d1, d2, d3}, [r0, :128] +;CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128] %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16) @@ -69,7 +69,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { ;CHECK: vst2Qi32: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vst2.32 {d0, d1, d2, d3}, [r0, :256] +;CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256] %tmp0 = bitcast i32* %A to i8* %tmp1 = load <4 x i32>* %B call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64) diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll index a339192b174..6a7e91dee9b 100644 --- a/test/CodeGen/ARM/vst4.ll +++ b/test/CodeGen/ARM/vst4.ll @@ -3,7 +3,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst4i8: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vst4.8 {d0, d1, d2, d3}, [r0, :64] +;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64] %tmp1 = load <8 x i8>* %B call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8) ret void @@ -12,7 +12,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind { define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst4i16: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vst4.16 {d0, d1, d2, d3}, [r0, :128] +;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128] %tmp0 = bitcast i16* %A to i8* %tmp1 = load <4 x i16>* %B call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16) @@ -22,7 +22,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind { define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vst4i32: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vst4.32 {d0, d1, d2, d3}, [r0, :256] +;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256] %tmp0 = bitcast i32* %A to i8* %tmp1 = load <2 x i32>* %B call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32) @@ -41,7 +41,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind { define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind { ;CHECK: vst4i64: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vst1.64 {d0, d1, d2, d3}, [r0, :256] +;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256] %tmp0 = bitcast i64* %A to i8* %tmp1 = load <1 x i64>* %B call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64) @@ -51,8 +51,8 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind { define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { ;CHECK: vst4Qi8: ;Check the alignment value. Max for this instruction is 256 bits: -;CHECK: vst4.8 {d0, d2, d4, d6}, [r0, :256]! -;CHECK: vst4.8 {d1, d3, d5, d7}, [r0, :256] +;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]! +;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256] %tmp1 = load <16 x i8>* %B call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64) ret void @@ -61,8 +61,8 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind { define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind { ;CHECK: vst4Qi16: ;Check for no alignment specifier. -;CHECK: vst4.16 {d0, d2, d4, d6}, [r0]! -;CHECK: vst4.16 {d1, d3, d5, d7}, [r0] +;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]! +;CHECK: vst4.16 {d17, d19, d21, d23}, [r0] %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll index 26750065af3..0b6c92ba8a0 100644 --- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll +++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll @@ -23,8 +23,8 @@ entry: %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2] ; Constant pool load followed by add. ; Then clobber the loaded register, not the sum. -; CHECK: vldr.64 [[LDR:d.]] -; CHECK: vadd.f64 [[ADD:d.]], [[LDR]], [[LDR]] +; CHECK: vldr.64 [[LDR:d.*]], +; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]] ; CHECK: vmov.f64 [[LDR]] %5 = fadd <2 x double> %3, %3 ; <<2 x double>> [#uses=2] %6 = fadd <2 x double> %4, %4 ; <<2 x double>> [#uses=2] diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll index f7ec5a3b577..d06f8a7beeb 100644 --- a/test/CodeGen/Thumb2/cortex-fp.ll +++ b/test/CodeGen/Thumb2/cortex-fp.ll @@ -19,6 +19,6 @@ entry: %0 = fmul double %a, %b ; CORTEXM3: blx ___muldf3 ; CORTEXM4: blx ___muldf3 -; CORTEXA8: vmul.f64 d0, d1, d0 +; CORTEXA8: vmul.f64 d16, d17, d16 ret double %0 } diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index b949b2f3050..3fac512add4 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -56,7 +56,7 @@ define void @t2(i8* %ptr1, i8* %ptr2) nounwind { entry: ; CHECK: t2: ; CHECK: adr r{{.}}, #LCPI1_0 -; CHECK: vldmia r3, {d0, d1} +; CHECK: vldmia r3, {d16, d17} br i1 undef, label %bb1, label %bb2 bb1: diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 4f92c933380..0d73fba93e9 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -20,6 +20,26 @@ entry: %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] + %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] br label %bb4 @@ -44,7 +64,16 @@ bb4: ; preds = %bb193, %entry %18 = fmul <4 x float> %17, %val173 ; <<4 x float>> [#uses=1] %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] - %21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2] + %tmp1 = fadd <4 x float> %20, %ld3 + %tmp2 = fadd <4 x float> %tmp1, %ld4 + %tmp3 = fadd <4 x float> %tmp2, %ld5 + %tmp4 = fadd <4 x float> %tmp3, %ld6 + %tmp5 = fadd <4 x float> %tmp4, %ld7 + %tmp6 = fadd <4 x float> %tmp5, %ld8 + %tmp7 = fadd <4 x float> %tmp6, %ld9 + %tmp8 = fadd <4 x float> %tmp7, %ld10 + %tmp9 = fadd <4 x float> %tmp8, %ld11 + %21 = fadd <4 x float> %tmp9, %ld12 %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0] %tmp = extractelement <4 x i1> %22, i32 0 br i1 %tmp, label %bb193, label %bb186