From 3ea0d92811bd4483f4a5039de4efc4aa105de5cb Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Fri, 29 Jan 2016 01:08:41 +0000 Subject: [PATCH] [AArch64] Fix i64 nontemporal high-half extraction. Since we only have pair - not single - nontemporal store instructions, we have to extract the high part into a separate register to be able to use them. When the initial nontemporal codegen support was added, I wrote the extract using the nonsensical UBFX [0,32[. Use the correct LSR form instead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259134 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrInfo.td | 2 +- test/CodeGen/AArch64/nontemporal.ll | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index d02bc9ff394..4f052e81de2 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -5982,7 +5982,7 @@ def : NTStore64Pat; def : Pat<(nontemporalstore GPR64:$Rt, (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)), (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), - (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32), + (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32), GPR64sp:$Rn, simm7s4:$offset)>; } // AddedComplexity=10 } // Predicates = [IsLE] diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll index db9779e0319..d8785f845c2 100644 --- a/test/CodeGen/AArch64/nontemporal.ll +++ b/test/CodeGen/AArch64/nontemporal.ll @@ -112,7 +112,7 @@ define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 { define void @test_stnp_i64(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64: -; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 +; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 ; CHECK-NEXT: stnp w1, w[[HI]], [x0] ; CHECK-NEXT: ret store i64 %v, i64* %p, align 1, !nontemporal !0 @@ -162,7 +162,7 @@ define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 { define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64_offset: -; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 +; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 ; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8] ; CHECK-NEXT: ret %tmp0 = getelementptr i64, i64* %p, i32 1 @@ -172,7 +172,7 @@ define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 { define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 { ; CHECK-LABEL: test_stnp_i64_offset_neg: -; CHECK-NEXT: ubfx x[[HI:[0-9]+]], x1, #0, #32 +; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32 ; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8] ; CHECK-NEXT: ret %tmp0 = getelementptr i64, i64* %p, i32 -1