[AArch64] Add support for NEON scalar signed saturating absolute value and

scalar signed saturating negate instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192733 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2013-10-15 21:18:44 +00:00
parent ab950f5f33
commit 1824bd0ef8
6 changed files with 201 additions and 1 deletions

View File

@ -3232,7 +3232,7 @@ multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
}
// Scalar Two Registers Miscellaneous
multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
string asmop> {
def ss : NeonI_Scalar2SameMisc<u, {size_high, 0b0}, opcode,
@ -3245,6 +3245,25 @@ multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
[], NoItinerary>;
}
multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>{
def bb : NeonI_Scalar2SameMisc<u, 0b00, opcode,
(outs FPR8:$Rd), (ins FPR8:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
def hh : NeonI_Scalar2SameMisc<u, 0b01, opcode,
(outs FPR16:$Rd), (ins FPR16:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
def ss : NeonI_Scalar2SameMisc<u, 0b10, opcode,
(outs FPR32:$Rd), (ins FPR32:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
def dd: NeonI_Scalar2SameMisc<u, 0b11, opcode,
(outs FPR64:$Rd), (ins FPR64:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
}
multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode,
SDPatternOperator Dopnode,
Instruction INSTS,
@ -3277,6 +3296,21 @@ class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
: Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 (bitconvert (v8i8 Neon_immAllZeros))))),
(INSTD VPR64:$Rn, 0)>;
multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
Instruction INSTB,
Instruction INSTH,
Instruction INSTS,
Instruction INSTD> {
def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
(INSTB FPR8:$Rn)>;
def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
(INSTH FPR16:$Rn)>;
def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
(INSTS FPR32:$Rn)>;
def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
(INSTD FPR64:$Rn)>;
}
// Scalar Integer Add
let isCommutable = 1 in {
def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
@ -3495,6 +3529,16 @@ def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
CMLTddi>;
// Scalar Signed Saturating Absolute Value
defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
SQABSbb, SQABShh, SQABSss, SQABSdd>;
// Scalar Signed Saturating Negate
defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
// Scalar Reduce Pairwise
multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,

View File

@ -0,0 +1,49 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define i8 @test_vqabsb_s8(i8 %a) {
; CHECK: test_vqabsb_s8
; CHECK: sqabs {{b[0-9]+}}, {{b[0-9]+}}
entry:
%vqabs.i = insertelement <1 x i8> undef, i8 %a, i32 0
%vqabs1.i = call <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8> %vqabs.i)
%0 = extractelement <1 x i8> %vqabs1.i, i32 0
ret i8 %0
}
declare <1 x i8> @llvm.arm.neon.vqabs.v1i8(<1 x i8>)
define i16 @test_vqabsh_s16(i16 %a) {
; CHECK: test_vqabsh_s16
; CHECK: sqabs {{h[0-9]+}}, {{h[0-9]+}}
entry:
%vqabs.i = insertelement <1 x i16> undef, i16 %a, i32 0
%vqabs1.i = call <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16> %vqabs.i)
%0 = extractelement <1 x i16> %vqabs1.i, i32 0
ret i16 %0
}
declare <1 x i16> @llvm.arm.neon.vqabs.v1i16(<1 x i16>)
define i32 @test_vqabss_s32(i32 %a) {
; CHECK: test_vqabss_s32
; CHECK: sqabs {{s[0-9]+}}, {{s[0-9]+}}
entry:
%vqabs.i = insertelement <1 x i32> undef, i32 %a, i32 0
%vqabs1.i = call <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32> %vqabs.i)
%0 = extractelement <1 x i32> %vqabs1.i, i32 0
ret i32 %0
}
declare <1 x i32> @llvm.arm.neon.vqabs.v1i32(<1 x i32>)
define i64 @test_vqabsd_s64(i64 %a) {
; CHECK: test_vqabsd_s64
; CHECK: sqabs {{d[0-9]+}}, {{d[0-9]+}}
entry:
%vqabs.i = insertelement <1 x i64> undef, i64 %a, i32 0
%vqabs1.i = call <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64> %vqabs.i)
%0 = extractelement <1 x i64> %vqabs1.i, i32 0
ret i64 %0
}
declare <1 x i64> @llvm.arm.neon.vqabs.v1i64(<1 x i64>)

View File

@ -0,0 +1,49 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define i8 @test_vqnegb_s8(i8 %a) {
; CHECK: test_vqnegb_s8
; CHECK: sqneg {{b[0-9]+}}, {{b[0-9]+}}
entry:
%vqneg.i = insertelement <1 x i8> undef, i8 %a, i32 0
%vqneg1.i = call <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8> %vqneg.i)
%0 = extractelement <1 x i8> %vqneg1.i, i32 0
ret i8 %0
}
declare <1 x i8> @llvm.arm.neon.vqneg.v1i8(<1 x i8>)
define i16 @test_vqnegh_s16(i16 %a) {
; CHECK: test_vqnegh_s16
; CHECK: sqneg {{h[0-9]+}}, {{h[0-9]+}}
entry:
%vqneg.i = insertelement <1 x i16> undef, i16 %a, i32 0
%vqneg1.i = call <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16> %vqneg.i)
%0 = extractelement <1 x i16> %vqneg1.i, i32 0
ret i16 %0
}
declare <1 x i16> @llvm.arm.neon.vqneg.v1i16(<1 x i16>)
define i32 @test_vqnegs_s32(i32 %a) {
; CHECK: test_vqnegs_s32
; CHECK: sqneg {{s[0-9]+}}, {{s[0-9]+}}
entry:
%vqneg.i = insertelement <1 x i32> undef, i32 %a, i32 0
%vqneg1.i = call <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32> %vqneg.i)
%0 = extractelement <1 x i32> %vqneg1.i, i32 0
ret i32 %0
}
declare <1 x i32> @llvm.arm.neon.vqneg.v1i32(<1 x i32>)
define i64 @test_vqnegd_s64(i64 %a) {
; CHECK: test_vqnegd_s64
; CHECK: sqneg {{d[0-9]+}}, {{d[0-9]+}}
entry:
%vqneg.i = insertelement <1 x i64> undef, i64 %a, i32 0
%vqneg1.i = call <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64> %vqneg.i)
%0 = extractelement <1 x i64> %vqneg1.i, i32 0
ret i64 %0
}
declare <1 x i64> @llvm.arm.neon.vqneg.v1i64(<1 x i64>)

View File

@ -0,0 +1,17 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Scalar Absolute Value
//----------------------------------------------------------------------
sqabs b19, b14
sqabs h21, h15
sqabs s20, s12
sqabs d18, d12
// CHECK: sqabs b19, b14 // encoding: [0xd3,0x79,0x20,0x5e]
// CHECK: sqabs h21, h15 // encoding: [0xf5,0x79,0x60,0x5e]
// CHECK: sqabs s20, s12 // encoding: [0x94,0x79,0xa0,0x5e]
// CHECK: sqabs d18, d12 // encoding: [0x92,0x79,0xe0,0x5e]

View File

@ -0,0 +1,17 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Scalar Signed Saturating Negate
//----------------------------------------------------------------------
sqneg b19, b14
sqneg h21, h15
sqneg s20, s12
sqneg d18, d12
// CHECK: sqneg b19, b14 // encoding: [0xd3,0x79,0x20,0x7e]
// CHECK: sqneg h21, h15 // encoding: [0xf5,0x79,0x60,0x7e]
// CHECK: sqneg s20, s12 // encoding: [0x94,0x79,0xa0,0x7e]
// CHECK: sqneg d18, d12 // encoding: [0x92,0x79,0xe0,0x7e]

View File

@ -1599,3 +1599,27 @@
#----------------------------------------------------------------------
# CHECK: cmtst d20, d21, d22
0xb4,0x8e,0xf6,0x5e
#----------------------------------------------------------------------
# Scalar Absolute Value
#----------------------------------------------------------------------
# CHECK: sqabs b19, b14
# CHECK: sqabs h21, h15
# CHECK: sqabs s20, s12
# CHECK: sqabs d18, d12
0xd3,0x79,0x20,0x5e
0xf5,0x79,0x60,0x5e
0x94,0x79,0xa0,0x5e
0x92,0x79,0xe0,0x5e
#----------------------------------------------------------------------
# Scalar Signed Saturating Negate
#----------------------------------------------------------------------
# CHECK: sqneg b19, b14
# CHECK: sqneg h21, h15
# CHECK: sqneg s20, s12
# CHECK: sqneg d18, d12
0xd3,0x79,0x20,0x7e
0xf5,0x79,0x60,0x7e
0x94,0x79,0xa0,0x7e
0x92,0x79,0xe0,0x7e