[AArch64] Add support for NEON scalar extract narrow instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192970 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chad Rosier 2013-10-18 14:03:24 +00:00
parent e1bc6ddc0b
commit c439c205ba
5 changed files with 277 additions and 0 deletions

View File

@ -3292,6 +3292,22 @@ multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
[], NoItinerary>;
}
multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
string asmop> {
def bh : NeonI_Scalar2SameMisc<u, 0b00, opcode,
(outs FPR8:$Rd), (ins FPR16:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
def hs : NeonI_Scalar2SameMisc<u, 0b01, opcode,
(outs FPR16:$Rd), (ins FPR32:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
def sd : NeonI_Scalar2SameMisc<u, 0b10, opcode,
(outs FPR32:$Rd), (ins FPR64:$Rn),
!strconcat(asmop, " $Rd, $Rn"),
[], NoItinerary>;
}
multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
string asmop> {
@ -3366,6 +3382,20 @@ multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
(INSTS FPR32:$Rn)>;
}
multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
SDPatternOperator opnode,
Instruction INSTH,
Instruction INSTS,
Instruction INSTD> {
def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
(INSTH FPR16:$Rn)>;
def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
(INSTS FPR32:$Rn)>;
def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
(INSTD FPR64:$Rn)>;
}
multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
SDPatternOperator opnode,
Instruction INSTB,
@ -3645,6 +3675,24 @@ defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
USQADDbb, USQADDhh,
USQADDss, USQADDdd>;
// Scalar Signed Saturating Extract Unsigned Narrow
defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
SQXTUNbh, SQXTUNhs,
SQXTUNsd>;
// Scalar Signed Saturating Extract Narrow
defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
SQXTNbh, SQXTNhs,
SQXTNsd>;
// Scalar Unsigned Saturating Extract Narrow
defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
UQXTNbh, UQXTNhs,
UQXTNsd>;
// Scalar Reduce Pairwise
multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,

View File

@ -0,0 +1,104 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
define i8 @test_vqmovunh_s16(i16 %a) {
; CHECK: test_vqmovunh_s16
; CHECK: sqxtun {{b[0-9]+}}, {{h[0-9]+}}
entry:
%vqmovun.i = insertelement <1 x i16> undef, i16 %a, i32 0
%vqmovun1.i = call <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16> %vqmovun.i)
%0 = extractelement <1 x i8> %vqmovun1.i, i32 0
ret i8 %0
}
define i16 @test_vqmovuns_s32(i32 %a) {
; CHECK: test_vqmovuns_s32
; CHECK: sqxtun {{h[0-9]+}}, {{s[0-9]+}}
entry:
%vqmovun.i = insertelement <1 x i32> undef, i32 %a, i32 0
%vqmovun1.i = call <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32> %vqmovun.i)
%0 = extractelement <1 x i16> %vqmovun1.i, i32 0
ret i16 %0
}
define i32 @test_vqmovund_s64(i64 %a) {
; CHECK: test_vqmovund_s64
; CHECK: sqxtun {{s[0-9]+}}, {{d[0-9]+}}
entry:
%vqmovun.i = insertelement <1 x i64> undef, i64 %a, i32 0
%vqmovun1.i = call <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64> %vqmovun.i)
%0 = extractelement <1 x i32> %vqmovun1.i, i32 0
ret i32 %0
}
declare <1 x i8> @llvm.arm.neon.vqmovnsu.v1i8(<1 x i16>)
declare <1 x i16> @llvm.arm.neon.vqmovnsu.v1i16(<1 x i32>)
declare <1 x i32> @llvm.arm.neon.vqmovnsu.v1i32(<1 x i64>)
define i8 @test_vqmovnh_s16(i16 %a) {
; CHECK: test_vqmovnh_s16
; CHECK: sqxtn {{b[0-9]+}}, {{h[0-9]+}}
entry:
%vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
%vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16> %vqmovn.i)
%0 = extractelement <1 x i8> %vqmovn1.i, i32 0
ret i8 %0
}
define i16 @test_vqmovns_s32(i32 %a) {
; CHECK: test_vqmovns_s32
; CHECK: sqxtn {{h[0-9]+}}, {{s[0-9]+}}
entry:
%vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
%vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32> %vqmovn.i)
%0 = extractelement <1 x i16> %vqmovn1.i, i32 0
ret i16 %0
}
define i32 @test_vqmovnd_s64(i64 %a) {
; CHECK: test_vqmovnd_s64
; CHECK: sqxtn {{s[0-9]+}}, {{d[0-9]+}}
entry:
%vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
%vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64> %vqmovn.i)
%0 = extractelement <1 x i32> %vqmovn1.i, i32 0
ret i32 %0
}
declare <1 x i8> @llvm.arm.neon.vqmovns.v1i8(<1 x i16>)
declare <1 x i16> @llvm.arm.neon.vqmovns.v1i16(<1 x i32>)
declare <1 x i32> @llvm.arm.neon.vqmovns.v1i32(<1 x i64>)
define i8 @test_vqmovnh_u16(i16 %a) {
; CHECK: test_vqmovnh_u16
; CHECK: uqxtn {{b[0-9]+}}, {{h[0-9]+}}
entry:
%vqmovn.i = insertelement <1 x i16> undef, i16 %a, i32 0
%vqmovn1.i = call <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16> %vqmovn.i)
%0 = extractelement <1 x i8> %vqmovn1.i, i32 0
ret i8 %0
}
define i16 @test_vqmovns_u32(i32 %a) {
; CHECK: test_vqmovns_u32
; CHECK: uqxtn {{h[0-9]+}}, {{s[0-9]+}}
entry:
%vqmovn.i = insertelement <1 x i32> undef, i32 %a, i32 0
%vqmovn1.i = call <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32> %vqmovn.i)
%0 = extractelement <1 x i16> %vqmovn1.i, i32 0
ret i16 %0
}
define i32 @test_vqmovnd_u64(i64 %a) {
; CHECK: test_vqmovnd_u64
; CHECK: uqxtn {{s[0-9]+}}, {{d[0-9]+}}
entry:
%vqmovn.i = insertelement <1 x i64> undef, i64 %a, i32 0
%vqmovn1.i = call <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64> %vqmovn.i)
%0 = extractelement <1 x i32> %vqmovn1.i, i32 0
ret i32 %0
}
declare <1 x i8> @llvm.arm.neon.vqmovnu.v1i8(<1 x i16>)
declare <1 x i16> @llvm.arm.neon.vqmovnu.v1i16(<1 x i32>)
declare <1 x i32> @llvm.arm.neon.vqmovnu.v1i32(<1 x i64>)

View File

@ -4502,3 +4502,58 @@
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqdmull d15, s22, d12
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Signed Saturating Extract Unsigned Narrow
//----------------------------------------------------------------------
sqxtun b19, b14
sqxtun h21, h15
sqxtun s20, s12
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqxtun b19, b14
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqxtun h21, h15
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqxtun s20, s12
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Signed Saturating Extract Signed Narrow
//----------------------------------------------------------------------
sqxtn b18, b18
sqxtn h20, h17
sqxtn s19, s14
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqxtn b18, b18
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqxtn h20, h17
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: sqxtn s19, s14
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Unsigned Saturating Extract Narrow
//----------------------------------------------------------------------
uqxtn b18, b18
uqxtn h20, h17
uqxtn s19, s14
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: uqxtn b18, b18
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: uqxtn h20, h17
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: uqxtn s19, s14
// CHECK-ERROR: ^

View File

@ -0,0 +1,40 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
// Check that the assembler can handle the documented syntax for AArch64
//----------------------------------------------------------------------
// Scalar Signed Saturating Extract Unsigned Narrow
//----------------------------------------------------------------------
sqxtun b19, h14
sqxtun h21, s15
sqxtun s20, d12
// CHECK: sqxtun b19, h14 // encoding: [0xd3,0x29,0x21,0x7e]
// CHECK: sqxtun h21, s15 // encoding: [0xf5,0x29,0x61,0x7e]
// CHECK: sqxtun s20, d12 // encoding: [0x94,0x29,0xa1,0x7e]
//----------------------------------------------------------------------
// Scalar Signed Saturating Extract Signed Narrow
//----------------------------------------------------------------------
sqxtn b18, h18
sqxtn h20, s17
sqxtn s19, d14
// CHECK: sqxtn b18, h18 // encoding: [0x52,0x4a,0x21,0x5e]
// CHECK: sqxtn h20, s17 // encoding: [0x34,0x4a,0x61,0x5e]
// CHECK: sqxtn s19, d14 // encoding: [0xd3,0x49,0xa1,0x5e]
//----------------------------------------------------------------------
// Scalar Unsigned Saturating Extract Narrow
//----------------------------------------------------------------------
uqxtn b18, h18
uqxtn h20, s17
uqxtn s19, d14
// CHECK: uqxtn b18, h18 // encoding: [0x52,0x4a,0x21,0x7e]
// CHECK: uqxtn h20, s17 // encoding: [0x34,0x4a,0x61,0x7e]
// CHECK: uqxtn s19, d14 // encoding: [0xd3,0x49,0xa1,0x7e]

View File

@ -1683,3 +1683,33 @@
# CHECK: sqdmull d15, s22, s12
0xcc,0xd2,0x6c,0x5e
0xcf,0xd2,0xac,0x5e
#----------------------------------------------------------------------
# Scalar Signed Saturating Extract Unsigned Narrow
#----------------------------------------------------------------------
# CHECK: sqxtun b19, h14
# CHECK: sqxtun h21, s15
# CHECK: sqxtun s20, d12
0xd3,0x29,0x21,0x7e
0xf5,0x29,0x61,0x7e
0x94,0x29,0xa1,0x7e
#----------------------------------------------------------------------
# Scalar Signed Saturating Extract Signed Narrow
#----------------------------------------------------------------------
# CHECK: sqxtn b18, h18
# CHECK: sqxtn h20, s17
# CHECK: sqxtn s19, d14
0x52,0x4a,0x21,0x5e
0x34,0x4a,0x61,0x5e
0xd3,0x49,0xa1,0x5e
#----------------------------------------------------------------------
# Scalar Unsigned Saturating Extract Narrow
#----------------------------------------------------------------------
# CHECK: uqxtn b18, h18
# CHECK: uqxtn h20, s17
# CHECK: uqxtn s19, d14
0x52,0x4a,0x21,0x7e
0x34,0x4a,0x61,0x7e
0xd3,0x49,0xa1,0x7e