mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-15 23:57:48 +00:00
ARM: add natural patterns for vaddhl and vsubhl.
These instructions aren't particularly complicated and it's well worth having patterns for some reasonably useful LLVM IR that will match them. Soon we should be able to switch Clang over to producing this natural version. llvm-svn: 189335
This commit is contained in:
parent
084b7ef3da
commit
24c6842d69
@ -3979,6 +3979,13 @@ defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
|
||||
defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
|
||||
int_arm_neon_vraddhn, 1>;
|
||||
|
||||
def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
|
||||
(VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
|
||||
def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
|
||||
(VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
|
||||
def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
|
||||
(VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
|
||||
|
||||
// Vector Multiply Operations.
|
||||
|
||||
// VMUL : Vector Multiply (integer, polynomial and floating-point)
|
||||
@ -4262,6 +4269,13 @@ defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
|
||||
defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
|
||||
int_arm_neon_vrsubhn, 0>;
|
||||
|
||||
def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
|
||||
(VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
|
||||
def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
|
||||
(VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
|
||||
def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
|
||||
(VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
|
||||
|
||||
// Vector Comparisons.
|
||||
|
||||
// VCEQ : Vector Compare Equal
|
||||
|
@ -152,6 +152,33 @@ declare <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind rea
|
||||
declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
define <8 x i8> @vaddhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: vaddhni16_natural:
|
||||
; CHECK: vaddhn.i16
|
||||
%sum = add <8 x i16> %A, %B
|
||||
%shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%trunc = trunc <8 x i16> %shift to <8 x i8>
|
||||
ret <8 x i8> %trunc
|
||||
}
|
||||
|
||||
define <4 x i16> @vaddhni32_natural(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: vaddhni32_natural:
|
||||
; CHECK: vaddhn.i32
|
||||
%sum = add <4 x i32> %A, %B
|
||||
%shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
|
||||
%trunc = trunc <4 x i32> %shift to <4 x i16>
|
||||
ret <4 x i16> %trunc
|
||||
}
|
||||
|
||||
define <2 x i32> @vaddhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
|
||||
; CHECK-LABEL: vaddhni64_natural:
|
||||
; CHECK: vaddhn.i64
|
||||
%sum = add <2 x i64> %A, %B
|
||||
%shift = lshr <2 x i64> %sum, <i64 32, i64 32>
|
||||
%trunc = trunc <2 x i64> %shift to <2 x i32>
|
||||
ret <2 x i32> %trunc
|
||||
}
|
||||
|
||||
define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
;CHECK-LABEL: vaddls8:
|
||||
;CHECK: vaddl.s8
|
||||
|
@ -121,6 +121,33 @@ declare <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind read
|
||||
declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
define <8 x i8> @vsubhni16_natural(<8 x i16> %A, <8 x i16> %B) nounwind {
|
||||
; CHECK-LABEL: vsubhni16_natural:
|
||||
; CHECK: vsubhn.i16
|
||||
%sum = sub <8 x i16> %A, %B
|
||||
%shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%trunc = trunc <8 x i16> %shift to <8 x i8>
|
||||
ret <8 x i8> %trunc
|
||||
}
|
||||
|
||||
define <4 x i16> @vsubhni32_natural(<4 x i32> %A, <4 x i32> %B) nounwind {
|
||||
; CHECK-LABEL: vsubhni32_natural:
|
||||
; CHECK: vsubhn.i32
|
||||
%sum = sub <4 x i32> %A, %B
|
||||
%shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
|
||||
%trunc = trunc <4 x i32> %shift to <4 x i16>
|
||||
ret <4 x i16> %trunc
|
||||
}
|
||||
|
||||
define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) nounwind {
|
||||
; CHECK-LABEL: vsubhni64_natural:
|
||||
; CHECK: vsubhn.i64
|
||||
%sum = sub <2 x i64> %A, %B
|
||||
%shift = lshr <2 x i64> %sum, <i64 32, i64 32>
|
||||
%trunc = trunc <2 x i64> %shift to <2 x i32>
|
||||
ret <2 x i32> %trunc
|
||||
}
|
||||
|
||||
define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
|
||||
;CHECK-LABEL: vrsubhni16:
|
||||
;CHECK: vrsubhn.i16
|
||||
|
Loading…
Reference in New Issue
Block a user