mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-21 12:51:20 +00:00
[PowerPC] Implement Vector Multiply High/Divide Extended Builtins in LLVM/Clang
This patch implements the function prototypes vec_mulh and vec_dive in order to utilize the vector multiply high (vmulh[s|u][w|d]) and vector divide extended (vdive[s|u][w|d]) instructions introduced in Power10. Differential Revision: https://reviews.llvm.org/D82609
This commit is contained in:
parent
98083a3146
commit
daa2ee7b26
@ -950,6 +950,18 @@ def int_ppc_altivec_vrldmi :
|
||||
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
// Vector Divide Extended Intrinsics.
|
||||
def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">;
|
||||
def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">;
|
||||
def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">;
|
||||
def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">;
|
||||
|
||||
// Vector Multiply High Intrinsics.
|
||||
def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">;
|
||||
def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">;
|
||||
def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">;
|
||||
def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC VSX Intrinsic Definitions.
|
||||
|
||||
|
@ -1207,13 +1207,21 @@ let Predicates = [IsISA3_1] in {
|
||||
"vdivud $vD, $vA, $vB", IIC_VecGeneral,
|
||||
[(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>;
|
||||
def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vdivesw $vD, $vA, $vB", IIC_VecGeneral, []>;
|
||||
"vdivesw $vD, $vA, $vB", IIC_VecGeneral,
|
||||
[(set v4i32:$vD, (int_ppc_altivec_vdivesw v4i32:$vA,
|
||||
v4i32:$vB))]>;
|
||||
def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vdiveuw $vD, $vA, $vB", IIC_VecGeneral, []>;
|
||||
"vdiveuw $vD, $vA, $vB", IIC_VecGeneral,
|
||||
[(set v4i32:$vD, (int_ppc_altivec_vdiveuw v4i32:$vA,
|
||||
v4i32:$vB))]>;
|
||||
def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vdivesd $vD, $vA, $vB", IIC_VecGeneral, []>;
|
||||
"vdivesd $vD, $vA, $vB", IIC_VecGeneral,
|
||||
[(set v2i64:$vD, (int_ppc_altivec_vdivesd v2i64:$vA,
|
||||
v2i64:$vB))]>;
|
||||
def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
|
||||
"vdiveud $vD, $vA, $vB", IIC_VecGeneral, []>;
|
||||
"vdiveud $vD, $vA, $vB", IIC_VecGeneral,
|
||||
[(set v2i64:$vD, (int_ppc_altivec_vdiveud v2i64:$vA,
|
||||
v2i64:$vB))]>;
|
||||
def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
|
||||
"xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
|
||||
|
||||
@ -1285,6 +1293,15 @@ let Predicates = [IsISA3_1] in {
|
||||
|
||||
//---------------------------- Anonymous Patterns ----------------------------//
|
||||
let Predicates = [IsISA3_1] in {
|
||||
// Exploit the vector multiply high instructions using intrinsics.
|
||||
def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)),
|
||||
(v4i32 (VMULHSW $vA, $vB))>;
|
||||
def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)),
|
||||
(v4i32 (VMULHUW $vA, $vB))>;
|
||||
def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)),
|
||||
(v2i64 (VMULHSD $vA, $vB))>;
|
||||
def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)),
|
||||
(v2i64 (VMULHUD $vA, $vB))>;
|
||||
def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)),
|
||||
(v16i8 (COPY_TO_REGCLASS (XXGENPCVBM $VRB, imm:$IMM), VRRC))>;
|
||||
def : Pat<(v8i16 (int_ppc_vsx_xxgenpcvhm v8i16:$VRB, imm:$IMM)),
|
||||
|
@ -49,3 +49,49 @@ entry:
|
||||
%div = sdiv <4 x i32> %a, %b
|
||||
ret <4 x i32> %div
|
||||
}
|
||||
|
||||
; Test the vector divide extended intrinsics.
|
||||
declare <4 x i32> @llvm.ppc.altivec.vdivesw(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.ppc.altivec.vdiveuw(<4 x i32>, <4 x i32>)
|
||||
declare <2 x i64> @llvm.ppc.altivec.vdivesd(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <4 x i32> @test_vdivesw(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_vdivesw:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vdivesw v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%div = tail call <4 x i32> @llvm.ppc.altivec.vdivesw(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %div
|
||||
}
|
||||
|
||||
define <4 x i32> @test_vdiveuw(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_vdiveuw:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vdiveuw v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%div = tail call <4 x i32> @llvm.ppc.altivec.vdiveuw(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %div
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vdivesd(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_vdivesd:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vdivesd v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%div = tail call <2 x i64> @llvm.ppc.altivec.vdivesd(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %div
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vdiveud(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_vdiveud:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vdiveud v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%div = tail call <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %div
|
||||
}
|
||||
|
@ -76,3 +76,49 @@ entry:
|
||||
%tr = trunc <4 x i64> %shr to <4 x i32>
|
||||
ret <4 x i32> %tr
|
||||
}
|
||||
|
||||
; Test the vector multiply high intrinsics.
|
||||
declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>)
|
||||
declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>)
|
||||
declare <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64>, <2 x i64>)
|
||||
|
||||
define <4 x i32> @test_vmulhsw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_vmulhsw_intrinsic:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmulhsw v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %mulh
|
||||
}
|
||||
|
||||
define <4 x i32> @test_vmulhuw_intrinsic(<4 x i32> %a, <4 x i32> %b) {
|
||||
; CHECK-LABEL: test_vmulhuw_intrinsic:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmulhuw v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b)
|
||||
ret <4 x i32> %mulh
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vmulhsd_intrinsic(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_vmulhsd_intrinsic:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmulhsd v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %mulh
|
||||
}
|
||||
|
||||
define <2 x i64> @test_vmulhud_intrinsic(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-LABEL: test_vmulhud_intrinsic:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmulhud v2, v2, v3
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
%mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b)
|
||||
ret <2 x i64> %mulh
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user