mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-19 09:57:42 +00:00
Add new intrinsics for Neon VTRN, VZIP and VUZP operations. Modeling these
as vector shuffles did not work out well. Shuffles that produce double-wide vectors accurately represent the operation but make it hard to do anything with the results. I considered splitting them up into 2 shuffles, one to write each register separately, but there doesn't seem to be a good way to reunite them for codegen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78437 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c3c9239d76
commit
9e699cc726
@ -66,6 +66,12 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
|
||||
LLVMTruncatedElementVectorType<0>,
|
||||
LLVMTruncatedElementVectorType<0>],
|
||||
[IntrNoMem]>;
|
||||
class Neon_2Result_Intrinsic
|
||||
: Intrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class Neon_2Result_Float_Intrinsic
|
||||
: Intrinsic<[llvm_anyfloat_ty, LLVMMatchType<0>],
|
||||
[LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class Neon_CvtFxToFP_Intrinsic
|
||||
: Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
class Neon_CvtFPToFx_Intrinsic
|
||||
@ -288,6 +294,18 @@ def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
|
||||
def int_arm_neon_vmovls : Neon_1Arg_Long_Intrinsic;
|
||||
def int_arm_neon_vmovlu : Neon_1Arg_Long_Intrinsic;
|
||||
|
||||
// Vector Transpose.
|
||||
def int_arm_neon_vtrni : Neon_2Result_Intrinsic;
|
||||
def int_arm_neon_vtrnf : Neon_2Result_Float_Intrinsic;
|
||||
|
||||
// Vector Interleave (vzip).
|
||||
def int_arm_neon_vzipi : Neon_2Result_Intrinsic;
|
||||
def int_arm_neon_vzipf : Neon_2Result_Float_Intrinsic;
|
||||
|
||||
// Vector Deinterleave (vuzp).
|
||||
def int_arm_neon_vuzpi : Neon_2Result_Intrinsic;
|
||||
def int_arm_neon_vuzpf : Neon_2Result_Float_Intrinsic;
|
||||
|
||||
let TargetPrefix = "arm" in {
|
||||
|
||||
// De-interleaving vector loads from N-element structures.
|
||||
|
Loading…
Reference in New Issue
Block a user