mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-02 13:21:43 +00:00
Replace the arm.neon.vmovls and vmovlu intrinsics with vector sign-extend and
zero-extend operations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@111614 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2df9504fec
commit
b31a11b466
@ -60,9 +60,6 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
|
||||
class Neon_1Arg_Narrow_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMExtendedElementVectorType<0>], [IntrNoMem]>;
|
||||
class Neon_1Arg_Long_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMTruncatedElementVectorType<0>], [IntrNoMem]>;
|
||||
class Neon_2Arg_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
@ -322,8 +319,6 @@ def int_arm_neon_vmovn : Neon_1Arg_Narrow_Intrinsic;
|
||||
def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
|
||||
def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
|
||||
def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
|
||||
def int_arm_neon_vmovls : Neon_1Arg_Long_Intrinsic;
|
||||
def int_arm_neon_vmovlu : Neon_1Arg_Long_Intrinsic;
|
||||
|
||||
// Vector Table Lookup.
|
||||
// The first 1-4 arguments are the table.
|
||||
|
@ -125,12 +125,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
|
||||
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
|
||||
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
|
||||
setOperationAction(ISD::ZERO_EXTEND, VT.getSimpleVT(), Expand);
|
||||
if (VT.isInteger()) {
|
||||
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
|
||||
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
|
||||
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
|
||||
setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
|
||||
}
|
||||
setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
|
||||
|
||||
// Promote all bit-wise operations.
|
||||
if (VT.isInteger() && VT != PromotedBitwiseVT) {
|
||||
@ -320,6 +322,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
|
||||
|
||||
setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
|
||||
|
||||
// Neon does not support some operations on v1i64 and v2i64 types.
|
||||
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
|
||||
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
|
||||
@ -3786,7 +3790,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
|
||||
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
|
||||
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
||||
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -888,14 +888,14 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
||||
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
|
||||
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
|
||||
|
||||
// Long 2-register intrinsics (currently only used for VMOVL).
|
||||
class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
||||
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
|
||||
InstrItinClass itin, string OpcodeStr, string Dt,
|
||||
ValueType TyQ, ValueType TyD, Intrinsic IntOp>
|
||||
// Long 2-register operations (currently only used for VMOVL).
|
||||
class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
||||
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
|
||||
InstrItinClass itin, string OpcodeStr, string Dt,
|
||||
ValueType TyQ, ValueType TyD, SDNode OpNode>
|
||||
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
|
||||
(ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
|
||||
[(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
|
||||
[(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
|
||||
|
||||
// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
|
||||
class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
|
||||
@ -1508,14 +1508,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
|
||||
|
||||
// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
|
||||
// source operand element sizes of 16, 32 and 64 bits:
|
||||
multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
|
||||
string OpcodeStr, string Dt, Intrinsic IntOp> {
|
||||
def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
|
||||
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
|
||||
def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
|
||||
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
|
||||
def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
|
||||
OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
|
||||
multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
|
||||
string OpcodeStr, string Dt, SDNode OpNode> {
|
||||
def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
|
||||
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
|
||||
def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
|
||||
OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
|
||||
def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
|
||||
OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
|
||||
}
|
||||
|
||||
|
||||
@ -3123,10 +3123,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
|
||||
defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
|
||||
"vqmovun", "s", int_arm_neon_vqmovnsu>;
|
||||
// VMOVL : Vector Lengthening Move
|
||||
defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s",
|
||||
int_arm_neon_vmovls>;
|
||||
defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u",
|
||||
int_arm_neon_vmovlu>;
|
||||
defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
|
||||
defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
|
||||
|
||||
// Vector Conversions.
|
||||
|
||||
|
@ -78,6 +78,13 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
NewFn = F;
|
||||
return true;
|
||||
}
|
||||
} else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
|
||||
if (Name.compare(14, 7, "vmovls.", 7) == 0 ||
|
||||
Name.compare(14, 7, "vmovlu.", 7) == 0) {
|
||||
// Calls to these are transformed into IR without intrinsics.
|
||||
NewFn = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'b':
|
||||
@ -320,6 +327,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
assert(F && "CallInst has no function associated with it.");
|
||||
|
||||
if (!NewFn) {
|
||||
// Get the Function's name.
|
||||
const std::string& Name = F->getName();
|
||||
|
||||
// Upgrade ARM NEON intrinsics.
|
||||
if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
|
||||
Instruction *NewI;
|
||||
if (Name.compare(14, 7, "vmovls.", 7) == 0) {
|
||||
NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
|
||||
"upgraded." + CI->getName(), CI);
|
||||
} else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
|
||||
NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
|
||||
"upgraded." + CI->getName(), CI);
|
||||
} else {
|
||||
llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
|
||||
}
|
||||
// Replace any uses of the old CallInst.
|
||||
if (!CI->use_empty())
|
||||
CI->replaceAllUsesWith(NewI);
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
|
||||
bool isLoadH = false, isLoadL = false, isMovL = false;
|
||||
bool isMovSD = false, isShufPD = false;
|
||||
bool isUnpckhPD = false, isUnpcklPD = false;
|
||||
|
29
test/Bitcode/neon-intrinsics.ll
Normal file
29
test/Bitcode/neon-intrinsics.ll
Normal file
@ -0,0 +1,29 @@
|
||||
; RUN: llvm-dis < %s.bc | FileCheck %s
|
||||
|
||||
; vmovls should be auto-upgraded to sext
|
||||
|
||||
; CHECK: vmovls8
|
||||
; CHECK-NOT: arm.neon.vmovls.v8i16
|
||||
; CHECK: sext <8 x i8>
|
||||
|
||||
; CHECK: vmovls16
|
||||
; CHECK-NOT: arm.neon.vmovls.v4i32
|
||||
; CHECK: sext <4 x i16>
|
||||
|
||||
; CHECK: vmovls32
|
||||
; CHECK-NOT: arm.neon.vmovls.v2i64
|
||||
; CHECK: sext <2 x i32>
|
||||
|
||||
; vmovlu should be auto-upgraded to zext
|
||||
|
||||
; CHECK: vmovlu8
|
||||
; CHECK-NOT: arm.neon.vmovlu.v8i16
|
||||
; CHECK: zext <8 x i8>
|
||||
|
||||
; CHECK: vmovlu16
|
||||
; CHECK-NOT: arm.neon.vmovlu.v4i32
|
||||
; CHECK: zext <4 x i16>
|
||||
|
||||
; CHECK: vmovlu32
|
||||
; CHECK-NOT: arm.neon.vmovlu.v2i64
|
||||
; CHECK: zext <2 x i32>
|
BIN
test/Bitcode/neon-intrinsics.ll.bc
Normal file
BIN
test/Bitcode/neon-intrinsics.ll.bc
Normal file
Binary file not shown.
@ -1,7 +0,0 @@
|
||||
; RUN: llc -march=arm -mattr=+neon -O2 -o /dev/null
|
||||
|
||||
; This used to crash.
|
||||
define <4 x i32> @test1(<4 x i16> %a) {
|
||||
%A = zext <4 x i16> %a to <4 x i32>
|
||||
ret <4 x i32> %A
|
||||
}
|
@ -192,7 +192,7 @@ define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
|
||||
;CHECK: vmovls8:
|
||||
;CHECK: vmovl.s8
|
||||
%tmp1 = load <8 x i8>* %A
|
||||
%tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1)
|
||||
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
|
||||
ret <8 x i16> %tmp2
|
||||
}
|
||||
|
||||
@ -200,7 +200,7 @@ define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
|
||||
;CHECK: vmovls16:
|
||||
;CHECK: vmovl.s16
|
||||
%tmp1 = load <4 x i16>* %A
|
||||
%tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1)
|
||||
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
@ -208,7 +208,7 @@ define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
|
||||
;CHECK: vmovls32:
|
||||
;CHECK: vmovl.s32
|
||||
%tmp1 = load <2 x i32>* %A
|
||||
%tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1)
|
||||
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
|
||||
ret <2 x i64> %tmp2
|
||||
}
|
||||
|
||||
@ -216,7 +216,7 @@ define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
|
||||
;CHECK: vmovlu8:
|
||||
;CHECK: vmovl.u8
|
||||
%tmp1 = load <8 x i8>* %A
|
||||
%tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1)
|
||||
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
|
||||
ret <8 x i16> %tmp2
|
||||
}
|
||||
|
||||
@ -224,7 +224,7 @@ define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
|
||||
;CHECK: vmovlu16:
|
||||
;CHECK: vmovl.u16
|
||||
%tmp1 = load <4 x i16>* %A
|
||||
%tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1)
|
||||
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
|
||||
ret <4 x i32> %tmp2
|
||||
}
|
||||
|
||||
@ -232,18 +232,10 @@ define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
|
||||
;CHECK: vmovlu32:
|
||||
;CHECK: vmovl.u32
|
||||
%tmp1 = load <2 x i32>* %A
|
||||
%tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1)
|
||||
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
|
||||
ret <2 x i64> %tmp2
|
||||
}
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone
|
||||
declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
|
||||
declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone
|
||||
|
||||
declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone
|
||||
declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone
|
||||
declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone
|
||||
|
||||
define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
|
||||
;CHECK: vmovni16:
|
||||
;CHECK: vmovn.i16
|
||||
|
Loading…
x
Reference in New Issue
Block a user