mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 04:09:45 +00:00
Improve ARM lowering for "icmp <2 x i64> eq".
The custom lowering is pretty straightforward: basically, just AND together the two halves of a <4 x i32> compare. Differential Revision: https://reviews.llvm.org/D25713 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284536 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a31af6a338
commit
ed57153864
@ -278,7 +278,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
|
||||
}
|
||||
|
||||
MVT ElemTy = VT.getVectorElementType();
|
||||
if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
|
||||
if (ElemTy != MVT::f64)
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
|
||||
@ -742,8 +742,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
|
||||
setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
|
||||
// Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
|
||||
// a destination type that is wider than the source, and nor does
|
||||
// it have a FP_TO_[SU]INT instruction with a narrower destination than
|
||||
@ -5242,10 +5240,27 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
|
||||
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
|
||||
(SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
|
||||
// Special-case integer 64-bit equality comparisons. They aren't legal,
|
||||
// but they can be lowered with a few vector instructions.
|
||||
unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
|
||||
EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
|
||||
SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
|
||||
SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
|
||||
SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
|
||||
DAG.getCondCode(ISD::SETEQ));
|
||||
SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
|
||||
SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
|
||||
Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
|
||||
if (SetCCOpcode == ISD::SETNE)
|
||||
Merged = DAG.getNOT(dl, Merged, CmpVT);
|
||||
Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
|
||||
return Merged;
|
||||
}
|
||||
|
||||
if (CmpVT.getVectorElementType() == MVT::i64)
|
||||
// 64-bit comparisons are not legal. We've marked SETCC as non-Custom,
|
||||
// but it's possible that our operands are 64-bit but our result is 32-bit.
|
||||
// Bail in this case.
|
||||
// 64-bit comparisons are not legal in general.
|
||||
return SDValue();
|
||||
|
||||
if (Op1.getValueType().isFloatingPoint()) {
|
||||
|
52
test/CodeGen/ARM/vicmp-64.ll
Normal file
52
test/CodeGen/ARM/vicmp-64.ll
Normal file
@ -0,0 +1,52 @@
|
||||
; RUN: llc -mtriple=arm -mattr=+neon %s -o - | FileCheck %s
|
||||
|
||||
; Check codegen for 64-bit icmp operations, which don't directly map to any
|
||||
; instruction.
|
||||
|
||||
define <2 x i64> @vne(<2 x i64>* %A, <2 x i64>* %B) nounwind {
|
||||
;CHECK-LABEL: vne:
|
||||
;CHECK: vceq.i32
|
||||
;CHECK-NEXT: vrev64.32
|
||||
;CHECK-NEXT: vand
|
||||
;CHECK-NEXT: vmvn
|
||||
;CHECK-NEXT: vmov
|
||||
;CHECK-NEXT: vmov
|
||||
;CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <2 x i64>, <2 x i64>* %A
|
||||
%tmp2 = load <2 x i64>, <2 x i64>* %B
|
||||
%tmp3 = icmp ne <2 x i64> %tmp1, %tmp2
|
||||
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
|
||||
ret <2 x i64> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i64> @veq(<2 x i64>* %A, <2 x i64>* %B) nounwind {
|
||||
;CHECK-LABEL: veq:
|
||||
;CHECK: vceq.i32
|
||||
;CHECK-NEXT: vrev64.32
|
||||
;CHECK-NEXT: vand
|
||||
;CHECK-NEXT: vmov
|
||||
;CHECK-NEXT: vmov
|
||||
;CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <2 x i64>, <2 x i64>* %A
|
||||
%tmp2 = load <2 x i64>, <2 x i64>* %B
|
||||
%tmp3 = icmp eq <2 x i64> %tmp1, %tmp2
|
||||
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
|
||||
ret <2 x i64> %tmp4
|
||||
}
|
||||
|
||||
; FIXME: We currently generate terrible code for this.
|
||||
; (Atop < Btop) | ((ATop == BTop) & (ABottom < BBottom))
|
||||
; would come out to roughly 6 instructions, but we currently
|
||||
; scalarize it.
|
||||
define <2 x i64> @vult(<2 x i64>* %A, <2 x i64>* %B) nounwind {
|
||||
;CHECK-LABEL: vult:
|
||||
;CHECK: subs
|
||||
;CHECK: sbcs
|
||||
;CHECK: subs
|
||||
;CHECK: sbcs
|
||||
%tmp1 = load <2 x i64>, <2 x i64>* %A
|
||||
%tmp2 = load <2 x i64>, <2 x i64>* %B
|
||||
%tmp3 = icmp ult <2 x i64> %tmp1, %tmp2
|
||||
%tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
|
||||
ret <2 x i64> %tmp4
|
||||
}
|
Loading…
Reference in New Issue
Block a user