mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-05 10:17:36 +00:00
[AArch64] Fix condition for "high-vector" DUP optimizations.
AArch64 NEON has a bunch of instructions with a "2" suffix that extract the top half of the source vectors, instead of the bottom half. We have some DAGCombines to try to take advantage of that. However, they assumed that any EXTRACT_VECTOR was extracting the high half of the vector in question. This issue has apparently existed since the AArch64 backend was merged. Fixes https://bugs.llvm.org/show_bug.cgi?id=40632 . Differential Revision: https://reviews.llvm.org/D57862 llvm-svn: 353486
This commit is contained in:
parent
22c328969d
commit
b87d675297
@ -9722,12 +9722,13 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
|
||||
DAG.getConstant(NumElems, dl, MVT::i64));
|
||||
}
|
||||
|
||||
static bool isEssentiallyExtractSubvector(SDValue N) {
|
||||
if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
|
||||
return true;
|
||||
|
||||
return N.getOpcode() == ISD::BITCAST &&
|
||||
N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR;
|
||||
static bool isEssentiallyExtractHighSubvector(SDValue N) {
|
||||
if (N.getOpcode() == ISD::BITCAST)
|
||||
N = N.getOperand(0);
|
||||
if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
|
||||
return false;
|
||||
return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
|
||||
N.getOperand(0).getValueType().getVectorNumElements() / 2;
|
||||
}
|
||||
|
||||
/// Helper structure to keep track of ISD::SET_CC operands.
|
||||
@ -9894,13 +9895,13 @@ static SDValue performAddSubLongCombine(SDNode *N,
|
||||
|
||||
// It's not worth doing if at least one of the inputs isn't already an
|
||||
// extract, but we don't know which it'll be so we have to try both.
|
||||
if (isEssentiallyExtractSubvector(LHS.getOperand(0))) {
|
||||
if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
|
||||
RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
|
||||
if (!RHS.getNode())
|
||||
return SDValue();
|
||||
|
||||
RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
|
||||
} else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) {
|
||||
} else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
|
||||
LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
|
||||
if (!LHS.getNode())
|
||||
return SDValue();
|
||||
@ -9933,11 +9934,11 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
|
||||
// Either node could be a DUP, but it's not worth doing both of them (you'd
|
||||
// just as well use the non-high version) so look for a corresponding extract
|
||||
// operation on the other "wing".
|
||||
if (isEssentiallyExtractSubvector(LHS)) {
|
||||
if (isEssentiallyExtractHighSubvector(LHS)) {
|
||||
RHS = tryExtendDUPToExtractHigh(RHS, DAG);
|
||||
if (!RHS.getNode())
|
||||
return SDValue();
|
||||
} else if (isEssentiallyExtractSubvector(RHS)) {
|
||||
} else if (isEssentiallyExtractHighSubvector(RHS)) {
|
||||
LHS = tryExtendDUPToExtractHigh(LHS, DAG);
|
||||
if (!LHS.getNode())
|
||||
return SDValue();
|
||||
|
@ -885,6 +885,20 @@ declare double @llvm.fabs.f64(double) nounwind readnone
|
||||
define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: uabdl_from_extract_dup:
|
||||
; CHECK-NOT: ext.16b
|
||||
; CHECK: uabdl.2d
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
|
||||
|
||||
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
||||
%res = tail call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
|
||||
%res1 = zext <2 x i32> %res to <2 x i64>
|
||||
ret <2 x i64> %res1
|
||||
}
|
||||
|
||||
define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: uabdl2_from_extract_dup:
|
||||
; CHECK-NOT: ext.16b
|
||||
; CHECK: uabdl2.2d
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
|
||||
@ -899,6 +913,20 @@ define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: sabdl_from_extract_dup:
|
||||
; CHECK-NOT: ext.16b
|
||||
; CHECK: sabdl.2d
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
|
||||
|
||||
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
||||
%res = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
|
||||
%res1 = zext <2 x i32> %res to <2 x i64>
|
||||
ret <2 x i64> %res1
|
||||
}
|
||||
|
||||
define <2 x i64> @sabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: sabdl2_from_extract_dup:
|
||||
; CHECK-NOT: ext.16b
|
||||
; CHECK: sabdl2.2d
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
|
||||
|
@ -738,6 +738,22 @@ declare <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float>, <2 x float>) noun
|
||||
declare <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <2 x i64> @uaddl_duprhs(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: uaddl_duprhs
|
||||
; CHECK-NOT: ext.16b
|
||||
; CHECK: uaddl.2d
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
|
||||
|
||||
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
||||
%lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
|
||||
%rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
|
||||
|
||||
%res = add <2 x i64> %lhs.ext, %rhs.ext
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @uaddl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: uaddl2_duprhs
|
||||
; CHECK-NOT: ext.16b
|
||||
@ -754,6 +770,22 @@ define <2 x i64> @uaddl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @saddl_duplhs(i32 %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: saddl_duplhs
|
||||
; CHECK-NOT: ext.16b
|
||||
; CHECK: saddl.2d
|
||||
%lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
|
||||
%lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
|
||||
|
||||
%rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
||||
%lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
|
||||
%rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
|
||||
|
||||
%res = add <2 x i64> %lhs.ext, %rhs.ext
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @saddl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: saddl2_duplhs
|
||||
; CHECK-NOT: ext.16b
|
||||
@ -770,6 +802,22 @@ define <2 x i64> @saddl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @usubl_duprhs(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: usubl_duprhs
|
||||
; CHECK-NOT: ext.16b
|
||||
; CHECK: usubl.2d
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
|
||||
|
||||
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
||||
%lhs.ext = zext <2 x i32> %lhs.high to <2 x i64>
|
||||
%rhs.ext = zext <2 x i32> %rhsvec to <2 x i64>
|
||||
|
||||
%res = sub <2 x i64> %lhs.ext, %rhs.ext
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @usubl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: usubl2_duprhs
|
||||
; CHECK-NOT: ext.16b
|
||||
@ -786,8 +834,24 @@ define <2 x i64> @usubl2_duprhs(<4 x i32> %lhs, i32 %rhs) {
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @ssubl_duplhs(i32 %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: ssubl_duplhs:
|
||||
; CHECK-NOT: ext.16b
|
||||
; CHECK: ssubl.2d
|
||||
%lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
|
||||
%lhsvec = insertelement <2 x i32> %lhsvec.tmp, i32 %lhs, i32 1
|
||||
|
||||
%rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
||||
%lhs.ext = sext <2 x i32> %lhsvec to <2 x i64>
|
||||
%rhs.ext = sext <2 x i32> %rhs.high to <2 x i64>
|
||||
|
||||
%res = sub <2 x i64> %lhs.ext, %rhs.ext
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @ssubl2_duplhs(i32 %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: ssubl2_duplhs
|
||||
; CHECK-LABEL: ssubl2_duplhs:
|
||||
; CHECK-NOT: ext.16b
|
||||
; CHECK: ssubl2.2d
|
||||
%lhsvec.tmp = insertelement <2 x i32> undef, i32 %lhs, i32 0
|
||||
|
@ -1338,6 +1338,19 @@ entry:
|
||||
ret <4 x i32> %vmull2.i
|
||||
}
|
||||
|
||||
define <4 x i32> @foo6a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
|
||||
; CHECK-LABEL: foo6a:
|
||||
; CHECK-NEXT: smull.4s v0, v1, v2[1]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = bitcast <8 x i16> %b to <2 x i64>
|
||||
%shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0>
|
||||
%1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
|
||||
%shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
|
||||
ret <4 x i32> %vmull2.i
|
||||
}
|
||||
|
||||
define <2 x i64> @foo7(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
|
||||
; CHECK-LABEL: foo7:
|
||||
; CHECK-NEXT: smull2.2d v0, v1, v2[1]
|
||||
@ -1351,6 +1364,20 @@ entry:
|
||||
ret <2 x i64> %vmull2.i
|
||||
}
|
||||
|
||||
define <2 x i64> @foo7a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
|
||||
; CHECK-LABEL: foo7a:
|
||||
; CHECK-NEXT: smull.2d v0, v1, v2[1]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = bitcast <4 x i32> %b to <2 x i64>
|
||||
%shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0>
|
||||
%1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
|
||||
%shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
|
||||
ret <2 x i64> %vmull2.i
|
||||
}
|
||||
|
||||
|
||||
define <4 x i32> @foo8(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
|
||||
; CHECK-LABEL: foo8:
|
||||
; CHECK-NEXT: umull2.4s v0, v1, v2[1]
|
||||
@ -1364,6 +1391,19 @@ entry:
|
||||
ret <4 x i32> %vmull2.i
|
||||
}
|
||||
|
||||
define <4 x i32> @foo8a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
|
||||
; CHECK-LABEL: foo8a:
|
||||
; CHECK-NEXT: umull.4s v0, v1, v2[1]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = bitcast <8 x i16> %b to <2 x i64>
|
||||
%shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0>
|
||||
%1 = bitcast <1 x i64> %shuffle.i to <4 x i16>
|
||||
%shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind
|
||||
ret <4 x i32> %vmull2.i
|
||||
}
|
||||
|
||||
define <2 x i64> @foo9(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
|
||||
; CHECK-LABEL: foo9:
|
||||
; CHECK-NEXT: umull2.2d v0, v1, v2[1]
|
||||
@ -1377,6 +1417,19 @@ entry:
|
||||
ret <2 x i64> %vmull2.i
|
||||
}
|
||||
|
||||
define <2 x i64> @foo9a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
|
||||
; CHECK-LABEL: foo9a:
|
||||
; CHECK-NEXT: umull.2d v0, v1, v2[1]
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%0 = bitcast <4 x i32> %b to <2 x i64>
|
||||
%shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0>
|
||||
%1 = bitcast <1 x i64> %shuffle.i to <2 x i32>
|
||||
%shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
|
||||
%vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind
|
||||
ret <2 x i64> %vmull2.i
|
||||
}
|
||||
|
||||
define <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind {
|
||||
; CHECK-LABEL: bar0:
|
||||
; CHECK: smlal2.8h v0, v1, v2
|
||||
@ -1667,6 +1720,24 @@ entry:
|
||||
ret <2 x i64> %vmull2.i
|
||||
}
|
||||
|
||||
define <4 x i32> @vmull_low_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp {
|
||||
entry:
|
||||
; CHECK: vmull_low_n_s16_test
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: smull.4s
|
||||
; CHECK-NEXT: ret
|
||||
%conv = trunc i32 %d to i16
|
||||
%0 = bitcast <8 x i16> %b to <2 x i64>
|
||||
%shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0>
|
||||
%1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16>
|
||||
%vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0
|
||||
%vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1
|
||||
%vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2
|
||||
%vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3
|
||||
%vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind
|
||||
ret <4 x i32> %vmull2.i.i
|
||||
}
|
||||
|
||||
define <4 x i32> @vmull_high_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp {
|
||||
entry:
|
||||
; CHECK: vmull_high_n_s16_test
|
||||
@ -1804,8 +1875,21 @@ define <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x
|
||||
ret <2 x i64> %sum
|
||||
}
|
||||
|
||||
define <2 x i64> @mull_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: mull_from_extract_dup:
|
||||
define <2 x i64> @mull_from_extract_dup_low(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: mull_from_extract_dup_low:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: sqdmull.2d
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
%rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1
|
||||
|
||||
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
||||
%res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @mull_from_extract_dup_high(<4 x i32> %lhs, i32 %rhs) {
|
||||
; CHECK-LABEL: mull_from_extract_dup_high:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: sqdmull2.2d
|
||||
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
|
||||
@ -1817,8 +1901,21 @@ define <2 x i64> @mull_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @pmull_from_extract_dup(<16 x i8> %lhs, i8 %rhs) {
|
||||
; CHECK-LABEL: pmull_from_extract_dup:
|
||||
define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) {
|
||||
; CHECK-LABEL: pmull_from_extract_dup_low:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: pmull.8h
|
||||
%rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0
|
||||
%rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
|
||||
%lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
|
||||
%res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) {
|
||||
; CHECK-LABEL: pmull_from_extract_dup_high:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: pmull2.8h
|
||||
%rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0
|
||||
@ -1830,8 +1927,20 @@ define <8 x i16> @pmull_from_extract_dup(<16 x i8> %lhs, i8 %rhs) {
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @pmull_from_extract_duplane(<16 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK-LABEL: pmull_from_extract_duplane:
|
||||
define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK-LABEL: pmull_from_extract_duplane_low:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: pmull.8h
|
||||
|
||||
%lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
|
||||
%res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) {
|
||||
; CHECK-LABEL: pmull_from_extract_duplane_high:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: pmull2.8h
|
||||
|
||||
@ -1842,8 +1951,20 @@ define <8 x i16> @pmull_from_extract_duplane(<16 x i8> %lhs, <8 x i8> %rhs) {
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmull_from_extract_duplane(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: sqdmull_from_extract_duplane:
|
||||
define <2 x i64> @sqdmull_from_extract_duplane_low(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: sqdmull_from_extract_duplane_low:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: sqdmull.2d
|
||||
|
||||
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
%rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
|
||||
|
||||
%res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmull_from_extract_duplane_high(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: sqdmull_from_extract_duplane_high:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: sqdmull2.2d
|
||||
|
||||
@ -1854,8 +1975,21 @@ define <2 x i64> @sqdmull_from_extract_duplane(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: sqdmlal_from_extract_duplane:
|
||||
define <2 x i64> @sqdmlal_from_extract_duplane_low(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: sqdmlal_from_extract_duplane_low:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: sqdmlal.2d
|
||||
|
||||
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
%rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
|
||||
|
||||
%res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
|
||||
%sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res)
|
||||
ret <2 x i64> %sum
|
||||
}
|
||||
|
||||
define <2 x i64> @sqdmlal_from_extract_duplane_high(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: sqdmlal_from_extract_duplane_high:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: sqdmlal2.2d
|
||||
|
||||
@ -1867,8 +2001,21 @@ define <2 x i64> @sqdmlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs,
|
||||
ret <2 x i64> %sum
|
||||
}
|
||||
|
||||
define <2 x i64> @umlal_from_extract_duplane(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: umlal_from_extract_duplane:
|
||||
define <2 x i64> @umlal_from_extract_duplane_low(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: umlal_from_extract_duplane_low:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: umlal.2d
|
||||
|
||||
%lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
%rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0>
|
||||
|
||||
%res = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind
|
||||
%sum = add <2 x i64> %accum, %res
|
||||
ret <2 x i64> %sum
|
||||
}
|
||||
|
||||
define <2 x i64> @umlal_from_extract_duplane_high(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
; CHECK-LABEL: umlal_from_extract_duplane_high:
|
||||
; CHECK-NOT: ext
|
||||
; CHECK: umlal2.2d
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user