[Hexagon] Move selection of HVX multiply from lowering to patterns

Also, change i32*i32 to V6_vmpyieoh + V6_vmpyiewuh_acc, which works
on V60 as well.
This commit is contained in:
Krzysztof Parzyszek 2020-10-01 11:01:07 -05:00
parent 30ff1827df
commit 3482300416
3 changed files with 27 additions and 103 deletions

View File

@ -91,6 +91,7 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::XOR, T, Legal);
setOperationAction(ISD::ADD, T, Legal);
setOperationAction(ISD::SUB, T, Legal);
setOperationAction(ISD::MUL, T, Legal);
setOperationAction(ISD::CTPOP, T, Legal);
setOperationAction(ISD::CTLZ, T, Legal);
if (T != ByteV) {
@ -103,7 +104,6 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::LOAD, T, Custom);
setOperationAction(ISD::MLOAD, T, Custom);
setOperationAction(ISD::MSTORE, T, Custom);
setOperationAction(ISD::MUL, T, Custom);
setOperationAction(ISD::MULHS, T, Custom);
setOperationAction(ISD::MULHU, T, Custom);
setOperationAction(ISD::BUILD_VECTOR, T, Custom);
@ -1444,73 +1444,6 @@ HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
{VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
}
SDValue
HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
MVT ResTy = ty(Op);
assert(ResTy.isVector() && isHvxSingleTy(ResTy));
const SDLoc &dl(Op);
SmallVector<int,256> ShuffMask;
MVT ElemTy = ResTy.getVectorElementType();
unsigned VecLen = ResTy.getVectorNumElements();
SDValue Vs = Op.getOperand(0);
SDValue Vt = Op.getOperand(1);
switch (ElemTy.SimpleTy) {
case MVT::i8: {
// For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
// V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
// where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
MVT ExtTy = typeExtElem(ResTy, 2);
unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv
: Hexagon::V6_vmpyhv;
SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
// Discard high halves of the resulting values, collect the low halves.
for (unsigned I = 0; I < VecLen; I += 2) {
ShuffMask.push_back(I); // Pick even element.
ShuffMask.push_back(I+VecLen); // Pick odd element.
}
VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
return DAG.getBitcast(ResTy, BS);
}
case MVT::i16:
// For i16 there is V6_vmpyih, which acts exactly like the MUL opcode.
// (There is also V6_vmpyhv, which behaves in an analogous way to
// V6_vmpybv.)
return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG);
case MVT::i32: {
auto MulL_V60 = [&](SDValue Vs, SDValue Vt) {
// Use the following sequence for signed word multiply:
// T0 = V6_vmpyiowh Vs, Vt
// T1 = V6_vaslw T0, 16
// T2 = V6_vmpyiewuh_acc T1, Vs, Vt
SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG);
SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG);
SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
{T1, Vs, Vt}, DAG);
return T2;
};
auto MulL_V62 = [&](SDValue Vs, SDValue Vt) {
MVT PairTy = typeJoin({ResTy, ResTy});
SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy,
{Vs, Vt}, DAG);
SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy,
{T0, Vs, Vt}, DAG);
return opSplit(T1, dl, DAG).first;
};
if (Subtarget.useHVXV62Ops())
return MulL_V62(Vs, Vt);
return MulL_V60(Vs, Vt);
}
default:
break;
}
return SDValue();
}
SDValue
HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
MVT ResTy = ty(Op);
@ -2100,7 +2033,6 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA:
case ISD::SHL:
case ISD::SRL: return LowerHvxShift(Op, DAG);
case ISD::MUL: return LowerHvxMul(Op, DAG);
case ISD::MULHS:
case ISD::MULHU: return LowerHvxMulh(Op, DAG);
case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);

View File

@ -316,6 +316,20 @@ let Predicates = [UseHVX] in {
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
}
let Predicates = [UseHVX] in {
// For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
// V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
// where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
def: Pat<(mul HVI8:$Vs, HVI8:$Vt),
(V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)),
(LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>;
def: Pat<(mul HVI16:$Vs, HVI16:$Vt),
(V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(mul HVI32:$Vs, HVI32:$Vt),
(V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt),
HvxVR:$Vs, HvxVR:$Vt)>;
}
let Predicates = [UseHVX] in {
def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>;
def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;

View File

@ -223,16 +223,16 @@ define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
; --- mul
; CHECK-LABEL: mpyb_64:
; CHECK: v[[H00:[0-9]+]]:[[L00:[0-9]+]].h = vmpy(v0.b,v1.b)
; CHECK: vshuffe(v[[H00]].b,v[[L00]].b)
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
%p = mul <64 x i8> %v0, %v1
ret <64 x i8> %p
}
; CHECK-LABEL: mpyb_128:
; CHECK: v[[H10:[0-9]+]]:[[L10:[0-9]+]].h = vmpy(v0.b,v1.b)
; CHECK: vshuffe(v[[H10]].b,v[[L10]].b)
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
%p = mul <128 x i8> %v0, %v1
ret <128 x i8> %p
@ -252,43 +252,21 @@ define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
ret <64 x i16> %p
}
; CHECK-LABEL: mpyw_64_v60:
; CHECK-DAG: r[[T00:[0-9]+]] = #16
; CHECK-DAG: v[[T01:[0-9]+]].w = vmpyio(v0.w,v1.h)
; CHECK: v[[T02:[0-9]+]].w = vasl(v[[T01]].w,r[[T00]])
; CHECK: v[[T02]].w += vmpyie(v0.w,v1.uh)
define <16 x i32> @mpyw_64_v60(<16 x i32> %v0, <16 x i32> %v1) #0 {
; CHECK-LABEL: mpyw_64:
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
%p = mul <16 x i32> %v0, %v1
ret <16 x i32> %p
}
; CHECK-LABEL: mpyw_128_v60:
; CHECK-DAG: r[[T10:[0-9]+]] = #16
; CHECK-DAG: v[[T11:[0-9]+]].w = vmpyio(v0.w,v1.h)
; CHECK: v[[T12:[0-9]+]].w = vasl(v[[T11]].w,r[[T10]])
; CHECK: v[[T12]].w += vmpyie(v0.w,v1.uh)
define <32 x i32> @mpyw_128_v60(<32 x i32> %v0, <32 x i32> %v1) #1 {
%p = mul <32 x i32> %v0, %v1
ret <32 x i32> %p
}
; CHECK-LABEL: mpyw_64_v62:
; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh)
; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h)
define <16 x i32> @mpyw_64_v62(<16 x i32> %v0, <16 x i32> %v1) #3 {
%p = mul <16 x i32> %v0, %v1
ret <16 x i32> %p
}
; CHECK-LABEL: mpyw_128_v62:
; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh)
; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h)
define <32 x i32> @mpyw_128_v62(<32 x i32> %v0, <32 x i32> %v1) #4 {
; CHECK-LABEL: mpyw_128:
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
%p = mul <32 x i32> %v0, %v1
ret <32 x i32> %p
}
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
attributes #3 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length64b" }
attributes #4 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length128b" }