mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-03-07 03:47:20 +00:00
[Hexagon] Move selection of HVX multiply from lowering to patterns
Also, change i32*i32 to V6_vmpyieoh + V6_vmpyiewuh_acc, which works on V60 as well.
This commit is contained in:
parent
30ff1827df
commit
3482300416
@ -91,6 +91,7 @@ HexagonTargetLowering::initializeHVXLowering() {
|
||||
setOperationAction(ISD::XOR, T, Legal);
|
||||
setOperationAction(ISD::ADD, T, Legal);
|
||||
setOperationAction(ISD::SUB, T, Legal);
|
||||
setOperationAction(ISD::MUL, T, Legal);
|
||||
setOperationAction(ISD::CTPOP, T, Legal);
|
||||
setOperationAction(ISD::CTLZ, T, Legal);
|
||||
if (T != ByteV) {
|
||||
@ -103,7 +104,6 @@ HexagonTargetLowering::initializeHVXLowering() {
|
||||
setOperationAction(ISD::LOAD, T, Custom);
|
||||
setOperationAction(ISD::MLOAD, T, Custom);
|
||||
setOperationAction(ISD::MSTORE, T, Custom);
|
||||
setOperationAction(ISD::MUL, T, Custom);
|
||||
setOperationAction(ISD::MULHS, T, Custom);
|
||||
setOperationAction(ISD::MULHU, T, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, T, Custom);
|
||||
@ -1444,73 +1444,6 @@ HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
|
||||
{VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
|
||||
MVT ResTy = ty(Op);
|
||||
assert(ResTy.isVector() && isHvxSingleTy(ResTy));
|
||||
const SDLoc &dl(Op);
|
||||
SmallVector<int,256> ShuffMask;
|
||||
|
||||
MVT ElemTy = ResTy.getVectorElementType();
|
||||
unsigned VecLen = ResTy.getVectorNumElements();
|
||||
SDValue Vs = Op.getOperand(0);
|
||||
SDValue Vt = Op.getOperand(1);
|
||||
|
||||
switch (ElemTy.SimpleTy) {
|
||||
case MVT::i8: {
|
||||
// For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
|
||||
// V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
|
||||
// where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
|
||||
MVT ExtTy = typeExtElem(ResTy, 2);
|
||||
unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv
|
||||
: Hexagon::V6_vmpyhv;
|
||||
SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
|
||||
|
||||
// Discard high halves of the resulting values, collect the low halves.
|
||||
for (unsigned I = 0; I < VecLen; I += 2) {
|
||||
ShuffMask.push_back(I); // Pick even element.
|
||||
ShuffMask.push_back(I+VecLen); // Pick odd element.
|
||||
}
|
||||
VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
|
||||
SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
|
||||
return DAG.getBitcast(ResTy, BS);
|
||||
}
|
||||
case MVT::i16:
|
||||
// For i16 there is V6_vmpyih, which acts exactly like the MUL opcode.
|
||||
// (There is also V6_vmpyhv, which behaves in an analogous way to
|
||||
// V6_vmpybv.)
|
||||
return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG);
|
||||
case MVT::i32: {
|
||||
auto MulL_V60 = [&](SDValue Vs, SDValue Vt) {
|
||||
// Use the following sequence for signed word multiply:
|
||||
// T0 = V6_vmpyiowh Vs, Vt
|
||||
// T1 = V6_vaslw T0, 16
|
||||
// T2 = V6_vmpyiewuh_acc T1, Vs, Vt
|
||||
SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
|
||||
SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG);
|
||||
SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG);
|
||||
SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
|
||||
{T1, Vs, Vt}, DAG);
|
||||
return T2;
|
||||
};
|
||||
auto MulL_V62 = [&](SDValue Vs, SDValue Vt) {
|
||||
MVT PairTy = typeJoin({ResTy, ResTy});
|
||||
SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy,
|
||||
{Vs, Vt}, DAG);
|
||||
SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy,
|
||||
{T0, Vs, Vt}, DAG);
|
||||
return opSplit(T1, dl, DAG).first;
|
||||
};
|
||||
if (Subtarget.useHVXV62Ops())
|
||||
return MulL_V62(Vs, Vt);
|
||||
return MulL_V60(Vs, Vt);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
|
||||
MVT ResTy = ty(Op);
|
||||
@ -2100,7 +2033,6 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::SRA:
|
||||
case ISD::SHL:
|
||||
case ISD::SRL: return LowerHvxShift(Op, DAG);
|
||||
case ISD::MUL: return LowerHvxMul(Op, DAG);
|
||||
case ISD::MULHS:
|
||||
case ISD::MULHU: return LowerHvxMulh(Op, DAG);
|
||||
case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
|
||||
|
@ -316,6 +316,20 @@ let Predicates = [UseHVX] in {
|
||||
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseHVX] in {
|
||||
// For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
|
||||
// V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
|
||||
// where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
|
||||
def: Pat<(mul HVI8:$Vs, HVI8:$Vt),
|
||||
(V6_vshuffeb (HiVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)),
|
||||
(LoVec (V6_vmpybv HvxVR:$Vs, HvxVR:$Vt)))>;
|
||||
def: Pat<(mul HVI16:$Vs, HVI16:$Vt),
|
||||
(V6_vmpyih HvxVR:$Vs, HvxVR:$Vt)>;
|
||||
def: Pat<(mul HVI32:$Vs, HVI32:$Vt),
|
||||
(V6_vmpyiewuh_acc (V6_vmpyieoh HvxVR:$Vs, HvxVR:$Vt),
|
||||
HvxVR:$Vs, HvxVR:$Vt)>;
|
||||
}
|
||||
|
||||
let Predicates = [UseHVX] in {
|
||||
def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>;
|
||||
def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
|
||||
|
@ -223,16 +223,16 @@ define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
|
||||
; --- mul
|
||||
|
||||
; CHECK-LABEL: mpyb_64:
|
||||
; CHECK: v[[H00:[0-9]+]]:[[L00:[0-9]+]].h = vmpy(v0.b,v1.b)
|
||||
; CHECK: vshuffe(v[[H00]].b,v[[L00]].b)
|
||||
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
|
||||
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
|
||||
define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 {
|
||||
%p = mul <64 x i8> %v0, %v1
|
||||
ret <64 x i8> %p
|
||||
}
|
||||
|
||||
; CHECK-LABEL: mpyb_128:
|
||||
; CHECK: v[[H10:[0-9]+]]:[[L10:[0-9]+]].h = vmpy(v0.b,v1.b)
|
||||
; CHECK: vshuffe(v[[H10]].b,v[[L10]].b)
|
||||
; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b)
|
||||
; CHECK: vshuffe(v[[H0]].b,v[[L0]].b)
|
||||
define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 {
|
||||
%p = mul <128 x i8> %v0, %v1
|
||||
ret <128 x i8> %p
|
||||
@ -252,43 +252,21 @@ define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 {
|
||||
ret <64 x i16> %p
|
||||
}
|
||||
|
||||
; CHECK-LABEL: mpyw_64_v60:
|
||||
; CHECK-DAG: r[[T00:[0-9]+]] = #16
|
||||
; CHECK-DAG: v[[T01:[0-9]+]].w = vmpyio(v0.w,v1.h)
|
||||
; CHECK: v[[T02:[0-9]+]].w = vasl(v[[T01]].w,r[[T00]])
|
||||
; CHECK: v[[T02]].w += vmpyie(v0.w,v1.uh)
|
||||
define <16 x i32> @mpyw_64_v60(<16 x i32> %v0, <16 x i32> %v1) #0 {
|
||||
; CHECK-LABEL: mpyw_64:
|
||||
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
|
||||
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
|
||||
define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 {
|
||||
%p = mul <16 x i32> %v0, %v1
|
||||
ret <16 x i32> %p
|
||||
}
|
||||
|
||||
; CHECK-LABEL: mpyw_128_v60:
|
||||
; CHECK-DAG: r[[T10:[0-9]+]] = #16
|
||||
; CHECK-DAG: v[[T11:[0-9]+]].w = vmpyio(v0.w,v1.h)
|
||||
; CHECK: v[[T12:[0-9]+]].w = vasl(v[[T11]].w,r[[T10]])
|
||||
; CHECK: v[[T12]].w += vmpyie(v0.w,v1.uh)
|
||||
define <32 x i32> @mpyw_128_v60(<32 x i32> %v0, <32 x i32> %v1) #1 {
|
||||
%p = mul <32 x i32> %v0, %v1
|
||||
ret <32 x i32> %p
|
||||
}
|
||||
|
||||
; CHECK-LABEL: mpyw_64_v62:
|
||||
; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh)
|
||||
; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h)
|
||||
define <16 x i32> @mpyw_64_v62(<16 x i32> %v0, <16 x i32> %v1) #3 {
|
||||
%p = mul <16 x i32> %v0, %v1
|
||||
ret <16 x i32> %p
|
||||
}
|
||||
|
||||
; CHECK-LABEL: mpyw_128_v62:
|
||||
; CHECK: v[[T00:[0-9]+]]:[[T01:[0-9]+]] = vmpye(v0.w,v1.uh)
|
||||
; CHECK: v[[T00]]:[[T01]] += vmpyo(v0.w,v1.h)
|
||||
define <32 x i32> @mpyw_128_v62(<32 x i32> %v0, <32 x i32> %v1) #4 {
|
||||
; CHECK-LABEL: mpyw_128:
|
||||
; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h)
|
||||
; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh)
|
||||
define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 {
|
||||
%p = mul <32 x i32> %v0, %v1
|
||||
ret <32 x i32> %p
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" }
|
||||
attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" }
|
||||
attributes #3 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length64b" }
|
||||
attributes #4 = { nounwind "target-cpu"="hexagonv62" "target-features"="+hvxv62,+hvx-length128b" }
|
||||
|
Loading…
x
Reference in New Issue
Block a user