diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 97700bcd556..2a841b925ce 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -217,6 +217,10 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VCLE_U: return "MipsISD::VCLE_U"; case MipsISD::VCLT_S: return "MipsISD::VCLT_S"; case MipsISD::VCLT_U: return "MipsISD::VCLT_U"; + case MipsISD::VSMAX: return "MipsISD::VSMAX"; + case MipsISD::VSMIN: return "MipsISD::VSMIN"; + case MipsISD::VUMAX: return "MipsISD::VUMAX"; + case MipsISD::VUMIN: return "MipsISD::VUMIN"; case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 730e97f3541..0d588c1d2a9 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -166,6 +166,12 @@ namespace llvm { VCLT_S, VCLT_U, + // Element-wise vector max/min. + VSMAX, + VSMIN, + VUMAX, + VUMIN, + // Special case of BUILD_VECTOR where all elements are the same. VSPLAT, // Special case of VSPLAT where the result is v2i64, the operand is diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 48bf8a27715..1909dc7464f 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -26,6 +26,14 @@ def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>; def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>; +def MipsVSMax : SDNode<"MipsISD::VSMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def MipsVSMin : SDNode<"MipsISD::VSMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def MipsVUMax : SDNode<"MipsISD::VUMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; +def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; def MipsVSplat : SDNode<"MipsISD::VSPLAT", SDT_MipsSplat>; def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, @@ -970,17 +978,6 @@ class MSA_I5_DESC_BASE { - dag OutOperandList = (outs RCWD:$wd); - dag InOperandList = (ins RCWS:$ws, uimm5:$u5); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u5"); - list Pattern = [(set RCWD:$wd, (OpNode RCWS:$ws, immZExt5:$u5))]; - InstrItinClass Itinerary = itin; -} - class MSA_SI5_DESC_BASE; class MAX_A_W_DESC : MSA_3R_DESC_BASE<"max_a.w", int_mips_max_a_w, MSA128W>; class MAX_A_D_DESC : MSA_3R_DESC_BASE<"max_a.d", int_mips_max_a_d, MSA128D>; -class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", int_mips_max_s_b, MSA128B>; -class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", int_mips_max_s_h, MSA128H>; -class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", int_mips_max_s_w, MSA128W>; -class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", int_mips_max_s_d, MSA128D>; +class MAX_S_B_DESC : MSA_3R_DESC_BASE<"max_s.b", MipsVSMax, MSA128B>; +class MAX_S_H_DESC : MSA_3R_DESC_BASE<"max_s.h", MipsVSMax, MSA128H>; +class MAX_S_W_DESC : MSA_3R_DESC_BASE<"max_s.w", MipsVSMax, MSA128W>; +class MAX_S_D_DESC : MSA_3R_DESC_BASE<"max_s.d", MipsVSMax, MSA128D>; -class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", int_mips_max_u_b, MSA128B>; -class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", int_mips_max_u_h, MSA128H>; -class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", int_mips_max_u_w, MSA128W>; -class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", int_mips_max_u_d, MSA128D>; +class MAX_U_B_DESC : MSA_3R_DESC_BASE<"max_u.b", MipsVUMax, MSA128B>; +class MAX_U_H_DESC : MSA_3R_DESC_BASE<"max_u.h", MipsVUMax, MSA128H>; +class MAX_U_W_DESC : MSA_3R_DESC_BASE<"max_u.w", MipsVUMax, MSA128W>; +class MAX_U_D_DESC : MSA_3R_DESC_BASE<"max_u.d", MipsVUMax, MSA128D>; -class MAXI_S_B_DESC : MSA_I5_X_DESC_BASE<"maxi_s.b", int_mips_maxi_s_b, - MSA128B>; -class MAXI_S_H_DESC : MSA_I5_X_DESC_BASE<"maxi_s.h", int_mips_maxi_s_h, - MSA128H>; -class MAXI_S_W_DESC : MSA_I5_X_DESC_BASE<"maxi_s.w", int_mips_maxi_s_w, - MSA128W>; -class MAXI_S_D_DESC : MSA_I5_X_DESC_BASE<"maxi_s.d", int_mips_maxi_s_d, - MSA128D>; +class MAXI_S_B_DESC : MSA_I5_DESC_BASE<"maxi_s.b", MipsVSMax, vsplati8, + MSA128B>; +class MAXI_S_H_DESC : MSA_I5_DESC_BASE<"maxi_s.h", MipsVSMax, vsplati16, + MSA128H>; +class MAXI_S_W_DESC : MSA_I5_DESC_BASE<"maxi_s.w", MipsVSMax, vsplati32, + MSA128W>; +class MAXI_S_D_DESC : MSA_I5_DESC_BASE<"maxi_s.d", MipsVSMax, vsplati64, + MSA128D>; -class MAXI_U_B_DESC : MSA_I5_X_DESC_BASE<"maxi_u.b", int_mips_maxi_u_b, - MSA128B>; -class MAXI_U_H_DESC : MSA_I5_X_DESC_BASE<"maxi_u.h", int_mips_maxi_u_h, - MSA128H>; -class MAXI_U_W_DESC : MSA_I5_X_DESC_BASE<"maxi_u.w", int_mips_maxi_u_w, - MSA128W>; -class MAXI_U_D_DESC : MSA_I5_X_DESC_BASE<"maxi_u.d", int_mips_maxi_u_d, - MSA128D>; +class MAXI_U_B_DESC : MSA_I5_DESC_BASE<"maxi_u.b", MipsVUMax, vsplati8, + MSA128B>; +class MAXI_U_H_DESC : MSA_I5_DESC_BASE<"maxi_u.h", MipsVUMax, vsplati16, + MSA128H>; +class MAXI_U_W_DESC : MSA_I5_DESC_BASE<"maxi_u.w", MipsVUMax, vsplati32, + MSA128W>; +class MAXI_U_D_DESC : MSA_I5_DESC_BASE<"maxi_u.d", MipsVUMax, vsplati64, + MSA128D>; class MIN_A_B_DESC : MSA_3R_DESC_BASE<"min_a.b", int_mips_min_a_b, MSA128B>; class MIN_A_H_DESC : MSA_3R_DESC_BASE<"min_a.h", int_mips_min_a_h, MSA128H>; class MIN_A_W_DESC : MSA_3R_DESC_BASE<"min_a.w", int_mips_min_a_w, MSA128W>; class MIN_A_D_DESC : MSA_3R_DESC_BASE<"min_a.d", int_mips_min_a_d, MSA128D>; -class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", int_mips_min_s_b, MSA128B>; -class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", int_mips_min_s_h, MSA128H>; -class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", int_mips_min_s_w, MSA128W>; -class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", int_mips_min_s_d, MSA128D>; +class MIN_S_B_DESC : MSA_3R_DESC_BASE<"min_s.b", MipsVSMin, MSA128B>; +class MIN_S_H_DESC : MSA_3R_DESC_BASE<"min_s.h", MipsVSMin, MSA128H>; +class MIN_S_W_DESC : MSA_3R_DESC_BASE<"min_s.w", MipsVSMin, MSA128W>; +class MIN_S_D_DESC : MSA_3R_DESC_BASE<"min_s.d", MipsVSMin, MSA128D>; -class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", int_mips_min_u_b, MSA128B>; -class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", int_mips_min_u_h, MSA128H>; -class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", int_mips_min_u_w, MSA128W>; -class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", int_mips_min_u_d, MSA128D>; +class MIN_U_B_DESC : MSA_3R_DESC_BASE<"min_u.b", MipsVUMin, MSA128B>; +class MIN_U_H_DESC : MSA_3R_DESC_BASE<"min_u.h", MipsVUMin, MSA128H>; +class MIN_U_W_DESC : MSA_3R_DESC_BASE<"min_u.w", MipsVUMin, MSA128W>; +class MIN_U_D_DESC : MSA_3R_DESC_BASE<"min_u.d", MipsVUMin, MSA128D>; -class MINI_S_B_DESC : MSA_I5_X_DESC_BASE<"mini_s.b", int_mips_mini_s_b, - MSA128B>; -class MINI_S_H_DESC : MSA_I5_X_DESC_BASE<"mini_s.h", int_mips_mini_s_h, - MSA128H>; -class MINI_S_W_DESC : MSA_I5_X_DESC_BASE<"mini_s.w", int_mips_mini_s_w, - MSA128W>; -class MINI_S_D_DESC : MSA_I5_X_DESC_BASE<"mini_s.d", int_mips_mini_s_d, - MSA128D>; +class MINI_S_B_DESC : MSA_I5_DESC_BASE<"mini_s.b", MipsVSMin, vsplati8, + MSA128B>; +class MINI_S_H_DESC : MSA_I5_DESC_BASE<"mini_s.h", MipsVSMin, vsplati16, + MSA128H>; +class MINI_S_W_DESC : MSA_I5_DESC_BASE<"mini_s.w", MipsVSMin, vsplati32, + MSA128W>; +class MINI_S_D_DESC : MSA_I5_DESC_BASE<"mini_s.d", MipsVSMin, vsplati64, + MSA128D>; -class MINI_U_B_DESC : MSA_I5_X_DESC_BASE<"mini_u.b", int_mips_mini_u_b, - MSA128B>; -class MINI_U_H_DESC : MSA_I5_X_DESC_BASE<"mini_u.h", int_mips_mini_u_h, - MSA128H>; -class MINI_U_W_DESC : MSA_I5_X_DESC_BASE<"mini_u.w", int_mips_mini_u_w, - MSA128W>; -class MINI_U_D_DESC : MSA_I5_X_DESC_BASE<"mini_u.d", int_mips_mini_u_d, - MSA128D>; +class MINI_U_B_DESC : MSA_I5_DESC_BASE<"mini_u.b", MipsVUMin, vsplati8, + MSA128B>; +class MINI_U_H_DESC : MSA_I5_DESC_BASE<"mini_u.h", MipsVUMin, vsplati16, + MSA128H>; +class MINI_U_W_DESC : MSA_I5_DESC_BASE<"mini_u.w", MipsVUMin, vsplati32, + MSA128W>; +class MINI_U_D_DESC : MSA_I5_DESC_BASE<"mini_u.d", MipsVUMin, vsplati64, + MSA128D>; class MOD_S_B_DESC : MSA_3R_DESC_BASE<"mod_s.b", int_mips_mod_s_b, MSA128B>; class MOD_S_H_DESC : MSA_3R_DESC_BASE<"mod_s.h", int_mips_mod_s_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index ffc5777cec8..9687bb90958 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -673,17 +673,57 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { EVT Ty = N->getValueType(0); - if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) - return SDValue(); + if (Ty.is128BitVector() && Ty.isInteger()) { + // Try the following combines: + // (vselect (setcc $a, $b, SETLT), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLE), $b, $a)) -> (vsmax $a, $b) + // (vselect (setcc $a, $b, SETLT), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETLE), $a, $b)) -> (vsmin $a, $b) + // (vselect (setcc $a, $b, SETULT), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULE), $b, $a)) -> (vumax $a, $b) + // (vselect (setcc $a, $b, SETULT), $a, $b)) -> (vumin $a, $b) + // (vselect (setcc $a, $b, SETULE), $a, $b)) -> (vumin $a, $b) + // SETGT/SETGE/SETUGT/SETUGE variants of these will show up initially but + // will be expanded to equivalent SETLT/SETLE/SETULT/SETULE versions by the + // legalizer. + SDValue Op0 = N->getOperand(0); - SDValue SetCC = N->getOperand(0); + if (Op0->getOpcode() != ISD::SETCC) + return SDValue(); - if (SetCC.getOpcode() != MipsISD::SETCC_DSP) - return SDValue(); + ISD::CondCode CondCode = cast(Op0->getOperand(2))->get(); + bool Signed; - return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, - SetCC.getOperand(0), SetCC.getOperand(1), N->getOperand(1), - N->getOperand(2), SetCC.getOperand(2)); + if (CondCode == ISD::SETLT || CondCode == ISD::SETLE) + Signed = true; + else if (CondCode == ISD::SETULT || CondCode == ISD::SETULE) + Signed = false; + else + return SDValue(); + + SDValue Op1 = N->getOperand(1); + SDValue Op2 = N->getOperand(2); + SDValue Op0Op0 = Op0->getOperand(0); + SDValue Op0Op1 = Op0->getOperand(1); + + if (Op1 == Op0Op0 && Op2 == Op0Op1) + return DAG.getNode(Signed ? MipsISD::VSMIN : MipsISD::VUMIN, SDLoc(N), + Ty, Op1, Op2); + else if (Op1 == Op0Op1 && Op2 == Op0Op0) + return DAG.getNode(Signed ? MipsISD::VSMAX : MipsISD::VUMAX, SDLoc(N), + Ty, Op1, Op2); + } else if ((Ty == MVT::v2i16) || (Ty == MVT::v4i8)) { + SDValue SetCC = N->getOperand(0); + + if (SetCC.getOpcode() != MipsISD::SETCC_DSP) + return SDValue(); + + return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, + SetCC.getOperand(0), SetCC.getOperand(1), + N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); + } + + return SDValue(); } static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, @@ -1288,6 +1328,50 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_ldi_w: case Intrinsic::mips_ldi_d: return lowerMSAUnaryIntr(Op, DAG, MipsISD::VSPLAT); + case Intrinsic::mips_max_s_b: + case Intrinsic::mips_max_s_h: + case Intrinsic::mips_max_s_w: + case Intrinsic::mips_max_s_d: + return lowerMSABinaryIntr(Op, DAG, MipsISD::VSMAX); + case Intrinsic::mips_max_u_b: + case Intrinsic::mips_max_u_h: + case Intrinsic::mips_max_u_w: + case Intrinsic::mips_max_u_d: + return lowerMSABinaryIntr(Op, DAG, MipsISD::VUMAX); + case Intrinsic::mips_maxi_s_b: + case Intrinsic::mips_maxi_s_h: + case Intrinsic::mips_maxi_s_w: + case Intrinsic::mips_maxi_s_d: + return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VSMAX, + lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_maxi_u_b: + case Intrinsic::mips_maxi_u_h: + case Intrinsic::mips_maxi_u_w: + case Intrinsic::mips_maxi_u_d: + return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VUMAX, + lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_min_s_b: + case Intrinsic::mips_min_s_h: + case Intrinsic::mips_min_s_w: + case Intrinsic::mips_min_s_d: + return lowerMSABinaryIntr(Op, DAG, MipsISD::VSMIN); + case Intrinsic::mips_min_u_b: + case Intrinsic::mips_min_u_h: + case Intrinsic::mips_min_u_w: + case Intrinsic::mips_min_u_d: + return lowerMSABinaryIntr(Op, DAG, MipsISD::VUMIN); + case Intrinsic::mips_mini_s_b: + case Intrinsic::mips_mini_s_h: + case Intrinsic::mips_mini_s_w: + case Intrinsic::mips_mini_s_d: + return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VSMIN, + lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_mini_u_b: + case Intrinsic::mips_mini_u_h: + case Intrinsic::mips_mini_u_w: + case Intrinsic::mips_mini_u_d: + return lowerMSABinaryImmIntr(Op, DAG, MipsISD::VUMIN, + lowerMSASplatImm(Op, 2, DAG)); case Intrinsic::mips_mulv_b: case Intrinsic::mips_mulv_h: case Intrinsic::mips_mulv_w: diff --git a/test/CodeGen/Mips/msa/arithmetic.ll b/test/CodeGen/Mips/msa/arithmetic.ll index d695f1204fa..7dc758e35df 100644 --- a/test/CodeGen/Mips/msa/arithmetic.ll +++ b/test/CodeGen/Mips/msa/arithmetic.ll @@ -69,7 +69,8 @@ define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { %1 = load <16 x i8>* %a ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) - %2 = add <16 x i8> %1, + %2 = add <16 x i8> %1, ; CHECK-DAG: addvi.b [[R3:\$w[0-9]+]], [[R1]], 1 store <16 x i8> %2, <16 x i8>* %c ; CHECK-DAG: st.b [[R3]], 0($4) @@ -83,7 +84,8 @@ define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { %1 = load <8 x i16>* %a ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) - %2 = add <8 x i16> %1, + %2 = add <8 x i16> %1, ; CHECK-DAG: addvi.h [[R3:\$w[0-9]+]], [[R1]], 1 store <8 x i16> %2, <8 x i16>* %c ; CHECK-DAG: st.h [[R3]], 0($4) @@ -189,7 +191,8 @@ define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind { %1 = load <16 x i8>* %a ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) - %2 = sub <16 x i8> %1, + %2 = sub <16 x i8> %1, ; CHECK-DAG: subvi.b [[R3:\$w[0-9]+]], [[R1]], 1 store <16 x i8> %2, <16 x i8>* %c ; CHECK-DAG: st.b [[R3]], 0($4) @@ -203,7 +206,8 @@ define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind { %1 = load <8 x i16>* %a ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) - %2 = sub <8 x i16> %1, + %2 = sub <8 x i16> %1, ; CHECK-DAG: subvi.h [[R3:\$w[0-9]+]], [[R1]], 1 store <8 x i16> %2, <8 x i16>* %c ; CHECK-DAG: st.h [[R3]], 0($4) diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll index fc83f44cd40..2e66d9467da 100644 --- a/test/CodeGen/Mips/msa/compare.ll +++ b/test/CodeGen/Mips/msa/compare.ll @@ -965,3 +965,1027 @@ define void @bseli_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b, ret void ; CHECK: .size bseli_u_v2i64 } + +define void @max_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: max_s_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sgt <16 x i8> %1, %2 + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 + ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size max_s_v16i8 +} + +define void @max_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: max_s_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sgt <8 x i16> %1, %2 + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size max_s_v8i16 +} + +define void @max_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: max_s_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sgt <4 x i32> %1, %2 + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 + ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size max_s_v4i32 +} + +define void @max_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: max_s_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sgt <2 x i64> %1, %2 + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 + ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size max_s_v2i64 +} + +define void @max_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: max_u_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ugt <16 x i8> %1, %2 + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 + ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size max_u_v16i8 +} + +define void @max_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: max_u_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ugt <8 x i16> %1, %2 + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size max_u_v8i16 +} + +define void @max_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: max_u_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ugt <4 x i32> %1, %2 + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 + ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size max_u_v4i32 +} + +define void @max_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: max_u_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ugt <2 x i64> %1, %2 + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 + ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size max_u_v2i64 +} + +define void @max_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: max_s_eq_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sge <16 x i8> %1, %2 + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 + ; CHECK-DAG: max_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size max_s_eq_v16i8 +} + +define void @max_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: max_s_eq_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sge <8 x i16> %1, %2 + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + ; CHECK-DAG: max_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size max_s_eq_v8i16 +} + +define void @max_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: max_s_eq_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sge <4 x i32> %1, %2 + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 + ; CHECK-DAG: max_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size max_s_eq_v4i32 +} + +define void @max_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: max_s_eq_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sge <2 x i64> %1, %2 + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 + ; CHECK-DAG: max_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size max_s_eq_v2i64 +} + +define void @max_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: max_u_eq_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp uge <16 x i8> %1, %2 + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 + ; CHECK-DAG: max_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size max_u_eq_v16i8 +} + +define void @max_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: max_u_eq_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp uge <8 x i16> %1, %2 + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + ; CHECK-DAG: max_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size max_u_eq_v8i16 +} + +define void @max_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: max_u_eq_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp uge <4 x i32> %1, %2 + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 + ; CHECK-DAG: max_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size max_u_eq_v4i32 +} + +define void @max_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: max_u_eq_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp uge <2 x i64> %1, %2 + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 + ; CHECK-DAG: max_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size max_u_eq_v2i64 +} + +define void @maxi_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: maxi_s_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sgt <16 x i8> %1, + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_s_v16i8 +} + +define void @maxi_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: maxi_s_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sgt <8 x i16> %1, + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_s_v8i16 +} + +define void @maxi_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: maxi_s_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sgt <4 x i32> %1, + %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_s_v4i32 +} + +define void @maxi_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: maxi_s_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sgt <2 x i64> %1, + %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_s_v2i64 +} + +define void @maxi_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: maxi_u_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ugt <16 x i8> %1, + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_u_v16i8 +} + +define void @maxi_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: maxi_u_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ugt <8 x i16> %1, + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_u_v8i16 +} + +define void @maxi_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: maxi_u_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ugt <4 x i32> %1, + %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_u_v4i32 +} + +define void @maxi_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: maxi_u_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ugt <2 x i64> %1, + %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_u_v2i64 +} + +define void @maxi_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: maxi_s_eq_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sge <16 x i8> %1, + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: maxi_s.b [[R3:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_s_eq_v16i8 +} + +define void @maxi_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: maxi_s_eq_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sge <8 x i16> %1, + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: maxi_s.h [[R3:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_s_eq_v8i16 +} + +define void @maxi_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: maxi_s_eq_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sge <4 x i32> %1, + %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: maxi_s.w [[R3:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_s_eq_v4i32 +} + +define void @maxi_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: maxi_s_eq_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sge <2 x i64> %1, + %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: maxi_s.d [[R3:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_s_eq_v2i64 +} + +define void @maxi_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: maxi_u_eq_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = icmp uge <16 x i8> %1, + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: maxi_u.b [[R3:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_u_eq_v16i8 +} + +define void @maxi_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: maxi_u_eq_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = icmp uge <8 x i16> %1, + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: maxi_u.h [[R3:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_u_eq_v8i16 +} + +define void @maxi_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: maxi_u_eq_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = icmp uge <4 x i32> %1, + %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: maxi_u.w [[R3:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_u_eq_v4i32 +} + +define void @maxi_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: maxi_u_eq_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = icmp uge <2 x i64> %1, + %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: maxi_u.d [[R3:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size maxi_u_eq_v2i64 +} + +define void @min_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: min_s_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sle <16 x i8> %1, %2 + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 + ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size min_s_v16i8 +} + +define void @min_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: min_s_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp slt <8 x i16> %1, %2 + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size min_s_v8i16 +} + +define void @min_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: min_s_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp slt <4 x i32> %1, %2 + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 + ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size min_s_v4i32 +} + +define void @min_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: min_s_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp slt <2 x i64> %1, %2 + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 + ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size min_s_v2i64 +} + +define void @min_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: min_u_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ult <16 x i8> %1, %2 + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 + ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size min_u_v16i8 +} + +define void @min_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: min_u_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ult <8 x i16> %1, %2 + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size min_u_v8i16 +} + +define void @min_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: min_u_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ult <4 x i32> %1, %2 + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 + ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size min_u_v4i32 +} + +define void @min_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: min_u_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ult <2 x i64> %1, %2 + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 + ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size min_u_v2i64 +} + +define void @min_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: min_s_eq_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sle <16 x i8> %1, %2 + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 + ; CHECK-DAG: min_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size min_s_eq_v16i8 +} + +define void @min_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: min_s_eq_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sle <8 x i16> %1, %2 + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + ; CHECK-DAG: min_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size min_s_eq_v8i16 +} + +define void @min_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: min_s_eq_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sle <4 x i32> %1, %2 + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 + ; CHECK-DAG: min_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size min_s_eq_v4i32 +} + +define void @min_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: min_s_eq_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sle <2 x i64> %1, %2 + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 + ; CHECK-DAG: min_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size min_s_eq_v2i64 +} + +define void @min_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: min_u_eq_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ule <16 x i8> %1, %2 + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2 + ; CHECK-DAG: min_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <16 x i8> %4, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size min_u_eq_v16i8 +} + +define void @min_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: min_u_eq_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ule <8 x i16> %1, %2 + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2 + ; CHECK-DAG: min_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <8 x i16> %4, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size min_u_eq_v8i16 +} + +define void @min_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: min_u_eq_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ule <4 x i32> %1, %2 + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2 + ; CHECK-DAG: min_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x i32> %4, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size min_u_eq_v4i32 +} + +define void @min_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: min_u_eq_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ule <2 x i64> %1, %2 + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2 + ; CHECK-DAG: min_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x i64> %4, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size min_u_eq_v2i64 +} + +define void @mini_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: mini_s_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = icmp slt <16 x i8> %1, + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size mini_s_v16i8 +} + +define void @mini_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: mini_s_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = icmp slt <8 x i16> %1, + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size mini_s_v8i16 +} + +define void @mini_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: mini_s_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = icmp slt <4 x i32> %1, + %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size mini_s_v4i32 +} + +define void @mini_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: mini_s_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = icmp slt <2 x i64> %1, + %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size mini_s_v2i64 +} + +define void @mini_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: mini_u_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ult <16 x i8> %1, + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size mini_u_v16i8 +} + +define void @mini_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: mini_u_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ult <8 x i16> %1, + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size mini_u_v8i16 +} + +define void @mini_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: mini_u_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ult <4 x i32> %1, + %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size mini_u_v4i32 +} + +define void @mini_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: mini_u_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ult <2 x i64> %1, + %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size mini_u_v2i64 +} + +define void @mini_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: mini_s_eq_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sle <16 x i8> %1, + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: mini_s.b [[R3:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size mini_s_eq_v16i8 +} + +define void @mini_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: mini_s_eq_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sle <8 x i16> %1, + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: mini_s.h [[R3:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size mini_s_eq_v8i16 +} + +define void @mini_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: mini_s_eq_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sle <4 x i32> %1, + %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: mini_s.w [[R3:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size mini_s_eq_v4i32 +} + +define void @mini_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: mini_s_eq_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = icmp sle <2 x i64> %1, + %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: mini_s.d [[R3:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size mini_s_eq_v2i64 +} + +define void @mini_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind { + ; CHECK: mini_u_eq_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ule <16 x i8> %1, + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: mini_u.b [[R3:\$w[0-9]+]], [[R1]], 1 + store <16 x i8> %3, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size mini_u_eq_v16i8 +} + +define void @mini_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind { + ; CHECK: mini_u_eq_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ule <8 x i16> %1, + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: mini_u.h [[R3:\$w[0-9]+]], [[R1]], 1 + store <8 x i16> %3, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size mini_u_eq_v8i16 +} + +define void @mini_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind { + ; CHECK: mini_u_eq_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ule <4 x i32> %1, + %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: mini_u.w [[R3:\$w[0-9]+]], [[R1]], 1 + store <4 x i32> %3, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size mini_u_eq_v4i32 +} + +define void @mini_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { + ; CHECK: mini_u_eq_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = icmp ule <2 x i64> %1, + %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: mini_u.d [[R3:\$w[0-9]+]], [[R1]], 1 + store <2 x i64> %3, <2 x i64>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size mini_u_eq_v2i64 +} diff --git a/test/CodeGen/Mips/msa/compare_float.ll b/test/CodeGen/Mips/msa/compare_float.ll index 106653f47de..6bbcea05d58 100644 --- a/test/CodeGen/Mips/msa/compare_float.ll +++ b/test/CodeGen/Mips/msa/compare_float.ll @@ -596,3 +596,67 @@ define void @bseli_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, ret void ; CHECK: .size bseli_v2f64 } + +define void @max_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { + ; CHECK: max_v4f32: + + %1 = load <4 x float>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x float>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %1, <4 x float> %2) + ; CHECK-DAG: fmax.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x float> %3, <4 x float>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size max_v4f32 +} + +define void @max_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { + ; CHECK: max_v2f64: + + %1 = load <2 x double>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x double>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %1, <2 x double> %2) + ; CHECK-DAG: fmax.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x double> %3, <2 x double>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size max_v2f64 +} + +define void @min_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind { + ; CHECK: min_v4f32: + + %1 = load <4 x float>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x float>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %1, <4 x float> %2) + ; CHECK-DAG: fmin.w [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <4 x float> %3, <4 x float>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size min_v4f32 +} + +define void @min_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind { + ; CHECK: min_v2f64: + + %1 = load <2 x double>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x double>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %1, <2 x double> %2) + ; CHECK-DAG: fmin.d [[R3:\$w[0-9]+]], [[R1]], [[R2]] + store <2 x double> %3, <2 x double>* %c + ; CHECK-DAG: st.d [[R3]], 0($4) + + ret void + ; CHECK: .size min_v2f64 +}