diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td index 5cdabdeadae..110d55d562a 100644 --- a/include/llvm/IR/IntrinsicsPowerPC.td +++ b/include/llvm/IR/IntrinsicsPowerPC.td @@ -542,3 +542,180 @@ def int_ppc_vsx_xsmindp : PowerPC_VSX_Sca_DDD_Intrinsic<"xsmindp">; def int_ppc_vsx_xvdivdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvdivdp">; def int_ppc_vsx_xvdivsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvdivsp">; } + +//===----------------------------------------------------------------------===// +// PowerPC QPX Intrinsics. +// + +let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". + /// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics. + class PowerPC_QPX_Intrinsic ret_types, + list param_types, + list properties> + : GCCBuiltin, + Intrinsic; +} + +//===----------------------------------------------------------------------===// +// PowerPC QPX Intrinsic Class Definitions. +// + +/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64 +/// vector and returns one. These intrinsics have no side effects. +class PowerPC_QPX_FF_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_QPX_FFF_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64 +/// vectors and returns one. These intrinsics have no side effects. +class PowerPC_QPX_FFFF_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer +/// and returns a v4f64. +class PowerPC_QPX_Load_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer +/// and returns a v4f64 permutation. +class PowerPC_QPX_LoadPerm_Intrinsic + : PowerPC_QPX_Intrinsic; + +/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer +/// and stores a v4f64. +class PowerPC_QPX_Store_Intrinsic + : PowerPC_QPX_Intrinsic; + +//===----------------------------------------------------------------------===// +// PowerPC QPX Intrinsic Definitions. + +let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". + // Add Instructions + def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">; + def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">; + def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">; + def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">; + + // Estimate Instructions + def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">; + def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">; + def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">; + def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">; + + // Multiply Instructions + def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">; + def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">; + def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">; + def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">; + + // Multiply-add instructions + def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">; + def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">; + def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">; + def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">; + def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">; + def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">; + def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">; + def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">; + def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">; + def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">; + def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">; + def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">; + def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">; + def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">; + def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">; + def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">; + + // Select Instruction + def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">; + + // Permute Instruction + def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">; + + // Convert and Round Instructions + def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">; + def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">; + def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">; + def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">; + def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">; + def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">; + def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">; + def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">; + def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">; + def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">; + def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">; + def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">; + def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">; + def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">; + def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">; + def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">; + def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">; + + // Move Instructions + def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">; + def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">; + def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">; + def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">; + + // Compare Instructions + def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">; + def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">; + def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">; + def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">; + + // Load instructions + def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">; + def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">; + def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">; + def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">; + + def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">; + def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">; + def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">; + def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">; + def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">; + def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">; + def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">; + def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">; + + def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">; + def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">; + def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">; + def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">; + + // Store instructions + def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">; + def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">; + def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">; + def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">; + + def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">; + def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">; + def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">; + def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">; + def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">; + def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">; + + // Logical and permutation formation + def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical", + [llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci", + [llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>; +} + diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index cd36e58b78d..bf00e7397be 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -132,6 +132,16 @@ static const MCPhysReg VSFRegs[64] = { PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 }; +static unsigned QFRegs[32] = { + PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3, + PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, + PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15, + PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19, + PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23, + PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27, + PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31 +}; static const MCPhysReg CRBITRegs[32] = { PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, @@ -429,6 +439,7 @@ public: bool isU8ImmX8() const { return Kind == Immediate && isUInt<8>(getImm()) && (getImm() & 7) == 0; } + bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); } bool isU16Imm() const { switch (Kind) { case Expression: @@ -564,6 +575,21 @@ public: Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()])); } + void addRegQFRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()])); + } + + void addRegQSRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()])); + } + + void addRegQBRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(QFRegs[getReg()])); + } + void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()])); diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 5251b60f348..0ed07239327 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -164,6 +164,17 @@ static const unsigned G8Regs[] = { PPC::X28, PPC::X29, PPC::X30, PPC::X31 }; +static const unsigned QFRegs[] = { + PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3, + PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, + PPC::QF12, PPC::QF13, PPC::QF14, PPC::QF15, + PPC::QF16, PPC::QF17, PPC::QF18, PPC::QF19, + PPC::QF20, PPC::QF21, PPC::QF22, PPC::QF23, + PPC::QF24, PPC::QF25, PPC::QF26, PPC::QF27, + PPC::QF28, PPC::QF29, PPC::QF30, PPC::QF31 +}; + template static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, const unsigned (&Regs)[N]) { @@ -235,6 +246,15 @@ static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo, #define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass #define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass +static DecodeStatus DecodeQFRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, QFRegs); +} + +#define DecodeQSRCRegisterClass DecodeQFRCRegisterClass +#define DecodeQBRCRegisterClass DecodeQFRCRegisterClass + template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, int64_t Address, const void *Decoder) { @@ -335,6 +355,15 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size, uint32_t Inst = (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0); + if ((STI.getFeatureBits() & PPC::FeatureQPX) != 0) { + DecodeStatus result = + decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI); + if (result != MCDisassembler::Fail) + return result; + + MI.clear(); + } + return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI); } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 670c40a2a3b..c287fbe7c5b 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -34,7 +34,20 @@ FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false), #include "PPCGenAsmWriter.inc" void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << getRegisterName(RegNo); + const char *RegName = getRegisterName(RegNo); + if (RegName[0] == 'q' /* QPX */) { + // The system toolchain on the BG/Q does not understand QPX register names + // in .cfi_* directives, so print the name of the floating-point + // subregister instead. + std::string RN(RegName); + + RN[0] = 'f'; + OS << RN; + + return; + } + + OS << RegName; } void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O, @@ -236,6 +249,13 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo, O << (unsigned int)Value; } +void PPCInstPrinter::printU12ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned short Value = MI->getOperand(OpNo).getImm(); + assert(Value <= 4095 && "Invalid u12imm argument!"); + O << (unsigned short)Value; +} + void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (MI->getOperand(OpNo).isImm()) @@ -338,6 +358,7 @@ static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { case 'r': case 'f': + case 'q': // for QPX case 'v': if (RegName[1] == 's') return RegName + 2; diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index b21aa22daa1..6ead19b33fe 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -48,6 +48,7 @@ public: void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU12ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 480b790a99b..13272908b12 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -151,6 +151,7 @@ static const char *stripRegisterPrefix(const char *RegName) { switch (RegName[0]) { case 'r': case 'f': + case 'q': // for QPX case 'v': if (RegName[1] == 's') return RegName + 2; diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 3eaec6ba54d..045fca3c747 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -55,13 +55,17 @@ def RetCC_PPC : CallingConv<[ // only the ELFv2 ABI fully utilizes all these registers. CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, - + + // QPX vectors are returned in QF1 and QF2. + CCIfType<[v4f64, v4f32, v4i1], + CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>, + // Vector types returned as "direct" go into V2 .. V9; note that only the // ELFv2 ABI fully utilizes all these registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>, - CCIfType<[v2f64, v2i64], - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> ]>; // No explicit register is specified for the AnyReg calling convention. The @@ -108,10 +112,12 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>, - CCIfType<[v2f64, v2i64], - CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>> + CCIfType<[v4f64, v4f32, v4i1], + CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>, + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>, + CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", + CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>> ]>; //===----------------------------------------------------------------------===// @@ -144,6 +150,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // alignment and size as doubles. CCIfType<[f32,f64], CCAssignToStack<8, 8>>, + // QPX vectors that are stored in double precision need 32-byte alignment. + CCIfType<[v4f64, v4i1], CCAssignToStack<32, 32>>, + // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>> ]>; @@ -158,12 +167,17 @@ def CC_PPC32_SVR4_VarArg : CallingConv<[ // In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to // put vector arguments in vector registers before putting them on the stack. def CC_PPC32_SVR4 : CallingConv<[ + // QPX vectors mirror the scalar FP convention. + CCIfType<[v4f64, v4f32, v4i1], CCIfSubtarget<"hasQPX()", + CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>, + // The first 12 Vector arguments are passed in AltiVec registers. - CCIfType<[v16i8, v8i16, v4i32, v4f32], - CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>, - CCIfType<[v2f64, v2i64], + CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()", + CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, + V10, V11, V12, V13]>>>, + CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()", CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9, - VSH10, VSH11, VSH12, VSH13]>>, + VSH10, VSH11, VSH12, VSH13]>>>, CCDelegateTo ]>; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 10429db9b90..f997fea4d93 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -83,7 +83,7 @@ static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, - (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0), + STI.getPlatformStackAlignment(), 0), Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), TOCSaveOffset(computeTOCSaveOffset(Subtarget)), FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 2418ca6b19a..0d553d32f31 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2293,6 +2293,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // Altivec Vector compare instructions do not set any CR register by default and // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { + if (PPCSubTarget->hasQPX()) + return nullptr; + EVT VecVT = LHS.getValueType(); bool Swap, Negate; unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, @@ -2468,6 +2471,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); switch (LoadedVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Invalid PPC load type!"); + case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX + case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX case MVT::f64: Opcode = PPC::LFDUX; break; case MVT::f32: Opcode = PPC::LFSUX; break; case MVT::i32: Opcode = PPC::LWZUX; break; @@ -2711,6 +2716,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SelectCCOp = PPC::SELECT_CC_VSFRC; else SelectCCOp = PPC::SELECT_CC_F8; + else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) + SelectCCOp = PPC::SELECT_CC_QFRC; + else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) + SelectCCOp = PPC::SELECT_CC_QSRC; + else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) + SelectCCOp = PPC::SELECT_CC_QBRC; else if (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64) SelectCCOp = PPC::SELECT_CC_VSRC; @@ -3406,6 +3417,9 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: + case PPC::SELECT_QFRC: + case PPC::SELECT_QSRC: + case PPC::SELECT_QBRC: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSRC: { @@ -3713,6 +3727,9 @@ void PPCDAGToDAGISel::PeepholeCROps() { case PPC::SELECT_I8: case PPC::SELECT_F4: case PPC::SELECT_F8: + case PPC::SELECT_QFRC: + case PPC::SELECT_QSRC: + case PPC::SELECT_QBRC: case PPC::SELECT_VRRC: case PPC::SELECT_VSFRC: case PPC::SELECT_VSRC: diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 7346dff8602..bb0eb399529 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -610,6 +610,162 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); } + if (Subtarget.hasQPX()) { + setOperationAction(ISD::FADD, MVT::v4f64, Legal); + setOperationAction(ISD::FSUB, MVT::v4f64, Legal); + setOperationAction(ISD::FMUL, MVT::v4f64, Legal); + setOperationAction(ISD::FREM, MVT::v4f64, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal); + setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand); + + setOperationAction(ISD::LOAD , MVT::v4f64, Custom); + setOperationAction(ISD::STORE , MVT::v4f64, Custom); + + setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom); + + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SELECT, MVT::v4f64, Expand); + setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand); + setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); + + setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal); + setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand); + + setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal); + setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); + + setOperationAction(ISD::FNEG , MVT::v4f64, Legal); + setOperationAction(ISD::FABS , MVT::v4f64, Legal); + setOperationAction(ISD::FSIN , MVT::v4f64, Expand); + setOperationAction(ISD::FCOS , MVT::v4f64, Expand); + setOperationAction(ISD::FPOWI , MVT::v4f64, Expand); + setOperationAction(ISD::FPOW , MVT::v4f64, Expand); + setOperationAction(ISD::FLOG , MVT::v4f64, Expand); + setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand); + setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand); + setOperationAction(ISD::FEXP , MVT::v4f64, Expand); + setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand); + + setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal); + + setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal); + + addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass); + + setOperationAction(ISD::FADD, MVT::v4f32, Legal); + setOperationAction(ISD::FSUB, MVT::v4f32, Legal); + setOperationAction(ISD::FMUL, MVT::v4f32, Legal); + setOperationAction(ISD::FREM, MVT::v4f32, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); + setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand); + + setOperationAction(ISD::LOAD , MVT::v4f32, Custom); + setOperationAction(ISD::STORE , MVT::v4f32, Custom); + + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SELECT, MVT::v4f32, Expand); + setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand); + setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + + setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal); + setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand); + + setOperationAction(ISD::FNEG , MVT::v4f32, Legal); + setOperationAction(ISD::FABS , MVT::v4f32, Legal); + setOperationAction(ISD::FSIN , MVT::v4f32, Expand); + setOperationAction(ISD::FCOS , MVT::v4f32, Expand); + setOperationAction(ISD::FPOWI , MVT::v4f32, Expand); + setOperationAction(ISD::FPOW , MVT::v4f32, Expand); + setOperationAction(ISD::FLOG , MVT::v4f32, Expand); + setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand); + setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand); + setOperationAction(ISD::FEXP , MVT::v4f32, Expand); + setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand); + + setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); + + setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal); + setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal); + + addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass); + + setOperationAction(ISD::AND , MVT::v4i1, Legal); + setOperationAction(ISD::OR , MVT::v4i1, Legal); + setOperationAction(ISD::XOR , MVT::v4i1, Legal); + + if (!Subtarget.useCRBits()) + setOperationAction(ISD::SELECT, MVT::v4i1, Expand); + setOperationAction(ISD::VSELECT, MVT::v4i1, Legal); + + setOperationAction(ISD::LOAD , MVT::v4i1, Custom); + setOperationAction(ISD::STORE , MVT::v4i1, Custom); + + setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand); + setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand); + setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom); + + setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom); + + addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass); + + setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal); + setOperationAction(ISD::FROUND, MVT::v4f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + + setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); + + // These need to set FE_INEXACT, and so cannot be vectorized here. + setOperationAction(ISD::FRINT, MVT::v4f64, Expand); + setOperationAction(ISD::FRINT, MVT::v4f32, Expand); + + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); + + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); + } else { + setOperationAction(ISD::FDIV, MVT::v4f64, Expand); + setOperationAction(ISD::FSQRT, MVT::v4f64, Expand); + + setOperationAction(ISD::FDIV, MVT::v4f32, Expand); + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + } + } + if (Subtarget.has64BitSupport()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); @@ -621,8 +777,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } setBooleanContents(ZeroOrOneBooleanContent); - // Altivec instructions set fields to all zeros or all ones. - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + if (Subtarget.hasAltivec()) { + // Altivec instructions set fields to all zeros or all ones. + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + } if (!isPPC64) { // These libcalls are not available in 32-bit. @@ -851,12 +1010,22 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; case PPCISD::SC: return "PPCISD::SC"; + case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; + case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; + case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; + case PPCISD::QVESPLATI: return "PPCISD::QVESPLATI"; + case PPCISD::QBFLT: return "PPCISD::QBFLT"; + case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; } } -EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { +EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const { if (!VT.isVector()) return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; + + if (Subtarget.hasQPX()) + return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); + return VT.changeVectorElementTypeToInteger(); } @@ -1242,6 +1411,36 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { return SDValue(); } +/// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift +/// amount, otherwise return -1. +int PPC::isQVALIGNIShuffleMask(SDNode *N) { + EVT VT = N->getValueType(0); + if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1) + return -1; + + ShuffleVectorSDNode *SVOp = cast(N); + + // Find the first non-undef value in the shuffle mask. + unsigned i; + for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i) + /*search*/; + + if (i == 4) return -1; // all undef. + + // Otherwise, check to see if the rest of the elements are consecutively + // numbered from this value. + unsigned ShiftAmt = SVOp->getMaskElt(i); + if (ShiftAmt < i) return -1; + ShiftAmt -= i; + + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 4; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) + return -1; + + return ShiftAmt; +} + //===----------------------------------------------------------------------===// // Addressing Mode Selection //===----------------------------------------------------------------------===// @@ -1501,9 +1700,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, } else return false; - // PowerPC doesn't have preinc load/store instructions for vectors. - if (VT.isVector()) - return false; + // PowerPC doesn't have preinc load/store instructions for vectors (except + // for QPX, which does have preinc r+r forms). + if (VT.isVector()) { + if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) { + return false; + } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) { + AM = ISD::PRE_INC; + return true; + } + } if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { @@ -2240,6 +2446,17 @@ static const MCPhysReg *GetFPR() { return FPR; } +/// GetQFPR - Get the set of QPX registers that should be allocated for +/// arguments. +static const MCPhysReg *GetQFPR() { + static const MCPhysReg QFPR[] = { + PPC::QF1, PPC::QF2, PPC::QF3, PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7, + PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13 + }; + + return QFPR; +} + /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, @@ -2268,6 +2485,10 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) Align = 16; + // QPX vector types stored in double-precision are padded to a 32 byte + // boundary. + else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1) + Align = 32; // ByVal parameters are aligned as requested. if (Flags.isByVal()) { @@ -2306,7 +2527,7 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, - unsigned &AvailableVRs) { + unsigned &AvailableVRs, bool HasQPX) { bool UseMemory = false; // Respect alignment of argument on the stack. @@ -2330,7 +2551,11 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, // However, if the argument is actually passed in an FPR or a VR, // we don't use memory after all. if (!Flags.isByVal()) { - if (ArgVT == MVT::f32 || ArgVT == MVT::f64) + if (ArgVT == MVT::f32 || ArgVT == MVT::f64 || + // QPX registers overlap with the scalar FP registers. + (HasQPX && (ArgVT == MVT::v4f32 || + ArgVT == MVT::v4f64 || + ArgVT == MVT::v4i1))) if (AvailableFPRs > 0) { --AvailableFPRs; return false; @@ -2464,13 +2689,21 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: - case MVT::v4f32: RC = &PPC::VRRCRegClass; break; + case MVT::v4f32: + RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass; + break; case MVT::v2f64: case MVT::v2i64: RC = &PPC::VSHRCRegClass; break; + case MVT::v4f64: + RC = &PPC::QFRCRegClass; + break; + case MVT::v4i1: + RC = &PPC::QBRCRegClass; + break; } // Transform the arguments stored in physical registers into virtual ones. @@ -2658,9 +2891,12 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; + static const MCPhysReg *QFPR = GetQFPR(); + const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); + const unsigned Num_QFPR_Regs = Num_FPR_Regs; // Do a first pass over the arguments to determine whether the ABI // guarantees that our caller has allocated the parameter save area @@ -2676,7 +2912,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( for (unsigned i = 0, e = Ins.size(); i != e; ++i) if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, - NumBytes, AvailableFPRs, AvailableVRs)) + NumBytes, AvailableFPRs, AvailableVRs, + Subtarget.hasQPX())) HasParameterArea = true; // Add DAG nodes to load the arguments or copy them out of registers. On @@ -2685,6 +2922,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + unsigned &QFPR_idx = FPR_idx; SmallVector MemOps; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; @@ -2908,6 +3146,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. @@ -2926,6 +3165,36 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 16; break; + } // not QPX + + assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 && + "Invalid QPX parameter type"); + /* fall through */ + + case MVT::v4f64: + case MVT::v4i1: + // QPX vectors are treated like their scalar floating-point subregisters + // (except that they're larger). + unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32; + if (QFPR_idx != Num_QFPR_Regs) { + const TargetRegisterClass *RC; + switch (ObjectVT.getSimpleVT().SimpleTy) { + case MVT::v4f64: RC = &PPC::QFRCRegClass; break; + case MVT::v4f32: RC = &PPC::QSRCRegClass; break; + default: RC = &PPC::QBRCRegClass; break; + } + + unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + ++QFPR_idx; + } else { + if (CallConv == CallingConv::Fast) + ComputeArgOffset(); + needsLoad = true; + } + if (CallConv != CallingConv::Fast || needsLoad) + ArgOffset += Sz; + break; } // We need to load the argument to a virtual register if we determined @@ -4306,6 +4575,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + unsigned &QFPR_idx = FPR_idx; static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, @@ -4322,9 +4592,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; + static const MCPhysReg *QFPR = GetQFPR(); + const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); + const unsigned NumQFPRs = NumFPRs; // When using the fast calling convention, we don't provide backing for // arguments that will be in registers. @@ -4348,12 +4621,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (++NumGPRsUsed <= NumGPRs) continue; break; - case MVT::f32: - case MVT::f64: - if (++NumFPRsUsed <= NumFPRs) - continue; - break; - case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: @@ -4362,6 +4629,24 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (++NumVRsUsed <= NumVRs) continue; break; + case MVT::v4f32: + // When using QPX, this is handled like a FP register, otherwise, it + // is an Altivec register. + if (Subtarget.hasQPX()) { + if (++NumFPRsUsed <= NumFPRs) + continue; + } else { + if (++NumVRsUsed <= NumVRs) + continue; + } + break; + case MVT::f32: + case MVT::f64: + case MVT::v4f64: // QPX + case MVT::v4i1: // QPX + if (++NumFPRsUsed <= NumFPRs) + continue; + break; } } @@ -4703,6 +4988,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + if (!Subtarget.hasQPX()) { // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. @@ -4766,6 +5052,60 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (CallConv != CallingConv::Fast) ArgOffset += 16; break; + } // not QPX + + assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 && + "Invalid QPX parameter type"); + + /* fall through */ + case MVT::v4f64: + case MVT::v4i1: { + bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32; + if (isVarArg) { + // We could elide this store in the case where the object fits + // entirely in R registers. Maybe later. + SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), false, false, 0); + MemOpChains.push_back(Store); + if (QFPR_idx != NumQFPRs) { + SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl, + Store, PtrOff, MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load)); + } + ArgOffset += (IsF32 ? 16 : 32); + for (unsigned i=0; i<(IsF32 ? 16 : 32); i+=PtrByteSize) { + if (GPR_idx == NumGPRs) + break; + SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, + DAG.getConstant(i, PtrVT)); + SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), + false, false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + } + break; + } + + // Non-varargs QPX params go into registers or on the stack. + if (QFPR_idx != NumQFPRs) { + RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg)); + } else { + if (CallConv == CallingConv::Fast) + ComputePtrOff(); + + LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, + true, isTailCall, true, MemOpChains, + TailCallArguments, dl); + if (CallConv == CallingConv::Fast) + ArgOffset += (IsF32 ? 16 : 32); + } + + if (CallConv != CallingConv::Fast) + ArgOffset += (IsF32 ? 16 : 32); + break; + } } } @@ -5384,6 +5724,9 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, } SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType().isVector()) + return LowerVectorLoad(Op, DAG); + assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"); @@ -5405,6 +5748,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + if (Op.getOperand(1).getValueType().isVector()) + return LowerVectorStore(Op, DAG); + assert(Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"); @@ -5674,6 +6020,29 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain, SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + + if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) { + if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64) + return SDValue(); + + SDValue Value = Op.getOperand(0); + // The values are now known to be -1 (false) or 1 (true). To convert this + // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). + // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 + Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); + + SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64); + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPHalfs, FPHalfs, FPHalfs, FPHalfs); + + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + + if (Op.getValueType() != MVT::v4f64) + Value = DAG.getNode(ISD::FP_ROUND, dl, + Op.getValueType(), Value, DAG.getIntPtrConstant(1)); + return Value; + } + // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); @@ -6125,6 +6494,127 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) { + // We first build an i32 vector, load it into a QPX register, + // then convert it to a floating-point vector and compare it + // to a zero vector to get the boolean result. + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); + EVT PtrVT = getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + assert(BVN->getNumOperands() == 4 && + "BUILD_VECTOR for v4i1 does not have 4 operands"); + + bool IsConst = true; + for (unsigned i = 0; i < 4; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (!isa(BVN->getOperand(i))) { + IsConst = false; + break; + } + } + + if (IsConst) { + Constant *One = + ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0); + Constant *NegOne = + ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0); + + SmallVector CV(4, NegOne); + for (unsigned i = 0; i < 4; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) + CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext())); + else if (cast(BVN->getOperand(i))-> + getConstantIntValue()->isZero()) + continue; + else + CV[i] = One; + } + + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(), + 16 /* alignment */); + + SmallVector Ops; + Ops.push_back(DAG.getEntryNode()); + Ops.push_back(CPIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::v4i1); + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + return DAG.getMemIntrinsicNode(PPCISD::QVLFSb, + dl, VTs, Ops, MVT::v4f32, + MachinePointerInfo::getConstantPool()); + } + + SmallVector Stores; + for (unsigned i = 0; i < 4; ++i) { + if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = 4*i; + SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); + + unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize(); + if (StoreSize > 4) { + Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, + BVN->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), + MVT::i32, false, false, 0)); + } else { + SDValue StoreValue = BVN->getOperand(i); + if (StoreSize < 4) + StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue); + + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + StoreValue, Idx, + PtrInfo.getWithOffset(Offset), + false, false, 0)); + } + } + + SDValue StoreChain; + if (!Stores.empty()) + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + else + StoreChain = DAG.getEntryNode(); + + // Now load from v4i32 into the QPX register; this will extend it to + // v4i64 but not yet convert it to a floating point. Nevertheless, this + // is typed as v4f64 because the QPX register integer states are not + // explicitly represented. + + SmallVector Ops; + Ops.push_back(StoreChain); + Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, MVT::i32)); + Ops.push_back(FIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::v4f64); + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, + dl, VTs, Ops, MVT::v4i32, PtrInfo); + LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, + DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, MVT::i32), + LoadedVect); + + SDValue FPZeros = DAG.getConstantFP(0.0, MVT::f64); + FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPZeros, FPZeros, FPZeros, FPZeros); + + return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ); + } + + // All other QPX vectors are handled by generic code. + if (Subtarget.hasQPX()) + return SDValue(); + // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; @@ -6383,6 +6873,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); + if (Subtarget.hasQPX()) { + if (VT.getVectorNumElements() != 4) + return SDValue(); + + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + + int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp); + if (AlignIdx != -1) { + return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2, + DAG.getConstant(AlignIdx, MVT::i32)); + } else if (SVOp->isSplat()) { + int SplatIdx = SVOp->getSplatIndex(); + if (SplatIdx >= 4) { + std::swap(V1, V2); + SplatIdx -= 4; + } + + // FIXME: If SplatIdx == 0 and the input came from a load, then there is + // nothing to do. + + return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1, + DAG.getConstant(SplatIdx, MVT::i32)); + } + + // Lower this into a qvgpci/qvfperm pair. + + // Compute the qvgpci literal + unsigned idx = 0; + for (unsigned i = 0; i < 4; ++i) { + int m = SVOp->getMaskElt(i); + unsigned mm = m >= 0 ? (unsigned) m : i; + idx |= mm << (3-i)*3; + } + + SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64, + DAG.getConstant(idx, MVT::i32)); + return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3); + } + // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. @@ -6665,6 +7194,302 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, false, false, false, 0); } +SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + SDNode *N = Op.getNode(); + + assert(N->getOperand(0).getValueType() == MVT::v4i1 && + "Unknown extract_vector_elt type"); + + SDValue Value = N->getOperand(0); + + // The first part of this is like the store lowering except that we don't + // need to track the chain. + + // The values are now known to be -1 (false) or 1 (true). To convert this + // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). + // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 + Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); + + // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to + // understand how to form the extending load. + SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64); + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPHalfs, FPHalfs, FPHalfs, FPHalfs); + + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + + // Now convert to an integer and store. + Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, + DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32), + Value); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); + EVT PtrVT = getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue StoreChain = DAG.getEntryNode(); + SmallVector Ops; + Ops.push_back(StoreChain); + Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32)); + Ops.push_back(Value); + Ops.push_back(FIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, + dl, VTs, Ops, MVT::v4i32, PtrInfo); + + // Extract the value requested. + unsigned Offset = 4*cast(N->getOperand(1))->getZExtValue(); + SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); + + SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx, + PtrInfo.getWithOffset(Offset), + false, false, false, 0); + + if (!Subtarget.useCRBits()) + return IntVal; + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal); +} + +/// Lowering for QPX v4i1 loads +SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + LoadSDNode *LN = cast(Op.getNode()); + SDValue LoadChain = LN->getChain(); + SDValue BasePtr = LN->getBasePtr(); + + if (Op.getValueType() == MVT::v4f64 || + Op.getValueType() == MVT::v4f32) { + EVT MemVT = LN->getMemoryVT(); + unsigned Alignment = LN->getAlignment(); + + // If this load is properly aligned, then it is legal. + if (Alignment >= MemVT.getStoreSize()) + return Op; + + EVT ScalarVT = Op.getValueType().getScalarType(), + ScalarMemVT = MemVT.getScalarType(); + unsigned Stride = ScalarMemVT.getStoreSize(); + + SmallVector Vals, LoadChains; + for (unsigned Idx = 0; Idx < 4; ++Idx) { + SDValue Load; + if (ScalarVT != ScalarMemVT) + Load = + DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain, + BasePtr, + LN->getPointerInfo().getWithOffset(Idx*Stride), + ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(), + LN->isInvariant(), MinAlign(Alignment, Idx*Stride), + LN->getAAInfo()); + else + Load = + DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr, + LN->getPointerInfo().getWithOffset(Idx*Stride), + LN->isVolatile(), LN->isNonTemporal(), + LN->isInvariant(), MinAlign(Alignment, Idx*Stride), + LN->getAAInfo()); + + if (Idx == 0 && LN->isIndexed()) { + assert(LN->getAddressingMode() == ISD::PRE_INC && + "Unknown addressing mode on vector load"); + Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(), + LN->getAddressingMode()); + } + + Vals.push_back(Load); + LoadChains.push_back(Load.getValue(1)); + + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, BasePtr.getValueType())); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getValueType(), Vals); + + if (LN->isIndexed()) { + SDValue RetOps[] = { Value, Vals[0].getValue(1), TF }; + return DAG.getMergeValues(RetOps, dl); + } + + SDValue RetOps[] = { Value, TF }; + return DAG.getMergeValues(RetOps, dl); + } + + assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower"); + assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported"); + + // To lower v4i1 from a byte array, we load the byte elements of the + // vector and then reuse the BUILD_VECTOR logic. + + SmallVector VectElmts, VectElmtChains; + for (unsigned i = 0; i < 4; ++i) { + SDValue Idx = DAG.getConstant(i, BasePtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); + + VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD, + dl, MVT::i32, LoadChain, Idx, + LN->getPointerInfo().getWithOffset(i), + MVT::i8 /* memory type */, + LN->isVolatile(), LN->isNonTemporal(), + LN->isInvariant(), + 1 /* alignment */, LN->getAAInfo())); + VectElmtChains.push_back(VectElmts[i].getValue(1)); + } + + LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts); + + SDValue RVals[] = { Value, LoadChain }; + return DAG.getMergeValues(RVals, dl); +} + +/// Lowering for QPX v4i1 stores +SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + StoreSDNode *SN = cast(Op.getNode()); + SDValue StoreChain = SN->getChain(); + SDValue BasePtr = SN->getBasePtr(); + SDValue Value = SN->getValue(); + + if (Value.getValueType() == MVT::v4f64 || + Value.getValueType() == MVT::v4f32) { + EVT MemVT = SN->getMemoryVT(); + unsigned Alignment = SN->getAlignment(); + + // If this store is properly aligned, then it is legal. + if (Alignment >= MemVT.getStoreSize()) + return Op; + + EVT ScalarVT = Value.getValueType().getScalarType(), + ScalarMemVT = MemVT.getScalarType(); + unsigned Stride = ScalarMemVT.getStoreSize(); + + SmallVector Stores; + for (unsigned Idx = 0; Idx < 4; ++Idx) { + SDValue Ex = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value, + DAG.getConstant(Idx, getVectorIdxTy())); + SDValue Store; + if (ScalarVT != ScalarMemVT) + Store = + DAG.getTruncStore(StoreChain, dl, Ex, BasePtr, + SN->getPointerInfo().getWithOffset(Idx*Stride), + ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(), + MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); + else + Store = + DAG.getStore(StoreChain, dl, Ex, BasePtr, + SN->getPointerInfo().getWithOffset(Idx*Stride), + SN->isVolatile(), SN->isNonTemporal(), + MinAlign(Alignment, Idx*Stride), SN->getAAInfo()); + + if (Idx == 0 && SN->isIndexed()) { + assert(SN->getAddressingMode() == ISD::PRE_INC && + "Unknown addressing mode on vector store"); + Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(), + SN->getAddressingMode()); + } + + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, BasePtr.getValueType())); + Stores.push_back(Store); + } + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + if (SN->isIndexed()) { + SDValue RetOps[] = { TF, Stores[0].getValue(1) }; + return DAG.getMergeValues(RetOps, dl); + } + + return TF; + } + + assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported"); + assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower"); + + // The values are now known to be -1 (false) or 1 (true). To convert this + // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5). + // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5 + Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value); + + // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to + // understand how to form the extending load. + SDValue FPHalfs = DAG.getConstantFP(0.5, MVT::f64); + FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, + FPHalfs, FPHalfs, FPHalfs, FPHalfs); + + Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); + + // Now convert to an integer and store. + Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64, + DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, MVT::i32), + Value); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx); + EVT PtrVT = getPointerTy(); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SmallVector Ops; + Ops.push_back(StoreChain); + Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, MVT::i32)); + Ops.push_back(Value); + Ops.push_back(FIdx); + + SmallVector ValueVTs; + ValueVTs.push_back(MVT::Other); // chain + SDVTList VTs = DAG.getVTList(ValueVTs); + + StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, + dl, VTs, Ops, MVT::v4i32, PtrInfo); + + // Move data into the byte array. + SmallVector Loads, LoadChains; + for (unsigned i = 0; i < 4; ++i) { + unsigned Offset = 4*i; + SDValue Idx = DAG.getConstant(Offset, FIdx.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx); + + Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx, + PtrInfo.getWithOffset(Offset), + false, false, false, 0)); + LoadChains.push_back(Loads[i].getValue(1)); + } + + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + + SmallVector Stores; + for (unsigned i = 0; i < 4; ++i) { + SDValue Idx = DAG.getConstant(i, BasePtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx); + + Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx, + SN->getPointerInfo().getWithOffset(i), + MVT::i8 /* memory type */, + SN->isNonTemporal(), SN->isVolatile(), + 1 /* alignment */, SN->getAAInfo())); + } + + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + return StoreChain; +} + SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (Op.getValueType() == MVT::v4i32) { @@ -6787,6 +7612,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. @@ -7411,6 +8237,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || MI->getOpcode() == PPC::SELECT_CC_F8 || + MI->getOpcode() == PPC::SELECT_CC_QFRC || + MI->getOpcode() == PPC::SELECT_CC_QSRC || + MI->getOpcode() == PPC::SELECT_CC_QBRC || MI->getOpcode() == PPC::SELECT_CC_VRRC || MI->getOpcode() == PPC::SELECT_CC_VSFRC || MI->getOpcode() == PPC::SELECT_CC_VSRC || @@ -7418,6 +8247,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_I8 || MI->getOpcode() == PPC::SELECT_F4 || MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_QFRC || + MI->getOpcode() == PPC::SELECT_QSRC || + MI->getOpcode() == PPC::SELECT_QBRC || MI->getOpcode() == PPC::SELECT_VRRC || MI->getOpcode() == PPC::SELECT_VSFRC || MI->getOpcode() == PPC::SELECT_VSRC) { @@ -7451,6 +8283,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->getOpcode() == PPC::SELECT_I8 || MI->getOpcode() == PPC::SELECT_F4 || MI->getOpcode() == PPC::SELECT_F8 || + MI->getOpcode() == PPC::SELECT_QFRC || + MI->getOpcode() == PPC::SELECT_QSRC || + MI->getOpcode() == PPC::SELECT_QBRC || MI->getOpcode() == PPC::SELECT_VRRC || MI->getOpcode() == PPC::SELECT_VSFRC || MI->getOpcode() == PPC::SELECT_VSRC) { @@ -7866,7 +8701,9 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) { + (VT == MVT::v2f64 && Subtarget.hasVSX()) || + (VT == MVT::v4f32 && Subtarget.hasQPX()) || + (VT == MVT::v4f64 && Subtarget.hasQPX())) { // Convergence is quadratic, so we essentially double the number of digits // correct after every iteration. For both FRE and FRSQRTE, the minimum // architected relative accuracy is 2^-5. When hasRecipPrec(), this is @@ -7887,7 +8724,9 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || - (VT == MVT::v2f64 && Subtarget.hasVSX())) { + (VT == MVT::v2f64 && Subtarget.hasVSX()) || + (VT == MVT::v4f32 && Subtarget.hasQPX()) || + (VT == MVT::v4f64 && Subtarget.hasQPX())) { // Convergence is quadratic, so we essentially double the number of digits // correct after every iteration. For both FRE and FRSQRTE, the minimum // architected relative accuracy is 2^-5. When hasRecipPrec(), this is @@ -7973,6 +8812,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; + case Intrinsic::ppc_qpx_qvlfd: + case Intrinsic::ppc_qpx_qvlfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvlfs: + case Intrinsic::ppc_qpx_qvlfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvlfcd: + case Intrinsic::ppc_qpx_qvlfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvlfcs: + case Intrinsic::ppc_qpx_qvlfcsa: + VT = MVT::v2f32; + break; + case Intrinsic::ppc_qpx_qvlfiwa: + case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_vsx_lxvw4x: @@ -7999,6 +8856,24 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; + case Intrinsic::ppc_qpx_qvstfd: + case Intrinsic::ppc_qpx_qvstfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvstfs: + case Intrinsic::ppc_qpx_qvstfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvstfcd: + case Intrinsic::ppc_qpx_qvstfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvstfcs: + case Intrinsic::ppc_qpx_qvstfcsa: + VT = MVT::v2f32; + break; + case Intrinsic::ppc_qpx_qvstfiw: + case Intrinsic::ppc_qpx_qvstfiwa: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_vsx_stxvw4x: @@ -8927,14 +9802,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return expandVSXLoadForLE(N, DCI); } - Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + EVT MemVT = LD->getMemoryVT(); + Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); - if (ISD::isNON_EXTLoad(N) && VT.isVector() && Subtarget.hasAltivec() && - // P8 and later hardware should just use LOAD. - !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || - VT == MVT::v4i32 || VT == MVT::v4f32) && + Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext()); + unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy); + if (LD->isUnindexed() && VT.isVector() && + ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && + // P8 and later hardware should just use LOAD. + !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || + VT == MVT::v4i32 || VT == MVT::v4f32)) || + (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) && + LD->getAlignment() >= ScalarABIAlignment)) && LD->getAlignment() < ABIAlignment) { - // This is a type-legal unaligned Altivec load. + // This is a type-legal unaligned Altivec or QPX load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); bool isLittleEndian = Subtarget.isLittleEndian(); @@ -8963,10 +9844,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // a different base address offset from this one by an aligned amount. // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this // optimization later. - Intrinsic::ID Intr = (isLittleEndian ? - Intrinsic::ppc_altivec_lvsr : - Intrinsic::ppc_altivec_lvsl); - SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); + Intrinsic::ID Intr, IntrLD, IntrPerm; + MVT PermCntlTy, PermTy, LDTy; + if (Subtarget.hasAltivec()) { + Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : + Intrinsic::ppc_altivec_lvsl; + IntrLD = Intrinsic::ppc_altivec_lvx; + IntrPerm = Intrinsic::ppc_altivec_vperm; + PermCntlTy = MVT::v16i8; + PermTy = MVT::v4i32; + LDTy = MVT::v4i32; + } else { + Intr = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld : + Intrinsic::ppc_qpx_qvlpcls; + IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd : + Intrinsic::ppc_qpx_qvlfs; + IntrPerm = Intrinsic::ppc_qpx_qvfperm; + PermCntlTy = MVT::v4f64; + PermTy = MVT::v4f64; + LDTy = MemVT.getSimpleVT(); + } + + SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); // Create the new MMO for the new base load. It is like the original MMO, // but represents an area in memory almost twice the vector size centered @@ -8975,18 +9874,16 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // original unaligned load. MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *BaseMMO = - MF.getMachineMemOperand(LD->getMemOperand(), - -LD->getMemoryVT().getStoreSize()+1, - 2*LD->getMemoryVT().getStoreSize()-1); + MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1, + 2*MemVT.getStoreSize()-1); // Create the new base load. - SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx, - getPointerTy()); + SDValue LDXIntID = DAG.getTargetConstant(IntrLD, getPointerTy()); SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue BaseLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, - DAG.getVTList(MVT::v4i32, MVT::Other), - BaseLoadOps, MVT::v4i32, BaseMMO); + DAG.getVTList(PermTy, MVT::Other), + BaseLoadOps, LDTy, BaseMMO); // Note that the value of IncOffset (which is provided to the next // load's pointer info offset value, and thus used to calculate the @@ -9010,12 +9907,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, MachineMemOperand *ExtraMMO = MF.getMachineMemOperand(LD->getMemOperand(), - 1, 2*LD->getMemoryVT().getStoreSize()-1); + 1, 2*MemVT.getStoreSize()-1); SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue ExtraLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, - DAG.getVTList(MVT::v4i32, MVT::Other), - ExtraLoadOps, MVT::v4i32, ExtraMMO); + DAG.getVTList(PermTy, MVT::Other), + ExtraLoadOps, LDTy, ExtraMMO); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, BaseLoad.getValue(1), ExtraLoad.getValue(1)); @@ -9027,14 +9924,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // and ExtraLoad here. SDValue Perm; if (isLittleEndian) - Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + Perm = BuildIntrinsicOp(IntrPerm, ExtraLoad, BaseLoad, PermCntl, DAG, dl); else - Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + Perm = BuildIntrinsicOp(IntrPerm, BaseLoad, ExtraLoad, PermCntl, DAG, dl); - if (VT != MVT::v4i32) - Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); + if (VT != PermTy) + Perm = Subtarget.hasAltivec() ? + DAG.getNode(ISD::BITCAST, dl, VT, Perm) : + DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX + DAG.getTargetConstant(1, MVT::i64)); + // second argument is 1 because this rounding + // is always exact. // The output of the permutation is our loaded result, the TokenFactor is // our new chain. @@ -9045,15 +9947,21 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::INTRINSIC_WO_CHAIN: { bool isLittleEndian = Subtarget.isLittleEndian(); + unsigned IID = cast(N->getOperand(0))->getZExtValue(); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); - if (cast(N->getOperand(0))->getZExtValue() == Intr && - N->getOperand(1)->getOpcode() == ISD::ADD) { + if ((IID == Intr || + IID == Intrinsic::ppc_qpx_qvlpcld || + IID == Intrinsic::ppc_qpx_qvlpcls) && + N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); + int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ? + 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */; + if (DAG.MaskedValueIsZero( Add->getOperand(1), - APInt::getAllOnesValue(4 /* 16 byte alignment */) + APInt::getAllOnesValue(Bits /* alignment */) .zext( Add.getValueType().getScalarType().getSizeInBits()))) { SDNode *BasePtr = Add->getOperand(0).getNode(); @@ -9061,8 +9969,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - cast(UI->getOperand(0))->getZExtValue() == - Intr) { + cast(UI->getOperand(0))->getZExtValue() == IID) { // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. @@ -9071,6 +9978,27 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + + if (isa(Add->getOperand(1))) { + SDNode *BasePtr = Add->getOperand(0).getNode(); + for (SDNode::use_iterator UI = BasePtr->use_begin(), + UE = BasePtr->use_end(); UI != UE; ++UI) { + if (UI->getOpcode() == ISD::ADD && + isa(UI->getOperand(1)) && + (cast(Add->getOperand(1))->getZExtValue() - + cast(UI->getOperand(1))->getZExtValue()) % + (1 << Bits) == 0) { + SDNode *OtherAdd = *UI; + for (SDNode::use_iterator VI = OtherAdd->use_begin(), + VE = OtherAdd->use_end(); VI != VE; ++VI) { + if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && + cast(VI->getOperand(0))->getZExtValue() == IID) { + return SDValue(*VI, 0); + } + } + } + } + } } } @@ -9521,8 +10449,16 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return std::make_pair(0U, &PPC::F4RCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); + if (VT == MVT::v4f64 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QFRCRegClass); + if (VT == MVT::v4f32 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QSRCRegClass); break; case 'v': + if (VT == MVT::v4f64 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QFRCRegClass); + if (VT == MVT::v4f32 && Subtarget.hasQPX()) + return std::make_pair(0U, &PPC::QSRCRegClass); return std::make_pair(0U, &PPC::VRRCRegClass); case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); @@ -9642,7 +10578,9 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, Type *Ty) const { - // FIXME: PPC does not allow r+i addressing modes for vectors! + // PPC does not allow r+i addressing modes for vectors! + if (Ty->isVectorTy() && AM.BaseOffs != 0) + return false; // PPC allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) @@ -9773,6 +10711,12 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, unsigned Intrinsic) const { switch (Intrinsic) { + case Intrinsic::ppc_qpx_qvlfd: + case Intrinsic::ppc_qpx_qvlfs: + case Intrinsic::ppc_qpx_qvlfcd: + case Intrinsic::ppc_qpx_qvlfcs: + case Intrinsic::ppc_qpx_qvlfiwa: + case Intrinsic::ppc_qpx_qvlfiwz: case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_altivec_lvebx: @@ -9794,6 +10738,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_vsx_lxvd2x: VT = MVT::v2f64; break; + case Intrinsic::ppc_qpx_qvlfd: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvlfs: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvlfcd: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvlfcs: + VT = MVT::v2f32; + break; default: VT = MVT::v4i32; break; @@ -9810,6 +10766,47 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = false; return true; } + case Intrinsic::ppc_qpx_qvlfda: + case Intrinsic::ppc_qpx_qvlfsa: + case Intrinsic::ppc_qpx_qvlfcda: + case Intrinsic::ppc_qpx_qvlfcsa: + case Intrinsic::ppc_qpx_qvlfiwaa: + case Intrinsic::ppc_qpx_qvlfiwza: { + EVT VT; + switch (Intrinsic) { + case Intrinsic::ppc_qpx_qvlfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvlfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvlfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvlfcsa: + VT = MVT::v2f32; + break; + default: + VT = MVT::v4i32; + break; + } + + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = VT; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.size = VT.getStoreSize(); + Info.align = 1; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::ppc_qpx_qvstfd: + case Intrinsic::ppc_qpx_qvstfs: + case Intrinsic::ppc_qpx_qvstfcd: + case Intrinsic::ppc_qpx_qvstfcs: + case Intrinsic::ppc_qpx_qvstfiw: case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_altivec_stvebx: @@ -9831,6 +10828,18 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; + case Intrinsic::ppc_qpx_qvstfd: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvstfs: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvstfcd: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvstfcs: + VT = MVT::v2f32; + break; default: VT = MVT::v4i32; break; @@ -9847,6 +10856,41 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::ppc_qpx_qvstfda: + case Intrinsic::ppc_qpx_qvstfsa: + case Intrinsic::ppc_qpx_qvstfcda: + case Intrinsic::ppc_qpx_qvstfcsa: + case Intrinsic::ppc_qpx_qvstfiwa: { + EVT VT; + switch (Intrinsic) { + case Intrinsic::ppc_qpx_qvstfda: + VT = MVT::v4f64; + break; + case Intrinsic::ppc_qpx_qvstfsa: + VT = MVT::v4f32; + break; + case Intrinsic::ppc_qpx_qvstfcda: + VT = MVT::v2f64; + break; + case Intrinsic::ppc_qpx_qvstfcsa: + VT = MVT::v2f32; + break; + default: + VT = MVT::v4i32; + break; + } + + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = VT; + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.size = VT.getStoreSize(); + Info.align = 1; + Info.vol = false; + Info.readMem = false; + Info.writeMem = true; + return true; + } default: break; } @@ -10009,6 +11053,11 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles( if (VT == MVT::v2i64) return false; + if (Subtarget.hasQPX()) { + if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) + return true; + } + return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 6e12d9c097a..47d9c68f538 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -283,6 +283,22 @@ namespace llvm { /// of outputs. XXSWAPD, + /// QVFPERM = This corresponds to the QPX qvfperm instruction. + QVFPERM, + + /// QVGPCI = This corresponds to the QPX qvgpci instruction. + QVGPCI, + + /// QVALIGNI = This corresponds to the QPX qvaligni instruction. + QVALIGNI, + + /// QVESPLATI = This corresponds to the QPX qvesplati instruction. + QVESPLATI, + + /// QBFLT = Access the underlying QPX floating-point boolean + /// representation. + QBFLT, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or @@ -332,7 +348,11 @@ namespace llvm { /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. - STXVD2X + STXVD2X, + + /// QBRC, CHAIN = QVLFSb CHAIN, Ptr + /// The 4xf32 load used for v4i1 constants. + QVLFSb }; } @@ -381,6 +401,10 @@ namespace llvm { /// size, return the constant being splatted. The ByteSize field indicates /// the number of bytes of each element [124] -> [bhw]. SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + + /// If this is a qvaligni shuffle mask, return the shift + /// amount, otherwise return -1. + int isQVALIGNIShuffleMask(SDNode *N); } class PPCTargetLowering : public TargetLowering { @@ -679,11 +703,15 @@ namespace llvm { SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 0410b1c7590..506a2d0c7ae 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -562,6 +562,47 @@ class XForm_17 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +// Used for QPX +class XForm_18 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> FRT; + bits<5> FRA; + bits<5> FRB; + + let Pattern = pattern; + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-30} = xo; + let Inst{31} = 0; +} + +class XForm_19 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XForm_18 { + let FRA = 0; +} + +class XForm_20 opcode, bits<6> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> FRT; + bits<5> FRA; + bits<5> FRB; + bits<4> tttt; + + let Pattern = pattern; + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-24} = tttt; + let Inst{25-30} = xo; + let Inst{31} = 0; +} + class XForm_24 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> : I { @@ -1215,6 +1256,14 @@ class AForm_4 opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +// Used for QPX +class AForm_4a opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : AForm_1 { + let FRA = 0; + let FRC = 0; +} + // 1.7.13 M-Form class MForm_1 opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> @@ -1439,6 +1488,49 @@ class VXRForm_1 xo, dag OOL, dag IOL, string asmstr, let Inst{22-31} = xo; } +// Z23-Form (used by QPX) +class Z23Form_1 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> FRT; + bits<5> FRA; + bits<5> FRB; + bits<2> idx; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FRT; + let Inst{11-15} = FRA; + let Inst{16-20} = FRB; + let Inst{21-22} = idx; + let Inst{23-30} = xo; + let Inst{31} = RC; +} + +class Z23Form_2 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : Z23Form_1 { + let FRB = 0; +} + +class Z23Form_3 opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I { + bits<5> FRT; + bits<12> idx; + + let Pattern = pattern; + + bit RC = 0; // set by isDOT + + let Inst{6-10} = FRT; + let Inst{11-22} = idx; + let Inst{23-30} = xo; + let Inst{31} = RC; +} + //===----------------------------------------------------------------------===// class Pseudo pattern> : I<0, OOL, IOL, asmstr, NoItinerary> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index d1c60a2e37c..fe9474a5de0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -181,6 +181,9 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case PPC::RESTORE_CRBIT: case PPC::LVX: case PPC::LXVD2X: + case PPC::QVLFDX: + case PPC::QVLFSXs: + case PPC::QVLFDXb: case PPC::RESTORE_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -207,6 +210,9 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, case PPC::SPILL_CRBIT: case PPC::STVX: case PPC::STXVD2X: + case PPC::QVSTFDX: + case PPC::QVSTFSXs: + case PPC::QVSTFDXb: case PPC::SPILL_VRSAVE: // Check for the operands added by addFrameReference (the immediate is the // offset which defaults to 0). @@ -759,6 +765,12 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = PPC::XXLOR; else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::XXLORf; + else if (PPC::QFRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::QVFMR; + else if (PPC::QSRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::QVFMRs; + else if (PPC::QBRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::QVFMRb; else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) Opc = PPC::CROR; else @@ -844,6 +856,24 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); SpillsVRS = true; + } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFSXs)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVSTFDXb)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -939,6 +969,18 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, DestReg), FrameIdx)); SpillsVRS = true; + } else if (PPC::QFRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDX), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::QSRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFSXs), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::QBRCRegClass.hasSubClassEq(RC)) { + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::QVLFDXb), DestReg), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 4e3980dfc9b..c2c53355b6e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -61,6 +61,27 @@ def tocentry32 : Operand { let MIOperandInfo = (ops i32imm:$imm); } +def SDT_PPCqvfperm : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVec<3> +]>; +def SDT_PPCqvgpci : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisInt<1> +]>; +def SDT_PPCqvaligni : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3> +]>; +def SDT_PPCqvesplati : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> +]>; + +def SDT_PPCqbflt : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1> +]>; + +def SDT_PPCqvlfsb : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisPtrTy<1> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -127,6 +148,16 @@ def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; +def PPCqvfperm : SDNode<"PPCISD::QVFPERM", SDT_PPCqvfperm, []>; +def PPCqvgpci : SDNode<"PPCISD::QVGPCI", SDT_PPCqvgpci, []>; +def PPCqvaligni : SDNode<"PPCISD::QVALIGNI", SDT_PPCqvaligni, []>; +def PPCqvesplati : SDNode<"PPCISD::QVESPLATI", SDT_PPCqvesplati, []>; + +def PPCqbflt : SDNode<"PPCISD::QBFLT", SDT_PPCqbflt, []>; + +def PPCqvlfsb : SDNode<"PPCISD::QVLFSb", SDT_PPCqvlfsb, + [SDNPHasChain, SDNPMayLoad]>; + def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>; // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift @@ -464,6 +495,15 @@ def u6imm : Operand { let ParserMatchClass = PPCU6ImmAsmOperand; let DecoderMethod = "decodeUImmOperand<6>"; } +def PPCU12ImmAsmOperand : AsmOperandClass { + let Name = "U12Imm"; let PredicateMethod = "isU12Imm"; + let RenderMethod = "addImmOperands"; +} +def u12imm : Operand { + let PrintMethod = "printU12ImmOperand"; + let ParserMatchClass = PPCU12ImmAsmOperand; + let DecoderMethod = "decodeUImmOperand<12>"; +} def PPCS16ImmAsmOperand : AsmOperandClass { let Name = "S16Imm"; let PredicateMethod = "isS16Imm"; let RenderMethod = "addS16ImmOperands"; @@ -680,6 +720,10 @@ def IsPPC6xx : Predicate<"PPCSubTarget->isPPC6xx()">; def IsE500 : Predicate<"PPCSubTarget->isE500()">; def HasSPE : Predicate<"PPCSubTarget->HasSPE()">; def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; + +def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; +def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; + //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. @@ -2643,6 +2687,7 @@ include "PPCInstrAltivec.td" include "PPCInstrSPE.td" include "PPCInstr64Bit.td" include "PPCInstrVSX.td" +include "PPCInstrQPX.td" def crnot : OutPatFrag<(ops node:$in), (CRNOR $in, $in)>; diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td new file mode 100644 index 00000000000..c984d461d25 --- /dev/null +++ b/lib/Target/PowerPC/PPCInstrQPX.td @@ -0,0 +1,1192 @@ +//===- PPCInstrQPX.td - The PowerPC QPX Extension --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the QPX extension to the PowerPC instruction set. +// Reference: +// Book Q: QPX Architecture Definition. IBM (as updated in) 2011. +// +//===----------------------------------------------------------------------===// + +def PPCRegQFRCAsmOperand : AsmOperandClass { + let Name = "RegQFRC"; let PredicateMethod = "isRegNumber"; +} +def qfrc : RegisterOperand { + let ParserMatchClass = PPCRegQFRCAsmOperand; +} +def PPCRegQSRCAsmOperand : AsmOperandClass { + let Name = "RegQSRC"; let PredicateMethod = "isRegNumber"; +} +def qsrc : RegisterOperand { + let ParserMatchClass = PPCRegQSRCAsmOperand; +} +def PPCRegQBRCAsmOperand : AsmOperandClass { + let Name = "RegQBRC"; let PredicateMethod = "isRegNumber"; +} +def qbrc : RegisterOperand { + let ParserMatchClass = PPCRegQBRCAsmOperand; +} + +//===----------------------------------------------------------------------===// +// Helpers for defining instructions that directly correspond to intrinsics. + +// QPXA1_Int - A AForm_1 intrinsic definition. +class QPXA1_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_1; +// QPXA1s_Int - A AForm_1 intrinsic definition (simple instructions). +class QPXA1s_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_1; +// QPXA2_Int - A AForm_2 intrinsic definition. +class QPXA2_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_2; +// QPXA3_Int - A AForm_3 intrinsic definition. +class QPXA3_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_3; +// QPXA4_Int - A AForm_4a intrinsic definition. +class QPXA4_Int opcode, bits<5> xo, string opc, Intrinsic IntID> + : AForm_4a; +// QPXX18_Int - A XForm_18 intrinsic definition. +class QPXX18_Int opcode, bits<10> xo, string opc, Intrinsic IntID> + : XForm_18; +// QPXX19_Int - A XForm_19 intrinsic definition. +class QPXX19_Int opcode, bits<10> xo, string opc, Intrinsic IntID> + : XForm_19; + +//===----------------------------------------------------------------------===// +// Pattern Frags. + +def extloadv4f32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4f32; +}]>; + +def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4f32; +}]>; +def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset), + (pre_truncst node:$val, + node:$base, node:$offset), [{ + return cast(N)->getMemoryVT() == MVT::v4f32; +}]>; + +def fround_inexact : PatFrag<(ops node:$val), (fround node:$val), [{ + return cast(N->getOperand(1))->getZExtValue() == 0; +}]>; + +def fround_exact : PatFrag<(ops node:$val), (fround node:$val), [{ + return cast(N->getOperand(1))->getZExtValue() == 1; +}]>; + +let FastIselShouldIgnore = 1 in // FastIsel should ignore all u12 instrs. + def u12 : ImmLeaf; + +//===----------------------------------------------------------------------===// +// Instruction Definitions. + +def HasQPX : Predicate<"PPCSubTarget->hasQPX()">; +let Predicates = [HasQPX] in { +let DecoderNamespace = "QPX" in { +let hasSideEffects = 0 in { // QPX instructions don't have side effects. +let Uses = [RM] in { + // Add Instructions + let isCommutable = 1 in { + def QVFADD : AForm_2<4, 21, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfadd $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fadd v4f64:$FRA, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFADDS : QPXA2_Int<0, 21, "qvfadds", int_ppc_qpx_qvfadds>; + def QVFADDSs : AForm_2<0, 21, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfadds $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fadd v4f32:$FRA, v4f32:$FRB))]>; + } + def QVFSUB : AForm_2<4, 20, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfsub $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fsub v4f64:$FRA, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFSUBS : QPXA2_Int<0, 20, "qvfsubs", int_ppc_qpx_qvfsubs>; + def QVFSUBSs : AForm_2<0, 20, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfsubs $FRT, $FRA, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fsub v4f32:$FRA, v4f32:$FRB))]>; + + // Estimate Instructions + def QVFRE : AForm_4a<4, 24, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfre $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (PPCfre v4f64:$FRB))]>; + def QVFRES : QPXA4_Int<0, 24, "qvfres", int_ppc_qpx_qvfres>; + let isCodeGenOnly = 1 in + def QVFRESs : AForm_4a<0, 24, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfres $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (PPCfre v4f32:$FRB))]>; + + def QVFRSQRTE : AForm_4a<4, 26, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrsqrte $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (PPCfrsqrte v4f64:$FRB))]>; + def QVFRSQRTES : QPXA4_Int<0, 26, "qvfrsqrtes", int_ppc_qpx_qvfrsqrtes>; + let isCodeGenOnly = 1 in + def QVFRSQRTESs : AForm_4a<0, 26, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrsqrtes $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (PPCfrsqrte v4f32:$FRB))]>; + + // Multiply Instructions + let isCommutable = 1 in { + def QVFMUL : AForm_3<4, 25, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRC), + "qvfmul $FRT, $FRA, $FRC", IIC_FPGeneral, + [(set v4f64:$FRT, (fmul v4f64:$FRA, v4f64:$FRC))]>; + let isCodeGenOnly = 1 in + def QVFMULS : QPXA3_Int<0, 25, "qvfmuls", int_ppc_qpx_qvfmuls>; + def QVFMULSs : AForm_3<0, 25, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRC), + "qvfmuls $FRT, $FRA, $FRC", IIC_FPGeneral, + [(set v4f32:$FRT, (fmul v4f32:$FRA, v4f32:$FRC))]>; + } + def QVFXMUL : QPXA3_Int<4, 17, "qvfxmul", int_ppc_qpx_qvfxmul>; + def QVFXMULS : QPXA3_Int<0, 17, "qvfxmuls", int_ppc_qpx_qvfxmuls>; + + // Multiply-add instructions + def QVFMADD : AForm_1<4, 29, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFMADDS : QPXA1_Int<0, 29, "qvfmadds", int_ppc_qpx_qvfmadds>; + def QVFMADDSs : AForm_1<0, 29, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, v4f32:$FRB))]>; + def QVFNMADD : AForm_1<4, 31, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfnmadd $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC, + v4f64:$FRB)))]>; + let isCodeGenOnly = 1 in + def QVFNMADDS : QPXA1_Int<0, 31, "qvfnmadds", int_ppc_qpx_qvfnmadds>; + def QVFNMADDSs : AForm_1<0, 31, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfnmadds $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC, + v4f32:$FRB)))]>; + def QVFMSUB : AForm_1<4, 28, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fma v4f64:$FRA, v4f64:$FRC, + (fneg v4f64:$FRB)))]>; + let isCodeGenOnly = 1 in + def QVFMSUBS : QPXA1_Int<0, 28, "qvfmsubs", int_ppc_qpx_qvfmsubs>; + def QVFMSUBSs : AForm_1<0, 28, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fma v4f32:$FRA, v4f32:$FRC, + (fneg v4f32:$FRB)))]>; + def QVFNMSUB : AForm_1<4, 30, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfnmsub $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f64:$FRT, (fneg (fma v4f64:$FRA, v4f64:$FRC, + (fneg v4f64:$FRB))))]>; + let isCodeGenOnly = 1 in + def QVFNMSUBS : QPXA1_Int<0, 30, "qvfnmsubs", int_ppc_qpx_qvfnmsubs>; + def QVFNMSUBSs : AForm_1<0, 30, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfnmsubs $FRT, $FRA, $FRC, $FRB", IIC_FPFused, + [(set v4f32:$FRT, (fneg (fma v4f32:$FRA, v4f32:$FRC, + (fneg v4f32:$FRB))))]>; + def QVFXMADD : QPXA1_Int<4, 9, "qvfxmadd", int_ppc_qpx_qvfxmadd>; + def QVFXMADDS : QPXA1_Int<0, 9, "qvfxmadds", int_ppc_qpx_qvfxmadds>; + def QVFXXNPMADD : QPXA1_Int<4, 11, "qvfxxnpmadd", int_ppc_qpx_qvfxxnpmadd>; + def QVFXXNPMADDS : QPXA1_Int<0, 11, "qvfxxnpmadds", int_ppc_qpx_qvfxxnpmadds>; + def QVFXXCPNMADD : QPXA1_Int<4, 3, "qvfxxcpnmadd", int_ppc_qpx_qvfxxcpnmadd>; + def QVFXXCPNMADDS : QPXA1_Int<0, 3, "qvfxxcpnmadds", int_ppc_qpx_qvfxxcpnmadds>; + def QVFXXMADD : QPXA1_Int<4, 1, "qvfxxmadd", int_ppc_qpx_qvfxxmadd>; + def QVFXXMADDS : QPXA1_Int<0, 1, "qvfxxmadds", int_ppc_qpx_qvfxxmadds>; + + // Select Instruction + let isCodeGenOnly = 1 in + def QVFSEL : QPXA1s_Int<4, 23, "qvfsel", int_ppc_qpx_qvfsel>; + def QVFSELb : AForm_1<4, 23, (outs qfrc:$FRT), + (ins qbrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (vselect v4i1:$FRA, + v4f64:$FRC, v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFSELbs : AForm_1<4, 23, (outs qsrc:$FRT), + (ins qbrc:$FRA, qsrc:$FRB, qsrc:$FRC), + "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (vselect v4i1:$FRA, + v4f32:$FRC, v4f32:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFSELbb: AForm_1<4, 23, (outs qbrc:$FRT), + (ins qbrc:$FRA, qbrc:$FRB, qbrc:$FRC), + "qvfsel $FRT, $FRA, $FRC, $FRB", IIC_VecPerm, + [(set v4i1:$FRT, (vselect v4i1:$FRA, + v4i1:$FRC, v4i1:$FRB))]>; + + // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after + // instruction selection into a branch sequence. + let usesCustomInserter = 1 in { + def SELECT_CC_QFRC: Pseudo<(outs qfrc:$dst), (ins crrc:$cond, qfrc:$T, qfrc:$F, + i32imm:$BROPC), "#SELECT_CC_QFRC", + []>; + def SELECT_CC_QSRC: Pseudo<(outs qsrc:$dst), (ins crrc:$cond, qsrc:$T, qsrc:$F, + i32imm:$BROPC), "#SELECT_CC_QSRC", + []>; + def SELECT_CC_QBRC: Pseudo<(outs qbrc:$dst), (ins crrc:$cond, qbrc:$T, qbrc:$F, + i32imm:$BROPC), "#SELECT_CC_QBRC", + []>; + + // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition + // register bit directly. + def SELECT_QFRC: Pseudo<(outs qfrc:$dst), (ins crbitrc:$cond, + qfrc:$T, qfrc:$F), "#SELECT_QFRC", + [(set v4f64:$dst, + (select i1:$cond, v4f64:$T, v4f64:$F))]>; + def SELECT_QSRC: Pseudo<(outs qsrc:$dst), (ins crbitrc:$cond, + qsrc:$T, qsrc:$F), "#SELECT_QSRC", + [(set v4f32:$dst, + (select i1:$cond, v4f32:$T, v4f32:$F))]>; + def SELECT_QBRC: Pseudo<(outs qbrc:$dst), (ins crbitrc:$cond, + qbrc:$T, qbrc:$F), "#SELECT_QBRC", + [(set v4i1:$dst, + (select i1:$cond, v4i1:$T, v4i1:$F))]>; + } + + // Convert and Round Instructions + def QVFCTID : QPXX19_Int<4, 814, "qvfctid", int_ppc_qpx_qvfctid>; + let isCodeGenOnly = 1 in + def QVFCTIDb : XForm_19<4, 814, (outs qbrc:$FRT), (ins qbrc:$FRB), + "qvfctid $FRT, $FRB", IIC_FPGeneral, []>; + + def QVFCTIDU : QPXX19_Int<4, 942, "qvfctidu", int_ppc_qpx_qvfctidu>; + def QVFCTIDZ : QPXX19_Int<4, 815, "qvfctidz", int_ppc_qpx_qvfctidz>; + def QVFCTIDUZ : QPXX19_Int<4, 943, "qvfctiduz", int_ppc_qpx_qvfctiduz>; + def QVFCTIW : QPXX19_Int<4, 14, "qvfctiw", int_ppc_qpx_qvfctiw>; + def QVFCTIWU : QPXX19_Int<4, 142, "qvfctiwu", int_ppc_qpx_qvfctiwu>; + def QVFCTIWZ : QPXX19_Int<4, 15, "qvfctiwz", int_ppc_qpx_qvfctiwz>; + def QVFCTIWUZ : QPXX19_Int<4, 143, "qvfctiwuz", int_ppc_qpx_qvfctiwuz>; + def QVFCFID : QPXX19_Int<4, 846, "qvfcfid", int_ppc_qpx_qvfcfid>; + let isCodeGenOnly = 1 in + def QVFCFIDb : XForm_19<4, 846, (outs qbrc:$FRT), (ins qbrc:$FRB), + "qvfcfid $FRT, $FRB", IIC_FPGeneral, []>; + + def QVFCFIDU : QPXX19_Int<4, 974, "qvfcfidu", int_ppc_qpx_qvfcfidu>; + def QVFCFIDS : QPXX19_Int<0, 846, "qvfcfids", int_ppc_qpx_qvfcfids>; + def QVFCFIDUS : QPXX19_Int<0, 974, "qvfcfidus", int_ppc_qpx_qvfcfidus>; + + let isCodeGenOnly = 1 in + def QVFRSP : QPXX19_Int<4, 12, "qvfrsp", int_ppc_qpx_qvfrsp>; + def QVFRSPs : XForm_19<4, 12, + (outs qsrc:$FRT), (ins qfrc:$FRB), + "qvfrsp $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fround_inexact v4f64:$FRB))]>; + + def QVFRIZ : XForm_19<4, 424, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfriz $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (ftrunc v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRIZs : XForm_19<4, 424, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfriz $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (ftrunc v4f32:$FRB))]>; + + def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrin $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (frnd v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrin $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (frnd v4f32:$FRB))]>; + + def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrip $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (fceil v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRIPs : XForm_19<4, 456, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrip $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (fceil v4f32:$FRB))]>; + + def QVFRIM : XForm_19<4, 488, (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfrim $FRT, $FRB", IIC_FPGeneral, + [(set v4f64:$FRT, (ffloor v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFRIMs : XForm_19<4, 488, (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfrim $FRT, $FRB", IIC_FPGeneral, + [(set v4f32:$FRT, (ffloor v4f32:$FRB))]>; + + // Move Instructions + def QVFMR : XForm_19<4, 72, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfmr $FRT, $FRB", IIC_VecPerm, + [/* (set v4f64:$FRT, v4f64:$FRB) */]>; + let isCodeGenOnly = 1 in { + def QVFMRs : XForm_19<4, 72, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfmr $FRT, $FRB", IIC_VecPerm, + [/* (set v4f32:$FRT, v4f32:$FRB) */]>; + def QVFMRb : XForm_19<4, 72, + (outs qbrc:$FRT), (ins qbrc:$FRB), + "qvfmr $FRT, $FRB", IIC_VecPerm, + [/* (set v4i1:$FRT, v4i1:$FRB) */]>; + } + def QVFNEG : XForm_19<4, 40, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfneg $FRT, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fneg v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFNEGs : XForm_19<4, 40, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfneg $FRT, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fneg v4f32:$FRB))]>; + def QVFABS : XForm_19<4, 264, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfabs $FRT, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fabs v4f64:$FRB))]>; + let isCodeGenOnly = 1 in + def QVFABSs : XForm_19<4, 264, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfabs $FRT, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fabs v4f32:$FRB))]>; + def QVFNABS : XForm_19<4, 136, + (outs qfrc:$FRT), (ins qfrc:$FRB), + "qvfnabs $FRT, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fneg (fabs v4f64:$FRB)))]>; + let isCodeGenOnly = 1 in + def QVFNABSs : XForm_19<4, 136, + (outs qsrc:$FRT), (ins qsrc:$FRB), + "qvfnabs $FRT, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fneg (fabs v4f32:$FRB)))]>; + def QVFCPSGN : XForm_18<4, 8, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm, + [(set v4f64:$FRT, (fcopysign v4f64:$FRB, v4f64:$FRA))]>; + let isCodeGenOnly = 1 in + def QVFCPSGNs : XForm_18<4, 8, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcpsgn $FRT, $FRA, $FRB", IIC_VecPerm, + [(set v4f32:$FRT, (fcopysign v4f32:$FRB, v4f32:$FRA))]>; + + def QVALIGNI : Z23Form_1<4, 5, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u2imm:$idx), + "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm, + [(set v4f64:$FRT, + (PPCqvaligni v4f64:$FRA, v4f64:$FRB, + (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVALIGNIs : Z23Form_1<4, 5, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, u2imm:$idx), + "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm, + [(set v4f32:$FRT, + (PPCqvaligni v4f32:$FRA, v4f32:$FRB, + (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVALIGNIb : Z23Form_1<4, 5, + (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u2imm:$idx), + "qvaligni $FRT, $FRA, $FRB, $idx", IIC_VecPerm, + [(set v4i1:$FRT, + (PPCqvaligni v4i1:$FRA, v4i1:$FRB, + (i32 imm:$idx)))]>; + + def QVESPLATI : Z23Form_2<4, 37, + (outs qfrc:$FRT), (ins qfrc:$FRA, u2imm:$idx), + "qvesplati $FRT, $FRA, $idx", IIC_VecPerm, + [(set v4f64:$FRT, + (PPCqvesplati v4f64:$FRA, (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVESPLATIs : Z23Form_2<4, 37, + (outs qsrc:$FRT), (ins qsrc:$FRA, u2imm:$idx), + "qvesplati $FRT, $FRA, $idx", IIC_VecPerm, + [(set v4f32:$FRT, + (PPCqvesplati v4f32:$FRA, (i32 imm:$idx)))]>; + let isCodeGenOnly = 1 in + def QVESPLATIb : Z23Form_2<4, 37, + (outs qbrc:$FRT), (ins qbrc:$FRA, u2imm:$idx), + "qvesplati $FRT, $FRA, $idx", IIC_VecPerm, + [(set v4i1:$FRT, + (PPCqvesplati v4i1:$FRA, (i32 imm:$idx)))]>; + + def QVFPERM : AForm_1<4, 6, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, qfrc:$FRC), + "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm, + [(set v4f64:$FRT, + (PPCqvfperm v4f64:$FRA, v4f64:$FRB, v4f64:$FRC))]>; + let isCodeGenOnly = 1 in + def QVFPERMs : AForm_1<4, 6, + (outs qsrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB, qfrc:$FRC), + "qvfperm $FRT, $FRA, $FRB, $FRC", IIC_VecPerm, + [(set v4f32:$FRT, + (PPCqvfperm v4f32:$FRA, v4f32:$FRB, v4f64:$FRC))]>; + + let isReMaterializable = 1, isAsCheapAsAMove = 1 in + def QVGPCI : Z23Form_3<4, 133, + (outs qfrc:$FRT), (ins u12imm:$idx), + "qvgpci $FRT, $idx", IIC_VecPerm, + [(set v4f64:$FRT, (PPCqvgpci (u12:$idx)))]>; + + // Compare Instruction + let isCodeGenOnly = 1 in + def QVFTSTNAN : QPXX18_Int<4, 64, "qvftstnan", int_ppc_qpx_qvftstnan>; + def QVFTSTNANb : XForm_18<4, 64, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETUO))]>; + let isCodeGenOnly = 1 in + def QVFTSTNANbs : XForm_18<4, 64, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvftstnan $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETUO))]>; + let isCodeGenOnly = 1 in + def QVFCMPLT : QPXX18_Int<4, 96, "qvfcmplt", int_ppc_qpx_qvfcmplt>; + def QVFCMPLTb : XForm_18<4, 96, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETOLT))]>; + let isCodeGenOnly = 1 in + def QVFCMPLTbs : XForm_18<4, 96, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcmplt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETOLT))]>; + let isCodeGenOnly = 1 in + def QVFCMPGT : QPXX18_Int<4, 32, "qvfcmpgt", int_ppc_qpx_qvfcmpgt>; + def QVFCMPGTb : XForm_18<4, 32, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETOGT))]>; + let isCodeGenOnly = 1 in + def QVFCMPGTbs : XForm_18<4, 32, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcmpgt $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETOGT))]>; + let isCodeGenOnly = 1 in + def QVFCMPEQ : QPXX18_Int<4, 0, "qvfcmpeq", int_ppc_qpx_qvfcmpeq>; + def QVFCMPEQb : XForm_18<4, 0, (outs qbrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB), + "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f64:$FRA, v4f64:$FRB, SETOEQ))]>; + let isCodeGenOnly = 1 in + def QVFCMPEQbs : XForm_18<4, 0, (outs qbrc:$FRT), (ins qsrc:$FRA, qsrc:$FRB), + "qvfcmpeq $FRT, $FRA, $FRB", IIC_FPCompare, + [(set v4i1:$FRT, + (setcc v4f32:$FRA, v4f32:$FRB, SETOEQ))]>; + + let isCodeGenOnly = 1 in + def QVFLOGICAL : XForm_20<4, 4, + (outs qfrc:$FRT), (ins qfrc:$FRA, qfrc:$FRB, u12imm:$tttt), + "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; + def QVFLOGICALb : XForm_20<4, 4, + (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt), + "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; + let isCodeGenOnly = 1 in + def QVFLOGICALs : XForm_20<4, 4, + (outs qbrc:$FRT), (ins qbrc:$FRA, qbrc:$FRB, u12imm:$tttt), + "qvflogical $FRT, $FRA, $FRB, $tttt", IIC_VecPerm, []>; + + // Load indexed instructions + let mayLoad = 1, canFoldAsLoad = 1 in { + def QVLFDX : XForm_1<31, 583, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfdx $FRT, $src", IIC_LdStLFD, + [(set v4f64:$FRT, (load xoaddr:$src))]>; + let isCodeGenOnly = 1 in + def QVLFDXb : XForm_1<31, 583, + (outs qbrc:$FRT), (ins memrr:$src), + "qvlfdx $FRT, $src", IIC_LdStLFD, []>; + + let RC = 1 in + def QVLFDXA : XForm_1<31, 583, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfdxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFDUX : XForm_1<31, 615, + (outs qfrc:$FRT, ptr_rc_nor0:$ea_result), + (ins memrr:$src), + "qvlfdux $FRT, $src", IIC_LdStLFDU, []>, + RegConstraint<"$src.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + let RC = 1 in + def QVLFDUXA : XForm_1<31, 615, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfduxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFSX : XForm_1<31, 519, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfsx $FRT, $src", IIC_LdStLFD, + [(set v4f64:$FRT, (extloadv4f32 xoaddr:$src))]>; + + let isCodeGenOnly = 1 in + def QVLFSXb : XForm_1<31, 519, + (outs qbrc:$FRT), (ins memrr:$src), + "qvlfsx $FRT, $src", IIC_LdStLFD, + [(set v4i1:$FRT, (PPCqvlfsb xoaddr:$src))]>; + let isCodeGenOnly = 1 in + def QVLFSXs : XForm_1<31, 519, + (outs qsrc:$FRT), (ins memrr:$src), + "qvlfsx $FRT, $src", IIC_LdStLFD, + [(set v4f32:$FRT, (load xoaddr:$src))]>; + + let RC = 1 in + def QVLFSXA : XForm_1<31, 519, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfsxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFSUX : XForm_1<31, 551, + (outs qsrc:$FRT, ptr_rc_nor0:$ea_result), + (ins memrr:$src), + "qvlfsux $FRT, $src", IIC_LdStLFDU, []>, + RegConstraint<"$src.ptrreg = $ea_result">, + NoEncode<"$ea_result">; + + let RC = 1 in + def QVLFSUXA : XForm_1<31, 551, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfsuxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCDX : XForm_1<31, 71, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcdx $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFCDXA : XForm_1<31, 71, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcdxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCDUX : XForm_1<31, 103, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcdux $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFCDUXA : XForm_1<31, 103, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcduxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCSX : XForm_1<31, 7, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsx $FRT, $src", IIC_LdStLFD, []>; + let isCodeGenOnly = 1 in + def QVLFCSXs : XForm_1<31, 7, + (outs qsrc:$FRT), (ins memrr:$src), + "qvlfcsx $FRT, $src", IIC_LdStLFD, []>; + + let RC = 1 in + def QVLFCSXA : XForm_1<31, 7, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFCSUX : XForm_1<31, 39, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsux $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFCSUXA : XForm_1<31, 39, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfcsuxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFIWAX : XForm_1<31, 871, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwax $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFIWAXA : XForm_1<31, 871, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwaxa $FRT, $src", IIC_LdStLFD, []>; + + def QVLFIWZX : XForm_1<31, 839, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwzx $FRT, $src", IIC_LdStLFD, []>; + let RC = 1 in + def QVLFIWZXA : XForm_1<31, 839, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlfiwzxa $FRT, $src", IIC_LdStLFD, []>; + } + + + def QVLPCLDX : XForm_1<31, 582, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpcldx $FRT, $src", IIC_LdStLFD, []>; + def QVLPCLSX : XForm_1<31, 518, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpclsx $FRT, $src", IIC_LdStLFD, []>; + let isCodeGenOnly = 1 in + def QVLPCLSXint : XForm_11<31, 518, + (outs qfrc:$FRT), (ins G8RC:$src), + "qvlpclsx $FRT, 0, $src", IIC_LdStLFD, []>; + def QVLPCRDX : XForm_1<31, 70, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpcrdx $FRT, $src", IIC_LdStLFD, []>; + def QVLPCRSX : XForm_1<31, 6, + (outs qfrc:$FRT), (ins memrr:$src), + "qvlpcrsx $FRT, $src", IIC_LdStLFD, []>; + + // Store indexed instructions + let mayStore = 1 in { + def QVSTFDX : XForm_8<31, 711, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdx $FRT, $dst", IIC_LdStSTFD, + [(store qfrc:$FRT, xoaddr:$dst)]>; + let isCodeGenOnly = 1 in + def QVSTFDXb : XForm_8<31, 711, + (outs), (ins qbrc:$FRT, memrr:$dst), + "qvstfdx $FRT, $dst", IIC_LdStSTFD, []>; + + let RC = 1 in + def QVSTFDXA : XForm_8<31, 711, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFDUX : XForm_8<31, 743, (outs ptr_rc_nor0:$ea_res), + (ins qfrc:$FRT, memrr:$dst), + "qvstfdux $FRT, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">; + + let RC = 1 in + def QVSTFDUXA : XForm_8<31, 743, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfduxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFDXI : XForm_8<31, 709, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFDXIA : XForm_8<31, 709, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfdxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFDUXI : XForm_8<31, 741, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfduxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFDUXIA : XForm_8<31, 741, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfduxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSX : XForm_8<31, 647, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsx $FRT, $dst", IIC_LdStSTFD, + [(truncstorev4f32 qfrc:$FRT, xoaddr:$dst)]>; + let isCodeGenOnly = 1 in + def QVSTFSXs : XForm_8<31, 647, + (outs), (ins qsrc:$FRT, memrr:$dst), + "qvstfsx $FRT, $dst", IIC_LdStSTFD, + [(store qsrc:$FRT, xoaddr:$dst)]>; + + let RC = 1 in + def QVSTFSXA : XForm_8<31, 647, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSUX : XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res), + (ins qsrc:$FRT, memrr:$dst), + "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">; + let isCodeGenOnly = 1 in + def QVSTFSUXs: XForm_8<31, 679, (outs ptr_rc_nor0:$ea_res), + (ins qfrc:$FRT, memrr:$dst), + "qvstfsux $FRT, $dst", IIC_LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, + NoEncode<"$ea_res">; + + let RC = 1 in + def QVSTFSUXA : XForm_8<31, 679, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsuxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSXI : XForm_8<31, 645, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFSXIA : XForm_8<31, 645, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFSUXI : XForm_8<31, 677, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsuxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFSUXIA : XForm_8<31, 677, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfsuxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDX : XForm_8<31, 199, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdx $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDXA : XForm_8<31, 199, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSX : XForm_8<31, 135, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>; + let isCodeGenOnly = 1 in + def QVSTFCSXs : XForm_8<31, 135, + (outs), (ins qsrc:$FRT, memrr:$dst), + "qvstfcsx $FRT, $dst", IIC_LdStSTFD, []>; + + let RC = 1 in + def QVSTFCSXA : XForm_8<31, 135, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDUX : XForm_8<31, 231, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdux $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDUXA : XForm_8<31, 231, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcduxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSUX : XForm_8<31, 167, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsux $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCSUXA : XForm_8<31, 167, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsuxa $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDXI : XForm_8<31, 197, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDXIA : XForm_8<31, 197, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcdxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSXI : XForm_8<31, 133, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCSXIA : XForm_8<31, 133, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCDUXI : XForm_8<31, 229, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcduxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCDUXIA : XForm_8<31, 229, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcduxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFCSUXI : XForm_8<31, 165, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsuxi $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFCSUXIA : XForm_8<31, 165, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfcsuxia $FRT, $dst", IIC_LdStSTFD, []>; + + def QVSTFIWX : XForm_8<31, 967, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfiwx $FRT, $dst", IIC_LdStSTFD, []>; + let RC = 1 in + def QVSTFIWXA : XForm_8<31, 967, + (outs), (ins qfrc:$FRT, memrr:$dst), + "qvstfiwxa $FRT, $dst", IIC_LdStSTFD, []>; + } +} + +} // neverHasSideEffects +} + +def : InstAlias<"qvfclr $FRT", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 0)>; +def : InstAlias<"qvfand $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 1)>; +def : InstAlias<"qvfandc $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 4)>; +def : InstAlias<"qvfctfb $FRT, $FRA", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 5)>; +def : InstAlias<"qvfxor $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 6)>; +def : InstAlias<"qvfor $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 7)>; +def : InstAlias<"qvfnor $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 8)>; +def : InstAlias<"qvfequ $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 9)>; +def : InstAlias<"qvfnot $FRT, $FRA", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRA, 10)>; +def : InstAlias<"qvforc $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 13)>; +def : InstAlias<"qvfnand $FRT, $FRA, $FRB", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRA, qbrc:$FRB, 14)>; +def : InstAlias<"qvfset $FRT", + (QVFLOGICALb qbrc:$FRT, qbrc:$FRT, qbrc:$FRT, 15)>; + +//===----------------------------------------------------------------------===// +// Additional QPX Patterns +// + +def : Pat<(v4f64 (scalar_to_vector f64:$A)), + (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), $A, sub_64)>; +def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>; + +def : Pat<(f64 (vector_extract v4f64:$S, 0)), + (EXTRACT_SUBREG $S, sub_64)>; +def : Pat<(f32 (vector_extract v4f32:$S, 0)), + (EXTRACT_SUBREG $S, sub_64)>; + +def : Pat<(f64 (vector_extract v4f64:$S, 1)), + (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>; +def : Pat<(f64 (vector_extract v4f64:$S, 2)), + (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>; +def : Pat<(f64 (vector_extract v4f64:$S, 3)), + (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>; + +def : Pat<(f32 (vector_extract v4f32:$S, 1)), + (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>; +def : Pat<(f32 (vector_extract v4f32:$S, 2)), + (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>; +def : Pat<(f32 (vector_extract v4f32:$S, 3)), + (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>; + +def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)), + (EXTRACT_SUBREG (QVFPERM $S, $S, + (QVLPCLSXint (RLDICR $F, 2, + /* 63-2 = */ 61))), + sub_64)>; +def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)), + (EXTRACT_SUBREG (QVFPERMs $S, $S, + (QVLPCLSXint (RLDICR $F, 2, + /* 63-2 = */ 61))), + sub_64)>; + +def : Pat<(int_ppc_qpx_qvfperm v4f64:$A, v4f64:$B, v4f64:$C), + (QVFPERM $A, $B, $C)>; + +def : Pat<(int_ppc_qpx_qvfcpsgn v4f64:$A, v4f64:$B), + (QVFCPSGN $A, $B)>; + +// FCOPYSIGN's operand types need not agree. +def : Pat<(fcopysign v4f64:$frB, v4f32:$frA), + (QVFCPSGN (COPY_TO_REGCLASS $frA, QFRC), $frB)>; +def : Pat<(fcopysign QSRC:$frB, QFRC:$frA), + (QVFCPSGNs (COPY_TO_REGCLASS $frA, QSRC), $frB)>; + +def : Pat<(int_ppc_qpx_qvfneg v4f64:$A), (QVFNEG $A)>; +def : Pat<(int_ppc_qpx_qvfabs v4f64:$A), (QVFABS $A)>; +def : Pat<(int_ppc_qpx_qvfnabs v4f64:$A), (QVFNABS $A)>; + +def : Pat<(int_ppc_qpx_qvfriz v4f64:$A), (QVFRIZ $A)>; +def : Pat<(int_ppc_qpx_qvfrin v4f64:$A), (QVFRIN $A)>; +def : Pat<(int_ppc_qpx_qvfrip v4f64:$A), (QVFRIP $A)>; +def : Pat<(int_ppc_qpx_qvfrim v4f64:$A), (QVFRIM $A)>; + +def : Pat<(int_ppc_qpx_qvfre v4f64:$A), (QVFRE $A)>; +def : Pat<(int_ppc_qpx_qvfrsqrte v4f64:$A), (QVFRSQRTE $A)>; + +def : Pat<(int_ppc_qpx_qvfadd v4f64:$A, v4f64:$B), + (QVFADD $A, $B)>; +def : Pat<(int_ppc_qpx_qvfsub v4f64:$A, v4f64:$B), + (QVFSUB $A, $B)>; +def : Pat<(int_ppc_qpx_qvfmul v4f64:$A, v4f64:$B), + (QVFMUL $A, $B)>; + +// Additional QVFNMSUB patterns: -a*c + b == -(a*c - b) +def : Pat<(fma (fneg v4f64:$A), v4f64:$C, v4f64:$B), + (QVFNMSUB $A, $B, $C)>; +def : Pat<(fma v4f64:$A, (fneg v4f64:$C), v4f64:$B), + (QVFNMSUB $A, $B, $C)>; +def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B), + (QVFNMSUBSs $A, $B, $C)>; +def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B), + (QVFNMSUBSs $A, $B, $C)>; + +def : Pat<(int_ppc_qpx_qvfmadd v4f64:$A, v4f64:$B, v4f64:$C), + (QVFMADD $A, $B, $C)>; +def : Pat<(int_ppc_qpx_qvfnmadd v4f64:$A, v4f64:$B, v4f64:$C), + (QVFNMADD $A, $B, $C)>; +def : Pat<(int_ppc_qpx_qvfmsub v4f64:$A, v4f64:$B, v4f64:$C), + (QVFMSUB $A, $B, $C)>; +def : Pat<(int_ppc_qpx_qvfnmsub v4f64:$A, v4f64:$B, v4f64:$C), + (QVFNMSUB $A, $B, $C)>; + +def : Pat<(int_ppc_qpx_qvlfd xoaddr:$src), + (QVLFDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src), + (QVLFDXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfs xoaddr:$src), + (QVLFSX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src), + (QVLFSXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcda xoaddr:$src), + (QVLFCDXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcd xoaddr:$src), + (QVLFCDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcsa xoaddr:$src), + (QVLFCSXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfcs xoaddr:$src), + (QVLFCSX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfda xoaddr:$src), + (QVLFDXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwaa xoaddr:$src), + (QVLFIWAXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwa xoaddr:$src), + (QVLFIWAX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwza xoaddr:$src), + (QVLFIWZXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfiwz xoaddr:$src), + (QVLFIWZX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlfsa xoaddr:$src), + (QVLFSXA xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcld xoaddr:$src), + (QVLPCLDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcls xoaddr:$src), + (QVLPCLSX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcrd xoaddr:$src), + (QVLPCRDX xoaddr:$src)>; +def : Pat<(int_ppc_qpx_qvlpcrs xoaddr:$src), + (QVLPCRSX xoaddr:$src)>; + +def : Pat<(int_ppc_qpx_qvstfd v4f64:$T, xoaddr:$dst), + (QVSTFDX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfs v4f64:$T, xoaddr:$dst), + (QVSTFSX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcda v4f64:$T, xoaddr:$dst), + (QVSTFCDXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcd v4f64:$T, xoaddr:$dst), + (QVSTFCDX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcsa v4f64:$T, xoaddr:$dst), + (QVSTFCSXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfcs v4f64:$T, xoaddr:$dst), + (QVSTFCSX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfda v4f64:$T, xoaddr:$dst), + (QVSTFDXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfiwa v4f64:$T, xoaddr:$dst), + (QVSTFIWXA $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfiw v4f64:$T, xoaddr:$dst), + (QVSTFIWX $T, xoaddr:$dst)>; +def : Pat<(int_ppc_qpx_qvstfsa v4f64:$T, xoaddr:$dst), + (QVSTFSXA $T, xoaddr:$dst)>; + +def : Pat<(pre_store v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (QVSTFDUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store v4f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (QVSTFSUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncstv4f32 v4f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (QVSTFSUXs $rS, $ptrreg, $ptroff)>; + +def : Pat<(int_ppc_qpx_qvflogical v4f64:$A, v4f64:$B, (i32 imm:$idx)), + (QVFLOGICAL $A, $B, imm:$idx)>; +def : Pat<(int_ppc_qpx_qvgpci (u12:$idx)), + (QVGPCI imm:$idx)>; + +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOGE), + (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETOLE), + (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETONE), + (QVFLOGICALb (QVFCMPEQb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETO), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUEQ), + (QVFLOGICALb (QVFCMPEQb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGT), + (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUGE), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFCMPLTb $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULT), + (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFTSTNANb $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETULE), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFCMPGTb $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETUNE), + (QVFLOGICALb (QVFTSTNANb $FRA, $FRB), + (QVFCMPEQb $FRA, $FRB), (i32 13))>; + +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETEQ), + (QVFCMPEQb $FRA, $FRB)>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGT), + (QVFCMPGTb $FRA, $FRB)>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETGE), + (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFCMPLTb $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLT), + (QVFCMPLTb $FRA, $FRB)>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETLE), + (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFCMPGTb $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f64:$FRA, v4f64:$FRB, SETNE), + (QVFLOGICALb (QVFCMPEQb $FRA, $FRB), + (QVFCMPEQb $FRA, $FRB), (i32 10))>; + +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOGE), + (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETOLE), + (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETONE), + (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 8))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETO), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUEQ), + (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGT), + (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUGE), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFCMPLTbs $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULT), + (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFTSTNANbs $FRA, $FRB), (i32 7))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETULE), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFCMPGTbs $FRA, $FRB), (i32 13))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETUNE), + (QVFLOGICALb (QVFTSTNANbs $FRA, $FRB), + (QVFCMPEQbs $FRA, $FRB), (i32 13))>; + +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETEQ), + (QVFCMPEQbs $FRA, $FRB)>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGT), + (QVFCMPGTbs $FRA, $FRB)>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETGE), + (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFCMPLTbs $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLT), + (QVFCMPLTbs $FRA, $FRB)>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETLE), + (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFCMPGTbs $FRA, $FRB), (i32 10))>; +def : Pat<(setcc v4f32:$FRA, v4f32:$FRB, SETNE), + (QVFLOGICALb (QVFCMPEQbs $FRA, $FRB), + (QVFCMPEQbs $FRA, $FRB), (i32 10))>; + +def : Pat<(and v4i1:$FRA, (not v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 4))>; +def : Pat<(not (or v4i1:$FRA, v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 8))>; +def : Pat<(not (xor v4i1:$FRA, v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 9))>; +def : Pat<(or v4i1:$FRA, (not v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 13))>; +def : Pat<(not (and v4i1:$FRA, v4i1:$FRB)), + (QVFLOGICALb $FRA, $FRB, (i32 14))>; + +def : Pat<(and v4i1:$FRA, v4i1:$FRB), + (QVFLOGICALb $FRA, $FRB, (i32 1))>; +def : Pat<(or v4i1:$FRA, v4i1:$FRB), + (QVFLOGICALb $FRA, $FRB, (i32 7))>; +def : Pat<(xor v4i1:$FRA, v4i1:$FRB), + (QVFLOGICALb $FRA, $FRB, (i32 6))>; +def : Pat<(not v4i1:$FRA), + (QVFLOGICALb $FRA, $FRA, (i32 10))>; + +def : Pat<(v4f64 (fextend v4f32:$src)), + (COPY_TO_REGCLASS $src, QFRC)>; + +def : Pat<(v4f32 (fround_exact v4f64:$src)), + (COPY_TO_REGCLASS $src, QSRC)>; + +// Extract the underlying floating-point values from the +// QPX (-1.0, 1.0) boolean representation. +def : Pat<(v4f64 (PPCqbflt v4i1:$src)), + (COPY_TO_REGCLASS $src, QFRC)>; + +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLT)), + (SELECT_QFRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETLE)), + (SELECT_QFRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETEQ)), + (SELECT_QFRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGE)), + (SELECT_QFRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETGT)), + (SELECT_QFRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f64 (selectcc i1:$lhs, i1:$rhs, v4f64:$tval, v4f64:$fval, SETNE)), + (SELECT_QFRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLT)), + (SELECT_QSRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETLE)), + (SELECT_QSRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETEQ)), + (SELECT_QSRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGE)), + (SELECT_QSRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETGT)), + (SELECT_QSRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4f32 (selectcc i1:$lhs, i1:$rhs, v4f32:$tval, v4f32:$fval, SETNE)), + (SELECT_QSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLT)), + (SELECT_QBRC (CRANDC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETLE)), + (SELECT_QBRC (CRORC $rhs, $lhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETEQ)), + (SELECT_QBRC (CREQV $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGE)), + (SELECT_QBRC (CRORC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETGT)), + (SELECT_QBRC (CRANDC $lhs, $rhs), $tval, $fval)>; +def : Pat<(v4i1 (selectcc i1:$lhs, i1:$rhs, v4i1:$tval, v4i1:$fval, SETNE)), + (SELECT_QBRC (CRXOR $lhs, $rhs), $tval, $fval)>; + +} // end HasQPX + +let Predicates = [HasQPX, NoNaNsFPMath] in { +def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFCMPLTb $FRA, $FRB), $FRB, $FRA)>; +def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFCMPGTb $FRA, $FRB), $FRB, $FRA)>; + +def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFCMPLTbs $FRA, $FRB), $FRB, $FRA)>; +def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFCMPGTbs $FRA, $FRB), $FRB, $FRA)>; +} + +let Predicates = [HasQPX, NaNsFPMath] in { +// When either of these operands is NaN, we should return the other operand. +// QVFCMPLT/QVFCMPGT return false is either operand is NaN, which means we need +// to explicitly or with a NaN test on the second operand. +def : Pat<(fminnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFLOGICALb (QVFCMPLTb $FRA, $FRB), + (QVFTSTNANb $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; +def : Pat<(fmaxnum v4f64:$FRA, v4f64:$FRB), + (QVFSELb (QVFLOGICALb (QVFCMPGTb $FRA, $FRB), + (QVFTSTNANb $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; + +def : Pat<(fminnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFLOGICALb (QVFCMPLTbs $FRA, $FRB), + (QVFTSTNANbs $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; +def : Pat<(fmaxnum v4f32:$FRA, v4f32:$FRB), + (QVFSELbs (QVFLOGICALb (QVFCMPGTbs $FRA, $FRB), + (QVFTSTNANbs $FRB, $FRB), (i32 7)), + $FRB, $FRA)>; +} + diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 41bb11f47a0..c9a96840a9b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -275,6 +275,9 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } case PPC::F8RCRegClassID: case PPC::F4RCRegClassID: + case PPC::QFRCRegClassID: + case PPC::QSRCRegClassID: + case PPC::QBRCRegClassID: case PPC::VRRCRegClassID: case PPC::VFRCRegClassID: case PPC::VSLRCRegClassID: diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 62416bc5d9f..9a7df9615cc 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -49,6 +49,13 @@ class FPR num, string n> : PPCReg { let HWEncoding{4-0} = num; } +// QFPR - One of the 32 256-bit floating-point vector registers (used for QPX) +class QFPR : PPCReg { + let HWEncoding = SubReg.HWEncoding; + let SubRegs = [SubReg]; + let SubRegIndices = [sub_64]; +} + // VF - One of the 32 64-bit floating-point subregisters of the vector // registers (used by VSX). class VF num, string n> : PPCReg { @@ -114,6 +121,12 @@ foreach Index = 0-31 in { def VF#Index : VF; } +// QPX Floating-point registers +foreach Index = 0-31 in { + def QF#Index : QFPR("F"#Index), "q"#Index>, + DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>; +} + // Vector registers foreach Index = 0-31 in { def V#Index : VR("VF"#Index), "v"#Index>, @@ -303,6 +316,16 @@ def VFRC : RegisterClass<"PPC", [f64], 64, VF22, VF21, VF20)>; def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>; +// For QPX +def QFRC : RegisterClass<"PPC", [v4f64], 256, (add (sequence "QF%u", 0, 13), + (sequence "QF%u", 31, 14))>; +def QSRC : RegisterClass<"PPC", [v4f32], 128, (add QFRC)>; +def QBRC : RegisterClass<"PPC", [v4i1], 256, (add QFRC)> { + // These are actually stored as floating-point values where a positive + // number is true and anything else (including NaN) is false. + let Size = 256; +} + def CRBITRC : RegisterClass<"PPC", [i1], 32, (add CR2LT, CR2GT, CR2EQ, CR2UN, CR3LT, CR3GT, CR3EQ, CR3UN, diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 8d3d5c4e408..c91428db3a9 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -37,6 +37,10 @@ using namespace llvm; static cl::opt UseSubRegLiveness("ppc-track-subreg-liveness", cl::desc("Enable subregister liveness tracking for PPC"), cl::Hidden); +static cl::opt QPXStackUnaligned("qpx-stack-unaligned", + cl::desc("Even when QPX is enabled the stack is not 32-byte aligned"), + cl::Hidden); + PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); @@ -90,6 +94,7 @@ void PPCSubtarget::initializeEnvironment() { HasLazyResolverStubs = false; HasICBT = false; HasInvariantFunctionDescriptors = false; + IsQPXStackUnaligned = false; } void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -126,8 +131,8 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // QPX requires a 32-byte aligned stack. Note that we need to do this if // we're compiling for a BG/Q system regardless of whether or not QPX // is enabled because external functions will assume this alignment. - if (hasQPX() || isBGQ()) - StackAlignment = 32; + IsQPXStackUnaligned = QPXStackUnaligned; + StackAlignment = getPlatformStackAlignment(); // Determine endianness. // FIXME: Part of the TargetMachine. diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 704a226ed33..247a96d405e 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -114,6 +114,11 @@ protected: bool HasICBT; bool HasInvariantFunctionDescriptors; + /// When targeting QPX running a stock PPC64 Linux kernel where the stack + /// alignment has not been changed, we need to keep the 16-byte alignment + /// of the stack. + bool IsQPXStackUnaligned; + const PPCTargetMachine &TM; PPCFrameLowering FrameLowering; PPCInstrInfo InstrInfo; @@ -230,6 +235,14 @@ public: return HasInvariantFunctionDescriptors; } + bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } + unsigned getPlatformStackAlignment() const { + if ((hasQPX() || isBGQ()) && !isQPXStackUnaligned()) + return 32; + + return 16; + } + const Triple &getTargetTriple() const { return TargetTriple; } /// isDarwin - True if this is any darwin platform. diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index e1d46f72542..073bbb0c556 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -193,13 +193,14 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, } unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) { - if (Vector && !ST->hasAltivec()) + if (Vector && !ST->hasAltivec() && !ST->hasQPX()) return 0; return ST->hasVSX() ? 64 : 32; } unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) { if (Vector) { + if (ST->hasQPX()) return 256; if (ST->hasAltivec()) return 128; return 0; } @@ -276,6 +277,12 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, if (Index == 0) return 0; + return BaseT::getVectorInstrCost(Opcode, Val, Index); + } else if (ST->hasQPX() && Val->getScalarType()->isFloatingPointTy()) { + // Floating point scalars are already located in index #0. + if (Index == 0) + return 0; + return BaseT::getVectorInstrCost(Opcode, Val, Index); } diff --git a/test/CodeGen/PowerPC/qpx-bv-sint.ll b/test/CodeGen/PowerPC/qpx-bv-sint.ll new file mode 100644 index 00000000000..0bc14ed4351 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-bv-sint.ll @@ -0,0 +1,33 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define void @s452() nounwind { +entry: + br label %for.body4 + +for.body4: ; preds = %for.body4, %entry + %conv.4 = sitofp i32 undef to double + %conv.5 = sitofp i32 undef to double + %mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0 + %mul.4.v.i0.2 = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1 + %mul.4 = fmul <2 x double> %mul.4.v.i0.2, undef + %add7.4 = fadd <2 x double> undef, %mul.4 + store <2 x double> %add7.4, <2 x double>* undef, align 16 + br i1 undef, label %for.end, label %for.body4 + +for.end: ; preds = %for.body4 + unreachable +; CHECK-LABEL: @s452 +; CHECK: lfiwax [[REG1:[0-9]+]], +; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]] +; FIXME: We could 'promote' this to a vector earlier and remove this splat. +; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0 +; CHECK: qvfmul +; CHECK: qvfadd +; CHECK: qvesplati {{[0-9]+}}, +; FIXME: We can use qvstfcdx here instead of two stores. +; CHECK: stfd +; CHECK: stfd +} + diff --git a/test/CodeGen/PowerPC/qpx-bv.ll b/test/CodeGen/PowerPC/qpx-bv.ll new file mode 100644 index 00000000000..ae181de383b --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-bv.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mcpu=a2q | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) { + %v1 = insertelement <4 x double> undef, double %f1, i32 0 + %v2 = insertelement <4 x double> %v1, double %f2, i32 1 + %v3 = insertelement <4 x double> %v2, double %f3, i32 2 + %v4 = insertelement <4 x double> %v3, double %f4, i32 3 + ret <4 x double> %v4 + +; CHECK-LABEL: @foo +; CHECK: qvgpci [[REG1:[0-9]+]], 275 +; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101 +; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]] +; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]] +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) { + %v1 = insertelement <4 x float> undef, float %f1, i32 0 + %v2 = insertelement <4 x float> %v1, float %f2, i32 1 + %v3 = insertelement <4 x float> %v2, float %f3, i32 2 + %v4 = insertelement <4 x float> %v3, float %f4, i32 3 + ret <4 x float> %v4 + +; CHECK-LABEL: @goo +; CHECK: qvgpci [[REG1:[0-9]+]], 275 +; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101 +; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]] +; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]] +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]] +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/qpx-func-clobber.ll b/test/CodeGen/PowerPC/qpx-func-clobber.ll new file mode 100644 index 00000000000..c1b808aa7c4 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-func-clobber.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +declare <4 x double> @foo(<4 x double> %p) + +define <4 x double> @bar(<4 x double> %p, <4 x double> %q) { +entry: + %v = call <4 x double> @foo(<4 x double> %p) + %w = call <4 x double> @foo(<4 x double> %q) + %x = fadd <4 x double> %v, %w + ret <4 x double> %x + +; CHECK-LABEL: @bar +; CHECK: qvstfdx 2, +; CHECK: bl foo +; CHECK: qvstfdx 1, +; CHECK: qvlfdx 1, +; CHECK: bl foo +; CHECK: qvlfdx [[REG:[0-9]+]], +; CHECK: qvfadd 1, [[REG]], 1 +} + diff --git a/test/CodeGen/PowerPC/qpx-load.ll b/test/CodeGen/PowerPC/qpx-load.ll new file mode 100644 index 00000000000..2eb29081e26 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-load.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define <4 x double> @foo(<4 x double>* %p) { +entry: + %v = load <4 x double>* %p, align 8 + ret <4 x double> %v +} + +; CHECK: @foo +; CHECK-DAG: li [[REG1:[0-9]+]], 31 +; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3 +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]] +; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3 +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]] +; CHECK: blr + +define <4 x double> @bar(<4 x double>* %p) { +entry: + %v = load <4 x double>* %p, align 32 + ret <4 x double> %v +} + +; CHECK: @bar +; CHECK: qvlfdx + diff --git a/test/CodeGen/PowerPC/qpx-recipest.ll b/test/CodeGen/PowerPC/qpx-recipest.ll new file mode 100644 index 00000000000..0e01358e579 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-recipest.ll @@ -0,0 +1,194 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) + +define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind { +entry: + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %r = fdiv <4 x double> %a, %x + ret <4 x double> %r + +; CHECK-LABEL: @foo +; CHECK: qvfrsqrte +; CHECK: qvfmul +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfmul +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfmul +; CHECK: blr + +; CHECK-SAFE-LABEL: @foo +; CHECK-SAFE: fsqrt +; CHECK-SAFE: fdiv +; CHECK-SAFE: blr +} + +define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind { +entry: + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %y = fpext <4 x float> %x to <4 x double> + %r = fdiv <4 x double> %a, %y + ret <4 x double> %r + +; CHECK-LABEL: @foof +; CHECK: qvfrsqrtes +; CHECK: qvfmuls +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsubs +; CHECK: qvfmadds +; CHECK: qvfmadds +; CHECK: qvfmuls +; CHECK: qvfmul +; CHECK: blr + +; CHECK-SAFE-LABEL: @foof +; CHECK-SAFE: fsqrts +; CHECK-SAFE: fdiv +; CHECK-SAFE: blr +} + +define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind { +entry: + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %y = fptrunc <4 x double> %x to <4 x float> + %r = fdiv <4 x float> %a, %y + ret <4 x float> %r + +; CHECK-LABEL: @food +; CHECK: qvfrsqrte +; CHECK: qvfmul +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfmul +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfrsp +; CHECK: qvfmuls +; CHECK: blr + +; CHECK-SAFE-LABEL: @food +; CHECK-SAFE: fsqrt +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + +define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv <4 x float> %a, %x + ret <4 x float> %r + +; CHECK-LABEL: @goo +; CHECK: qvfrsqrtes +; CHECK: qvfmuls +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsubs +; CHECK: qvfmadds +; CHECK: qvfmadds +; CHECK: qvfmuls +; CHECK: qvfmuls +; CHECK: blr + +; CHECK-SAFE-LABEL: @goo +; CHECK-SAFE: fsqrts +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + +define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind { +entry: + %r = fdiv <4 x double> %a, %b + ret <4 x double> %r + +; CHECK-LABEL: @foo2 +; CHECK: qvfre +; CHECK: qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: blr + +; CHECK-SAFE-LABEL: @foo2 +; CHECK-SAFE: fdiv +; CHECK-SAFE: blr +} + +define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %r = fdiv <4 x float> %a, %b + ret <4 x float> %r + +; CHECK-LABEL: @goo2 +; CHECK: qvfres +; CHECK: qvfnmsubs +; CHECK: qvfmadds +; CHECK: qvfmuls +; CHECK: blr + +; CHECK-SAFE-LABEL: @goo2 +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + +define <4 x double> @foo3(<4 x double> %a) nounwind { +entry: + %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) + ret <4 x double> %r + +; CHECK-LABEL: @foo3 +; CHECK: qvfrsqrte +; CHECK: qvfmul +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsub +; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfcmpeq +; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfmul +; CHECK-DAG: qvfmul +; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfmul +; CHECK-DAG: qvfmul +; CHECK: qvfsel +; CHECK: blr + +; CHECK-SAFE-LABEL: @foo3 +; CHECK-SAFE: fsqrt +; CHECK-SAFE: blr +} + +define <4 x float> @goo3(<4 x float> %a) nounwind { +entry: + %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r + +; CHECK-LABEL: @goo3 +; CHECK: qvfrsqrtes +; CHECK: qvfmuls +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadds instead of a qvfnmsubs +; CHECK-DAG: qvfmadds +; CHECK-DAG: qvfcmpeq +; CHECK-DAG: qvfmadds +; CHECK-DAG: qvfmuls +; CHECK-DAG: qvfmuls +; CHECK: qvfsel +; CHECK: blr + +; CHECK-SAFE-LABEL: @goo3 +; CHECK-SAFE: fsqrts +; CHECK-SAFE: blr +} + diff --git a/test/CodeGen/PowerPC/qpx-rounding-ops.ll b/test/CodeGen/PowerPC/qpx-rounding-ops.ll new file mode 100644 index 00000000000..6fdd8e6a714 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-rounding-ops.ll @@ -0,0 +1,109 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define <4 x float> @test1(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test1: +; CHECK: qvfrim 1, 1 + +; CHECK-FM: test1: +; CHECK-FM: qvfrim 1, 1 +} + +declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test2(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test2: +; CHECK: qvfrim 1, 1 + +; CHECK-FM: test2: +; CHECK-FM: qvfrim 1, 1 +} + +declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone + +define <4 x float> @test3(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test3: +; CHECK-NOT: qvfrin + +; CHECK-FM: test3: +; CHECK-FM-NOT: qvfrin +} + +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test4(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test4: +; CHECK-NOT: qvfrin + +; CHECK-FM: test4: +; CHECK-FM-NOT: qvfrin +} + +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone + +define <4 x float> @test5(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test5: +; CHECK: qvfrip 1, 1 + +; CHECK-FM: test5: +; CHECK-FM: qvfrip 1, 1 +} + +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test6(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test6: +; CHECK: qvfrip 1, 1 + +; CHECK-FM: test6: +; CHECK-FM: qvfrip 1, 1 +} + +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone + +define <4 x float> @test9(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test9: +; CHECK: qvfriz 1, 1 + +; CHECK-FM: test9: +; CHECK-FM: qvfriz 1, 1 +} + +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test10(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test10: +; CHECK: qvfriz 1, 1 + +; CHECK-FM: test10: +; CHECK-FM: qvfriz 1, 1 +} + +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone + diff --git a/test/CodeGen/PowerPC/qpx-s-load.ll b/test/CodeGen/PowerPC/qpx-s-load.ll new file mode 100644 index 00000000000..8dfab1385dd --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-s-load.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define <4 x float> @foo(<4 x float>* %p) { +entry: + %v = load <4 x float>* %p, align 4 + ret <4 x float> %v +} + +; CHECK: @foo +; CHECK-DAG: li [[REG1:[0-9]+]], 15 +; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3 +; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]] +; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3 +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]] +; CHECK: blr + +define <4 x float> @bar(<4 x float>* %p) { +entry: + %v = load <4 x float>* %p, align 16 + ret <4 x float> %v +} + +; CHECK: @bar +; CHECK: qvlfsx + diff --git a/test/CodeGen/PowerPC/qpx-s-sel.ll b/test/CodeGen/PowerPC/qpx-s-sel.ll new file mode 100644 index 00000000000..008efea5da4 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-s-sel.ll @@ -0,0 +1,143 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +@Q = constant <4 x i1> , align 16 +@R = global <4 x i1> , align 16 + +define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone { +entry: + %r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %r + +; CHECK-LABEL: @test1 +; CHECK: qvfsel 1, 3, 1, 2 +; CHECK: blr +} + +define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone { +entry: + %v = insertelement <4 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2 + %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3 + %r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b + ret <4 x float> %r + +; CHECK-LABEL: @test2 +; CHECK: stw +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + +define <4 x i1> @test3(<4 x i1> %a) nounwind readnone { +entry: + %v = and <4 x i1> %a, + ret <4 x i1> %v + +; CHECK-LABEL: @test3 +; CHECK: qvlfsx [[REG:[0-9]+]], +; qvflogical 1, 1, [[REG]], 1 +; blr +} + +define <4 x i1> @test4(<4 x i1> %a) nounwind { +entry: + %q = load <4 x i1>* @Q, align 16 + %v = and <4 x i1> %a, %q + ret <4 x i1> %v + +; CHECK-LABEL: @test4 +; CHECK-DAG: lbz +; CHECK-DAG: qvlfdx [[REG1:[0-9]+]], +; CHECK-DAG: stw +; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]] +; CHECK: qvflogical 1, 1, [[REG4]], 1 +; CHECK: blr +} + +define void @test5(<4 x i1> %a) nounwind { +entry: + store <4 x i1> %a, <4 x i1>* @R + ret void + +; CHECK-LABEL: @test5 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: stb +; CHECK: blr +} + +define i1 @test6(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test6 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define i1 @test7(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + %s = extractelement <4 x i1> %a, i32 3 + %q = and i1 %r, %s + ret i1 %q + +; CHECK-LABEL: @test7 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK-DAG: lwz [[REG4:[0-9]+]], +; FIXME: We're storing the vector twice, and that's silly. +; CHECK-DAG: qvstfiwx [[REG3]], +; CHECK: lwz [[REG5:[0-9]+]], +; CHECK: and 3, +; CHECK: blr +} + +define i1 @test8(<3 x i1> %a) nounwind { +entry: + %r = extractelement <3 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test8 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone { +entry: + %v = insertelement <3 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2 + %r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b + ret <3 x float> %r + +; CHECK-LABEL: @test9 +; CHECK: stw +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/qpx-s-store.ll b/test/CodeGen/PowerPC/qpx-s-store.ll new file mode 100644 index 00000000000..d2ca45814da --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-s-store.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define void @foo(<4 x float> %v, <4 x float>* %p) { +entry: + store <4 x float> %v, <4 x float>* %p, align 4 + ret void +} + +; CHECK: @foo +; CHECK: stfs +; CHECK: stfs +; CHECK: stfs +; CHECK: stfs +; CHECK: blr + +define void @bar(<4 x float> %v, <4 x float>* %p) { +entry: + store <4 x float> %v, <4 x float>* %p, align 16 + ret void +} + +; CHECK: @bar +; CHECK: qvstfsx + diff --git a/test/CodeGen/PowerPC/qpx-sel.ll b/test/CodeGen/PowerPC/qpx-sel.ll new file mode 100644 index 00000000000..15ae57352c3 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-sel.ll @@ -0,0 +1,151 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +@Q = constant <4 x i1> , align 16 +@R = global <4 x i1> , align 16 + +define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone { +entry: + %r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b + ret <4 x double> %r + +; CHECK-LABEL: @test1 +; CHECK: qvfsel 1, 3, 1, 2 +; CHECK: blr +} + +define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone { +entry: + %v = insertelement <4 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2 + %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3 + %r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b + ret <4 x double> %r + +; CHECK-LABEL: @test2 + +; FIXME: This load/store sequence is unnecessary. +; CHECK-DAG: lbz +; CHECK-DAG: stw + +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + +define <4 x i1> @test3(<4 x i1> %a) nounwind readnone { +entry: + %v = and <4 x i1> %a, + ret <4 x i1> %v + +; CHECK-LABEL: @test3 +; CHECK: qvlfsx [[REG:[0-9]+]], +; qvflogical 1, 1, [[REG]], 1 +; blr +} + +define <4 x i1> @test4(<4 x i1> %a) nounwind { +entry: + %q = load <4 x i1>* @Q, align 16 + %v = and <4 x i1> %a, %q + ret <4 x i1> %v + +; CHECK-LABEL: @test4 +; CHECK-DAG: lbz +; CHECK-DAG: qvlfdx [[REG1:[0-9]+]], +; CHECK-DAG: stw +; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]] +; CHECK: qvflogical 1, 1, [[REG4]], 1 +; CHECK: blr +} + +define void @test5(<4 x i1> %a) nounwind { +entry: + store <4 x i1> %a, <4 x i1>* @R + ret void + +; CHECK-LABEL: @test5 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: stb +; CHECK: blr +} + +define i1 @test6(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test6 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define i1 @test7(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + %s = extractelement <4 x i1> %a, i32 3 + %q = and i1 %r, %s + ret i1 %q + +; CHECK-LABEL: @test7 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK-DAG: lwz [[REG4:[0-9]+]], +; FIXME: We're storing the vector twice, and that's silly. +; CHECK-DAG: qvstfiwx [[REG3]], +; CHECK-DAG: lwz [[REG5:[0-9]+]], +; CHECK: and 3, +; CHECK: blr +} + +define i1 @test8(<3 x i1> %a) nounwind { +entry: + %r = extractelement <3 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test8 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone { +entry: + %v = insertelement <3 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2 + %r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b + ret <3 x double> %r + +; CHECK-LABEL: @test9 + +; FIXME: This load/store sequence is unnecessary. +; CHECK-DAG: lbz +; CHECK-DAG: stw + +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/qpx-store.ll b/test/CodeGen/PowerPC/qpx-store.ll new file mode 100644 index 00000000000..c29cc225b05 --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-store.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define void @foo(<4 x double> %v, <4 x double>* %p) { +entry: + store <4 x double> %v, <4 x double>* %p, align 8 + ret void +} + +; CHECK: @foo +; CHECK: stfd +; CHECK: stfd +; CHECK: stfd +; CHECK: stfd +; CHECK: blr + +define void @bar(<4 x double> %v, <4 x double>* %p) { +entry: + store <4 x double> %v, <4 x double>* %p, align 32 + ret void +} + +; CHECK: @bar +; CHECK: qvstfdx + diff --git a/test/CodeGen/PowerPC/qpx-unalperm.ll b/test/CodeGen/PowerPC/qpx-unalperm.ll new file mode 100644 index 00000000000..e765b46a7cf --- /dev/null +++ b/test/CodeGen/PowerPC/qpx-unalperm.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -mcpu=a2q | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +define <4 x double> @foo(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 32 + ret <4 x double> %r +; CHECK: qvlfdx +; CHECK: blr +} + +define <4 x double> @bar(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 16 + %s = load <4 x double>* %b, align 32 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +; CHECK: qvlpcldx +; CHECK: qvlfdx +; CHECK: qvfperm +; CHECK: blr +} + +define <4 x double> @bar1(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 16 + %s = load <4 x double>* %b, align 8 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +} + +define <4 x double> @bar2(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 1 + %s = load <4 x double>* %b, align 32 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +} + +define <4 x double> @bar3(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 1 + %s = load <4 x double>* %b, align 8 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +} + +define <4 x double> @bar4(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 1 + %s = load <4 x double>* %b, align 8 + %c = getelementptr <4 x double>* %b, i32 1 + %t = load <4 x double>* %c, align 8 + %u = fadd <4 x double> %r, %s + %v = fadd <4 x double> %u, %t + ret <4 x double> %v +} + diff --git a/test/CodeGen/PowerPC/vsx-infl-copy2.ll b/test/CodeGen/PowerPC/vsx-infl-copy2.ll index 037473bdec8..0f279067159 100644 --- a/test/CodeGen/PowerPC/vsx-infl-copy2.ll +++ b/test/CodeGen/PowerPC/vsx-infl-copy2.ll @@ -8,7 +8,6 @@ entry: br i1 false, label %loop2_start, label %if.end5 ; CHECK-LABEL: @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc -; CHECK: xxlor loop2_start: ; preds = %loop2_start, %entry br i1 undef, label %loop2_start, label %if.then.i31 diff --git a/test/MC/Disassembler/PowerPC/qpx.txt b/test/MC/Disassembler/PowerPC/qpx.txt new file mode 100644 index 00000000000..b53bb4c4d37 --- /dev/null +++ b/test/MC/Disassembler/PowerPC/qpx.txt @@ -0,0 +1,383 @@ +# RUN: llvm-mc --disassemble %s -triple powerpc64-bgq-linux -mcpu=a2q | FileCheck %s + +# CHECK: qvfabs 3, 5 +0x10 0x60 0x2a 0x10 + +# CHECK: qvfadd 3, 4, 5 +0x10 0x64 0x28 0x2a + +# CHECK: qvfadds 3, 4, 5 +0x00 0x64 0x28 0x2a + +# FIXME: decode as qvfandc 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 4 +0x10 0x64 0x2a 0x08 + +# FIXME: decode as qvfand 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 1 +0x10 0x64 0x28 0x88 + +# CHECK: qvfcfid 3, 5 +0x10 0x60 0x2e 0x9c + +# CHECK: qvfcfids 3, 5 +0x00 0x60 0x2e 0x9c + +# CHECK: qvfcfidu 3, 5 +0x10 0x60 0x2f 0x9c + +# CHECK: qvfcfidus 3, 5 +0x00 0x60 0x2f 0x9c + +# FIXME: decode as qvfclr 3 +# CHECK: qvflogical 3, 3, 3, 0 +0x10 0x63 0x18 0x08 + +# CHECK: qvfcpsgn 3, 4, 5 +0x10 0x64 0x28 0x10 + +# FIXME: decode as qvfctfb 3, 4 +# CHECK: qvflogical 3, 4, 4, 5 +0x10 0x64 0x22 0x88 + +# CHECK: qvfctid 3, 5 +0x10 0x60 0x2e 0x5c + +# CHECK: qvfctidu 3, 5 +0x10 0x60 0x2f 0x5c + +# CHECK: qvfctiduz 3, 5 +0x10 0x60 0x2f 0x5e + +# CHECK: qvfctidz 3, 5 +0x10 0x60 0x2e 0x5e + +# CHECK: qvfctiw 3, 5 +0x10 0x60 0x28 0x1c + +# CHECK: qvfctiwu 3, 5 +0x10 0x60 0x29 0x1c + +# CHECK: qvfctiwuz 3, 5 +0x10 0x60 0x29 0x1e + +# CHECK: qvfctiwz 3, 5 +0x10 0x60 0x28 0x1e + +# FIXME: decode as qvfequ 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 9 +0x10 0x64 0x2c 0x88 + +# CHECK: qvflogical 3, 4, 5, 12 +0x10 0x64 0x2e 0x08 + +# CHECK: qvfmadd 3, 4, 6, 5 +0x10 0x64 0x29 0xba + +# CHECK: qvfmadds 3, 4, 6, 5 +0x00 0x64 0x29 0xba + +# CHECK: qvfmr 3, 5 +0x10 0x60 0x28 0x90 + +# CHECK: qvfmsub 3, 4, 6, 5 +0x10 0x64 0x29 0xb8 + +# CHECK: qvfmsubs 3, 4, 6, 5 +0x00 0x64 0x29 0xb8 + +# CHECK: qvfmul 3, 4, 6 +0x10 0x64 0x01 0xb2 + +# CHECK: qvfmuls 3, 4, 6 +0x00 0x64 0x01 0xb2 + +# CHECK: qvfnabs 3, 5 +0x10 0x60 0x29 0x10 + +# FIXME: decode as qvfnand 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 14 +0x10 0x64 0x2f 0x08 + +# CHECK: qvfneg 3, 5 +0x10 0x60 0x28 0x50 + +# CHECK: qvfnmadd 3, 4, 6, 5 +0x10 0x64 0x29 0xbe + +# CHECK: qvfnmadds 3, 4, 6, 5 +0x00 0x64 0x29 0xbe + +# CHECK: qvfnmsub 3, 4, 6, 5 +0x10 0x64 0x29 0xbc + +# CHECK: qvfnmsubs 3, 4, 6, 5 +0x00 0x64 0x29 0xbc + +# FIXME: decode as qvfnor 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 8 +0x10 0x64 0x2c 0x08 + +# FIXME: decode as qvfnot 3, 4 +# CHECK: qvflogical 3, 4, 4, 10 +0x10 0x64 0x25 0x08 + +# FIXME: decode as qvforc 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 13 +0x10 0x64 0x2e 0x88 + +# FIXME: decode as qvfor 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 7 +0x10 0x64 0x2b 0x88 + +# CHECK: qvfperm 3, 4, 5, 6 +0x10 0x64 0x29 0x8c + +# CHECK: qvfre 3, 5 +0x10 0x60 0x28 0x30 + +# CHECK: qvfres 3, 5 +0x00 0x60 0x28 0x30 + +# CHECK: qvfrim 3, 5 +0x10 0x60 0x2b 0xd0 + +# CHECK: qvfrin 3, 5 +0x10 0x60 0x2b 0x10 + +# CHECK: qvfrip 3, 5 +0x10 0x60 0x2b 0x90 + +# CHECK: qvfriz 3, 5 +0x10 0x60 0x2b 0x50 + +# CHECK: qvfrsp 3, 5 +0x10 0x60 0x28 0x18 + +# CHECK: qvfrsqrte 3, 5 +0x10 0x60 0x28 0x34 + +# CHECK: qvfrsqrtes 3, 5 +0x00 0x60 0x28 0x34 + +# CHECK: qvfsel 3, 4, 6, 5 +0x10 0x64 0x29 0xae + +# FIXME: decode as qvfset 3 +# CHECK: qvflogical 3, 3, 3, 15 +0x10 0x63 0x1f 0x88 + +# CHECK: qvfsub 3, 4, 5 +0x10 0x64 0x28 0x28 + +# CHECK: qvfsubs 3, 4, 5 +0x00 0x64 0x28 0x28 + +# CHECK: qvfxmadd 3, 4, 6, 5 +0x10 0x64 0x29 0x92 + +# CHECK: qvfxmadds 3, 4, 6, 5 +0x00 0x64 0x29 0x92 + +# CHECK: qvfxmul 3, 4, 6 +0x10 0x64 0x01 0xa2 + +# CHECK: qvfxmuls 3, 4, 6 +0x00 0x64 0x01 0xa2 + +# FIXME: decode as qvfxor 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 6 +0x10 0x64 0x2b 0x08 + +# CHECK: qvfxxcpnmadd 3, 4, 6, 5 +0x10 0x64 0x29 0x86 + +# CHECK: qvfxxcpnmadds 3, 4, 6, 5 +0x00 0x64 0x29 0x86 + +# CHECK: qvfxxmadd 3, 4, 6, 5 +0x10 0x64 0x29 0x82 + +# CHECK: qvfxxmadds 3, 4, 6, 5 +0x00 0x64 0x29 0x82 + +# CHECK: qvfxxnpmadd 3, 4, 6, 5 +0x10 0x64 0x29 0x96 + +# CHECK: qvfxxnpmadds 3, 4, 6, 5 +0x00 0x64 0x29 0x96 + +# CHECK: qvlfcduxa 3, 9, 11 +0x7c 0x69 0x58 0xcf + +# CHECK: qvlfcdux 3, 9, 11 +0x7c 0x69 0x58 0xce + +# CHECK: qvlfcdxa 3, 10, 11 +0x7c 0x6a 0x58 0x8f + +# CHECK: qvlfcdx 3, 10, 11 +0x7c 0x6a 0x58 0x8e + +# CHECK: qvlfcsuxa 3, 9, 11 +0x7c 0x69 0x58 0x4f + +# CHECK: qvlfcsux 3, 9, 11 +0x7c 0x69 0x58 0x4e + +# CHECK: qvlfcsxa 3, 10, 11 +0x7c 0x6a 0x58 0x0f + +# CHECK: qvlfcsx 3, 10, 11 +0x7c 0x6a 0x58 0x0e + +# CHECK: qvlfduxa 3, 9, 11 +0x7c 0x69 0x5c 0xcf + +# CHECK: qvlfdux 3, 9, 11 +0x7c 0x69 0x5c 0xce + +# CHECK: qvlfdxa 3, 10, 11 +0x7c 0x6a 0x5c 0x8f + +# CHECK: qvlfdx 3, 10, 11 +0x7c 0x6a 0x5c 0x8e + +# CHECK: qvlfiwaxa 3, 10, 11 +0x7c 0x6a 0x5e 0xcf + +# CHECK: qvlfiwax 3, 10, 11 +0x7c 0x6a 0x5e 0xce + +# CHECK: qvlfiwzxa 3, 10, 11 +0x7c 0x6a 0x5e 0x8f + +# CHECK: qvlfiwzx 3, 10, 11 +0x7c 0x6a 0x5e 0x8e + +# CHECK: qvlfsuxa 3, 9, 11 +0x7c 0x69 0x5c 0x4f + +# CHECK: qvlfsux 3, 9, 11 +0x7c 0x69 0x5c 0x4e + +# CHECK: qvlfsxa 3, 10, 11 +0x7c 0x6a 0x5c 0x0f + +# CHECK: qvlfsx 3, 10, 11 +0x7c 0x6a 0x5c 0x0e + +# CHECK: qvlpcldx 3, 10, 11 +0x7c 0x6a 0x5c 0x8c + +# CHECK: qvlpclsx 3, 10, 11 +0x7c 0x6a 0x5c 0x0c + +# CHECK: qvlpcrdx 3, 10, 11 +0x7c 0x6a 0x58 0x8c + +# CHECK: qvlpcrsx 3, 10, 11 +0x7c 0x6a 0x58 0x0c + +# CHECK: qvstfcduxa 2, 9, 11 +0x7c 0x49 0x59 0xcf + +# CHECK: qvstfcduxia 2, 9, 11 +0x7c 0x49 0x59 0xcb + +# CHECK: qvstfcduxi 2, 9, 11 +0x7c 0x49 0x59 0xca + +# CHECK: qvstfcdux 2, 9, 11 +0x7c 0x49 0x59 0xce + +# CHECK: qvstfcdxa 2, 10, 11 +0x7c 0x4a 0x59 0x8f + +# CHECK: qvstfcdxia 2, 10, 11 +0x7c 0x4a 0x59 0x8b + +# CHECK: qvstfcdxi 2, 10, 11 +0x7c 0x4a 0x59 0x8a + +# CHECK: qvstfcdx 2, 10, 11 +0x7c 0x4a 0x59 0x8e + +# CHECK: qvstfcsuxa 2, 9, 11 +0x7c 0x49 0x59 0x4f + +# CHECK: qvstfcsuxia 2, 9, 11 +0x7c 0x49 0x59 0x4b + +# CHECK: qvstfcsuxi 2, 9, 11 +0x7c 0x49 0x59 0x4a + +# CHECK: qvstfcsux 2, 9, 11 +0x7c 0x49 0x59 0x4e + +# CHECK: qvstfcsxa 2, 10, 11 +0x7c 0x4a 0x59 0x0f + +# CHECK: qvstfcsxia 2, 10, 11 +0x7c 0x4a 0x59 0x0b + +# CHECK: qvstfcsxi 2, 10, 11 +0x7c 0x4a 0x59 0x0a + +# CHECK: qvstfcsx 2, 10, 11 +0x7c 0x4a 0x59 0x0e + +# CHECK: qvstfduxa 2, 9, 11 +0x7c 0x49 0x5d 0xcf + +# CHECK: qvstfduxia 2, 9, 11 +0x7c 0x49 0x5d 0xcb + +# CHECK: qvstfduxi 2, 9, 11 +0x7c 0x49 0x5d 0xca + +# CHECK: qvstfdux 2, 9, 11 +0x7c 0x49 0x5d 0xce + +# CHECK: qvstfdxa 2, 10, 11 +0x7c 0x4a 0x5d 0x8f + +# CHECK: qvstfdxia 2, 10, 11 +0x7c 0x4a 0x5d 0x8b + +# CHECK: qvstfdxi 2, 10, 11 +0x7c 0x4a 0x5d 0x8a + +# CHECK: qvstfdx 2, 10, 11 +0x7c 0x4a 0x5d 0x8e + +# CHECK: qvstfiwxa 2, 10, 11 +0x7c 0x4a 0x5f 0x8f + +# CHECK: qvstfiwx 2, 10, 11 +0x7c 0x4a 0x5f 0x8e + +# CHECK: qvstfsuxa 2, 9, 11 +0x7c 0x49 0x5d 0x4f + +# CHECK: qvstfsuxia 2, 9, 11 +0x7c 0x49 0x5d 0x4b + +# CHECK: qvstfsuxi 2, 9, 11 +0x7c 0x49 0x5d 0x4a + +# CHECK: qvstfsux 2, 9, 11 +0x7c 0x49 0x5d 0x4e + +# CHECK: qvstfsxa 2, 10, 11 +0x7c 0x4a 0x5d 0x0f + +# CHECK: qvstfsxia 2, 10, 11 +0x7c 0x4a 0x5d 0x0b + +# CHECK: qvstfsxi 2, 10, 11 +0x7c 0x4a 0x5d 0x0a + +# CHECK: qvstfsx 2, 10, 11 +0x7c 0x4a 0x5d 0x0e + diff --git a/test/MC/PowerPC/qpx.s b/test/MC/PowerPC/qpx.s new file mode 100644 index 00000000000..6c92d715877 --- /dev/null +++ b/test/MC/PowerPC/qpx.s @@ -0,0 +1,251 @@ +# RUN: llvm-mc -triple powerpc64-bgq-linux --show-encoding %s | FileCheck %s + +# FIXME: print qvflogical aliases. + +# CHECK: qvfabs 3, 5 # encoding: [0x10,0x60,0x2a,0x10] + qvfabs 3, 5 +# CHECK: qvfadd 3, 4, 5 # encoding: [0x10,0x64,0x28,0x2a] + qvfadd 3, 4, 5 +# CHECK: qvfadds 3, 4, 5 # encoding: [0x00,0x64,0x28,0x2a] + qvfadds 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 4 # encoding: [0x10,0x64,0x2a,0x08] + qvfandc 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 1 # encoding: [0x10,0x64,0x28,0x88] + qvfand 3, 4, 5 +# CHECK: qvfcfid 3, 5 # encoding: [0x10,0x60,0x2e,0x9c] + qvfcfid 3, 5 +# CHECK: qvfcfids 3, 5 # encoding: [0x00,0x60,0x2e,0x9c] + qvfcfids 3, 5 +# CHECK: qvfcfidu 3, 5 # encoding: [0x10,0x60,0x2f,0x9c] + qvfcfidu 3, 5 +# CHECK: qvfcfidus 3, 5 # encoding: [0x00,0x60,0x2f,0x9c] + qvfcfidus 3, 5 +# CHECK: qvflogical 3, 3, 3, 0 # encoding: [0x10,0x63,0x18,0x08] + qvfclr 3 +# CHECK: qvfcpsgn 3, 4, 5 # encoding: [0x10,0x64,0x28,0x10] + qvfcpsgn 3, 4, 5 +# CHECK: qvflogical 3, 4, 4, 5 # encoding: [0x10,0x64,0x22,0x88] + qvfctfb 3, 4 +# CHECK: qvfctid 3, 5 # encoding: [0x10,0x60,0x2e,0x5c] + qvfctid 3, 5 +# CHECK: qvfctidu 3, 5 # encoding: [0x10,0x60,0x2f,0x5c] + qvfctidu 3, 5 +# CHECK: qvfctiduz 3, 5 # encoding: [0x10,0x60,0x2f,0x5e] + qvfctiduz 3, 5 +# CHECK: qvfctidz 3, 5 # encoding: [0x10,0x60,0x2e,0x5e] + qvfctidz 3, 5 +# CHECK: qvfctiw 3, 5 # encoding: [0x10,0x60,0x28,0x1c] + qvfctiw 3, 5 +# CHECK: qvfctiwu 3, 5 # encoding: [0x10,0x60,0x29,0x1c] + qvfctiwu 3, 5 +# CHECK: qvfctiwuz 3, 5 # encoding: [0x10,0x60,0x29,0x1e] + qvfctiwuz 3, 5 +# CHECK: qvfctiwz 3, 5 # encoding: [0x10,0x60,0x28,0x1e] + qvfctiwz 3, 5 +# CHECK: qvflogical 3, 4, 5, 9 # encoding: [0x10,0x64,0x2c,0x88] + qvfequ 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 12 # encoding: [0x10,0x64,0x2e,0x08] + qvflogical 3, 4, 5, 12 +# CHECK: qvfmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xba] + qvfmadd 3, 4, 6, 5 +# CHECK: qvfmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xba] + qvfmadds 3, 4, 6, 5 +# CHECK: qvfmr 3, 5 # encoding: [0x10,0x60,0x28,0x90] + qvfmr 3, 5 +# CHECK: qvfmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xb8] + qvfmsub 3, 4, 6, 5 +# CHECK: qvfmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xb8] + qvfmsubs 3, 4, 6, 5 +# CHECK: qvfmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xb2] + qvfmul 3, 4, 6 +# CHECK: qvfmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xb2] + qvfmuls 3, 4, 6 +# CHECK: qvfnabs 3, 5 # encoding: [0x10,0x60,0x29,0x10] + qvfnabs 3, 5 +# CHECK: qvflogical 3, 4, 5, 14 # encoding: [0x10,0x64,0x2f,0x08] + qvfnand 3, 4, 5 +# CHECK: qvfneg 3, 5 # encoding: [0x10,0x60,0x28,0x50] + qvfneg 3, 5 +# CHECK: qvfnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbe] + qvfnmadd 3, 4, 6, 5 +# CHECK: qvfnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbe] + qvfnmadds 3, 4, 6, 5 +# CHECK: qvfnmsub 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xbc] + qvfnmsub 3, 4, 6, 5 +# CHECK: qvfnmsubs 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0xbc] + qvfnmsubs 3, 4, 6, 5 +# CHECK: qvflogical 3, 4, 5, 8 # encoding: [0x10,0x64,0x2c,0x08] + qvfnor 3, 4, 5 +# CHECK: qvflogical 3, 4, 4, 10 # encoding: [0x10,0x64,0x25,0x08] + qvfnot 3, 4 +# CHECK: qvflogical 3, 4, 5, 13 # encoding: [0x10,0x64,0x2e,0x88] + qvforc 3, 4, 5 +# CHECK: qvflogical 3, 4, 5, 7 # encoding: [0x10,0x64,0x2b,0x88] + qvfor 3, 4, 5 +# CHECK: qvfperm 3, 4, 5, 6 # encoding: [0x10,0x64,0x29,0x8c] + qvfperm 3, 4, 5, 6 +# CHECK: qvfre 3, 5 # encoding: [0x10,0x60,0x28,0x30] + qvfre 3, 5 +# CHECK: qvfres 3, 5 # encoding: [0x00,0x60,0x28,0x30] + qvfres 3, 5 +# CHECK: qvfrim 3, 5 # encoding: [0x10,0x60,0x2b,0xd0] + qvfrim 3, 5 +# CHECK: qvfrin 3, 5 # encoding: [0x10,0x60,0x2b,0x10] + qvfrin 3, 5 +# CHECK: qvfrip 3, 5 # encoding: [0x10,0x60,0x2b,0x90] + qvfrip 3, 5 +# CHECK: qvfriz 3, 5 # encoding: [0x10,0x60,0x2b,0x50] + qvfriz 3, 5 +# CHECK: qvfrsp 3, 5 # encoding: [0x10,0x60,0x28,0x18] + qvfrsp 3, 5 +# CHECK: qvfrsqrte 3, 5 # encoding: [0x10,0x60,0x28,0x34] + qvfrsqrte 3, 5 +# CHECK: qvfrsqrtes 3, 5 # encoding: [0x00,0x60,0x28,0x34] + qvfrsqrtes 3, 5 +# CHECK: qvfsel 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0xae] + qvfsel 3, 4, 6, 5 +# CHECK: qvflogical 3, 3, 3, 15 # encoding: [0x10,0x63,0x1f,0x88] + qvfset 3 +# CHECK: qvfsub 3, 4, 5 # encoding: [0x10,0x64,0x28,0x28] + qvfsub 3, 4, 5 +# CHECK: qvfsubs 3, 4, 5 # encoding: [0x00,0x64,0x28,0x28] + qvfsubs 3, 4, 5 +# CHECK: qvfxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x92] + qvfxmadd 3, 4, 6, 5 +# CHECK: qvfxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x92] + qvfxmadds 3, 4, 6, 5 +# CHECK: qvfxmul 3, 4, 6 # encoding: [0x10,0x64,0x01,0xa2] + qvfxmul 3, 4, 6 +# CHECK: qvfxmuls 3, 4, 6 # encoding: [0x00,0x64,0x01,0xa2] + qvfxmuls 3, 4, 6 +# CHECK: qvflogical 3, 4, 5, 6 # encoding: [0x10,0x64,0x2b,0x08] + qvfxor 3, 4, 5 +# CHECK: qvfxxcpnmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x86] + qvfxxcpnmadd 3, 4, 6, 5 +# CHECK: qvfxxcpnmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x86] + qvfxxcpnmadds 3, 4, 6, 5 +# CHECK: qvfxxmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x82] + qvfxxmadd 3, 4, 6, 5 +# CHECK: qvfxxmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x82] + qvfxxmadds 3, 4, 6, 5 +# CHECK: qvfxxnpmadd 3, 4, 6, 5 # encoding: [0x10,0x64,0x29,0x96] + qvfxxnpmadd 3, 4, 6, 5 +# CHECK: qvfxxnpmadds 3, 4, 6, 5 # encoding: [0x00,0x64,0x29,0x96] + qvfxxnpmadds 3, 4, 6, 5 +# CHECK: qvlfcduxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xcf] + qvlfcduxa 3, 9, 11 +# CHECK: qvlfcdux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0xce] + qvlfcdux 3, 9, 11 +# CHECK: qvlfcdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8f] + qvlfcdxa 3, 10, 11 +# CHECK: qvlfcdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8e] + qvlfcdx 3, 10, 11 +# CHECK: qvlfcsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4f] + qvlfcsuxa 3, 9, 11 +# CHECK: qvlfcsux 3, 9, 11 # encoding: [0x7c,0x69,0x58,0x4e] + qvlfcsux 3, 9, 11 +# CHECK: qvlfcsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0f] + qvlfcsxa 3, 10, 11 +# CHECK: qvlfcsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0e] + qvlfcsx 3, 10, 11 +# CHECK: qvlfduxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xcf] + qvlfduxa 3, 9, 11 +# CHECK: qvlfdux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0xce] + qvlfdux 3, 9, 11 +# CHECK: qvlfdxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8f] + qvlfdxa 3, 10, 11 +# CHECK: qvlfdx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8e] + qvlfdx 3, 10, 11 +# CHECK: qvlfiwaxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xcf] + qvlfiwaxa 3, 10, 11 +# CHECK: qvlfiwax 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0xce] + qvlfiwax 3, 10, 11 +# CHECK: qvlfiwzxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8f] + qvlfiwzxa 3, 10, 11 +# CHECK: qvlfiwzx 3, 10, 11 # encoding: [0x7c,0x6a,0x5e,0x8e] + qvlfiwzx 3, 10, 11 +# CHECK: qvlfsuxa 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4f] + qvlfsuxa 3, 9, 11 +# CHECK: qvlfsux 3, 9, 11 # encoding: [0x7c,0x69,0x5c,0x4e] + qvlfsux 3, 9, 11 +# CHECK: qvlfsxa 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0f] + qvlfsxa 3, 10, 11 +# CHECK: qvlfsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0e] + qvlfsx 3, 10, 11 +# CHECK: qvlpcldx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x8c] + qvlpcldx 3, 10, 11 +# CHECK: qvlpclsx 3, 10, 11 # encoding: [0x7c,0x6a,0x5c,0x0c] + qvlpclsx 3, 10, 11 +# CHECK: qvlpcrdx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x8c] + qvlpcrdx 3, 10, 11 +# CHECK: qvlpcrsx 3, 10, 11 # encoding: [0x7c,0x6a,0x58,0x0c] + qvlpcrsx 3, 10, 11 +# CHECK: qvstfcduxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcf] + qvstfcduxa 2, 9, 11 +# CHECK: qvstfcduxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xcb] + qvstfcduxia 2, 9, 11 +# CHECK: qvstfcduxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xca] + qvstfcduxi 2, 9, 11 +# CHECK: qvstfcdux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0xce] + qvstfcdux 2, 9, 11 +# CHECK: qvstfcdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8f] + qvstfcdxa 2, 10, 11 +# CHECK: qvstfcdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8b] + qvstfcdxia 2, 10, 11 +# CHECK: qvstfcdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8a] + qvstfcdxi 2, 10, 11 +# CHECK: qvstfcdx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x8e] + qvstfcdx 2, 10, 11 +# CHECK: qvstfcsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4f] + qvstfcsuxa 2, 9, 11 +# CHECK: qvstfcsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4b] + qvstfcsuxia 2, 9, 11 +# CHECK: qvstfcsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4a] + qvstfcsuxi 2, 9, 11 +# CHECK: qvstfcsux 2, 9, 11 # encoding: [0x7c,0x49,0x59,0x4e] + qvstfcsux 2, 9, 11 +# CHECK: qvstfcsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0f] + qvstfcsxa 2, 10, 11 +# CHECK: qvstfcsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0b] + qvstfcsxia 2, 10, 11 +# CHECK: qvstfcsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0a] + qvstfcsxi 2, 10, 11 +# CHECK: qvstfcsx 2, 10, 11 # encoding: [0x7c,0x4a,0x59,0x0e] + qvstfcsx 2, 10, 11 +# CHECK: qvstfduxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcf] + qvstfduxa 2, 9, 11 +# CHECK: qvstfduxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xcb] + qvstfduxia 2, 9, 11 +# CHECK: qvstfduxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xca] + qvstfduxi 2, 9, 11 +# CHECK: qvstfdux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0xce] + qvstfdux 2, 9, 11 +# CHECK: qvstfdxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8f] + qvstfdxa 2, 10, 11 +# CHECK: qvstfdxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8b] + qvstfdxia 2, 10, 11 +# CHECK: qvstfdxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8a] + qvstfdxi 2, 10, 11 +# CHECK: qvstfdx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x8e] + qvstfdx 2, 10, 11 +# CHECK: qvstfiwxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8f] + qvstfiwxa 2, 10, 11 +# CHECK: qvstfiwx 2, 10, 11 # encoding: [0x7c,0x4a,0x5f,0x8e] + qvstfiwx 2, 10, 11 +# CHECK: qvstfsuxa 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4f] + qvstfsuxa 2, 9, 11 +# CHECK: qvstfsuxia 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4b] + qvstfsuxia 2, 9, 11 +# CHECK: qvstfsuxi 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4a] + qvstfsuxi 2, 9, 11 +# CHECK: qvstfsux 2, 9, 11 # encoding: [0x7c,0x49,0x5d,0x4e] + qvstfsux 2, 9, 11 +# CHECK: qvstfsxa 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0f] + qvstfsxa 2, 10, 11 +# CHECK: qvstfsxia 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0b] + qvstfsxia 2, 10, 11 +# CHECK: qvstfsxi 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0a] + qvstfsxi 2, 10, 11 +# CHECK: qvstfsx 2, 10, 11 # encoding: [0x7c,0x4a,0x5d,0x0e] + qvstfsx 2, 10, 11 +