mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-05 19:29:01 +00:00
X86-FMA3: Implemented commute transformations FMA*_Int instructions.
It made it possible to apply the memory folding optimization for the 2nd operand of FMA*_Int instructions. Reviewer: Quentin Colombet Differential Revision: http://reviews.llvm.org/D14550 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@252973 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e4296ea350
commit
c81095daa7
@ -164,15 +164,15 @@ multiclass fma3s_rm<bits<8> opc, string OpcodeStr,
|
||||
// sence they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc.
|
||||
// instructions.
|
||||
//
|
||||
// FIXME: The FMA*_Int instructions are TEMPORARILY defined as NOT commutable.
|
||||
// All of the FMA*_Int opcodes are defined as commutable here.
|
||||
// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial
|
||||
// and the corresponding optimization has been developed (please see
|
||||
// http://reviews.llvm.org/D13269 for details). The optimization though needs
|
||||
// some minor tuning to enable it for FMA*_Int opcodes.
|
||||
// and the corresponding optimizations have been developed.
|
||||
// Commuting the 1st operand of FMA*_Int requires some additional analysis,
|
||||
// the commute optimization is legal only if all users of FMA*_Int use only
|
||||
// the lowest element of the FMA*_Int instruction.
|
||||
let Constraints = "$src1 = $dst", isCommutable = 0, isCodeGenOnly =1,
|
||||
// the lowest element of the FMA*_Int instruction. Even though such analysis
|
||||
// may be not implemened yet we allow the routines doing the actual commute
|
||||
// transformation to decide if one or another instruction is commutable or not.
|
||||
let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
|
||||
hasSideEffects = 0 in
|
||||
multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
|
||||
Operand memopr, RegisterClass RC> {
|
||||
|
@ -2973,112 +2973,149 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
|
||||
/// Returns true if the given instruction opcode is FMA3.
|
||||
/// Otherwise, returns false.
|
||||
static bool isFMA3(unsigned Opcode) {
|
||||
/// The second parameter is optional and is used as the second return from
|
||||
/// the function. It is set to true if the given instruction has FMA3 opcode
|
||||
/// that is used for lowering of scalar FMA intrinsics, and it is set to false
|
||||
/// otherwise.
|
||||
static bool isFMA3(unsigned Opcode, bool *IsIntrinsic = nullptr) {
|
||||
if (IsIntrinsic)
|
||||
*IsIntrinsic = false;
|
||||
|
||||
switch (Opcode) {
|
||||
case X86::VFMADDSDr132r: case X86::VFMADDSDr132m:
|
||||
case X86::VFMADDSSr132r: case X86::VFMADDSSr132m:
|
||||
case X86::VFMSUBSDr132r: case X86::VFMSUBSDr132m:
|
||||
case X86::VFMSUBSSr132r: case X86::VFMSUBSSr132m:
|
||||
case X86::VFNMADDSDr132r: case X86::VFNMADDSDr132m:
|
||||
case X86::VFNMADDSSr132r: case X86::VFNMADDSSr132m:
|
||||
case X86::VFNMSUBSDr132r: case X86::VFNMSUBSDr132m:
|
||||
case X86::VFNMSUBSSr132r: case X86::VFNMSUBSSr132m:
|
||||
case X86::VFMADDSDr132r: case X86::VFMADDSDr132m:
|
||||
case X86::VFMADDSSr132r: case X86::VFMADDSSr132m:
|
||||
case X86::VFMSUBSDr132r: case X86::VFMSUBSDr132m:
|
||||
case X86::VFMSUBSSr132r: case X86::VFMSUBSSr132m:
|
||||
case X86::VFNMADDSDr132r: case X86::VFNMADDSDr132m:
|
||||
case X86::VFNMADDSSr132r: case X86::VFNMADDSSr132m:
|
||||
case X86::VFNMSUBSDr132r: case X86::VFNMSUBSDr132m:
|
||||
case X86::VFNMSUBSSr132r: case X86::VFNMSUBSSr132m:
|
||||
|
||||
case X86::VFMADDSDr213r: case X86::VFMADDSDr213m:
|
||||
case X86::VFMADDSSr213r: case X86::VFMADDSSr213m:
|
||||
case X86::VFMSUBSDr213r: case X86::VFMSUBSDr213m:
|
||||
case X86::VFMSUBSSr213r: case X86::VFMSUBSSr213m:
|
||||
case X86::VFNMADDSDr213r: case X86::VFNMADDSDr213m:
|
||||
case X86::VFNMADDSSr213r: case X86::VFNMADDSSr213m:
|
||||
case X86::VFNMSUBSDr213r: case X86::VFNMSUBSDr213m:
|
||||
case X86::VFNMSUBSSr213r: case X86::VFNMSUBSSr213m:
|
||||
case X86::VFMADDSDr213r: case X86::VFMADDSDr213m:
|
||||
case X86::VFMADDSSr213r: case X86::VFMADDSSr213m:
|
||||
case X86::VFMSUBSDr213r: case X86::VFMSUBSDr213m:
|
||||
case X86::VFMSUBSSr213r: case X86::VFMSUBSSr213m:
|
||||
case X86::VFNMADDSDr213r: case X86::VFNMADDSDr213m:
|
||||
case X86::VFNMADDSSr213r: case X86::VFNMADDSSr213m:
|
||||
case X86::VFNMSUBSDr213r: case X86::VFNMSUBSDr213m:
|
||||
case X86::VFNMSUBSSr213r: case X86::VFNMSUBSSr213m:
|
||||
|
||||
case X86::VFMADDSDr231r: case X86::VFMADDSDr231m:
|
||||
case X86::VFMADDSSr231r: case X86::VFMADDSSr231m:
|
||||
case X86::VFMSUBSDr231r: case X86::VFMSUBSDr231m:
|
||||
case X86::VFMSUBSSr231r: case X86::VFMSUBSSr231m:
|
||||
case X86::VFNMADDSDr231r: case X86::VFNMADDSDr231m:
|
||||
case X86::VFNMADDSSr231r: case X86::VFNMADDSSr231m:
|
||||
case X86::VFNMSUBSDr231r: case X86::VFNMSUBSDr231m:
|
||||
case X86::VFNMSUBSSr231r: case X86::VFNMSUBSSr231m:
|
||||
case X86::VFMADDSDr231r: case X86::VFMADDSDr231m:
|
||||
case X86::VFMADDSSr231r: case X86::VFMADDSSr231m:
|
||||
case X86::VFMSUBSDr231r: case X86::VFMSUBSDr231m:
|
||||
case X86::VFMSUBSSr231r: case X86::VFMSUBSSr231m:
|
||||
case X86::VFNMADDSDr231r: case X86::VFNMADDSDr231m:
|
||||
case X86::VFNMADDSSr231r: case X86::VFNMADDSSr231m:
|
||||
case X86::VFNMSUBSDr231r: case X86::VFNMSUBSDr231m:
|
||||
case X86::VFNMSUBSSr231r: case X86::VFNMSUBSSr231m:
|
||||
|
||||
case X86::VFMADDSUBPDr132r: case X86::VFMADDSUBPDr132m:
|
||||
case X86::VFMADDSUBPSr132r: case X86::VFMADDSUBPSr132m:
|
||||
case X86::VFMSUBADDPDr132r: case X86::VFMSUBADDPDr132m:
|
||||
case X86::VFMSUBADDPSr132r: case X86::VFMSUBADDPSr132m:
|
||||
case X86::VFMADDSUBPDr132rY: case X86::VFMADDSUBPDr132mY:
|
||||
case X86::VFMADDSUBPSr132rY: case X86::VFMADDSUBPSr132mY:
|
||||
case X86::VFMSUBADDPDr132rY: case X86::VFMSUBADDPDr132mY:
|
||||
case X86::VFMSUBADDPSr132rY: case X86::VFMSUBADDPSr132mY:
|
||||
case X86::VFMADDSUBPDr132r: case X86::VFMADDSUBPDr132m:
|
||||
case X86::VFMADDSUBPSr132r: case X86::VFMADDSUBPSr132m:
|
||||
case X86::VFMSUBADDPDr132r: case X86::VFMSUBADDPDr132m:
|
||||
case X86::VFMSUBADDPSr132r: case X86::VFMSUBADDPSr132m:
|
||||
case X86::VFMADDSUBPDr132rY: case X86::VFMADDSUBPDr132mY:
|
||||
case X86::VFMADDSUBPSr132rY: case X86::VFMADDSUBPSr132mY:
|
||||
case X86::VFMSUBADDPDr132rY: case X86::VFMSUBADDPDr132mY:
|
||||
case X86::VFMSUBADDPSr132rY: case X86::VFMSUBADDPSr132mY:
|
||||
|
||||
case X86::VFMADDPDr132r: case X86::VFMADDPDr132m:
|
||||
case X86::VFMADDPSr132r: case X86::VFMADDPSr132m:
|
||||
case X86::VFMSUBPDr132r: case X86::VFMSUBPDr132m:
|
||||
case X86::VFMSUBPSr132r: case X86::VFMSUBPSr132m:
|
||||
case X86::VFNMADDPDr132r: case X86::VFNMADDPDr132m:
|
||||
case X86::VFNMADDPSr132r: case X86::VFNMADDPSr132m:
|
||||
case X86::VFNMSUBPDr132r: case X86::VFNMSUBPDr132m:
|
||||
case X86::VFNMSUBPSr132r: case X86::VFNMSUBPSr132m:
|
||||
case X86::VFMADDPDr132rY: case X86::VFMADDPDr132mY:
|
||||
case X86::VFMADDPSr132rY: case X86::VFMADDPSr132mY:
|
||||
case X86::VFMSUBPDr132rY: case X86::VFMSUBPDr132mY:
|
||||
case X86::VFMSUBPSr132rY: case X86::VFMSUBPSr132mY:
|
||||
case X86::VFNMADDPDr132rY: case X86::VFNMADDPDr132mY:
|
||||
case X86::VFNMADDPSr132rY: case X86::VFNMADDPSr132mY:
|
||||
case X86::VFNMSUBPDr132rY: case X86::VFNMSUBPDr132mY:
|
||||
case X86::VFNMSUBPSr132rY: case X86::VFNMSUBPSr132mY:
|
||||
case X86::VFMADDPDr132r: case X86::VFMADDPDr132m:
|
||||
case X86::VFMADDPSr132r: case X86::VFMADDPSr132m:
|
||||
case X86::VFMSUBPDr132r: case X86::VFMSUBPDr132m:
|
||||
case X86::VFMSUBPSr132r: case X86::VFMSUBPSr132m:
|
||||
case X86::VFNMADDPDr132r: case X86::VFNMADDPDr132m:
|
||||
case X86::VFNMADDPSr132r: case X86::VFNMADDPSr132m:
|
||||
case X86::VFNMSUBPDr132r: case X86::VFNMSUBPDr132m:
|
||||
case X86::VFNMSUBPSr132r: case X86::VFNMSUBPSr132m:
|
||||
case X86::VFMADDPDr132rY: case X86::VFMADDPDr132mY:
|
||||
case X86::VFMADDPSr132rY: case X86::VFMADDPSr132mY:
|
||||
case X86::VFMSUBPDr132rY: case X86::VFMSUBPDr132mY:
|
||||
case X86::VFMSUBPSr132rY: case X86::VFMSUBPSr132mY:
|
||||
case X86::VFNMADDPDr132rY: case X86::VFNMADDPDr132mY:
|
||||
case X86::VFNMADDPSr132rY: case X86::VFNMADDPSr132mY:
|
||||
case X86::VFNMSUBPDr132rY: case X86::VFNMSUBPDr132mY:
|
||||
case X86::VFNMSUBPSr132rY: case X86::VFNMSUBPSr132mY:
|
||||
|
||||
case X86::VFMADDSUBPDr213r: case X86::VFMADDSUBPDr213m:
|
||||
case X86::VFMADDSUBPSr213r: case X86::VFMADDSUBPSr213m:
|
||||
case X86::VFMSUBADDPDr213r: case X86::VFMSUBADDPDr213m:
|
||||
case X86::VFMSUBADDPSr213r: case X86::VFMSUBADDPSr213m:
|
||||
case X86::VFMADDSUBPDr213rY: case X86::VFMADDSUBPDr213mY:
|
||||
case X86::VFMADDSUBPSr213rY: case X86::VFMADDSUBPSr213mY:
|
||||
case X86::VFMSUBADDPDr213rY: case X86::VFMSUBADDPDr213mY:
|
||||
case X86::VFMSUBADDPSr213rY: case X86::VFMSUBADDPSr213mY:
|
||||
case X86::VFMADDSUBPDr213r: case X86::VFMADDSUBPDr213m:
|
||||
case X86::VFMADDSUBPSr213r: case X86::VFMADDSUBPSr213m:
|
||||
case X86::VFMSUBADDPDr213r: case X86::VFMSUBADDPDr213m:
|
||||
case X86::VFMSUBADDPSr213r: case X86::VFMSUBADDPSr213m:
|
||||
case X86::VFMADDSUBPDr213rY: case X86::VFMADDSUBPDr213mY:
|
||||
case X86::VFMADDSUBPSr213rY: case X86::VFMADDSUBPSr213mY:
|
||||
case X86::VFMSUBADDPDr213rY: case X86::VFMSUBADDPDr213mY:
|
||||
case X86::VFMSUBADDPSr213rY: case X86::VFMSUBADDPSr213mY:
|
||||
|
||||
case X86::VFMADDPDr213r: case X86::VFMADDPDr213m:
|
||||
case X86::VFMADDPSr213r: case X86::VFMADDPSr213m:
|
||||
case X86::VFMSUBPDr213r: case X86::VFMSUBPDr213m:
|
||||
case X86::VFMSUBPSr213r: case X86::VFMSUBPSr213m:
|
||||
case X86::VFNMADDPDr213r: case X86::VFNMADDPDr213m:
|
||||
case X86::VFNMADDPSr213r: case X86::VFNMADDPSr213m:
|
||||
case X86::VFNMSUBPDr213r: case X86::VFNMSUBPDr213m:
|
||||
case X86::VFNMSUBPSr213r: case X86::VFNMSUBPSr213m:
|
||||
case X86::VFMADDPDr213rY: case X86::VFMADDPDr213mY:
|
||||
case X86::VFMADDPSr213rY: case X86::VFMADDPSr213mY:
|
||||
case X86::VFMSUBPDr213rY: case X86::VFMSUBPDr213mY:
|
||||
case X86::VFMSUBPSr213rY: case X86::VFMSUBPSr213mY:
|
||||
case X86::VFNMADDPDr213rY: case X86::VFNMADDPDr213mY:
|
||||
case X86::VFNMADDPSr213rY: case X86::VFNMADDPSr213mY:
|
||||
case X86::VFNMSUBPDr213rY: case X86::VFNMSUBPDr213mY:
|
||||
case X86::VFNMSUBPSr213rY: case X86::VFNMSUBPSr213mY:
|
||||
case X86::VFMADDPDr213r: case X86::VFMADDPDr213m:
|
||||
case X86::VFMADDPSr213r: case X86::VFMADDPSr213m:
|
||||
case X86::VFMSUBPDr213r: case X86::VFMSUBPDr213m:
|
||||
case X86::VFMSUBPSr213r: case X86::VFMSUBPSr213m:
|
||||
case X86::VFNMADDPDr213r: case X86::VFNMADDPDr213m:
|
||||
case X86::VFNMADDPSr213r: case X86::VFNMADDPSr213m:
|
||||
case X86::VFNMSUBPDr213r: case X86::VFNMSUBPDr213m:
|
||||
case X86::VFNMSUBPSr213r: case X86::VFNMSUBPSr213m:
|
||||
case X86::VFMADDPDr213rY: case X86::VFMADDPDr213mY:
|
||||
case X86::VFMADDPSr213rY: case X86::VFMADDPSr213mY:
|
||||
case X86::VFMSUBPDr213rY: case X86::VFMSUBPDr213mY:
|
||||
case X86::VFMSUBPSr213rY: case X86::VFMSUBPSr213mY:
|
||||
case X86::VFNMADDPDr213rY: case X86::VFNMADDPDr213mY:
|
||||
case X86::VFNMADDPSr213rY: case X86::VFNMADDPSr213mY:
|
||||
case X86::VFNMSUBPDr213rY: case X86::VFNMSUBPDr213mY:
|
||||
case X86::VFNMSUBPSr213rY: case X86::VFNMSUBPSr213mY:
|
||||
|
||||
case X86::VFMADDSUBPDr231r: case X86::VFMADDSUBPDr231m:
|
||||
case X86::VFMADDSUBPSr231r: case X86::VFMADDSUBPSr231m:
|
||||
case X86::VFMSUBADDPDr231r: case X86::VFMSUBADDPDr231m:
|
||||
case X86::VFMSUBADDPSr231r: case X86::VFMSUBADDPSr231m:
|
||||
case X86::VFMADDSUBPDr231rY: case X86::VFMADDSUBPDr231mY:
|
||||
case X86::VFMADDSUBPSr231rY: case X86::VFMADDSUBPSr231mY:
|
||||
case X86::VFMSUBADDPDr231rY: case X86::VFMSUBADDPDr231mY:
|
||||
case X86::VFMSUBADDPSr231rY: case X86::VFMSUBADDPSr231mY:
|
||||
case X86::VFMADDSUBPDr231r: case X86::VFMADDSUBPDr231m:
|
||||
case X86::VFMADDSUBPSr231r: case X86::VFMADDSUBPSr231m:
|
||||
case X86::VFMSUBADDPDr231r: case X86::VFMSUBADDPDr231m:
|
||||
case X86::VFMSUBADDPSr231r: case X86::VFMSUBADDPSr231m:
|
||||
case X86::VFMADDSUBPDr231rY: case X86::VFMADDSUBPDr231mY:
|
||||
case X86::VFMADDSUBPSr231rY: case X86::VFMADDSUBPSr231mY:
|
||||
case X86::VFMSUBADDPDr231rY: case X86::VFMSUBADDPDr231mY:
|
||||
case X86::VFMSUBADDPSr231rY: case X86::VFMSUBADDPSr231mY:
|
||||
|
||||
case X86::VFMADDPDr231r: case X86::VFMADDPDr231m:
|
||||
case X86::VFMADDPSr231r: case X86::VFMADDPSr231m:
|
||||
case X86::VFMSUBPDr231r: case X86::VFMSUBPDr231m:
|
||||
case X86::VFMSUBPSr231r: case X86::VFMSUBPSr231m:
|
||||
case X86::VFNMADDPDr231r: case X86::VFNMADDPDr231m:
|
||||
case X86::VFNMADDPSr231r: case X86::VFNMADDPSr231m:
|
||||
case X86::VFNMSUBPDr231r: case X86::VFNMSUBPDr231m:
|
||||
case X86::VFNMSUBPSr231r: case X86::VFNMSUBPSr231m:
|
||||
case X86::VFMADDPDr231rY: case X86::VFMADDPDr231mY:
|
||||
case X86::VFMADDPSr231rY: case X86::VFMADDPSr231mY:
|
||||
case X86::VFMSUBPDr231rY: case X86::VFMSUBPDr231mY:
|
||||
case X86::VFMSUBPSr231rY: case X86::VFMSUBPSr231mY:
|
||||
case X86::VFNMADDPDr231rY: case X86::VFNMADDPDr231mY:
|
||||
case X86::VFNMADDPSr231rY: case X86::VFNMADDPSr231mY:
|
||||
case X86::VFNMSUBPDr231rY: case X86::VFNMSUBPDr231mY:
|
||||
case X86::VFNMSUBPSr231rY: case X86::VFNMSUBPSr231mY:
|
||||
case X86::VFMADDPDr231r: case X86::VFMADDPDr231m:
|
||||
case X86::VFMADDPSr231r: case X86::VFMADDPSr231m:
|
||||
case X86::VFMSUBPDr231r: case X86::VFMSUBPDr231m:
|
||||
case X86::VFMSUBPSr231r: case X86::VFMSUBPSr231m:
|
||||
case X86::VFNMADDPDr231r: case X86::VFNMADDPDr231m:
|
||||
case X86::VFNMADDPSr231r: case X86::VFNMADDPSr231m:
|
||||
case X86::VFNMSUBPDr231r: case X86::VFNMSUBPDr231m:
|
||||
case X86::VFNMSUBPSr231r: case X86::VFNMSUBPSr231m:
|
||||
case X86::VFMADDPDr231rY: case X86::VFMADDPDr231mY:
|
||||
case X86::VFMADDPSr231rY: case X86::VFMADDPSr231mY:
|
||||
case X86::VFMSUBPDr231rY: case X86::VFMSUBPDr231mY:
|
||||
case X86::VFMSUBPSr231rY: case X86::VFMSUBPSr231mY:
|
||||
case X86::VFNMADDPDr231rY: case X86::VFNMADDPDr231mY:
|
||||
case X86::VFNMADDPSr231rY: case X86::VFNMADDPSr231mY:
|
||||
case X86::VFNMSUBPDr231rY: case X86::VFNMSUBPDr231mY:
|
||||
case X86::VFNMSUBPSr231rY: case X86::VFNMSUBPSr231mY:
|
||||
return true;
|
||||
|
||||
case X86::VFMADDSDr132r_Int: case X86::VFMADDSDr132m_Int:
|
||||
case X86::VFMADDSSr132r_Int: case X86::VFMADDSSr132m_Int:
|
||||
case X86::VFMSUBSDr132r_Int: case X86::VFMSUBSDr132m_Int:
|
||||
case X86::VFMSUBSSr132r_Int: case X86::VFMSUBSSr132m_Int:
|
||||
case X86::VFNMADDSDr132r_Int: case X86::VFNMADDSDr132m_Int:
|
||||
case X86::VFNMADDSSr132r_Int: case X86::VFNMADDSSr132m_Int:
|
||||
case X86::VFNMSUBSDr132r_Int: case X86::VFNMSUBSDr132m_Int:
|
||||
case X86::VFNMSUBSSr132r_Int: case X86::VFNMSUBSSr132m_Int:
|
||||
|
||||
case X86::VFMADDSDr213r_Int: case X86::VFMADDSDr213m_Int:
|
||||
case X86::VFMADDSSr213r_Int: case X86::VFMADDSSr213m_Int:
|
||||
case X86::VFMSUBSDr213r_Int: case X86::VFMSUBSDr213m_Int:
|
||||
case X86::VFMSUBSSr213r_Int: case X86::VFMSUBSSr213m_Int:
|
||||
case X86::VFNMADDSDr213r_Int: case X86::VFNMADDSDr213m_Int:
|
||||
case X86::VFNMADDSSr213r_Int: case X86::VFNMADDSSr213m_Int:
|
||||
case X86::VFNMSUBSDr213r_Int: case X86::VFNMSUBSDr213m_Int:
|
||||
case X86::VFNMSUBSSr213r_Int: case X86::VFNMSUBSSr213m_Int:
|
||||
|
||||
case X86::VFMADDSDr231r_Int: case X86::VFMADDSDr231m_Int:
|
||||
case X86::VFMADDSSr231r_Int: case X86::VFMADDSSr231m_Int:
|
||||
case X86::VFMSUBSDr231r_Int: case X86::VFMSUBSDr231m_Int:
|
||||
case X86::VFMSUBSSr231r_Int: case X86::VFMSUBSSr231m_Int:
|
||||
case X86::VFNMADDSDr231r_Int: case X86::VFNMADDSDr231m_Int:
|
||||
case X86::VFNMADDSSr231r_Int: case X86::VFNMADDSSr231m_Int:
|
||||
case X86::VFNMSUBSDr231r_Int: case X86::VFNMSUBSDr231m_Int:
|
||||
case X86::VFNMSUBSSr231r_Int: case X86::VFNMSUBSSr231m_Int:
|
||||
if (IsIntrinsic)
|
||||
*IsIntrinsic = true;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -3378,7 +3415,7 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
|
||||
|
||||
// Define the array that holds FMA opcodes in groups
|
||||
// of 3 opcodes(132, 213, 231) in each group.
|
||||
static const unsigned OpcodeGroups[][3] = {
|
||||
static const unsigned RegularOpcodeGroups[][3] = {
|
||||
{ X86::VFMADDSSr132r, X86::VFMADDSSr213r, X86::VFMADDSSr231r },
|
||||
{ X86::VFMADDSDr132r, X86::VFMADDSDr213r, X86::VFMADDSDr231r },
|
||||
{ X86::VFMADDPSr132r, X86::VFMADDPSr213r, X86::VFMADDPSr231r },
|
||||
@ -3449,32 +3486,83 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
|
||||
{ X86::VFMSUBADDPSr132mY, X86::VFMSUBADDPSr213mY, X86::VFMSUBADDPSr231mY },
|
||||
{ X86::VFMSUBADDPDr132mY, X86::VFMSUBADDPDr213mY, X86::VFMSUBADDPDr231mY }
|
||||
};
|
||||
|
||||
// Define the array that holds FMA*_Int opcodes in groups
|
||||
// of 3 opcodes(132, 213, 231) in each group.
|
||||
static const unsigned IntrinOpcodeGroups[][3] = {
|
||||
{ X86::VFMADDSSr132r_Int, X86::VFMADDSSr213r_Int, X86::VFMADDSSr231r_Int },
|
||||
{ X86::VFMADDSDr132r_Int, X86::VFMADDSDr213r_Int, X86::VFMADDSDr231r_Int },
|
||||
{ X86::VFMADDSSr132m_Int, X86::VFMADDSSr213m_Int, X86::VFMADDSSr231m_Int },
|
||||
{ X86::VFMADDSDr132m_Int, X86::VFMADDSDr213m_Int, X86::VFMADDSDr231m_Int },
|
||||
|
||||
{ X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr231r_Int },
|
||||
{ X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr231r_Int },
|
||||
{ X86::VFMSUBSSr132m_Int, X86::VFMSUBSSr213m_Int, X86::VFMSUBSSr231m_Int },
|
||||
{ X86::VFMSUBSDr132m_Int, X86::VFMSUBSDr213m_Int, X86::VFMSUBSDr231m_Int },
|
||||
|
||||
{ X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr231r_Int },
|
||||
{ X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr231r_Int },
|
||||
{ X86::VFNMADDSSr132m_Int, X86::VFNMADDSSr213m_Int, X86::VFNMADDSSr231m_Int },
|
||||
{ X86::VFNMADDSDr132m_Int, X86::VFNMADDSDr213m_Int, X86::VFNMADDSDr231m_Int },
|
||||
|
||||
{ X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr231r_Int },
|
||||
{ X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr231r_Int },
|
||||
{ X86::VFNMSUBSSr132m_Int, X86::VFNMSUBSSr213m_Int, X86::VFNMSUBSSr231m_Int },
|
||||
{ X86::VFNMSUBSDr132m_Int, X86::VFNMSUBSDr213m_Int, X86::VFNMSUBSDr231m_Int },
|
||||
};
|
||||
|
||||
const unsigned Form132Index = 0;
|
||||
const unsigned Form213Index = 1;
|
||||
const unsigned Form231Index = 2;
|
||||
const unsigned FormsNum = 3;
|
||||
|
||||
// Look for the input opcode in the OpcodeGroups table.
|
||||
unsigned OpcodeGroupsNum = sizeof(OpcodeGroups) / sizeof(OpcodeGroups[0]);
|
||||
unsigned GroupIndex = 0, FormIndex = FormsNum;
|
||||
for (; GroupIndex < OpcodeGroupsNum && FormIndex == FormsNum; GroupIndex++) {
|
||||
bool IsIntrinOpcode;
|
||||
isFMA3(Opc, &IsIntrinOpcode);
|
||||
|
||||
unsigned GroupsNum;
|
||||
const unsigned (*OpcodeGroups)[3];
|
||||
if (IsIntrinOpcode) {
|
||||
GroupsNum = sizeof(IntrinOpcodeGroups) / sizeof(IntrinOpcodeGroups[0]);
|
||||
OpcodeGroups = IntrinOpcodeGroups;
|
||||
} else {
|
||||
GroupsNum = sizeof(RegularOpcodeGroups) / sizeof(RegularOpcodeGroups[0]);
|
||||
OpcodeGroups = RegularOpcodeGroups;
|
||||
}
|
||||
|
||||
const unsigned *FoundOpcodesGroup = nullptr;
|
||||
unsigned FormIndex;
|
||||
|
||||
// Look for the input opcode in the corresponding opcodes table.
|
||||
unsigned GroupIndex = 0;
|
||||
for (; GroupIndex < GroupsNum && !FoundOpcodesGroup; GroupIndex++) {
|
||||
for (FormIndex = 0; FormIndex < FormsNum; FormIndex++) {
|
||||
if (OpcodeGroups[GroupIndex][FormIndex] == Opc)
|
||||
if (OpcodeGroups[GroupIndex][FormIndex] == Opc) {
|
||||
FoundOpcodesGroup = OpcodeGroups[GroupIndex];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Input opcode does not match with any of the opcodes from the table.
|
||||
if (FormIndex == FormsNum)
|
||||
return 0;
|
||||
// Do not forget to fix the GroupIndex after the loop.
|
||||
GroupIndex--;
|
||||
|
||||
// The input opcode does not match with any of the opcodes from the tables.
|
||||
// The unsupported FMA opcode must be added to one of the two opcode groups
|
||||
// defined above.
|
||||
assert(FoundOpcodesGroup != nullptr && "Unexpected FMA3 opcode");
|
||||
|
||||
// Put the lowest index to SrcOpIdx1 to simplify the checks below.
|
||||
if (SrcOpIdx1 > SrcOpIdx2)
|
||||
std::swap(SrcOpIdx1, SrcOpIdx2);
|
||||
|
||||
// TODO: Commuting the 1st operand of FMA*_Int requires some additional
|
||||
// analysis. The commute optimization is legal only if all users of FMA*_Int
|
||||
// use only the lowest element of the FMA*_Int instruction. Such analysis are
|
||||
// not implemented yet. So, just return 0 in that case.
|
||||
// When such analysis are available this place will be the right place for
|
||||
// calling it.
|
||||
if (IsIntrinOpcode && SrcOpIdx1 == 1)
|
||||
return 0;
|
||||
|
||||
unsigned Case;
|
||||
if (SrcOpIdx1 == 1 && SrcOpIdx2 == 2)
|
||||
if (SrcOpIdx1 == 1 && SrcOpIdx2 == 2)
|
||||
Case = 0;
|
||||
else if (SrcOpIdx1 == 1 && SrcOpIdx2 == 3)
|
||||
Case = 1;
|
||||
@ -3506,7 +3594,7 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
|
||||
|
||||
// Everything is ready, just adjust the FMA opcode and return it.
|
||||
FormIndex = FormMapping[Case][FormIndex];
|
||||
return OpcodeGroups[GroupIndex][FormIndex];
|
||||
return FoundOpcodesGroup[FormIndex];
|
||||
}
|
||||
|
||||
bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI,
|
||||
|
@ -4,6 +4,38 @@
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmadd_baa_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmadd_aba_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rcx), %xmm0
|
||||
; CHECK-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmadd_bba_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rdx), %xmm0
|
||||
; CHECK-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmadd_baa_ps:
|
||||
@ -66,6 +98,38 @@ define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmadd_baa_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmadd_aba_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rcx), %xmm0
|
||||
; CHECK-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmadd_bba_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rdx), %xmm0
|
||||
; CHECK-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmadd_baa_pd:
|
||||
@ -129,6 +193,37 @@ define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #
|
||||
}
|
||||
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmadd_baa_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmadd_aba_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rcx), %xmm0
|
||||
; CHECK-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmadd_bba_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rdx), %xmm0
|
||||
; CHECK-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
|
||||
@ -192,6 +287,38 @@ define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmadd_baa_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmadd_aba_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rcx), %xmm0
|
||||
; CHECK-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmadd_bba_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rdx), %xmm0
|
||||
; CHECK-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmadd_baa_pd:
|
||||
@ -255,6 +382,38 @@ define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b)
|
||||
}
|
||||
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmsub_baa_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmsub_aba_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rcx), %xmm0
|
||||
; CHECK-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmsub_bba_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rdx), %xmm0
|
||||
; CHECK-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmsub_baa_ps:
|
||||
@ -317,6 +476,38 @@ define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmsub_baa_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmsub_aba_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rcx), %xmm0
|
||||
; CHECK-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmsub_bba_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rdx), %xmm0
|
||||
; CHECK-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fmsub_baa_pd:
|
||||
@ -380,6 +571,38 @@ define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #
|
||||
}
|
||||
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmsub_baa_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmsub_aba_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rcx), %xmm0
|
||||
; CHECK-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmsub_bba_ss:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rdx), %xmm0
|
||||
; CHECK-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
|
||||
define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmsub_baa_ps:
|
||||
@ -442,6 +665,38 @@ define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0
|
||||
ret <8 x float> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmsub_baa_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
|
||||
; CHECK-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmsub_aba_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rcx), %xmm0
|
||||
; CHECK-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmsub_bba_sd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%rdx), %xmm0
|
||||
; CHECK-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
|
||||
define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: test_x86_fnmsub_baa_pd:
|
||||
|
@ -10,9 +10,9 @@ define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_ss:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdi)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0
|
||||
;
|
||||
@ -27,9 +27,9 @@ define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1,
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1
|
||||
; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
|
||||
@ -45,9 +45,9 @@ define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_sd:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0
|
||||
;
|
||||
@ -62,9 +62,9 @@ define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %
|
||||
; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1
|
||||
; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
|
||||
@ -154,9 +154,9 @@ define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_ss:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0
|
||||
;
|
||||
@ -171,9 +171,9 @@ define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1,
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1
|
||||
; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
|
||||
@ -190,9 +190,9 @@ define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_sd:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0
|
||||
;
|
||||
@ -207,9 +207,9 @@ define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %
|
||||
; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1
|
||||
; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
|
||||
@ -299,9 +299,9 @@ define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_ss:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0
|
||||
;
|
||||
@ -316,9 +316,9 @@ define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1
|
||||
; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
|
||||
@ -335,9 +335,9 @@ define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_sd:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0
|
||||
;
|
||||
@ -352,9 +352,9 @@ define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double>
|
||||
; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1
|
||||
; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
|
||||
@ -444,9 +444,9 @@ define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_ss:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0
|
||||
;
|
||||
@ -461,9 +461,9 @@ define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1
|
||||
; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
|
||||
@ -480,9 +480,9 @@ define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1,
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_sd:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0
|
||||
;
|
||||
@ -497,9 +497,9 @@ define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double>
|
||||
; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
;
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vmovap{{s|d}} {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
|
||||
; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0
|
||||
;
|
||||
; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1
|
||||
; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0
|
||||
|
Loading…
Reference in New Issue
Block a user