Add support for VSX FMA single-precision instructions to the PPC back end

This patch corresponds to review: http://reviews.llvm.org/D9941 It adds the various FMA instructions introduced in the version 2.07 of the ISA along with the testing for them. These are operations on single precision scalar values in VSX registers. llvm-svn: 238578
2024-12-12 05:56:28 +00:00 · 2015-05-29 17:13:25 +00:00 · 2015-05-29 17:13:25 +00:00 · d9fdd9c5e6
commit d9fdd9c5e6
parent 63329afd38
6 changed files with 392 additions and 9 deletions
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@ -148,6 +148,9 @@ class PPCFastISel final : public FastISel {
    bool isVSFRCRegister(unsigned Register) const {
      return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
    }
+    bool isVSSRCRegister(unsigned Register) const {
+      return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
+    }
    bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
                    bool isZExt, unsigned DestReg);
    bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
@ -503,8 +506,11 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,

  // If this is a potential VSX load with an offset of 0, a VSX indexed load can
  // be used.
+  bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
  bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
-  if (IsVSFRC && (Opc == PPC::LFD) && 
+  bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
+  bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
+  if ((Is32VSXLoad || Is64VSXLoad) &&
      (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
      (Addr.Offset == 0)) {
    UseOffset = false;
@ -518,7 +524,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
  // into a RegBase.
  if (Addr.BaseType == Address::FrameIndexBase) {
    // VSX only provides an indexed load.
-    if (IsVSFRC && Opc == PPC::LFD) return false;
+    if (Is32VSXLoad || Is64VSXLoad) return false;

    MachineMemOperand *MMO =
      FuncInfo.MF->getMachineMemOperand(
@ -532,7 +538,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
  // Base reg with offset in range.
  } else if (UseOffset) {
    // VSX only provides an indexed load.
-    if (IsVSFRC && Opc == PPC::LFD) return false;
+    if (Is32VSXLoad || Is64VSXLoad) return false;

    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
      .addImm(Addr.Offset).addReg(Addr.Base.Reg);
@ -555,7 +561,7 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
      case PPC::LWA:    Opc = PPC::LWAX;    break;
      case PPC::LWA_32: Opc = PPC::LWAX_32; break;
      case PPC::LD:     Opc = PPC::LDX;     break;
-      case PPC::LFS:    Opc = PPC::LFSX;    break;
+      case PPC::LFS:    Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
      case PPC::LFD:    Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
    }
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
@ -636,9 +642,12 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {

  // If this is a potential VSX store with an offset of 0, a VSX indexed store
  // can be used.
+  bool IsVSSRC = isVSSRCRegister(SrcReg);
  bool IsVSFRC = isVSFRCRegister(SrcReg);
-  if (IsVSFRC && (Opc == PPC::STFD) && 
-      (Addr.BaseType != Address::FrameIndexBase) && UseOffset && 
+  bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
+  bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
+  if ((Is32VSXStore || Is64VSXStore) &&
+      (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
      (Addr.Offset == 0)) {
    UseOffset = false;
  }
@ -648,7 +657,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
  // into a RegBase.
  if (Addr.BaseType == Address::FrameIndexBase) {
    // VSX only provides an indexed store.
-    if (IsVSFRC && Opc == PPC::STFD) return false;
+    if (Is32VSXStore || Is64VSXStore) return false;

    MachineMemOperand *MMO =
      FuncInfo.MF->getMachineMemOperand(
@ -665,7 +674,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
  // Base reg with offset in range.
  } else if (UseOffset) {
    // VSX only provides an indexed store.
-    if (IsVSFRC && Opc == PPC::STFD) return false;
+    if (Is32VSXStore || Is64VSXStore) return false;
    
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
      .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
@ -684,7 +693,7 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
      case PPC::STH8: Opc = PPC::STHX8; break;
      case PPC::STW8: Opc = PPC::STWX8; break;
      case PPC::STD:  Opc = PPC::STDX;  break;
-      case PPC::STFS: Opc = PPC::STFSX; break;
+      case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
      case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
    }

--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@ -1078,6 +1078,82 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                        (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
                        "xssubsp $XT, $XA, $XB", IIC_VecFP,
                        [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>;
+
+  // FMA Instructions
+  let BaseName = "XSMADDASP" in {
+  let isCommutable = 1 in
+  def XSMADDASP : XX3Form<60, 1,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsmaddasp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  let IsVSXFMAAlt = 1 in
+  def XSMADDMSP : XX3Form<60, 9,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  }
+
+  let BaseName = "XSMSUBASP" in {
+  let isCommutable = 1 in
+  def XSMSUBASP : XX3Form<60, 17,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsmsubasp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fma f32:$XA, f32:$XB,
+                                              (fneg f32:$XTi)))]>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  let IsVSXFMAAlt = 1 in
+  def XSMSUBMSP : XX3Form<60, 25,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  }
+
+  let BaseName = "XSNMADDASP" in {
+  let isCommutable = 1 in
+  def XSNMADDASP : XX3Form<60, 129,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+                                                    f32:$XTi)))]>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  let IsVSXFMAAlt = 1 in
+  def XSNMADDMSP : XX3Form<60, 137,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  }
+
+  let BaseName = "XSNMSUBASP" in {
+  let isCommutable = 1 in
+  def XSNMSUBASP : XX3Form<60, 145,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
+                          [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+                                                    (fneg f32:$XTi))))]>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  let IsVSXFMAAlt = 1 in
+  def XSNMSUBMSP : XX3Form<60, 153,
+                          (outs vssrc:$XT),
+                          (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+                          "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+                          RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+                          AltVSXFMARel;
+  }
 } // AddedComplexity = 400
 } // HasP8Vector

--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@ -1,9 +1,12 @@
 ; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s

 declare double @dummy1(double) #0
 declare double @dummy2(double, double) #0
 declare double @dummy3(double, double, double) #0
+declare float @dummy4(float, float) #0

 define double @test_FMADD1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
@ -126,3 +129,83 @@ define float @test_FNMSUBS(float %A, float %B, float %C) {
 ; CHECK-VSX: fnmsubs
 ; CHECK-VSX-NEXT: blr
 }
+
+define float @test_XSMADDMSP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fadd float %C, %D		; <float> [#uses=1]
+	ret float %E
+; CHECK-P8-LABEL: test_XSMADDMSP:
+; CHECK-P8: xsmaddmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSMSUBMSP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fsub float %D, %C		; <float> [#uses=1]
+	ret float %E
+; CHECK-P8-LABEL: test_XSMSUBMSP:
+; CHECK-P8: xsmsubmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSMADDASP(float %A, float %B, float %C, float %D) {
+	%E = fmul float %A, %B 	; <float> [#uses=2]
+	%F = fadd float %E, %C 	; <float> [#uses=1]
+	%G = fsub float %E, %D 	; <float> [#uses=1]
+	%H = call float @dummy4(float %F, float %G)      ; <float> [#uses=1]
+	ret float %H
+; CHECK-P8-LABEL: test_XSMADDASP:
+; CHECK-P8: xsmaddasp
+; CHECK-P8-NEXT: xsmsubmsp
+}
+
+define float @test_XSMSUBASP(float %A, float %B, float %C, float %D) {
+	%E = fmul float %A, %B 	; <float> [#uses=2]
+	%F = fsub float %E, %C 	; <float> [#uses=1]
+	%G = fsub float %E, %D 	; <float> [#uses=1]
+	%H = call float @dummy4(float %F, float %G)      ; <float> [#uses=1]
+	ret float %H
+; CHECK-P8-LABEL: test_XSMSUBASP:
+; CHECK-P8: xsmsubasp
+; CHECK-P8-NEXT: xsmsubmsp
+}
+
+define float @test_XSNMADDMSP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fadd float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
+	ret float %F
+; CHECK-P8-LABEL: test_XSNMADDMSP:
+; CHECK-P8: xsnmaddmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSNMSUBMSP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fsub float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
+	ret float %F
+; CHECK-P8-LABEL: test_XSNMSUBMSP:
+; CHECK-P8: xsnmsubmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSNMADDASP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fadd float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
+	%H = call float @dummy4(float %E, float %F)      ; <float> [#uses=1]
+	ret float %F
+; CHECK-P8-LABEL: test_XSNMADDASP:
+; CHECK-P8: xsnmaddasp
+}
+
+define float @test_XSNMSUBASP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fsub float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
+	%H = call float @dummy4(float %E, float %F)      ; <float> [#uses=1]
+	ret float %F
+; CHECK-P8-LABEL: test_XSNMSUBASP:
+; CHECK-P8: xsnmsubasp
+}
--- a/test/CodeGen/PowerPC/vsx-fma-sp.ll
+++ b/test/CodeGen/PowerPC/vsx-fma-sp.ll
@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
+define void @test1sp(float %a, float %b, float %c, float %e, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx1, align 4
+  ret void
+
+; CHECK-LABEL: @test1sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 4
+; CHECK-DAG: stxsspx 3, 0, 7
+; CHECK-DAG: stxsspx 1, 7, [[C1]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test1sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 7
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 1, 7, [[C1]]
+; CHECK-FISL: blr
+}
+
+define void @test2sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx1, align 4
+  %2 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+  %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+  store float %2, float* %arrayidx2, align 4
+  ret void
+
+; CHECK-LABEL: @test2sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+; CHECK-DAG: stxsspx 3, 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test2sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+}
+
+define void @test3sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 3
+  store float %2, float* %arrayidx1, align 4
+  %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+  %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+  store float %3, float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx3, align 4
+  ret void
+
+; CHECK-LABEL: @test3sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 12
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: li [[C3:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+
+; CHECK-DAG: xsmaddmsp 3, 2, 4
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 3, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test3sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: fmr 4, [[F1]]
+; CHECK-FISL-DAG: xsmaddasp 4, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL: blr
+}
+
+define void @test4sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx1, align 4
+  %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+  %arrayidx3 = getelementptr inbounds float, float* %d, i64 3
+  store float %2, float* %arrayidx3, align 4
+  %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+  %arrayidx4 = getelementptr inbounds float, float* %d, i64 2
+  store float %3, float* %arrayidx4, align 4
+  ret void
+
+; CHECK-LABEL: @test4sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp 1, 2, 5
+
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: li [[C3:[0-9]+]], 12
+; CHECK-DAG: xsmaddasp 4, 2, 3
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test4sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr [[F1]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+}
+
+declare float @llvm.fma.f32(float, float, float) #0
--- a/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@ -90,6 +90,12 @@
 # CHECK: xsmaddmdp 7, 63, 27
 0xf0 0xff 0xd9 0x4c

+# CHECK: xsmaddasp 7, 63, 27
+0xf0 0xff 0xd8 0x0c
+
+# CHECK: xsmaddmsp 7, 63, 27
+0xf0 0xff 0xd8 0x4c
+
 # CHECK: xsmaxdp 7, 63, 27
 0xf0 0xff 0xdd 0x04

@ -102,6 +108,12 @@
 # CHECK: xsmsubmdp 7, 63, 27
 0xf0 0xff 0xd9 0xcc

+# CHECK: xsmsubasp 7, 63, 27
+0xf0 0xff 0xd8 0x8c
+
+# CHECK: xsmsubmsp 7, 63, 27
+0xf0 0xff 0xd8 0xcc
+
 # CHECK: xsmulsp 7, 63, 27
 0xf0 0xff 0xd8 0x84

@ -126,6 +138,18 @@
 # CHECK: xsnmsubmdp 7, 63, 27
 0xf0 0xff 0xdd 0xcc

+# CHECK: xsnmaddasp 7, 63, 27
+0xf0 0xff 0xdc 0x0c
+
+# CHECK: xsnmaddmsp 7, 63, 27
+0xf0 0xff 0xdc 0x4c
+
+# CHECK: xsnmsubasp 7, 63, 27
+0xf0 0xff 0xdc 0x8c
+
+# CHECK: xsnmsubmsp 7, 63, 27
+0xf0 0xff 0xdc 0xcc
+
 # CHECK: xsrdpi 7, 27
 0xf0 0xe0 0xd9 0x24

--- a/test/MC/PowerPC/vsx.s
+++ b/test/MC/PowerPC/vsx.s
@ -95,6 +95,12 @@
 # CHECK-BE: xsmaddmdp 7, 63, 27                # encoding: [0xf0,0xff,0xd9,0x4c]
 # CHECK-LE: xsmaddmdp 7, 63, 27                # encoding: [0x4c,0xd9,0xff,0xf0]
            xsmaddmdp 7, 63, 27
+# CHECK-BE: xsmaddasp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x0c]
+# CHECK-LE: xsmaddasp 7, 63, 27                # encoding: [0x0c,0xd8,0xff,0xf0]
+            xsmaddasp 7, 63, 27
+# CHECK-BE: xsmaddmsp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x4c]
+# CHECK-LE: xsmaddmsp 7, 63, 27                # encoding: [0x4c,0xd8,0xff,0xf0]
+            xsmaddmsp 7, 63, 27
 # CHECK-BE: xsmaxdp 7, 63, 27                  # encoding: [0xf0,0xff,0xdd,0x04]
 # CHECK-LE: xsmaxdp 7, 63, 27                  # encoding: [0x04,0xdd,0xff,0xf0]
            xsmaxdp 7, 63, 27
@ -107,6 +113,12 @@
 # CHECK-BE: xsmsubmdp 7, 63, 27                # encoding: [0xf0,0xff,0xd9,0xcc]
 # CHECK-LE: xsmsubmdp 7, 63, 27                # encoding: [0xcc,0xd9,0xff,0xf0]
            xsmsubmdp 7, 63, 27
+# CHECK-BE: xsmsubasp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x8c]
+# CHECK-LE: xsmsubasp 7, 63, 27                # encoding: [0x8c,0xd8,0xff,0xf0]
+            xsmsubasp 7, 63, 27
+# CHECK-BE: xsmsubmsp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0xcc]
+# CHECK-LE: xsmsubmsp 7, 63, 27                # encoding: [0xcc,0xd8,0xff,0xf0]
+            xsmsubmsp 7, 63, 27
 # CHECK-BE: xsmulsp 7, 63, 27                  # encoding: [0xf0,0xff,0xd8,0x84]
 # CHECK-LE: xsmulsp 7, 63, 27                  # encoding: [0x84,0xd8,0xff,0xf0]
            xsmulsp 7, 63, 27
@ -131,6 +143,18 @@
 # CHECK-BE: xsnmsubmdp 7, 63, 27               # encoding: [0xf0,0xff,0xdd,0xcc]
 # CHECK-LE: xsnmsubmdp 7, 63, 27               # encoding: [0xcc,0xdd,0xff,0xf0]
            xsnmsubmdp 7, 63, 27
+# CHECK-BE: xsnmaddasp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0x0c]
+# CHECK-LE: xsnmaddasp 7, 63, 27               # encoding: [0x0c,0xdc,0xff,0xf0]
+            xsnmaddasp 7, 63, 27
+# CHECK-BE: xsnmaddmsp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0x4c]
+# CHECK-LE: xsnmaddmsp 7, 63, 27               # encoding: [0x4c,0xdc,0xff,0xf0]
+            xsnmaddmsp 7, 63, 27
+# CHECK-BE: xsnmsubasp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0x8c]
+# CHECK-LE: xsnmsubasp 7, 63, 27               # encoding: [0x8c,0xdc,0xff,0xf0]
+            xsnmsubasp 7, 63, 27
+# CHECK-BE: xsnmsubmsp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0xcc]
+# CHECK-LE: xsnmsubmsp 7, 63, 27               # encoding: [0xcc,0xdc,0xff,0xf0]
+            xsnmsubmsp 7, 63, 27
 # CHECK-BE: xsrdpi 7, 27                       # encoding: [0xf0,0xe0,0xd9,0x24]
 # CHECK-LE: xsrdpi 7, 27                       # encoding: [0x24,0xd9,0xe0,0xf0]
            xsrdpi 7, 27