From dd12c3433ee9b4ef15c633bd325ab5a0c9c5e03b Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 17 May 2022 16:54:13 +0100 Subject: [PATCH] [AMDGPU] Shrink F16 MAD/FMA to MADAK/MADMK/FMAAK/FMAMK on GFX10 Differential Revision: https://reviews.llvm.org/D125803 --- .../Target/AMDGPU/SIShrinkInstructions.cpp | 17 ++++++++-- .../CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir | 32 +++++++++---------- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 9f00c78b256e..d2a8cf7945a0 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -359,6 +359,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { case AMDGPU::V_FMA_F32_e64: NewOpcode = AMDGPU::V_FMAAK_F32; break; + case AMDGPU::V_MAD_F16_e64: + NewOpcode = AMDGPU::V_MADAK_F16; + break; + case AMDGPU::V_FMA_F16_e64: + NewOpcode = AMDGPU::V_FMAAK_F16; + break; } } @@ -380,6 +386,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { case AMDGPU::V_FMA_F32_e64: NewOpcode = AMDGPU::V_FMAMK_F32; break; + case AMDGPU::V_MAD_F16_e64: + NewOpcode = AMDGPU::V_MADMK_F16; + break; + case AMDGPU::V_FMA_F16_e64: + NewOpcode = AMDGPU::V_FMAMK_F16; + break; } } @@ -806,9 +818,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (!TII->isVOP3(MI)) continue; - // TODO: Also shrink F16 forms. if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 || - MI.getOpcode() == AMDGPU::V_FMA_F32_e64) { + MI.getOpcode() == AMDGPU::V_FMA_F32_e64 || + MI.getOpcode() == AMDGPU::V_MAD_F16_e64 || + MI.getOpcode() == AMDGPU::V_FMA_F16_e64) { shrinkMadFma(MI); continue; } diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir index 8150769ef40b..198c5cb82a61 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx10-shrink-mad-fma.mir @@ -128,8 +128,8 @@ body: | ; GFX10-LABEL: name: mad_cvv_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]] + ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -143,8 +143,8 @@ body: | ; GFX10-LABEL: name: mad_vcv_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]] + ; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -158,8 +158,8 @@ body: | ; GFX10-LABEL: name: mad_vvc_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]] + ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -173,8 +173,8 @@ body: | ; GFX10-LABEL: name: mad_vsc_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]] + ; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF %2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -188,8 +188,8 @@ body: | ; GFX10-LABEL: name: fma_cvv_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]] + ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -203,8 +203,8 @@ body: | ; GFX10-LABEL: name: fma_vcv_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]] + ; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec @@ -218,8 +218,8 @@ body: | ; GFX10-LABEL: name: fma_vvc_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]] + ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:vgpr_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec @@ -233,8 +233,8 @@ body: | ; GFX10-LABEL: name: fma_vsc_f16 ; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]] + ; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]] %0:vgpr_32 = IMPLICIT_DEF %1:sreg_32 = IMPLICIT_DEF %2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec