[AMDGPU] Shrink F16 MAD/FMA to MADAK/MADMK/FMAAK/FMAMK on GFX10

Differential Revision: https://reviews.llvm.org/D125803
This commit is contained in:
Jay Foad 2022-05-17 16:54:13 +01:00
parent 7d8ec4dc44
commit dd12c3433e
2 changed files with 31 additions and 18 deletions

View File

@ -359,6 +359,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F32_e64:
NewOpcode = AMDGPU::V_FMAAK_F32;
break;
case AMDGPU::V_MAD_F16_e64:
NewOpcode = AMDGPU::V_MADAK_F16;
break;
case AMDGPU::V_FMA_F16_e64:
NewOpcode = AMDGPU::V_FMAAK_F16;
break;
}
}
@ -380,6 +386,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F32_e64:
NewOpcode = AMDGPU::V_FMAMK_F32;
break;
case AMDGPU::V_MAD_F16_e64:
NewOpcode = AMDGPU::V_MADMK_F16;
break;
case AMDGPU::V_FMA_F16_e64:
NewOpcode = AMDGPU::V_FMAMK_F16;
break;
}
}
@ -806,9 +818,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (!TII->isVOP3(MI))
continue;
// TODO: Also shrink F16 forms.
if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F32_e64) {
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_e64) {
shrinkMadFma(MI);
continue;
}

View File

@ -128,8 +128,8 @@ body: |
; GFX10-LABEL: name: mad_cvv_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
@ -143,8 +143,8 @@ body: |
; GFX10-LABEL: name: mad_vcv_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
@ -158,8 +158,8 @@ body: |
; GFX10-LABEL: name: mad_vvc_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
@ -173,8 +173,8 @@ body: |
; GFX10-LABEL: name: mad_vsc_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
%2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
@ -188,8 +188,8 @@ body: |
; GFX10-LABEL: name: fma_cvv_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
@ -203,8 +203,8 @@ body: |
; GFX10-LABEL: name: fma_vcv_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
@ -218,8 +218,8 @@ body: |
; GFX10-LABEL: name: fma_vvc_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
@ -233,8 +233,8 @@ body: |
; GFX10-LABEL: name: fma_vsc_f16
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
%0:vgpr_32 = IMPLICIT_DEF
%1:sreg_32 = IMPLICIT_DEF
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec