mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 06:10:12 +00:00
[AMDGPU] Shrink F16 MAD/FMA to MADAK/MADMK/FMAAK/FMAMK on GFX10
Differential Revision: https://reviews.llvm.org/D125803
This commit is contained in:
parent
7d8ec4dc44
commit
dd12c3433e
@ -359,6 +359,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
|
||||
case AMDGPU::V_FMA_F32_e64:
|
||||
NewOpcode = AMDGPU::V_FMAAK_F32;
|
||||
break;
|
||||
case AMDGPU::V_MAD_F16_e64:
|
||||
NewOpcode = AMDGPU::V_MADAK_F16;
|
||||
break;
|
||||
case AMDGPU::V_FMA_F16_e64:
|
||||
NewOpcode = AMDGPU::V_FMAAK_F16;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -380,6 +386,12 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
|
||||
case AMDGPU::V_FMA_F32_e64:
|
||||
NewOpcode = AMDGPU::V_FMAMK_F32;
|
||||
break;
|
||||
case AMDGPU::V_MAD_F16_e64:
|
||||
NewOpcode = AMDGPU::V_MADMK_F16;
|
||||
break;
|
||||
case AMDGPU::V_FMA_F16_e64:
|
||||
NewOpcode = AMDGPU::V_FMAMK_F16;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -806,9 +818,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (!TII->isVOP3(MI))
|
||||
continue;
|
||||
|
||||
// TODO: Also shrink F16 forms.
|
||||
if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
|
||||
MI.getOpcode() == AMDGPU::V_FMA_F32_e64) {
|
||||
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
|
||||
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
|
||||
MI.getOpcode() == AMDGPU::V_FMA_F16_e64) {
|
||||
shrinkMadFma(MI);
|
||||
continue;
|
||||
}
|
||||
|
@ -128,8 +128,8 @@ body: |
|
||||
; GFX10-LABEL: name: mad_cvv_f16
|
||||
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
|
||||
; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = V_MAD_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
@ -143,8 +143,8 @@ body: |
|
||||
; GFX10-LABEL: name: mad_vcv_f16
|
||||
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
|
||||
; GFX10-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = V_MADMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_MADMK_F16_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
@ -158,8 +158,8 @@ body: |
|
||||
; GFX10-LABEL: name: mad_vvc_f16
|
||||
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
|
||||
; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
|
||||
@ -173,8 +173,8 @@ body: |
|
||||
; GFX10-LABEL: name: mad_vsc_f16
|
||||
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[V_MAD_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAD_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_MAD_F16_e64_]]
|
||||
; GFX10-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = V_MADAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_MADAK_F16_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:sreg_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = V_MAD_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
|
||||
@ -188,8 +188,8 @@ body: |
|
||||
; GFX10-LABEL: name: fma_cvv_f16
|
||||
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, [[DEF]], 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
|
||||
; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = V_FMA_F16_e64 0, 18688, 0, %0, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
@ -203,8 +203,8 @@ body: |
|
||||
; GFX10-LABEL: name: fma_vcv_f16
|
||||
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, 18688, 0, [[DEF1]], 0, 0, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
|
||||
; GFX10-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = V_FMAMK_F16 [[DEF]], 18688, [[DEF1]], implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_FMAMK_F16_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, 18688, 0, %1, 0, 0, implicit $mode, implicit $exec
|
||||
@ -218,8 +218,8 @@ body: |
|
||||
; GFX10-LABEL: name: fma_vvc_f16
|
||||
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
|
||||
; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF]], [[DEF1]], 18688, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:vgpr_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
|
||||
@ -233,8 +233,8 @@ body: |
|
||||
; GFX10-LABEL: name: fma_vsc_f16
|
||||
; GFX10: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
|
||||
; GFX10-NEXT: [[V_FMA_F16_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_e64 0, [[DEF]], 0, [[DEF1]], 0, 18688, 0, 0, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_FMA_F16_e64_]]
|
||||
; GFX10-NEXT: [[V_FMAAK_F16_:%[0-9]+]]:vgpr_32 = V_FMAAK_F16 [[DEF1]], [[DEF]], 18688, implicit $mode, implicit $exec
|
||||
; GFX10-NEXT: SI_RETURN implicit [[V_FMAAK_F16_]]
|
||||
%0:vgpr_32 = IMPLICIT_DEF
|
||||
%1:sreg_32 = IMPLICIT_DEF
|
||||
%2:vgpr_32 = V_FMA_F16_e64 0, %0, 0, %1, 0, 18688, 0, 0, implicit $mode, implicit $exec
|
||||
|
Loading…
Reference in New Issue
Block a user