[MIPS GlobalISel] Select MSA vector generic and builtin fabs

selectImpl is able to select G_FABS when we set bank for vector
operands to fprb. Add detailed tests.
Note: G_FABS is generated from llvm-ir intrinsics llvm.fabs.*,
and at the moment MIPS is not able to generate this intrinsic for
vector type (some targets generate vector llvm.fabs.* from calls
to a builtin function).
We can handle fabs using __builtin_msa_fmax_a_<format> and passing
same vector as both arguments. __builtin_msa_fmax_a_<format> will
be directly selected into FMAX_A_<format> in legalizeIntrinsic.

Differential Revision: https://reviews.llvm.org/D69346
This commit is contained in:
Petar Avramovic 2019-10-24 13:45:26 +02:00
parent 1ae8e8d25f
commit e3b49df50e
8 changed files with 309 additions and 3 deletions

View File

@ -188,10 +188,10 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) {
getActionDefinitionsBuilder(G_FCONSTANT)
.legalFor({s32, s64});
getActionDefinitionsBuilder({G_FABS, G_FSQRT})
getActionDefinitionsBuilder(G_FSQRT)
.legalFor({s32, s64});
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV})
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FABS})
.legalIf([=, &ST](const LegalityQuery &Query) {
if (CheckTyN(0, Query, {s32, s64}))
return true;
@ -425,6 +425,10 @@ bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
case Intrinsic::mips_fdiv_w:
case Intrinsic::mips_fdiv_d:
return MSA3OpIntrinsicToGeneric(MI, TargetOpcode::G_FDIV, MIRBuilder, ST);
case Intrinsic::mips_fmax_a_w:
return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_W, MIRBuilder, ST);
case Intrinsic::mips_fmax_a_d:
return SelectMSA3OpIntrinsic(MI, Mips::FMAX_A_D, MIRBuilder, ST);
default:
break;
}

View File

@ -539,7 +539,6 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
&Mips::ValueMappings[Mips::GPRIdx]});
MappingID = CustomMappingID;
break;
case G_FABS:
case G_FSQRT:
OperandsMapping = getFprbMapping(Op0Size);
break;
@ -547,6 +546,7 @@ MipsRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case G_FSUB:
case G_FMUL:
case G_FDIV:
case G_FABS:
OperandsMapping = getFprbMapping(Op0Size);
if (Op0Size == 128)
OperandsMapping = getMSAMapping(MF);

View File

@ -0,0 +1,60 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
--- |
define void @fabs_v4f32(<4 x float>* %a, <4 x float>* %c) { entry: ret void }
define void @fabs_v2f64(<2 x double>* %a, <2 x double>* %c) { entry: ret void }
...
---
name: fabs_v4f32
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $a0, $a1
; P5600-LABEL: name: fabs_v4f32
; P5600: liveins: $a0, $a1
; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
; P5600: [[LD_W:%[0-9]+]]:msa128w = LD_W [[COPY]], 0 :: (load 16 from %ir.a)
; P5600: [[FABS_W:%[0-9]+]]:msa128w = FABS_W [[LD_W]]
; P5600: ST_W [[FABS_W]], [[COPY1]], 0 :: (store 16 into %ir.c)
; P5600: RetRA
%0:gprb(p0) = COPY $a0
%1:gprb(p0) = COPY $a1
%2:fprb(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
%3:fprb(<4 x s32>) = G_FABS %2
G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c)
RetRA
...
---
name: fabs_v2f64
alignment: 4
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $a0, $a1
; P5600-LABEL: name: fabs_v2f64
; P5600: liveins: $a0, $a1
; P5600: [[COPY:%[0-9]+]]:gpr32 = COPY $a0
; P5600: [[COPY1:%[0-9]+]]:gpr32 = COPY $a1
; P5600: [[LD_D:%[0-9]+]]:msa128d = LD_D [[COPY]], 0 :: (load 16 from %ir.a)
; P5600: [[FABS_D:%[0-9]+]]:msa128d = FABS_D [[LD_D]]
; P5600: ST_D [[FABS_D]], [[COPY1]], 0 :: (store 16 into %ir.c)
; P5600: RetRA
%0:gprb(p0) = COPY $a0
%1:gprb(p0) = COPY $a1
%2:fprb(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
%3:fprb(<2 x s64>) = G_FABS %2
G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c)
RetRA
...

View File

@ -0,0 +1,56 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
--- |
define void @fabs_v4f32(<4 x float>* %a, <4 x float>* %c) { entry: ret void }
define void @fabs_v2f64(<2 x double>* %a, <2 x double>* %c) { entry: ret void }
...
---
name: fabs_v4f32
alignment: 4
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $a0, $a1
; P5600-LABEL: name: fabs_v4f32
; P5600: liveins: $a0, $a1
; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
; P5600: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
; P5600: [[FABS:%[0-9]+]]:_(<4 x s32>) = G_FABS [[LOAD]]
; P5600: G_STORE [[FABS]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c)
; P5600: RetRA
%0:_(p0) = COPY $a0
%1:_(p0) = COPY $a1
%2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
%3:_(<4 x s32>) = G_FABS %2
G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c)
RetRA
...
---
name: fabs_v2f64
alignment: 4
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $a0, $a1
; P5600-LABEL: name: fabs_v2f64
; P5600: liveins: $a0, $a1
; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
; P5600: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
; P5600: [[FABS:%[0-9]+]]:_(<2 x s64>) = G_FABS [[LOAD]]
; P5600: G_STORE [[FABS]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c)
; P5600: RetRA
%0:_(p0) = COPY $a0
%1:_(p0) = COPY $a1
%2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
%3:_(<2 x s64>) = G_FABS %2
G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c)
RetRA
...

View File

@ -0,0 +1,59 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
--- |
declare <4 x float> @llvm.mips.fmax.a.w(<4 x float>, <4 x float>)
define void @fabs_v4f32_builtin(<4 x float>* %a, <4 x float>* %c) { entry: ret void }
declare <2 x double> @llvm.mips.fmax.a.d(<2 x double>, <2 x double>)
define void @fabs_v2f64_builtin(<2 x double>* %a, <2 x double>* %c) { entry: ret void }
...
---
name: fabs_v4f32_builtin
alignment: 4
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $a0, $a1
; P5600-LABEL: name: fabs_v4f32_builtin
; P5600: liveins: $a0, $a1
; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
; P5600: [[LOAD:%[0-9]+]]:msa128w(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
; P5600: [[FMAX_A_W:%[0-9]+]]:msa128w(<4 x s32>) = FMAX_A_W [[LOAD]](<4 x s32>), [[LOAD]](<4 x s32>)
; P5600: G_STORE [[FMAX_A_W]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c)
; P5600: RetRA
%0:_(p0) = COPY $a0
%1:_(p0) = COPY $a1
%2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
%3:_(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.mips.fmax.a.w), %2(<4 x s32>), %2(<4 x s32>)
G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c)
RetRA
...
---
name: fabs_v2f64_builtin
alignment: 4
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $a0, $a1
; P5600-LABEL: name: fabs_v2f64_builtin
; P5600: liveins: $a0, $a1
; P5600: [[COPY:%[0-9]+]]:_(p0) = COPY $a0
; P5600: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1
; P5600: [[LOAD:%[0-9]+]]:msa128d(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
; P5600: [[FMAX_A_D:%[0-9]+]]:msa128d(<2 x s64>) = FMAX_A_D [[LOAD]](<2 x s64>), [[LOAD]](<2 x s64>)
; P5600: G_STORE [[FMAX_A_D]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c)
; P5600: RetRA
%0:_(p0) = COPY $a0
%1:_(p0) = COPY $a1
%2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
%3:_(<2 x s64>) = G_INTRINSIC intrinsic(@llvm.mips.fmax.a.d), %2(<2 x s64>), %2(<2 x s64>)
G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c)
RetRA
...

View File

@ -0,0 +1,34 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600
declare <4 x float> @llvm.fabs.v4f32(<4 x float> %Val)
define void @fabs_v4f32(<4 x float>* %a, <4 x float>* %c) {
; P5600-LABEL: fabs_v4f32:
; P5600: # %bb.0: # %entry
; P5600-NEXT: ld.w $w0, 0($4)
; P5600-NEXT: fmax_a.w $w0, $w0, $w0
; P5600-NEXT: st.w $w0, 0($5)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
entry:
%0 = load <4 x float>, <4 x float>* %a, align 16
%fabs = call <4 x float> @llvm.fabs.v4f32 (<4 x float> %0)
store <4 x float> %fabs, <4 x float>* %c, align 16
ret void
}
declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val)
define void @fabs_v2f64(<2 x double>* %a, <2 x double>* %c) {
; P5600-LABEL: fabs_v2f64:
; P5600: # %bb.0: # %entry
; P5600-NEXT: ld.d $w0, 0($4)
; P5600-NEXT: fmax_a.d $w0, $w0, $w0
; P5600-NEXT: st.d $w0, 0($5)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
entry:
%0 = load <2 x double>, <2 x double>* %a, align 16
%fabs = call <2 x double> @llvm.fabs.v2f64 (<2 x double> %0)
store <2 x double> %fabs, <2 x double>* %c, align 16
ret void
}

View File

@ -0,0 +1,35 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=P5600
declare <4 x float> @llvm.mips.fmax.a.w(<4 x float>, <4 x float>)
define void @fabs_v4f32_builtin(<4 x float>* %a, <4 x float>* %c) {
; P5600-LABEL: fabs_v4f32_builtin:
; P5600: # %bb.0: # %entry
; P5600-NEXT: ld.w $w0, 0($4)
; P5600-NEXT: fmax_a.w $w0, $w0, $w0
; P5600-NEXT: st.w $w0, 0($5)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
entry:
%0 = load <4 x float>, <4 x float>* %a, align 16
%1 = tail call <4 x float> @llvm.mips.fmax.a.w(<4 x float> %0, <4 x float> %0)
store <4 x float> %1, <4 x float>* %c, align 16
ret void
}
declare <2 x double> @llvm.mips.fmax.a.d(<2 x double>, <2 x double>)
define void @fabs_v2f64_builtin(<2 x double>* %a, <2 x double>* %c) {
; P5600-LABEL: fabs_v2f64_builtin:
; P5600: # %bb.0: # %entry
; P5600-NEXT: ld.d $w0, 0($4)
; P5600-NEXT: fmax_a.d $w0, $w0, $w0
; P5600-NEXT: st.d $w0, 0($5)
; P5600-NEXT: jr $ra
; P5600-NEXT: nop
entry:
%0 = load <2 x double>, <2 x double>* %a, align 16
%1 = tail call <2 x double> @llvm.mips.fmax.a.d(<2 x double> %0, <2 x double> %0)
store <2 x double> %1, <2 x double>* %c, align 16
ret void
}

View File

@ -0,0 +1,58 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=mipsel-linux-gnu -mcpu=mips32r5 -mattr=+msa,+fp64,+nan2008 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=P5600
--- |
define void @fabs_v4f32(<4 x float>* %a, <4 x float>* %c) { entry: ret void }
define void @fabs_v2f64(<2 x double>* %a, <2 x double>* %c) { entry: ret void }
...
---
name: fabs_v4f32
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $a0, $a1
; P5600-LABEL: name: fabs_v4f32
; P5600: liveins: $a0, $a1
; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
; P5600: [[LOAD:%[0-9]+]]:fprb(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
; P5600: [[FABS:%[0-9]+]]:fprb(<4 x s32>) = G_FABS [[LOAD]]
; P5600: G_STORE [[FABS]](<4 x s32>), [[COPY1]](p0) :: (store 16 into %ir.c)
; P5600: RetRA
%0:_(p0) = COPY $a0
%1:_(p0) = COPY $a1
%2:_(<4 x s32>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
%3:_(<4 x s32>) = G_FABS %2
G_STORE %3(<4 x s32>), %1(p0) :: (store 16 into %ir.c)
RetRA
...
---
name: fabs_v2f64
alignment: 4
legalized: true
tracksRegLiveness: true
body: |
bb.1.entry:
liveins: $a0, $a1
; P5600-LABEL: name: fabs_v2f64
; P5600: liveins: $a0, $a1
; P5600: [[COPY:%[0-9]+]]:gprb(p0) = COPY $a0
; P5600: [[COPY1:%[0-9]+]]:gprb(p0) = COPY $a1
; P5600: [[LOAD:%[0-9]+]]:fprb(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load 16 from %ir.a)
; P5600: [[FABS:%[0-9]+]]:fprb(<2 x s64>) = G_FABS [[LOAD]]
; P5600: G_STORE [[FABS]](<2 x s64>), [[COPY1]](p0) :: (store 16 into %ir.c)
; P5600: RetRA
%0:_(p0) = COPY $a0
%1:_(p0) = COPY $a1
%2:_(<2 x s64>) = G_LOAD %0(p0) :: (load 16 from %ir.a)
%3:_(<2 x s64>) = G_FABS %2
G_STORE %3(<2 x s64>), %1(p0) :: (store 16 into %ir.c)
RetRA
...