mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-30 23:00:36 +00:00
parent
7f0d01b1a0
commit
f54424411f
@ -298,6 +298,19 @@ public:
|
||||
bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
|
||||
bool applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
|
||||
|
||||
/// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
|
||||
bool matchCombineTruncOfExt(MachineInstr &MI,
|
||||
std::pair<Register, unsigned> &MatchInfo);
|
||||
bool applyCombineTruncOfExt(MachineInstr &MI,
|
||||
std::pair<Register, unsigned> &MatchInfo);
|
||||
|
||||
/// Transform trunc (shl x, K) to shl (trunc x),
|
||||
/// K => K < VT.getScalarSizeInBits().
|
||||
bool matchCombineTruncOfShl(MachineInstr &MI,
|
||||
std::pair<Register, Register> &MatchInfo);
|
||||
bool applyCombineTruncOfShl(MachineInstr &MI,
|
||||
std::pair<Register, Register> &MatchInfo);
|
||||
|
||||
/// Return true if any explicit use operand on \p MI is defined by a
|
||||
/// G_IMPLICIT_DEF.
|
||||
bool matchAnyExplicitUseIsUndef(MachineInstr &MI);
|
||||
|
@ -202,7 +202,7 @@ def binop_left_undef_to_zero: GICombineRule<
|
||||
// replaced with undef.
|
||||
def propagate_undef_any_op: GICombineRule<
|
||||
(defs root:$root),
|
||||
(match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR):$root,
|
||||
(match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC):$root,
|
||||
[{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
|
||||
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
|
||||
|
||||
@ -437,6 +437,24 @@ def unmerge_zext_to_zext : GICombineRule<
|
||||
(apply [{ return Helper.applyCombineUnmergeZExtToZExt(*${d}); }])
|
||||
>;
|
||||
|
||||
// Fold trunc ([asz]ext x) -> x or ([asz]ext x) or (trunc x).
|
||||
def trunc_ext_fold_matchinfo : GIDefMatchData<"std::pair<Register, unsigned>">;
|
||||
def trunc_ext_fold: GICombineRule <
|
||||
(defs root:$root, trunc_ext_fold_matchinfo:$matchinfo),
|
||||
(match (wip_match_opcode G_TRUNC):$root,
|
||||
[{ return Helper.matchCombineTruncOfExt(*${root}, ${matchinfo}); }]),
|
||||
(apply [{ return Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
// Fold trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits().
|
||||
def trunc_shl_matchinfo : GIDefMatchData<"std::pair<Register, Register>">;
|
||||
def trunc_shl: GICombineRule <
|
||||
(defs root:$root, trunc_shl_matchinfo:$matchinfo),
|
||||
(match (wip_match_opcode G_TRUNC):$root,
|
||||
[{ return Helper.matchCombineTruncOfShl(*${root}, ${matchinfo}); }]),
|
||||
(apply [{ return Helper.applyCombineTruncOfShl(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
// FIXME: These should use the custom predicate feature once it lands.
|
||||
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
|
||||
undef_to_negative_one,
|
||||
@ -469,4 +487,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
|
||||
known_bits_simplifications, ext_ext_fold,
|
||||
not_cmp_fold, opt_brcond_by_inverting_cond,
|
||||
unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc,
|
||||
unmerge_zext_to_zext]>;
|
||||
unmerge_zext_to_zext, trunc_ext_fold, trunc_shl]>;
|
||||
|
@ -2029,6 +2029,83 @@ bool CombinerHelper::applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchCombineTruncOfExt(
|
||||
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
|
||||
unsigned SrcOpc = SrcMI->getOpcode();
|
||||
if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
|
||||
SrcOpc == TargetOpcode::G_ZEXT) {
|
||||
MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CombinerHelper::applyCombineTruncOfExt(
|
||||
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
|
||||
Register SrcReg = MatchInfo.first;
|
||||
unsigned SrcExtOp = MatchInfo.second;
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
LLT SrcTy = MRI.getType(SrcReg);
|
||||
LLT DstTy = MRI.getType(DstReg);
|
||||
if (SrcTy == DstTy) {
|
||||
MI.eraseFromParent();
|
||||
replaceRegWith(MRI, DstReg, SrcReg);
|
||||
return true;
|
||||
}
|
||||
Builder.setInstrAndDebugLoc(MI);
|
||||
if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
|
||||
Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
|
||||
else
|
||||
Builder.buildTrunc(DstReg, SrcReg);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchCombineTruncOfShl(
|
||||
MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
LLT DstTy = MRI.getType(DstReg);
|
||||
Register ShiftSrc;
|
||||
Register ShiftAmt;
|
||||
|
||||
if (MRI.hasOneNonDBGUse(SrcReg) &&
|
||||
mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
|
||||
isLegalOrBeforeLegalizer(
|
||||
{TargetOpcode::G_SHL,
|
||||
{DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
|
||||
KnownBits Known = KB->getKnownBits(ShiftAmt);
|
||||
unsigned Size = DstTy.getSizeInBits();
|
||||
if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
|
||||
MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CombinerHelper::applyCombineTruncOfShl(
|
||||
MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
Register SrcReg = MI.getOperand(1).getReg();
|
||||
LLT DstTy = MRI.getType(DstReg);
|
||||
MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
|
||||
|
||||
Register ShiftSrc = MatchInfo.first;
|
||||
Register ShiftAmt = MatchInfo.second;
|
||||
Builder.setInstrAndDebugLoc(MI);
|
||||
Builder.buildShl(DstReg, Builder.buildTrunc(DstTy, ShiftSrc),
|
||||
Builder.buildTrunc(DstTy, ShiftAmt), SrcMI->getFlags());
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
|
||||
return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
|
||||
return MO.isReg() &&
|
||||
|
@ -107,8 +107,8 @@ end:
|
||||
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_ADD %{{[0-9]+}}:_, %{{[0-9]+}}:_ (in function: nonpow2_add_narrowing)
|
||||
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_add_narrowing
|
||||
; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_add_narrowing:
|
||||
define void @nonpow2_add_narrowing() {
|
||||
%a = add i128 undef, undef
|
||||
define void @nonpow2_add_narrowing(i128 %x, i128 %y) {
|
||||
%a = add i128 %x, %y
|
||||
%b = trunc i128 %a to i96
|
||||
%dummy = add i96 %b, %b
|
||||
store i96 %dummy, i96* undef
|
||||
|
142
test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
Normal file
142
test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
Normal file
@ -0,0 +1,142 @@
|
||||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
|
||||
---
|
||||
name: test_combine_trunc_undef
|
||||
body: |
|
||||
bb.1:
|
||||
; CHECK-LABEL: name: test_combine_trunc_undef
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: $w0 = COPY [[DEF]](s32)
|
||||
%0:_(s64) = G_IMPLICIT_DEF
|
||||
%1:_(s32) = G_TRUNC %0(s64)
|
||||
$w0 = COPY %1(s32)
|
||||
...
|
||||
---
|
||||
name: test_combine_trunc_undef_vec
|
||||
body: |
|
||||
bb.1:
|
||||
; CHECK-LABEL: name: test_combine_trunc_undef_vec
|
||||
; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
|
||||
; CHECK: $x0 = COPY [[DEF]](<2 x s32>)
|
||||
%0:_(<2 x s64>) = G_IMPLICIT_DEF
|
||||
%1:_(<2 x s32>) = G_TRUNC %0(<2 x s64>)
|
||||
$x0 = COPY %1(<2 x s32>)
|
||||
...
|
||||
---
|
||||
name: test_combine_trunc_anyext_s32_s16
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $h0
|
||||
; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16)
|
||||
; CHECK: $w0 = COPY [[ANYEXT]](s32)
|
||||
%0:_(s16) = COPY $h0
|
||||
%1:_(s64) = G_ANYEXT %0(s16)
|
||||
%2:_(s32) = G_TRUNC %1(s64)
|
||||
$w0 = COPY %2(s32)
|
||||
...
|
||||
---
|
||||
name: test_combine_trunc_anyext_s32_s16_vec
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $s0
|
||||
; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16_vec
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $s0
|
||||
; CHECK: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[COPY]](<2 x s16>)
|
||||
; CHECK: $x0 = COPY [[ANYEXT]](<2 x s32>)
|
||||
%0:_(<2 x s16>) = COPY $s0
|
||||
%1:_(<2 x s64>) = G_ANYEXT %0(<2 x s16>)
|
||||
%2:_(<2 x s32>) = G_TRUNC %1(<2 x s64>)
|
||||
$x0 = COPY %2(<2 x s32>)
|
||||
...
|
||||
---
|
||||
name: test_combine_trunc_sext_s32_s16
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $h0
|
||||
; CHECK-LABEL: name: test_combine_trunc_sext_s32_s16
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
|
||||
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16)
|
||||
; CHECK: $w0 = COPY [[SEXT]](s32)
|
||||
%0:_(s16) = COPY $h0
|
||||
%1:_(s64) = G_SEXT %0(s16)
|
||||
%2:_(s32) = G_TRUNC %1(s64)
|
||||
$w0 = COPY %2(s32)
|
||||
...
|
||||
---
|
||||
name: test_combine_trunc_zext_s32_s16
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $h0
|
||||
; CHECK-LABEL: name: test_combine_trunc_zext_s32_s16
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
|
||||
; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16)
|
||||
; CHECK: $w0 = COPY [[ZEXT]](s32)
|
||||
%0:_(s16) = COPY $h0
|
||||
%1:_(s64) = G_ZEXT %0(s16)
|
||||
%2:_(s32) = G_TRUNC %1(s64)
|
||||
$w0 = COPY %2(s32)
|
||||
...
|
||||
---
|
||||
name: test_combine_trunc_anyext_s32_s32
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0
|
||||
; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s32
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: $w0 = COPY [[COPY]](s32)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s64) = G_ANYEXT %0(s32)
|
||||
%2:_(s32) = G_TRUNC %1(s64)
|
||||
$w0 = COPY %2(s32)
|
||||
...
|
||||
---
|
||||
name: test_combine_trunc_anyext_s32_s64
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $x0
|
||||
; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s64
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
|
||||
; CHECK: $w0 = COPY [[TRUNC]](s32)
|
||||
%0:_(s64) = COPY $x0
|
||||
%1:_(s128) = G_ANYEXT %0(s64)
|
||||
%2:_(s32) = G_TRUNC %1(s128)
|
||||
$w0 = COPY %2(s32)
|
||||
...
|
||||
---
|
||||
name: test_combine_trunc_shl_s32_by_2
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0
|
||||
; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_2
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
|
||||
; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s32)
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16)
|
||||
; CHECK: $h0 = COPY [[SHL]](s16)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 2
|
||||
%2:_(s32) = G_SHL %0(s32), %1(s32)
|
||||
%3:_(s16) = G_TRUNC %2(s32)
|
||||
$h0 = COPY %3(s16)
|
||||
...
|
||||
---
|
||||
name: test_combine_trunc_shl_s32_by_17
|
||||
body: |
|
||||
bb.1:
|
||||
liveins: $w0
|
||||
; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_17
|
||||
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
|
||||
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 17
|
||||
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
|
||||
; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
|
||||
; CHECK: $h0 = COPY [[TRUNC]](s16)
|
||||
%0:_(s32) = COPY $w0
|
||||
%1:_(s32) = G_CONSTANT i32 17
|
||||
%2:_(s32) = G_SHL %0(s32), %1(s32)
|
||||
%3:_(s16) = G_TRUNC %2(s32)
|
||||
$h0 = COPY %3(s16)
|
||||
...
|
@ -82,14 +82,14 @@ define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) {
|
||||
;
|
||||
; GFX8-LABEL: s_shl_i8_7:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, 7
|
||||
; GFX8-NEXT: s_bfe_u32 s1, 7, 0x100000
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, s1
|
||||
; GFX8-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_shl_i8_7:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 0xff
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 7
|
||||
; GFX9-NEXT: s_bfe_u32 s1, 7, 0x100000
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, s1
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%result = shl i8 %value, 7
|
||||
ret i8 %result
|
||||
@ -426,14 +426,14 @@ define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) {
|
||||
;
|
||||
; GFX8-LABEL: s_shl_i16_15:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, 15
|
||||
; GFX8-NEXT: s_bfe_u32 s1, 15, 0x100000
|
||||
; GFX8-NEXT: s_lshl_b32 s0, s0, s1
|
||||
; GFX8-NEXT: ; return to shader part epilog
|
||||
;
|
||||
; GFX9-LABEL: s_shl_i16_15:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, 15
|
||||
; GFX9-NEXT: s_bfe_u32 s1, 15, 0x100000
|
||||
; GFX9-NEXT: s_lshl_b32 s0, s0, s1
|
||||
; GFX9-NEXT: ; return to shader part epilog
|
||||
%result = shl i16 %value, 15
|
||||
ret i16 %result
|
||||
|
@ -37,7 +37,6 @@ define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> in
|
||||
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
|
||||
; GFX8-NEXT: s_mov_b32 s3, s2
|
||||
; GFX8-NEXT: s_and_b32 s0, s0, s2
|
||||
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3]
|
||||
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
|
||||
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX8-NEXT: s_and_b32 s0, s0, s2
|
||||
@ -121,10 +120,8 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
|
||||
; GFX8-NEXT: s_mov_b32 s5, s4
|
||||
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
|
||||
; GFX8-NEXT: s_and_b32 s6, s1, s4
|
||||
; GFX8-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
|
||||
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
|
||||
; GFX8-NEXT: s_and_b64 s[2:3], s[6:7], s[4:5]
|
||||
; GFX8-NEXT: s_xor_b64 s[2:3], s[2:3], s[4:5]
|
||||
; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5]
|
||||
; GFX8-NEXT: s_xor_b64 s[2:3], s[6:7], s[4:5]
|
||||
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
|
||||
; GFX8-NEXT: s_and_b32 s0, s0, s4
|
||||
; GFX8-NEXT: s_or_b32 s0, s1, s0
|
||||
|
Loading…
Reference in New Issue
Block a user