From 6ed3fad87238f3df8657aa45b2eedc4fb07cf751 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Thu, 19 Mar 2020 18:12:18 -0700 Subject: [PATCH] [GlobalISel] Combine G_SELECTs of the form (cond ? x : x) into x When we find something like this: ``` %a:_(s32) = G_SOMETHING ... ... %select:_(s32) = G_SELECT %cond(s1), %a, %a ``` We can remove the select and just replace it entirely with `%a` because it's always going to result in `%a`. Same if we have ``` %select:_(s32) = G_SELECT %cond(s1), %a, %b ``` where we can deduce that `%a == %b`. This implements the following cases: - `%select:_(s32) = G_SELECT %cond(s1), %a, %a` -> `%a` - `%select:_(s32) = G_SELECT %cond(s1), %a, %some_copy_from_a` -> `%a` - `%select:_(s32) = G_SELECT %cond(s1), %a, %b` -> `%a` when `%a` and `%b` are defined by identical instructions This gives a few minor code size improvements on CTMark at -O3 for AArch64. Differential Revision: https://reviews.llvm.org/D76523 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 10 ++ include/llvm/Target/GlobalISel/Combine.td | 13 ++- lib/CodeGen/GlobalISel/CombinerHelper.cpp | 35 +++++++ .../prelegalizercombiner-select.mir | 95 +++++++++++++++++++ 4 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-select.mir diff --git a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 36b80276ac4..f97c22ac420 100644 --- a/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -210,6 +210,16 @@ public: /// Replace an instruction with a G_IMPLICIT_DEF. bool replaceInstWithUndef(MachineInstr &MI); + /// Delete \p MI and replace all of its uses with its \p OpIdx-th operand. + bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx); + + /// Return true if \p MOP1 and \p MOP2 are register operands are defined by + /// equivalent instructions. + bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2); + + /// Optimize (cond ? x : x) -> x + bool matchSelectSameVal(MachineInstr &MI); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); diff --git a/include/llvm/Target/GlobalISel/Combine.td b/include/llvm/Target/GlobalISel/Combine.td index ebe7ea35b54..b0f189a3cd0 100644 --- a/include/llvm/Target/GlobalISel/Combine.td +++ b/include/llvm/Target/GlobalISel/Combine.td @@ -184,6 +184,14 @@ def propagate_undef_shuffle_mask: GICombineRule< [{ return Helper.matchUndefShuffleVectorMask(*${root}); }]), (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; +// Fold (cond ? x : x) -> x +def select_same_val: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchSelectSameVal(*${root}); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -191,6 +199,9 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, propagate_undef_all_ops, propagate_undef_shuffle_mask]>; +def identity_combines : GICombineGroup<[select_same_val]>; + def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl]>; def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, - combines_for_extload, combine_indexed_load_store, undef_combines]>; + combines_for_extload, combine_indexed_load_store, undef_combines, + identity_combines]>; diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 195698245fd..e9e4e2b3069 100644 --- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1503,6 +1503,41 @@ bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) { return all_of(Mask, [](int Elt) { return Elt < 0; }); } +bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1, + const MachineOperand &MOP2) { + if (!MOP1.isReg() || !MOP2.isReg()) + return false; + MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI); + if (!I1) + return false; + MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI); + if (!I2) + return false; + + // On the off-chance that there's some target instruction feeding into the + // select, let's use produceSameValue instead of isIdenticalTo. + return Builder.getTII().produceSameValue(*I1, *I2, &MRI); +} + +bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI, + unsigned OpIdx) { + assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?"); + Register OldReg = MI.getOperand(0).getReg(); + Register Replacement = MI.getOperand(OpIdx).getReg(); + assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?"); + MI.eraseFromParent(); + replaceRegWith(MRI, OldReg, Replacement); + return true; +} + +bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_SELECT); + // Match (cond ? x : x) + return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) && + canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(), + MRI); +} + bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); diff --git a/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-select.mir b/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-select.mir new file mode 100644 index 00000000000..646d7f8eea3 --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-select.mir @@ -0,0 +1,95 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s + +name: self +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; Optimize (cond ? %a : %a) -> %a + ; CHECK-LABEL: name: self + ; CHECK: liveins: $w0, $w1 + ; CHECK: %a:_(s32) = COPY $w0 + ; CHECK: $w0 = COPY %a(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %a:_(s32) = COPY $w0 + %cond_wide:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + %select:_(s32) = G_SELECT %cond(s1), %a, %a + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: self_with_copy +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; Optimize (cond ? %a : %b) -> %a + ; + ; This shows that we are looking through copies correctly and deduce that + ; %b is a copy from %a. + ; + ; CHECK-LABEL: name: self_with_copy + ; CHECK: liveins: $w0, $w1 + ; CHECK: %a:_(s32) = COPY $w0 + ; CHECK: $w0 = COPY %a(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %a:_(s32) = COPY $w0 + %b:_(s32) = COPY %a + %cond_wide:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + %select:_(s32) = G_SELECT %cond(s1), %a, %b + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: self_with_equivalent +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; Optimize (cond ? %a : %b) -> %a + ; + ; This shows that we can detect when %a == %b, even though they define + ; different virtual registers. + ; + ; CHECK-LABEL: name: self_with_equivalent + ; CHECK: liveins: $w0, $w1 + ; CHECK: %a:_(s32) = COPY $w0 + ; CHECK: $w0 = COPY %a(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %a:_(s32) = COPY $w0 + %b:_(s32) = COPY $w0 + %cond_wide:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + %select:_(s32) = G_SELECT %cond(s1), %a, %b + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: self_not_equivalent +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1 + ; In this case, the copies are not equivalent, so there is no optimization. + ; CHECK-LABEL: name: self_not_equivalent + ; CHECK: liveins: $w0, $w1 + ; CHECK: %a:_(s32) = COPY $w0 + ; CHECK: %b:_(s32) = COPY $w1 + ; CHECK: %cond_wide:gpr(s32) = COPY $w1 + ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + ; CHECK: %select:_(s32) = G_SELECT %cond(s1), %a, %b + ; CHECK: $w0 = COPY %select(s32) + ; CHECK: RET_ReallyLR implicit $w0 + %a:_(s32) = COPY $w0 + %b:_(s32) = COPY $w1 + %cond_wide:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %cond_wide(s32) + %select:_(s32) = G_SELECT %cond(s1), %a, %b + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0