mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-07 11:51:13 +00:00
0599d58aba
Summary: Rework the code that was sinking/duplicating (icmp and, 0) sequences into blocks where they were being used by conditional branches to form more tbz instructions on AArch64. The new code is more general in that it just looks for 'and's that have all icmp 0's as users, with a target hook used to select which subset of 'and' instructions to consider. This change also enables 'and' sinking for X86, where it is more widely beneficial than on AArch64. The 'and' sinking/duplicating code is moved into the optimizeInst phase of CodeGenPrepare, where it can take advantage of the fact the OptimizeCmpExpression has already sunk/duplicated any icmps into the blocks where they are used. One minor complication from this change is that optimizeLoadExt needed to be updated to always mark 'and's it has determined should be in the same block as their feeding load in the InsertedInsts set to avoid an infinite loop of hoisting and sinking the same 'and'. This change fixes a regression on X86 in the tsan runtime caused by moving GVNHoist to a later place in the optimization pipeline (see PR31382). Reviewers: t.p.northover, qcolombet, MatzeB Subscribers: aemerson, mcrosier, sebpop, llvm-commits Differential Revision: https://reviews.llvm.org/D28813 llvm-svn: 295746
91 lines
2.2 KiB
LLVM
91 lines
2.2 KiB
LLVM
; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s
|
|
; RUN: opt -S -codegenprepare -mtriple=aarch64-linux %s | FileCheck --check-prefix=CHECK-CGP %s
|
|
|
|
@A = global i32 zeroinitializer
|
|
@B = global i32 zeroinitializer
|
|
@C = global i32 zeroinitializer
|
|
|
|
; Test that and is sunk into cmp block to form tbz.
|
|
define i32 @and_sink1(i32 %a, i1 %c) {
|
|
; CHECK-LABEL: and_sink1:
|
|
; CHECK: tbz w1, #0
|
|
; CHECK: str wzr, [x{{[0-9]+}}, :lo12:A]
|
|
; CHECK: tbnz {{w[0-9]+}}, #2
|
|
|
|
; CHECK-CGP-LABEL: @and_sink1(
|
|
; CHECK-CGP-NOT: and i32
|
|
%and = and i32 %a, 4
|
|
br i1 %c, label %bb0, label %bb2
|
|
bb0:
|
|
; CHECK-CGP-LABEL: bb0:
|
|
; CHECK-CGP: and i32
|
|
; CHECK-CGP-NEXT: icmp eq i32
|
|
; CHECK-CGP-NEXT: store
|
|
; CHECK-CGP-NEXT: br
|
|
%cmp = icmp eq i32 %and, 0
|
|
store i32 0, i32* @A
|
|
br i1 %cmp, label %bb1, label %bb2
|
|
bb1:
|
|
ret i32 1
|
|
bb2:
|
|
ret i32 0
|
|
}
|
|
|
|
; Test that both 'and' and cmp get sunk to form tbz.
|
|
define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
|
|
; CHECK-LABEL: and_sink2:
|
|
; CHECK: str wzr, [x{{[0-9]+}}, :lo12:A]
|
|
; CHECK: tbz w1, #0
|
|
; CHECK: str wzr, [x{{[0-9]+}}, :lo12:B]
|
|
; CHECK: tbz w2, #0
|
|
; CHECK: str wzr, [x{{[0-9]+}}, :lo12:C]
|
|
; CHECK: tbnz {{w[0-9]+}}, #2
|
|
|
|
; CHECK-CGP-LABEL: @and_sink2(
|
|
; CHECK-CGP-NOT: and i32
|
|
%and = and i32 %a, 4
|
|
store i32 0, i32* @A
|
|
br i1 %c, label %bb0, label %bb3
|
|
bb0:
|
|
; CHECK-CGP-LABEL: bb0:
|
|
; CHECK-CGP-NOT: and i32
|
|
; CHECK-CGP-NOT: icmp
|
|
%cmp = icmp eq i32 %and, 0
|
|
store i32 0, i32* @B
|
|
br i1 %c2, label %bb1, label %bb3
|
|
bb1:
|
|
; CHECK-CGP-LABEL: bb1:
|
|
; CHECK-CGP: and i32
|
|
; CHECK-CGP-NEXT: icmp eq i32
|
|
; CHECK-CGP-NEXT: store
|
|
; CHECK-CGP-NEXT: br
|
|
store i32 0, i32* @C
|
|
br i1 %cmp, label %bb2, label %bb0
|
|
bb2:
|
|
ret i32 1
|
|
bb3:
|
|
ret i32 0
|
|
}
|
|
|
|
; Test that 'and' is not sunk since cbz is a better alternative.
|
|
define i32 @and_sink3(i32 %a) {
|
|
; CHECK-LABEL: and_sink3:
|
|
; CHECK: and [[REG:w[0-9]+]], w0, #0x3
|
|
; CHECK: [[LOOP:.L[A-Z0-9_]+]]:
|
|
; CHECK: str wzr, [x{{[0-9]+}}, :lo12:A]
|
|
; CHECK: cbz [[REG]], [[LOOP]]
|
|
|
|
; CHECK-CGP-LABEL: @and_sink3(
|
|
; CHECK-CGP-NEXT: and i32
|
|
%and = and i32 %a, 3
|
|
br label %bb0
|
|
bb0:
|
|
; CHECK-CGP-LABEL: bb0:
|
|
; CHECK-CGP-NOT: and i32
|
|
%cmp = icmp eq i32 %and, 0
|
|
store i32 0, i32* @A
|
|
br i1 %cmp, label %bb0, label %bb2
|
|
bb2:
|
|
ret i32 0
|
|
}
|