[RISCV] Ensure PseudoLA* can be hoisted

Since we mark the pseudos as mayLoad but do not provide any MMOs,
isSafeToMove conservatively returns false, stopping MachineLICM from
hoisting the instructions. PseudoLA_TLS_GD does not actually expand to a
load, so stop marking that as mayLoad to allow it to be hoisted, and for
the others make sure to add MMOs during lowering to indicate they're GOT
loads and thus can be freely moved.

Fixes https://github.com/llvm/llvm-project/issues/54372

Reviewed By: MaskRay, arichardson

Differential Revision: https://reviews.llvm.org/D121654
This commit is contained in:
Jessica Clarke 2022-03-16 18:45:34 +00:00
parent 883f755639
commit 659363c0cc
4 changed files with 66 additions and 51 deletions

View File

@ -3736,7 +3736,16 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
// Use PC-relative addressing to access the GOT for this symbol, then load
// the address from the GOT. This generates the pattern (PseudoLA sym),
// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
SDValue Load =
SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MemOp = MF.getMachineMemOperand(
MachinePointerInfo::getGOT(MF),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
return Load;
}
switch (getTargetMachine().getCodeModel()) {
@ -3819,6 +3828,13 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
SDValue Load =
SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MemOp = MF.getMachineMemOperand(
MachinePointerInfo::getGOT(MF),
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant,
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
// Add the thread pointer.
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);

View File

@ -1337,7 +1337,7 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.ie", "$dst, $src">;
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.gd", "$dst, $src">;

View File

@ -5,7 +5,6 @@
; RUN: | FileCheck -check-prefixes=RV64I %s
; Verifies that MachineLICM can hoist address generation pseudos out of loops.
; TODO: Does not currently work for anything other than PseudoLLA.
@l = protected global i32 0, align 4
@ -58,33 +57,31 @@ ret:
define void @test_la(i32 signext %n) {
; RV32I-LABEL: test_la:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: .LBB1_3: # %entry
; RV32I-NEXT: # Label of block must be emitted
; RV32I-NEXT: auipc a1, %got_pcrel_hi(g)
; RV32I-NEXT: lw a1, %pcrel_lo(.LBB1_3)(a1)
; RV32I-NEXT: li a2, 0
; RV32I-NEXT: .LBB1_1: # %loop
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: .LBB1_3: # %loop
; RV32I-NEXT: # in Loop: Header=BB1_1 Depth=1
; RV32I-NEXT: # Label of block must be emitted
; RV32I-NEXT: auipc a2, %got_pcrel_hi(g)
; RV32I-NEXT: lw a2, %pcrel_lo(.LBB1_3)(a2)
; RV32I-NEXT: lw a2, 0(a2)
; RV32I-NEXT: addi a1, a1, 1
; RV32I-NEXT: blt a1, a0, .LBB1_1
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: addi a2, a2, 1
; RV32I-NEXT: blt a2, a0, .LBB1_1
; RV32I-NEXT: # %bb.2: # %ret
; RV32I-NEXT: ret
;
; RV64I-LABEL: test_la:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: .LBB1_3: # %entry
; RV64I-NEXT: # Label of block must be emitted
; RV64I-NEXT: auipc a1, %got_pcrel_hi(g)
; RV64I-NEXT: ld a1, %pcrel_lo(.LBB1_3)(a1)
; RV64I-NEXT: li a2, 0
; RV64I-NEXT: .LBB1_1: # %loop
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: .LBB1_3: # %loop
; RV64I-NEXT: # in Loop: Header=BB1_1 Depth=1
; RV64I-NEXT: # Label of block must be emitted
; RV64I-NEXT: auipc a2, %got_pcrel_hi(g)
; RV64I-NEXT: ld a2, %pcrel_lo(.LBB1_3)(a2)
; RV64I-NEXT: lw a2, 0(a2)
; RV64I-NEXT: addiw a1, a1, 1
; RV64I-NEXT: blt a1, a0, .LBB1_1
; RV64I-NEXT: lw a3, 0(a1)
; RV64I-NEXT: addiw a2, a2, 1
; RV64I-NEXT: blt a2, a0, .LBB1_1
; RV64I-NEXT: # %bb.2: # %ret
; RV64I-NEXT: ret
entry:
@ -106,16 +103,15 @@ ret:
define void @test_la_tls_ie(i32 signext %n) {
; RV32I-LABEL: test_la_tls_ie:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: .LBB2_1: # %loop
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: .LBB2_3: # %loop
; RV32I-NEXT: # in Loop: Header=BB2_1 Depth=1
; RV32I-NEXT: .LBB2_3: # %entry
; RV32I-NEXT: # Label of block must be emitted
; RV32I-NEXT: auipc a2, %tls_ie_pcrel_hi(ie)
; RV32I-NEXT: lw a2, %pcrel_lo(.LBB2_3)(a2)
; RV32I-NEXT: li a1, 0
; RV32I-NEXT: add a2, a2, tp
; RV32I-NEXT: lw a2, 0(a2)
; RV32I-NEXT: .LBB2_1: # %loop
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: lw a3, 0(a2)
; RV32I-NEXT: addi a1, a1, 1
; RV32I-NEXT: blt a1, a0, .LBB2_1
; RV32I-NEXT: # %bb.2: # %ret
@ -123,16 +119,15 @@ define void @test_la_tls_ie(i32 signext %n) {
;
; RV64I-LABEL: test_la_tls_ie:
; RV64I: # %bb.0: # %entry
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: .LBB2_1: # %loop
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: .LBB2_3: # %loop
; RV64I-NEXT: # in Loop: Header=BB2_1 Depth=1
; RV64I-NEXT: .LBB2_3: # %entry
; RV64I-NEXT: # Label of block must be emitted
; RV64I-NEXT: auipc a2, %tls_ie_pcrel_hi(ie)
; RV64I-NEXT: ld a2, %pcrel_lo(.LBB2_3)(a2)
; RV64I-NEXT: li a1, 0
; RV64I-NEXT: add a2, a2, tp
; RV64I-NEXT: lw a2, 0(a2)
; RV64I-NEXT: .LBB2_1: # %loop
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: lw a3, 0(a2)
; RV64I-NEXT: addiw a1, a1, 1
; RV64I-NEXT: blt a1, a0, .LBB2_1
; RV64I-NEXT: # %bb.2: # %ret
@ -160,23 +155,25 @@ define void @test_la_tls_gd(i32 signext %n) nounwind {
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: li s1, 0
; RV32I-NEXT: li s2, 0
; RV32I-NEXT: .LBB3_3: # %entry
; RV32I-NEXT: # Label of block must be emitted
; RV32I-NEXT: auipc s1, %tls_gd_pcrel_hi(gd)
; RV32I-NEXT: addi s1, s1, %pcrel_lo(.LBB3_3)
; RV32I-NEXT: .LBB3_1: # %loop
; RV32I-NEXT: # =>This Inner Loop Header: Depth=1
; RV32I-NEXT: .LBB3_3: # %loop
; RV32I-NEXT: # in Loop: Header=BB3_1 Depth=1
; RV32I-NEXT: # Label of block must be emitted
; RV32I-NEXT: auipc a0, %tls_gd_pcrel_hi(gd)
; RV32I-NEXT: addi a0, a0, %pcrel_lo(.LBB3_3)
; RV32I-NEXT: mv a0, s1
; RV32I-NEXT: call __tls_get_addr@plt
; RV32I-NEXT: lw a0, 0(a0)
; RV32I-NEXT: addi s1, s1, 1
; RV32I-NEXT: blt s1, s0, .LBB3_1
; RV32I-NEXT: addi s2, s2, 1
; RV32I-NEXT: blt s2, s0, .LBB3_1
; RV32I-NEXT: # %bb.2: # %ret
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
@ -186,23 +183,25 @@ define void @test_la_tls_gd(i32 signext %n) nounwind {
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
; RV64I-NEXT: mv s0, a0
; RV64I-NEXT: li s1, 0
; RV64I-NEXT: li s2, 0
; RV64I-NEXT: .LBB3_3: # %entry
; RV64I-NEXT: # Label of block must be emitted
; RV64I-NEXT: auipc s1, %tls_gd_pcrel_hi(gd)
; RV64I-NEXT: addi s1, s1, %pcrel_lo(.LBB3_3)
; RV64I-NEXT: .LBB3_1: # %loop
; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
; RV64I-NEXT: .LBB3_3: # %loop
; RV64I-NEXT: # in Loop: Header=BB3_1 Depth=1
; RV64I-NEXT: # Label of block must be emitted
; RV64I-NEXT: auipc a0, %tls_gd_pcrel_hi(gd)
; RV64I-NEXT: addi a0, a0, %pcrel_lo(.LBB3_3)
; RV64I-NEXT: mv a0, s1
; RV64I-NEXT: call __tls_get_addr@plt
; RV64I-NEXT: lw a0, 0(a0)
; RV64I-NEXT: addiw s1, s1, 1
; RV64I-NEXT: blt s1, s0, .LBB3_1
; RV64I-NEXT: addiw s2, s2, 1
; RV64I-NEXT: blt s2, s0, .LBB3_1
; RV64I-NEXT: # %bb.2: # %ret
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: ret
entry:

View File

@ -24,10 +24,10 @@ define i32 @caller(i32 %a) nounwind {
; RV32-SMALL-LABEL: name: caller
; RV32-SMALL: target-flags(riscv-hi) @g_e
; RV32-SMALL-NEXT: target-flags(riscv-lo) @g_e
; RV32-SMALL-NEXT: target-flags(riscv-hi) @g_i
; RV32-SMALL-NEXT: target-flags(riscv-lo) @g_i
; RV32-SMALL: target-flags(riscv-tls-got-hi) @t_un
; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) %bb.1
; RV32-SMALL: target-flags(riscv-hi) @g_i
; RV32-SMALL-NEXT: target-flags(riscv-lo) @g_i
; RV32-SMALL: target-flags(riscv-tls-got-hi) @t_ld
; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) %bb.2
; RV32-SMALL: target-flags(riscv-tls-got-hi) @t_ie