mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-02 10:32:56 +00:00
eb92f5a745
This is the same change on PPC64 as r255821 on AArch64. I have even borrowed his commit message. The access function has a short entry and a short exit, the initialization block is only run the first time. To improve the performance, we want to have a short frame at the entry and exit. We explicitly handle most of the CSRs via copies. Only the CSRs that are not handled via copies will be in CSR_SaveList. Frame lowering and prologue/epilogue insertion will generate a short frame in the entry and exit according to CSR_SaveList. The majority of the CSRs will be handled by register allcoator. Register allocator will try to spill and reload them in the initialization block. We add CSRsViaCopy, it will be explicitly handled during lowering. 1> we first set FunctionLoweringInfo->SplitCSR if conditions are met (the target supports it for the given machine function and the function has only return exits). We also call TLI->initializeSplitCSR to perform initialization. 2> we call TLI->insertCopiesSplitCSR to insert copies from CSRsViaCopy to virtual registers at beginning of the entry block and copies from virtual registers to CSRsViaCopy at beginning of the exit blocks. 3> we also need to make sure the explicit copies will not be eliminated. Author: Tom Jablin (tjablin) Reviewers: hfinkel kbarton cycheng http://reviews.llvm.org/D17533 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@265781 91177308-0d34-0410-b5e6-96231b3b80d8
43 lines
1.4 KiB
LLVM
43 lines
1.4 KiB
LLVM
; RUN: llc < %s --enable-shrink-wrap=false -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s
|
|
%struct.S = type { i8 }
|
|
|
|
@sg = internal thread_local global %struct.S zeroinitializer, align 1
|
|
@__dso_handle = external global i8
|
|
@__tls_guard = internal thread_local unnamed_addr global i1 false
|
|
@sum1 = internal thread_local global i32 0, align 4
|
|
|
|
declare void @_ZN1SC1Ev(%struct.S*)
|
|
declare void @_ZN1SD1Ev(%struct.S*)
|
|
declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
|
|
|
|
; CHECK-LABEL: _ZTW2sg
|
|
define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
|
|
%.b.i = load i1, i1* @__tls_guard, align 1
|
|
; CHECK: bc 12, 1, [[BB_end:.?LBB0_[0-9]+]]
|
|
br i1 %.b.i, label %__tls_init.exit, label %init.i
|
|
|
|
init.i:
|
|
; CHECK: Folded Spill
|
|
store i1 true, i1* @__tls_guard, align 1
|
|
tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2
|
|
; CHECK: bl _ZN1SC1Ev
|
|
%1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2
|
|
; CHECK: Folded Reload
|
|
; CHECK: _tlv_atexit
|
|
br label %__tls_init.exit
|
|
|
|
; CHECK: [[BB_end]]:
|
|
__tls_init.exit:
|
|
ret %struct.S* @sg
|
|
}
|
|
|
|
; CHECK-LABEL: _ZTW4sum1
|
|
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
|
|
ret i32* @sum1
|
|
}
|
|
|
|
define cxx_fast_tlscc i32* @_ZTW4sum2() #0 {
|
|
ret i32* @sum1
|
|
}
|
|
|
|
attributes #0 = { nounwind "no-frame-pointer-elim"="true" } |