This option allows selecting the TLS size in the local exec TLS model,

which is the default TLS model for non-PIC objects. This allows large/
many thread local variables or a compact/fast code in an executable.

Specification is same as that of GCC. For example, the code model
option precedes the TLS size option.

TLS access models other than local-exec are not changed. It means
supoort of the large code model is only in the local exec TLS model.

Patch By KAWASHIMA Takahiro (kawashima-fj <t-kawashima@fujitsu.com>)
Reviewers: dmgreen, mstorsjo, t.p.northover, peter.smith, ostannard
Reviewd By: peter.smith
Committed by: peter.smith

Differential Revision: https://reviews.llvm.org/D71688
This commit is contained in:
KAWASHIMA Takahiro 2020-01-13 09:28:02 +00:00 committed by Peter Smith
parent 4897d2be46
commit 78130ba3e6
7 changed files with 231 additions and 71 deletions

View File

@ -238,6 +238,10 @@ static cl::opt<bool>
cl::desc("Emit functions into separate sections"),
cl::init(false));
static cl::opt<unsigned> TLSSize("tls-size",
cl::desc("Bit size of immediate TLS offsets"),
cl::init(0));
static cl::opt<bool> EmulatedTLS("emulated-tls",
cl::desc("Use emulated TLS model"),
cl::init(false));
@ -305,6 +309,7 @@ static TargetOptions InitTargetOptionsFromCodeGenFlags() {
Options.DataSections = DataSections;
Options.FunctionSections = FunctionSections;
Options.UniqueSectionNames = UniqueSectionNames;
Options.TLSSize = TLSSize;
Options.EmulatedTLS = EmulatedTLS;
Options.ExplicitEmulatedTLS = EmulatedTLS.getNumOccurrences() > 0;
Options.ExceptionModel = ExceptionModel;

View File

@ -231,6 +231,9 @@ namespace llvm {
/// noreturn calls, even if TrapUnreachable is true.
unsigned NoTrapAfterNoreturn : 1;
/// Bit size of immediate TLS offsets (0 == use the default).
unsigned TLSSize : 8;
/// EmulatedTLS - This flag enables emulated TLS model, using emutls
/// function in the runtime library..
unsigned EmulatedTLS : 1;

View File

@ -4591,6 +4591,97 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
}
/// Convert a thread-local variable reference into a sequence of instructions to
/// compute the variable's address for the local exec TLS model of ELF targets.
/// The sequence depends on the maximum TLS area size.
SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
SDValue ThreadBase,
const SDLoc &DL,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue TPOff, Addr;
switch (DAG.getTarget().Options.TLSSize) {
default:
llvm_unreachable("Unexpected TLS size");
case 12: {
// mrs x0, TPIDR_EL0
// add x0, x0, :tprel_lo12:a
SDValue Var = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
Var,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
case 24: {
// mrs x0, TPIDR_EL0
// add x0, x0, :tprel_hi12:a
// add x0, x0, :tprel_lo12_nc:a
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
case 32: {
// mrs x1, TPIDR_EL0
// movz x0, #:tprel_g1:a
// movk x0, #:tprel_g0_nc:a
// add x0, x1, x0
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
DAG.getTargetConstant(16, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
case 48: {
// mrs x1, TPIDR_EL0
// movz x0, #:tprel_g2:a
// movk x0, #:tprel_g1_nc:a
// movk x0, #:tprel_g0_nc:a
// add x0, x1, x0
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
SDValue MiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
DAG.getTargetConstant(32, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
DAG.getTargetConstant(16, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
}
}
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
@ -4628,15 +4719,7 @@ SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
if (getTargetMachine().getCodeModel() == CodeModel::Large)
report_fatal_error("ELF TLS only supported in small memory model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
// maximum TLS size is 4GiB.
// FIXME: add -mtls-size command line option and make it control the 16MiB
// vs. 4GiB code sequence generation.
// FIXME: add tiny codemodel support. We currently generate the same code as
// small, which may be larger than needed.
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
@ -4646,6 +4729,17 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
Model = TLSModel::GeneralDynamic;
}
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
Model != TLSModel::LocalExec)
report_fatal_error("ELF TLS only supported in small memory model or "
"in local exec TLS model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
// maximum TLS size is 4GiB.
// FIXME: add tiny and large code model support for TLS access models other
// than local exec. We currently generate the same code as small for tiny,
// which may be larger than needed.
SDValue TPOff;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
@ -4654,23 +4748,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
if (Model == TLSModel::LocalExec) {
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
SDValue TPWithOff_lo =
SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
SDValue TPWithOff =
SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return TPWithOff;
return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);

View File

@ -696,6 +696,8 @@ private:
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
const SDLoc &DL, SelectionDAG &DAG) const;
SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
SelectionDAG &DAG) const;
SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;

View File

@ -288,6 +288,17 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
this->Options.TrapUnreachable = true;
}
if (this->Options.TLSSize == 0) // default
this->Options.TLSSize = 24;
if ((getCodeModel() == CodeModel::Small ||
getCodeModel() == CodeModel::Kernel) &&
this->Options.TLSSize > 32)
// for the small (and kernel) code model, the maximum TLS size is 4GiB
this->Options.TLSSize = 32;
else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24)
// for the tiny code model, the maximum TLS size is 1MiB (< 16MiB)
this->Options.TLSSize = 24;
// Enable GlobalISel at or below EnableGlobalISelAt0, unless this is
// MachO/CodeModel::Large, which GlobalISel does not support.
if (getOptLevel() <= EnableGlobalISelAtO &&

View File

@ -49,48 +49,3 @@ define i32* @test_initial_exec_addr() {
; CHECK-TINY-RELOC: R_AARCH64_TLSIE_LD_GOTTPREL_PREL19
}
@local_exec_var = thread_local(localexec) global i32 0
define i32 @test_local_exec() {
; CHECK-LABEL: test_local_exec:
%val = load i32, i32* @local_exec_var
; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
; CHECK: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var
; CHECK: ldr w0, [x[[R3]]]
; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
; CHECK-TINY: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-TINY: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
; CHECK-TINY: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var
; CHECK-TINY: ldr w0, [x[[R3]]]
; CHECK-TINY-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
; CHECK-TINY-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
ret i32 %val
}
define i32* @test_local_exec_addr() {
; CHECK-LABEL: test_local_exec_addr:
ret i32* @local_exec_var
; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
; CHECK: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var
; CHECK: ret
; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
; CHECK-TINY: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-TINY: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
; CHECK-TINY: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var
; CHECK-TINY: ret
; CHECK-TINY-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
; CHECK-TINY-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
}

View File

@ -0,0 +1,106 @@
; Test each TLS size option
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -tls-size=12 < %s | FileCheck %s --check-prefix=CHECK-12
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -tls-size=12 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-12-RELOC %s
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=tiny -tls-size=24 < %s | FileCheck %s --check-prefix=CHECK-24
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=tiny -tls-size=24 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=small -tls-size=32 < %s | FileCheck %s --check-prefix=CHECK-32
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=small -tls-size=32 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-32-RELOC %s
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=large -tls-size=48 < %s | FileCheck %s --check-prefix=CHECK-48
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=large -tls-size=48 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-48-RELOC %s
;
; Test the maximum TLS size for each code model (fallback to a smaller size from the specified size)
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -tls-size=32 < %s | FileCheck %s --check-prefix=CHECK-32
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -tls-size=32 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-32-RELOC %s
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=tiny -tls-size=32 < %s | FileCheck %s --check-prefix=CHECK-24
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=tiny -tls-size=32 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=small -tls-size=48 < %s | FileCheck %s --check-prefix=CHECK-32
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=small -tls-size=48 | llvm-objdump -r - | FileCheck --check-prefix=CHECK-32-RELOC %s
;
; Test the default TLS size for each code model
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding < %s | FileCheck --check-prefix=CHECK-24 %s
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=tiny < %s | FileCheck %s --check-prefix=CHECK-24
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=tiny | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=small < %s | FileCheck %s --check-prefix=CHECK-24
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=small | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s
; RUN: llc -mtriple=arm64-none-linux-gnu -verify-machineinstrs -show-mc-encoding -code-model=large < %s | FileCheck %s --check-prefix=CHECK-24
; RUN: llc -mtriple=arm64-none-linux-gnu -filetype=obj < %s -code-model=large | llvm-objdump -r - | FileCheck --check-prefix=CHECK-24-RELOC %s
@local_exec_var = thread_local(localexec) global i32 0
define i32 @test_local_exec() {
; CHECK-LABEL: test_local_exec:
%val = load i32, i32* @local_exec_var
; CHECK-12: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-12: add x[[R2:[0-9]+]], x[[R1]], :tprel_lo12:local_exec_var
; CHECK-12: ldr w0, [x[[R2]]]
; CHECK-12-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12
; CHECK-24: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-24: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
; CHECK-24: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var
; CHECK-24: ldr w0, [x[[R3]]]
; CHECK-24-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
; CHECK-24-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
; CHECK-32: movz x[[R2:[0-9]+]], #:tprel_g1:local_exec_var
; CHECK-32: movk x[[R2]], #:tprel_g0_nc:local_exec_var
; CHECK-32: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-32: ldr w0, [x[[R1]], x[[R2]]]
; CHECK-32-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
; CHECK-32-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
; CHECK-48: movz x[[R2:[0-9]+]], #:tprel_g2:local_exec_var
; CHECK-48: movk x[[R2]], #:tprel_g1_nc:local_exec_var
; CHECK-48: movk x[[R2]], #:tprel_g0_nc:local_exec_var
; CHECK-48: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-48: ldr w0, [x[[R1]], x[[R2]]]
; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G2
; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
ret i32 %val
}
define i32* @test_local_exec_addr() {
; CHECK-LABEL: test_local_exec_addr:
ret i32* @local_exec_var
; CHECK-12: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-12: add x0, x[[R1]], :tprel_lo12:local_exec_var
; CHECK-12: ret
; CHECK-12-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12
; CHECK-24: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-24: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
; CHECK-24: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var
; CHECK-24: ret
; CHECK-24-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
; CHECK-24-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
; CHECK-32: movz x[[R2:[0-9]+]], #:tprel_g1:local_exec_var
; CHECK-32: movk x[[R2]], #:tprel_g0_nc:local_exec_var
; CHECK-32: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-32: add x0, x[[R1]], x[[R2]]
; CHECK-32: ret
; CHECK-32-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
; CHECK-32-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
; CHECK-48: movz x[[R2:[0-9]+]], #:tprel_g2:local_exec_var
; CHECK-48: movk x[[R2]], #:tprel_g1_nc:local_exec_var
; CHECK-48: movk x[[R2]], #:tprel_g0_nc:local_exec_var
; CHECK-48: mrs x[[R1:[0-9]+]], TPIDR_EL0
; CHECK-48: add x0, x[[R1]], x[[R2]]
; CHECK-48: ret
; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G2
; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1_NC
; CHECK-48-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
}