ARM: support TLS accesses on Darwin platforms

Darwin TLS accesses most closely resemble ELF's general-dynamic situation,
since they have to be able to handle all possible situations. The descriptors
and so on are obviously slightly different though.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257039 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2016-01-07 09:03:03 +00:00
parent 8b10213fae
commit 928410cd12
11 changed files with 297 additions and 8 deletions

View File

@ -105,6 +105,14 @@ ARMBaseRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
const uint32_t *
ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
assert(MF.getSubtarget<ARMSubtarget>().isTargetDarwin() &&
"only know about special TLS call on Darwin");
return CSR_iOS_TLSCall_RegMask;
}
const uint32_t *
ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {

View File

@ -95,6 +95,7 @@ public:
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const override;
const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i32 first argument if the calling convention

View File

@ -225,6 +225,10 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
(sequence "R%u", 12, 1),
(sequence "D%u", 31, 0))>;
// The "interrupt" attribute is used to generate code that is acceptable in
// exception-handlers of various kinds. It makes us use a different return
// instruction (handled elsewhere) and affects which registers we must return to

View File

@ -578,7 +578,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
if (VT != MVT::i32) return 0;
if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
Reloc::Model RelocM = TM.getRelocationModel();
bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);

View File

@ -622,7 +622,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else
Base = N;
@ -801,7 +802,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getRegister(0, MVT::i32);
@ -1067,7 +1069,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@ -1186,7 +1189,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
if (N.getOpcode() == ISD::ADD) {
return false; // We want to select register offset instead
} else if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else {
Base = N;
@ -1292,7 +1296,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::TargetConstantPool)
return false; // We want to select t2LDRpci instead.

View File

@ -2530,6 +2530,72 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
/// \brief Convert a TLS address reference into the correct sequence of loads
/// and calls to compute the variable's address for Darwin, and return an
/// SDValue containing the final node.
/// Darwin only has one TLS scheme which must be capable of dealing with the
/// fully general situation, in the worst case. This means:
/// + "extern __thread" declaration.
/// + Defined in a possibly unknown dynamic library.
///
/// The general system is that each __thread variable has a [3 x i32] descriptor
/// which contains information used by the runtime to calculate the address. The
/// only part of this the compiler needs to know about is the first word, which
/// contains a function pointer that must be called with the address of the
/// entire descriptor in "r0".
///
/// Since this descriptor may be in a different unit, in general access must
/// proceed along the usual ARM rules. A common sequence to produce is:
///
/// movw rT1, :lower16:_var$non_lazy_ptr
/// movt rT1, :upper16:_var$non_lazy_ptr
/// ldr r0, [rT1]
/// ldr rT2, [r0]
/// blx rT2
/// [...address now in r0...]
SDValue
ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
SDLoc DL(Op);
// First step is to get the address of the actua global symbol. This is where
// the TLS descriptor lives.
SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
// The first entry in the descriptor is a function pointer that we must call
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet =
DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, true, true, 4);
Chain = FuncTLVGet.getValue(1);
MachineFunction &F = DAG.getMachineFunction();
MachineFrameInfo *MFI = F.getFrameInfo();
MFI->setAdjustsStack(true);
// TLS calls preserve all registers except those that absolutely must be
// trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
// silly).
auto TRI =
getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: r0 takes the address of the descriptor, and
// returns the address of the variable in this thread.
Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
Chain =
DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
DAG.getRegisterMask(Mask), Chain.getValue(1));
return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
@ -2631,9 +2697,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SDValue
ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->isTargetDarwin())
return LowerGlobalTLSAddressDarwin(Op, DAG);
// TODO: implement the "local dynamic" model
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
assert(Subtarget->isTargetELF() && "Only ELF implemented here");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().Options.EmulatedTLS)
return LowerToTLSEmulatedModel(GA, DAG);

View File

@ -526,6 +526,8 @@ namespace llvm {
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const;
SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;

View File

@ -5398,6 +5398,27 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
Requires<[IsARM, UseMovt]>;
} // isReMaterializable
// The many different faces of TLS access.
def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst),
(MOVi32imm tglobaltlsaddr :$dst)>,
Requires<[IsARM, UseMovt]>;
def : Pat<(ARMWrapper tglobaltlsaddr:$src),
(LDRLIT_ga_abs tglobaltlsaddr:$src)>,
Requires<[IsARM, DontUseMovt]>;
def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
(MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>;
def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
(LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
Requires<[IsARM, DontUseMovt]>;
let AddedComplexity = 10 in
def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)),
(MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>,
Requires<[IsARM, UseMovt]>;
// ConstantPool, GlobalAddress, and JumpTable
def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,

View File

@ -1366,6 +1366,14 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
(ARMWrapper tglobaladdr:$src))]>,
Requires<[IsThumb, DontUseMovt]>;
// TLS globals
def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
(tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
Requires<[IsThumb, DontUseMovt]>;
def : Pat<(ARMWrapper tglobaltlsaddr:$addr),
(tLDRLIT_ga_abs tglobaltlsaddr:$addr)>,
Requires<[IsThumb, DontUseMovt]>;
// JumpTable
def : T1Pat<(ARMWrapperJT tjumptable:$dst),

View File

@ -3875,6 +3875,13 @@ def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
}
def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst),
(t2MOV_ga_pcrel tglobaltlsaddr:$dst)>,
Requires<[IsThumb2, UseMovt]>;
def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst),
(t2MOVi32imm tglobaltlsaddr:$dst)>,
Requires<[IsThumb2, UseMovt]>;
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,

View File

@ -0,0 +1,165 @@
; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - -fast-isel %s | FileCheck %s --check-prefix=T2-MOVT-PIC
; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -mattr=+no-movt | FileCheck %s --check-prefix=T2-LIT-PIC
; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -relocation-model=static | FileCheck %s --check-prefix=T2-MOVT-STATIC
; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -mattr=+no-movt -relocation-model=static | FileCheck %s --check-prefix=T2-LIT-STATIC
; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=ARM-MOVT-PIC
; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -mattr=+no-movt | FileCheck %s --check-prefix=ARM-LIT-PIC
; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -relocation-model=static | FileCheck %s --check-prefix=ARM-MOVT-STATIC
; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -mattr=+no-movt -relocation-model=static | FileCheck %s --check-prefix=ARM-LIT-STATIC
@local_tls_var = thread_local global i32 0
@external_tls_var = external thread_local global i32
define i32 @test_local_tls() {
; T2-MOVT-PIC-LABEL: test_local_tls:
; T2-MOVT-PIC: movw r0, :lower16:(_local_tls_var-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+4))
; T2-MOVT-PIC: movt r0, :upper16:(_local_tls_var-([[PCREL_LOC]]+4))
; T2-MOVT-PIC: [[PCREL_LOC]]:
; T2-MOVT-PIC-NEXT: add r0, pc
; T2-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; T2-MOVT-PIC: blx [[TLV_GET_ADDR]]
; T2-MOVT-PIC: ldr r0, [r0]
; T2-LIT-PIC-LABEL: test_local_tls:
; T2-LIT-PIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
; T2-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
; T2-LIT-PIC-NEXT: add r0, pc
; T2-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; T2-LIT-PIC: blx [[TLV_GET_ADDR]]
; T2-LIT-PIC: ldr r0, [r0]
; T2-LIT-PIC: [[LOCAL_VAR_ADDR]]:
; T2-LIT-PIC-NEXT: .long _local_tls_var-([[PCREL_LOC]]+4)
; T2-MOVT-STATIC-LABEL: test_local_tls:
; T2-MOVT-STATIC: movw r0, :lower16:_local_tls_var
; T2-MOVT-STATIC: movt r0, :upper16:_local_tls_var
; T2-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; T2-MOVT-STATIC: blx [[TLV_GET_ADDR]]
; T2-MOVT-STATIC: ldr r0, [r0]
; T2-LIT-STATIC-LABEL: test_local_tls:
; T2-LIT-STATIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
; T2-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; T2-LIT-STATIC: blx [[TLV_GET_ADDR]]
; T2-LIT-STATIC: ldr r0, [r0]
; T2-LIT-STATIC: [[LOCAL_VAR_ADDR]]:
; T2-LIT-STATIC-NEXT: .long _local_tls_var
; ARM-MOVT-PIC-LABEL: test_local_tls:
; ARM-MOVT-PIC: movw [[VARPC1:r[0-9]+]], :lower16:(_local_tls_var-([[PCREL_LOC1:LPC[0-9]+_[0-9]+]]+8))
; ARM-MOVT-PIC: movt [[VARPC1]], :upper16:(_local_tls_var-([[PCREL_LOC1]]+8))
; ARM-MOVT-PIC: [[PCREL_LOC1]]:
; ARM-MOVT-PIC: add r0, pc, [[VARPC1]]
; ARM-MOVT-PIC: movw [[VARPC2:r[0-9]+]], :lower16:(_local_tls_var-([[PCREL_LOC2:LPC[0-9]+_[0-9]+]]+8))
; ARM-MOVT-PIC: movt [[VARPC2]], :upper16:(_local_tls_var-([[PCREL_LOC2]]+8))
; ARM-MOVT-PIC: [[PCREL_LOC2]]:
; ARM-MOVT-PIC-NEXT: ldr [[TLV_GET_ADDR:r[0-9]+]], [pc, [[VARPC2]]]
; ARM-MOVT-PIC: blx [[TLV_GET_ADDR]]
; ARM-MOVT-PIC: ldr r0, [r0]
; ARM-LIT-PIC-LABEL: test_local_tls:
; ARM-LIT-PIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
; ARM-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
; ARM-LIT-PIC-NEXT: add r0, pc
; ARM-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; ARM-LIT-PIC: blx [[TLV_GET_ADDR]]
; ARM-LIT-PIC: ldr r0, [r0]
; ARM-LIT-PIC: [[LOCAL_VAR_ADDR]]:
; ARM-LIT-PIC-NEXT: .long _local_tls_var-([[PCREL_LOC]]+8)
; ARM-MOVT-STATIC-LABEL: test_local_tls:
; ARM-MOVT-STATIC: movw r0, :lower16:_local_tls_var
; ARM-MOVT-STATIC: movt r0, :upper16:_local_tls_var
; ARM-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; ARM-MOVT-STATIC: blx [[TLV_GET_ADDR]]
; ARM-MOVT-STATIC: ldr r0, [r0]
; ARM-LIT-STATIC-LABEL: test_local_tls:
; ARM-LIT-STATIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
; ARM-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; ARM-LIT-STATIC: blx [[TLV_GET_ADDR]]
; ARM-LIT-STATIC: ldr r0, [r0]
; ARM-LIT-STATIC: [[LOCAL_VAR_ADDR]]:
; ARM-LIT-STATIC-NEXT: .long _local_tls_var
%val = load i32, i32* @local_tls_var, align 4
ret i32 %val
}
define i32 @test_external_tls() {
; T2-MOVT-PIC-LABEL: test_external_tls:
; T2-MOVT-PIC: movw r[[EXTGOT:[0-9]+]], :lower16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+4))
; T2-MOVT-PIC: movt r[[EXTGOT]], :upper16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+4))
; T2-MOVT-PIC: [[PCREL_LOC]]:
; T2-MOVT-PIC-NEXT: add r[[EXTGOT]], pc
; T2-MOVT-PIC: ldr r0, [r[[EXTGOT]]]
; T2-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; T2-MOVT-PIC: blx [[TLV_GET_ADDR]]
; T2-MOVT-PIC: ldr r0, [r0]
; T2-LIT-PIC-LABEL: test_external_tls:
; T2-LIT-PIC: ldr r[[EXTGOT:[0-9]+]], [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
; T2-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
; T2-LIT-PIC-NEXT: add r[[EXTGOT]], pc
; T2-LIT-PIC: ldr r0, [r[[EXTGOT]]]
; T2-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; T2-LIT-PIC: blx [[TLV_GET_ADDR]]
; T2-LIT-PIC: ldr r0, [r0]
; T2-LIT-PIC: [[EXTERNAL_VAR_ADDR]]:
; T2-LIT-PIC-NEXT: .long L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+4)
; T2-MOVT-STATIC-LABEL: test_external_tls:
; T2-MOVT-STATIC: movw r0, :lower16:_external_tls_var
; T2-MOVT-STATIC: movt r0, :upper16:_external_tls_var
; T2-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; T2-MOVT-STATIC: blx [[TLV_GET_ADDR]]
; T2-MOVT-STATIC: ldr r0, [r0]
; T2-LIT-STATIC-LABEL: test_external_tls:
; T2-LIT-STATIC: ldr r0, [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
; T2-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; T2-LIT-STATIC: blx [[TLV_GET_ADDR]]
; T2-LIT-STATIC: ldr r0, [r0]
; T2-LIT-STATIC: [[EXTERNAL_VAR_ADDR]]:
; T2-LIT-STATIC-NEXT: .long _external_tls_var
; ARM-MOVT-PIC-LABEL: test_external_tls:
; ARM-MOVT-PIC: movw r[[EXTGOT:[0-9]+]], :lower16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+8))
; ARM-MOVT-PIC: movt r[[EXTGOT]], :upper16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+8))
; ARM-MOVT-PIC: [[PCREL_LOC]]:
; ARM-MOVT-PIC-NEXT: ldr r0, [pc, r[[EXTGOT]]]
; ARM-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; ARM-MOVT-PIC: blx [[TLV_GET_ADDR]]
; ARM-MOVT-PIC: ldr r0, [r0]
; ARM-LIT-PIC-LABEL: test_external_tls:
; ARM-LIT-PIC: ldr r[[EXTGOT:[0-9]+]], [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
; ARM-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
; ARM-LIT-PIC-NEXT: add r[[EXTGOT]], pc
; ARM-LIT-PIC: ldr r0, [r[[EXTGOT]]]
; ARM-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; ARM-LIT-PIC: blx [[TLV_GET_ADDR]]
; ARM-LIT-PIC: ldr r0, [r0]
; ARM-LIT-PIC: [[EXTERNAL_VAR_ADDR]]:
; ARM-LIT-PIC-NEXT: .long L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+8)
; ARM-MOVT-STATIC-LABEL: test_external_tls:
; ARM-MOVT-STATIC: movw r0, :lower16:_external_tls_var
; ARM-MOVT-STATIC: movt r0, :upper16:_external_tls_var
; ARM-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; ARM-MOVT-STATIC: blx [[TLV_GET_ADDR]]
; ARM-MOVT-STATIC: ldr r0, [r0]
; ARM-LIT-STATIC-LABEL: test_external_tls:
; ARM-LIT-STATIC: ldr r0, [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
; ARM-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
; ARM-LIT-STATIC: blx [[TLV_GET_ADDR]]
; ARM-LIT-STATIC: ldr r0, [r0]
; ARM-LIT-STATIC: [[EXTERNAL_VAR_ADDR]]:
; ARM-LIT-STATIC-NEXT: .long _external_tls_var
%val = load i32, i32* @external_tls_var, align 4
ret i32 %val
}