[TLS] New lower emutls pass, fix linkage bugs.

Previous implementation in http://reviews.llvm.org/D10522 created external references to __emutls_v.* variables. Such references are inaccurate and cannot be handled by all linkers, e.g. Android dynamic and gold linkers for aarch64. Now a new LowerEmuTLS pass to go through all global variables, and add emutls_v.* and emutls_t.* variables. These __emutls* variables have the same linkage and visibility as the associated user defined TLS variable. Also removed old code that dump __emutls* variables in AsmPrinter.cpp, and updated TLS unit tests. Differential Revision: http://reviews.llvm.org/D15300 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257718 91177308-0d34-0410-b5e6-96231b3b80d8
2024-11-27 21:50:40 +00:00 · 2016-01-13 23:56:37 +00:00 · 2016-01-13 23:56:37 +00:00 · 08ba2ca688
commit 08ba2ca688
parent c40a833c41
14 changed files with 372 additions and 139 deletions
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@ -238,11 +238,6 @@ public:
  ///
  virtual void EmitJumpTableInfo();

-  /// Emit the control variable for an emulated TLS variable.
-  virtual void EmitEmulatedTLSControlVariable(const GlobalVariable *GV,
-                                              MCSymbol *EmittedSym,
-                                              bool AllZeroInitValue);
-
  /// Emit the specified global variable to the .s file.
  virtual void EmitGlobalVariable(const GlobalVariable *GV);

--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@ -182,6 +182,7 @@ void initializeLowerExpectIntrinsicPass(PassRegistry&);
 void initializeLowerIntrinsicsPass(PassRegistry&);
 void initializeLowerInvokePass(PassRegistry&);
 void initializeLowerSwitchPass(PassRegistry&);
+void initializeLowerEmuTLSPass(PassRegistry&);
 void initializeMachineBlockFrequencyInfoPass(PassRegistry&);
 void initializeMachineBlockPlacementPass(PassRegistry&);
 void initializeMachineBlockPlacementStatsPass(PassRegistry&);
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@ -380,6 +380,13 @@ FunctionPass *createSinkingPass();
 //
 Pass *createLowerAtomicPass();

+//===----------------------------------------------------------------------===//
+//
+// LowerEmuTLS - This pass generates __emutls_[vt].xyz variables for all
+// TLS variables for the emulated TLS model.
+//
+ModulePass *createLowerEmuTLSPass(const TargetMachine *TM);
+
 //===----------------------------------------------------------------------===//
 //
 // ValuePropagation - Propagate CFG-derived value information
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -347,51 +347,17 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
  return TM.getSymbol(GV, *Mang);
 }

-static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) {
-  return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName());
-}
-
-static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) {
-  return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName());
-}
-
-/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable.
-void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV,
-                                                MCSymbol *EmittedSym,
-                                                bool AllZeroInitValue) {
-  MCSection *TLSVarSection = getObjFileLowering().getDataSection();
-  OutStreamer->SwitchSection(TLSVarSection);
-  MCSymbol *GVSym = getSymbol(GV);
-  EmitLinkage(GV, EmittedSym);  // same linkage as GV
-  const DataLayout &DL = GV->getParent()->getDataLayout();
-  uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
-  unsigned AlignLog = getGVAlignmentLog2(GV, DL);
-  unsigned WordSize = DL.getPointerSize();
-  unsigned Alignment = DL.getPointerABIAlignment();
-  EmitAlignment(Log2_32(Alignment));
-  OutStreamer->EmitLabel(EmittedSym);
-  OutStreamer->EmitIntValue(Size, WordSize);
-  OutStreamer->EmitIntValue((1 << AlignLog), WordSize);
-  OutStreamer->EmitIntValue(0, WordSize);
-  if (GV->hasInitializer() && !AllZeroInitValue) {
-    OutStreamer->EmitSymbolValue(
-        getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize);
-  } else
-    OutStreamer->EmitIntValue(0, WordSize);
-  if (MAI->hasDotTypeDotSizeDirective())
-    OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedSym),
-                             MCConstantExpr::create(4 * WordSize, OutContext));
-  OutStreamer->AddBlankLine();  // End of the __emutls_v.* variable.
-}
-
 /// EmitGlobalVariable - Emit the specified global variable to the .s file.
 void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
-  bool IsEmuTLSVar =
-      GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal &&
-      TM.Options.EmulatedTLS;
+  bool IsEmuTLSVar = TM.Options.EmulatedTLS && GV->isThreadLocal();
  assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&
         "No emulated TLS variables in the common section");

+  // Never emit TLS variable xyz in emulated TLS model.
+  // The initialization value is in __emutls_t.xyz instead of xyz.
+  if (IsEmuTLSVar)
+    return;
+
  if (GV->hasInitializer()) {
    // Check to see if this is a special global used by LLVM, if so, emit it.
    if (EmitSpecialLLVMGlobal(GV))
@ -402,7 +368,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
    if (GlobalGOTEquivs.count(getSymbol(GV)))
      return;

-    if (isVerbose() && !IsEmuTLSVar) {
+    if (isVerbose()) {
      // When printing the control variable __emutls_v.*,
      // we don't need to print the original TLS variable name.
      GV->printAsOperand(OutStreamer->GetCommentOS(),
@ -412,8 +378,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
  }

  MCSymbol *GVSym = getSymbol(GV);
-  MCSymbol *EmittedSym = IsEmuTLSVar ?
-      getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym;
+  MCSymbol *EmittedSym = GVSym;
  // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes.
  // GV's or GVSym's attributes will be used for the EmittedSym.

@ -440,18 +405,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
  // sections and expected to be contiguous (e.g. ObjC metadata).
  unsigned AlignLog = getGVAlignmentLog2(GV, DL);

-  bool AllZeroInitValue = false;
-  const Constant *InitValue = GV->getInitializer();
-  if (isa<ConstantAggregateZero>(InitValue))
-    AllZeroInitValue = true;
-  else {
-    const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue);
-    if (InitIntValue && InitIntValue->isZero())
-      AllZeroInitValue = true;
-  }
-  if (IsEmuTLSVar)
-    EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue);
-
  for (const HandlerInfo &HI : Handlers) {
    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
    HI.Handler->setSymbolSize(GVSym, Size);
@ -459,8 +412,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {

  // Handle common and BSS local symbols (.lcomm).
  if (GVKind.isCommon() || GVKind.isBSSLocal()) {
-    assert(!(IsEmuTLSVar && GVKind.isCommon()) &&
-           "No emulated TLS variables in the common section");
    if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
    unsigned Align = 1 << AlignLog;

@ -505,21 +456,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
    return;
  }

-  if (IsEmuTLSVar && AllZeroInitValue)
-    return;  // No need of initialization values.
+  MCSymbol *EmittedInitSym = GVSym;

-  MCSymbol *EmittedInitSym = IsEmuTLSVar ?
-      getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym;
-  // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes.
-  // GV's or GVSym's attributes will be used for the EmittedInitSym.
-
-  MCSection *TheSection = IsEmuTLSVar ?
-      getObjFileLowering().getReadOnlySection() :
+  MCSection *TheSection =
      getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);

  // Handle the zerofill directive on darwin, which is a special form of BSS
  // emission.
-  if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) {
+  if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
    if (Size == 0) Size = 1;  // zerofill of 0 bytes is undefined.

    // .globl _foo
@ -539,7 +483,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
  // TLOF class.  This will also make it more obvious that stuff like
  // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
  // specific code.
-  if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) {
+  if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
    // Emit the .tbss symbol
    MCSymbol *MangSym =
      OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
@ -583,9 +527,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {

  OutStreamer->SwitchSection(TheSection);

-  // emutls_t.* symbols are only used in the current compilation unit.
-  if (!IsEmuTLSVar)
-    EmitLinkage(GV, EmittedInitSym);
+  EmitLinkage(GV, EmittedInitSym);
  EmitAlignment(AlignLog, GV);

  OutStreamer->EmitLabel(EmittedInitSym);
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@ -53,6 +53,7 @@ add_llvm_library(LLVMCodeGen
  LiveStackAnalysis.cpp
  LiveVariables.cpp
  LocalStackSlotAllocation.cpp
+  LowerEmuTLS.cpp
  MachineBasicBlock.cpp
  MachineBlockFrequencyInfo.cpp
  MachineBlockPlacement.cpp
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@ -94,6 +94,10 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
                        AnalysisID StartAfter, AnalysisID StopAfter,
                        MachineFunctionInitializer *MFInitializer = nullptr) {

+  // When in emulated TLS mode, add the LowerEmuTLS pass.
+  if (TM->Options.EmulatedTLS)
+    PM.add(createLowerEmuTLSPass(TM));
+
  // Add internal analysis passes from the target machine.
  PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));

--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@ -0,0 +1,159 @@
+//===- LowerEmuTLS.cpp - Add __emutls_[vt].* variables --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is required for targets depending on libgcc style
+// emulated thread local storage variables. For every defined TLS variable xyz,
+// an __emutls_v.xyz is generated. If there is non-zero initialized value
+// an __emutls_t.xyz is also generated.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loweremutls"
+
+namespace {
+
+class LowerEmuTLS : public ModulePass {
+  const TargetMachine *TM;
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit LowerEmuTLS() : ModulePass(ID), TM(nullptr) { }
+  explicit LowerEmuTLS(const TargetMachine *TM)
+      : ModulePass(ID), TM(TM) {
+    initializeLowerEmuTLSPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnModule(Module &M) override;
+private:
+  bool addEmuTlsVar(Module &M, const GlobalVariable *GV);
+  static void copyLinkageVisibility(Module &M,
+                                    const GlobalVariable *from,
+                                    GlobalVariable *to) {
+    to->setLinkage(from->getLinkage());
+    to->setVisibility(from->getVisibility());
+    if (from->hasComdat()) {
+      to->setComdat(M.getOrInsertComdat(to->getName()));
+      to->getComdat()->setSelectionKind(from->getComdat()->getSelectionKind());
+    }
+  }
+};
+}
+
+char LowerEmuTLS::ID = 0;
+
+INITIALIZE_PASS(LowerEmuTLS, "loweremutls",
+                "Add __emutls_[vt]. variables for emultated TLS model",
+                false, false)
+
+ModulePass *llvm::createLowerEmuTLSPass(const TargetMachine *TM) {
+  return new LowerEmuTLS(TM);
+}
+
+bool LowerEmuTLS::runOnModule(Module &M) {
+  if (!TM || !TM->Options.EmulatedTLS)
+    return false;
+
+  bool Changed = false;
+  SmallVector<const GlobalVariable*, 8> TlsVars;
+  for (const auto &G : M.globals()) {
+    if (G.isThreadLocal())
+      TlsVars.append({&G});
+  }
+  for (const auto G : TlsVars)
+    Changed |= addEmuTlsVar(M, G);
+  return Changed;
+}
+
+bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
+  LLVMContext &C = M.getContext();
+  PointerType *VoidPtrType = Type::getInt8PtrTy(C);
+
+  std::string EmuTlsVarName = ("__emutls_v." + GV->getName()).str();
+  GlobalVariable *EmuTlsVar = M.getNamedGlobal(EmuTlsVarName);
+  if (EmuTlsVar)
+    return false;  // It has been added before.
+
+  const DataLayout &DL = M.getDataLayout();
+  Constant *NullPtr = ConstantPointerNull::get(VoidPtrType);
+
+  // Get non-zero initializer from GV's initializer.
+  const Constant *InitValue = nullptr;
+  if (GV->hasInitializer()) {
+    InitValue = GV->getInitializer();
+    const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue);
+    // When GV's init value is all 0, omit the EmuTlsTmplVar and let
+    // the emutls library function to reset newly allocated TLS variables.
+    if (isa<ConstantAggregateZero>(InitValue) ||
+        (InitIntValue && InitIntValue->isZero()))
+      InitValue = nullptr;
+  }
+
+  // Create the __emutls_v. symbol, whose type has 4 fields:
+  //     word size;   // size of GV in bytes
+  //     word align;  // alignment of GV
+  //     void *ptr;   // initialized to 0; set at run time per thread.
+  //     void *templ; // 0 or point to __emutls_t.*
+  // sizeof(word) should be the same as sizeof(void*) on target.
+  IntegerType *WordType = DL.getIntPtrType(C);
+  PointerType *InitPtrType = InitValue ?
+      PointerType::getUnqual(InitValue->getType()) : VoidPtrType;
+  Type *ElementTypes[4] = {WordType, WordType, VoidPtrType, InitPtrType};
+  ArrayRef<Type*> ElementTypeArray(ElementTypes, 4);
+  StructType *EmuTlsVarType = StructType::create(ElementTypeArray);
+  EmuTlsVar = cast<GlobalVariable>(
+      M.getOrInsertGlobal(EmuTlsVarName, EmuTlsVarType));
+  copyLinkageVisibility(M, GV, EmuTlsVar);
+
+  // Define "__emutls_t.*" and "__emutls_v.*" only if GV is defined.
+  if (!GV->hasInitializer())
+    return true;
+
+  Type *GVType = GV->getValueType();
+  unsigned GVAlignment = GV->getAlignment();
+  if (!GVAlignment) {
+    // When LLVM IL declares a variable without alignment, use
+    // the ABI default alignment for the type.
+    GVAlignment = DL.getABITypeAlignment(GVType);
+  }
+
+  // Define "__emutls_t.*" if there is InitValue
+  GlobalVariable *EmuTlsTmplVar = nullptr;
+  if (InitValue) {
+    std::string EmuTlsTmplName = ("__emutls_t." + GV->getName()).str();
+    EmuTlsTmplVar = dyn_cast_or_null<GlobalVariable>(
+        M.getOrInsertGlobal(EmuTlsTmplName, GVType));
+    assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer");
+    EmuTlsTmplVar->setConstant(true);
+    EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue));
+    EmuTlsTmplVar->setAlignment(GVAlignment);
+    copyLinkageVisibility(M, GV, EmuTlsTmplVar);
+  }
+
+  // Define "__emutls_v.*" with initializer and alignment.
+  Constant *ElementValues[4] = {
+      ConstantInt::get(WordType, DL.getTypeStoreSize(GVType)),
+      ConstantInt::get(WordType, GVAlignment),
+      NullPtr, EmuTlsTmplVar ? EmuTlsTmplVar : NullPtr
+  };
+  ArrayRef<Constant*> ElementValueArray(ElementValues, 4);
+  EmuTlsVar->setInitializer(
+      ConstantStruct::get(EmuTlsVarType, ElementValueArray));
+  unsigned MaxAlignment = std::max(
+      DL.getABITypeAlignment(WordType),
+      DL.getABITypeAlignment(VoidPtrType));
+  EmuTlsVar->setAlignment(MaxAlignment);
+  return true;
+}
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -3057,9 +3057,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
  Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
  StringRef EmuTlsVarName(NameString);
  GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
-  if (!EmuTlsVar)
-    EmuTlsVar = dyn_cast_or_null<GlobalVariable>(
-        VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType));
+  assert(EmuTlsVar && "Cannot find EmuTlsVar ");
  Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
  Entry.Ty = VoidPtrType;
  Args.push_back(Entry);
--- a/test/CodeGen/AArch64/emutls.ll
+++ b/test/CodeGen/AArch64/emutls.ll
@ -54,63 +54,160 @@ entry:
  ret i32* @i1
 }

+define i32 @f5() nounwind {
+; ARM64-LABEL: f5:
+; ARM64:        adrp x0, __emutls_v.i3
+; ARM64:        add x0, x0, :lo12:__emutls_v.i3
+; ARM64:        bl __emutls_get_address
+; ARM64-NEXT:   ldr w0, [x0]
+
+entry:
+  %tmp1 = load i32, i32* @i3
+  ret i32 %tmp1
+}
+
+define i32* @f6() {
+; ARM64-LABEL: f6:
+; ARM64:        adrp x0, __emutls_v.i3
+; ARM64:        add x0, x0, :lo12:__emutls_v.i3
+; ARM64-NEXT:   bl __emutls_get_address
+; ARM64-NEXT:   ldp x29, x30, [sp]
+
+entry:
+  ret i32* @i3
+}
+
+; Simple test of comdat __thread variables.
+; template <class T> struct A { static __thread T x; };
+; template <class T> T __thread A<T>::x;
+; int getIntX() { return A<int>::x++; }
+; float getFloatX() { return A<float>::x++; }
+
+$_ZN1AIiE1xE = comdat any
+$_ZN1AIfE1xE = comdat any
+@_ZN1AIiE1xE = linkonce_odr thread_local global i32 0, comdat, align 4
+@_ZN1AIfE1xE = linkonce_odr thread_local global float 0.000000e+00, comdat, align 4
+
+define i32 @_Z7getIntXv() {
+; ARM64-LABEL: _Z7getIntXv:
+; ARM64:        adrp x0, :got:__emutls_v._ZN1AIiE1xE
+; ARM64:        ldr x0, [x0, :got_lo12:__emutls_v._ZN1AIiE1xE]
+; ARM64-NEXT:   bl __emutls_get_address
+; ARM64-NEXT:   ldr {{.*}}, [x0]
+; ARM64:        add
+; ARM64:        str {{.*}}, [x0]
+
+entry:
+  %0 = load i32, i32* @_ZN1AIiE1xE, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @_ZN1AIiE1xE, align 4
+  ret i32 %0
+}
+
+define float @_Z9getFloatXv() {
+; ARM64-LABEL: _Z9getFloatXv:
+; ARM64:        adrp x0, :got:__emutls_v._ZN1AIfE1xE
+; ARM64:        ldr x0, [x0, :got_lo12:__emutls_v._ZN1AIfE1xE]
+; ARM64-NEXT:   bl __emutls_get_address
+; ARM64-NEXT:   ldr {{.*}}, [x0]
+; ARM64:        fadd s{{.*}}, s
+; ARM64:        str s{{.*}}, [x0]
+
+entry:
+  %0 = load float, float* @_ZN1AIfE1xE, align 4
+  %inc = fadd float %0, 1.000000e+00
+  store float %inc, float* @_ZN1AIfE1xE, align 4
+  ret float %0
+}
+
+
 ;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t.

-; ARM64       .section .data.rel.local,
+; ARM64:      .data{{$}}
+; ARM64:      .globl __emutls_v.i1
 ; ARM64-LABEL: __emutls_v.i1:
 ; ARM64-NEXT: .xword 4
 ; ARM64-NEXT: .xword 4
 ; ARM64-NEXT: .xword 0
 ; ARM64-NEXT: .xword __emutls_t.i1

-; ARM64       .section .rodata,
+; ARM64:      .section .rodata,
 ; ARM64-LABEL: __emutls_t.i1:
 ; ARM64-NEXT: .word 15

 ; ARM64-NOT:   __emutls_v.i2

-; ARM64       .section .data.rel.local,
+; ARM64:      .data{{$}}
+; ARM64-NOT:  .globl
 ; ARM64-LABEL: __emutls_v.i3:
 ; ARM64-NEXT: .xword 4
 ; ARM64-NEXT: .xword 4
 ; ARM64-NEXT: .xword 0
 ; ARM64-NEXT: .xword __emutls_t.i3

-; ARM64       .section .rodata,
+; ARM64:      .section .rodata,
 ; ARM64-LABEL: __emutls_t.i3:
 ; ARM64-NEXT: .word 15

-; ARM64       .section .data.rel.local,
+; ARM64:      .hidden __emutls_v.i4
+; ARM64:      .data{{$}}
+; ARM64:      .globl __emutls_v.i4
 ; ARM64-LABEL: __emutls_v.i4:
 ; ARM64-NEXT: .xword 4
 ; ARM64-NEXT: .xword 4
 ; ARM64-NEXT: .xword 0
 ; ARM64-NEXT: .xword __emutls_t.i4

-; ARM64       .section .rodata,
+; ARM64:      .section .rodata,
 ; ARM64-LABEL: __emutls_t.i4:
 ; ARM64-NEXT: .word 15

 ; ARM64-NOT:   __emutls_v.i5:
-; ARM64       .hidden __emutls_v.i5
+; ARM64:      .hidden __emutls_v.i5
 ; ARM64-NOT:   __emutls_v.i5:

-; ARM64       .section .data.rel.local,
+; ARM64:      .data{{$}}
+; ARM64:      .globl __emutls_v.s1
 ; ARM64-LABEL: __emutls_v.s1:
 ; ARM64-NEXT: .xword 2
 ; ARM64-NEXT: .xword 2
 ; ARM64-NEXT: .xword 0
 ; ARM64-NEXT: .xword __emutls_t.s1

-; ARM64       .section .rodata,
+; ARM64:      .section .rodata,
 ; ARM64-LABEL: __emutls_t.s1:
 ; ARM64-NEXT: .hword 15

-; ARM64       .section .data.rel.local,
+; ARM64:      .data{{$}}
 ; ARM64-LABEL: __emutls_v.b1:
 ; ARM64-NEXT: .xword 1
 ; ARM64-NEXT: .xword 1
 ; ARM64-NEXT: .xword 0
 ; ARM64-NEXT: .xword 0

-; ARM64-NOT:  __emutls_t.b1
+; ARM64-NOT:   __emutls_t.b1
+
+; ARM64:      .section .data.__emutls_v._ZN1AIiE1xE,{{.*}},__emutls_v._ZN1AIiE1xE,comdat
+; ARM64:      .weak __emutls_v._ZN1AIiE1xE
+; ARM64:      .align 3
+; ARM64-LABEL: __emutls_v._ZN1AIiE1xE:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword 0
+
+; ARM64:      .section .data.__emutls_v._ZN1AIfE1xE,{{.*}},__emutls_v._ZN1AIfE1xE,comdat
+; ARM64:      .weak __emutls_v._ZN1AIfE1xE
+; ARM64:      .align 3
+; ARM64-LABEL: __emutls_v._ZN1AIfE1xE:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t._ZN1AIfE1xE
+
+; ARM64:      .section .rodata.__emutls_t._ZN1AIfE1xE,{{.*}},__emutls_t._ZN1AIfE1xE,comdat
+; ARM64:      .weak __emutls_t._ZN1AIfE1xE
+; ARM64:      .align 2
+; ARM64-LABEL: __emutls_t._ZN1AIfE1xE:
+; ARM64-NEXT: .word 0
+; ARM64-NEXT: .size
--- a/test/CodeGen/AArch64/emutls_generic.ll
+++ b/test/CodeGen/AArch64/emutls_generic.ll
@ -37,6 +37,8 @@ entry:
 ; ARM_64:      __emutls_get_address
 ; ARM_64-NOT:   __emutls_t.external_x
 ; ARM_64-NOT:   __emutls_v.external_x:
+; ARM_64:        .data{{$}}
+; ARM_64:        .globl __emutls_v.external_y
 ; ARM_64:        .align 3
 ; ARM_64-LABEL:  __emutls_v.external_y:
 ; ARM_64-NEXT:   .xword 1
@ -47,7 +49,8 @@ entry:
 ; ARM_64:        .section .rodata,
 ; ARM_64-LABEL:  __emutls_t.external_y:
 ; ARM_64-NEXT:   .byte 7
-; ARM_64:        .data
+; ARM_64:        .data{{$}}
+; ARM_64-NOT:    .globl __emutls_v
 ; ARM_64:        .align 3
 ; ARM_64-LABEL:  __emutls_v.internal_y:
 ; ARM_64-NEXT:   .xword 8
--- a/test/CodeGen/ARM/emutls.ll
+++ b/test/CodeGen/ARM/emutls.ll
@ -13,6 +13,7 @@ define i32 @my_get_xyz() {
 ; ARM32:        ldr r0, [pc, r0]
 ; ARM32-NEXT:   bl my_emutls_get_address(PLT)
 ; ARM32-NEXT:   ldr r0, [r0]
+; ARM32:        .long my_emutls_v_xyz(GOT_PREL)

 entry:
  %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
@ -35,6 +36,7 @@ define i32 @f1() {
 ; ARM32:        ldr r0, [pc, r0]
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   ldr r0, [r0]
+; ARM32:        .long __emutls_v.i1(GOT_PREL)

 entry:
  %tmp1 = load i32, i32* @i1
@ -47,6 +49,7 @@ define i32* @f2() {
 ; ARM32:        ldr r0, [pc, r0]
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   pop
+; ARM32:        .long __emutls_v.i1(GOT_PREL)

 entry:
  ret i32* @i1
@ -58,6 +61,7 @@ define i32 @f3() nounwind {
 ; ARM32:        ldr r0, [pc, r0]
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   ldr r0, [r0]
+; ARM32:        .long __emutls_v.i2(GOT_PREL)

 entry:
  %tmp1 = load i32, i32* @i2
@ -70,6 +74,7 @@ define i32* @f4() {
 ; ARM32:        ldr r0, [pc, r0]
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   pop
+; ARM32:        .long __emutls_v.i2(GOT_PREL)

 entry:
  ret i32* @i2
@ -78,9 +83,10 @@ entry:
 define i32 @f5() nounwind {
 ; ARM32-LABEL: f5:
 ; ARM32:        ldr r0,
-; ARM32:        ldr r0, [pc, r0]
+; ARM32:        add	r0, pc, r0
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   ldr r0, [r0]
+; ARM32:        .long __emutls_v.i3-

 entry:
  %tmp1 = load i32, i32* @i3
@ -90,9 +96,10 @@ entry:
 define i32* @f6() {
 ; ARM32-LABEL: f6:
 ; ARM32:        ldr r0,
-; ARM32:        ldr r0, [pc, r0]
+; ARM32:        add	r0, pc, r0
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   pop
+; ARM32:        .long __emutls_v.i3-

 entry:
  ret i32* @i3
@ -101,9 +108,10 @@ entry:
 define i32 @f7() {
 ; ARM32-LABEL: f7:
 ; ARM32:        ldr r0,
-; ARM32:        ldr r0, [pc, r0]
+; ARM32:        add r0, pc, r0
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   ldr r0, [r0]
+; ARM32:        .long __emutls_v.i4-(.LPC

 entry:
  %tmp1 = load i32, i32* @i4
@ -113,9 +121,10 @@ entry:
 define i32* @f8() {
 ; ARM32-LABEL: f8:
 ; ARM32:        ldr r0,
-; ARM32:        ldr r0, [pc, r0]
+; ARM32:        add r0, pc, r0
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   pop
+; ARM32:        .long __emutls_v.i4-(.LPC

 entry:
  ret i32* @i4
@ -124,7 +133,7 @@ entry:
 define i32 @f9() {
 ; ARM32-LABEL: f9:
 ; ARM32:        ldr r0,
-; ARM32:        ldr r0, [pc, r0]
+; ARM32:        add r0, pc, r0
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   ldr r0, [r0]

@ -136,7 +145,7 @@ entry:
 define i32* @f10() {
 ; ARM32-LABEL: f10:
 ; ARM32:        ldr r0,
-; ARM32:        ldr r0, [pc, r0]
+; ARM32:        add r0, pc, r0
 ; ARM32-NEXT:   bl __emutls_get_address(PLT)
 ; ARM32-NEXT:   pop

@ -198,46 +207,50 @@ entry:

 ;;;;;;;;;;;;;; 32-bit __emutls_v. and __emutls_t.

-; ARM32       .section .data.rel.local,
+; ARM32:      .data{{$}}
+; ARM32:      .globl __emutls_v.i1
 ; ARM32-LABEL: __emutls_v.i1:
 ; ARM32-NEXT: .long 4
 ; ARM32-NEXT: .long 4
 ; ARM32-NEXT: .long 0
 ; ARM32-NEXT: .long __emutls_t.i1

-; ARM32       .section .rodata,
+; ARM32:      .section .rodata,
 ; ARM32-LABEL: __emutls_t.i1:
 ; ARM32-NEXT: .long 15

 ; ARM32-NOT:   __emutls_v.i2

-; ARM32       .section .data.rel.local,
+; ARM32:      .data{{$}}
+; ARM32-NOT:  .globl
 ; ARM32-LABEL: __emutls_v.i3:
 ; ARM32-NEXT: .long 4
 ; ARM32-NEXT: .long 4
 ; ARM32-NEXT: .long 0
 ; ARM32-NEXT: .long __emutls_t.i3

-; ARM32       .section .rodata,
+; ARM32:      .section .rodata,
 ; ARM32-LABEL: __emutls_t.i3:
 ; ARM32-NEXT: .long 15

-; ARM32       .section .data.rel.local,
+; ARM32:      .data{{$}}
+; ARM32:      .globl __emutls_v.i4
 ; ARM32-LABEL: __emutls_v.i4:
 ; ARM32-NEXT: .long 4
 ; ARM32-NEXT: .long 4
 ; ARM32-NEXT: .long 0
 ; ARM32-NEXT: .long __emutls_t.i4

-; ARM32       .section .rodata,
+; ARM32:      .section .rodata,
 ; ARM32-LABEL: __emutls_t.i4:
 ; ARM32-NEXT: .long 15

 ; ARM32-NOT:   __emutls_v.i5:
-; ARM32       .hidden __emutls_v.i5
+; ARM32:      .hidden __emutls_v.i5
 ; ARM32-NOT:   __emutls_v.i5:

-; ARM32 .section .data.rel.local,
+; ARM32:      .data{{$}}
+; ARM32:      .globl __emutls_v.s1
 ; ARM32-LABEL: __emutls_v.s1:
 ; ARM32-NEXT: .long 2
 ; ARM32-NEXT: .long 2
@ -248,7 +261,8 @@ entry:
 ; ARM32-LABEL: __emutls_t.s1:
 ; ARM32-NEXT: .short 15

-; ARM32 .section .data.rel.local,
+; ARM32:      .data{{$}}
+; ARM32:      .globl __emutls_v.b1
 ; ARM32-LABEL: __emutls_v.b1:
 ; ARM32-NEXT: .long 1
 ; ARM32-NEXT: .long 1
--- a/test/CodeGen/ARM/emutls_generic.ll
+++ b/test/CodeGen/ARM/emutls_generic.ll
@ -35,11 +35,12 @@ entry:
 ; ARM_32:        bl __emutls_get_address
 ; ARM_32:        .long __emutls_v.external_y
 ; ARM_32-LABEL:  get_internal_y:
-; ARM_32:      bl __emutls_get_address
-; ARM_32:      .long __emutls_v.internal_y
-; ARM_32-NOT:   __emutls_t.external_x
-; ARM_32-NOT:   __emutls_v.external_x:
-; ARM_32:        .data
+; ARM_32:        bl __emutls_get_address
+; ARM_32:        .long __emutls_v.internal_y
+; ARM_32-NOT:    __emutls_t.external_x
+; ARM_32-NOT:    __emutls_v.external_x:
+; ARM_32:        .data{{$}}
+; ARM_32:        .globl __emutls_v.external_y
 ; ARM_32:        .align 2
 ; ARM_32-LABEL:  __emutls_v.external_y:
 ; ARM_32-NEXT:   .long 1
@ -49,7 +50,8 @@ entry:
 ; ARM_32:        .section .rodata,
 ; ARM_32-LABEL:  __emutls_t.external_y:
 ; ARM_32-NEXT:   .byte 7
-; ARM_32:        .data
+; ARM_32:        .data{{$}}
+; ARM_32-NOT:    .globl
 ; ARM_32:        .align 2
 ; ARM_32-LABEL:  __emutls_v.internal_y:
 ; ARM_32-NEXT:   .long 8
--- a/test/CodeGen/X86/emutls-pic.ll
+++ b/test/CodeGen/X86/emutls-pic.ll
@ -82,28 +82,29 @@ entry:
 }

 ; X32-LABEL: f5:
-; X32:      movl __emutls_v.j@GOT(%ebx), %eax
+; X32:      leal __emutls_v.j@GOTOFF(%ebx), %eax
 ; X32-NEXT: movl %eax, (%esp)
 ; X32-NEXT: calll __emutls_get_address@PLT
 ; X32-NEXT: movl (%eax), %esi
-; X32-NEXT: movl __emutls_v.k@GOT(%ebx), %eax
+; X32-NEXT: leal __emutls_v.k@GOTOFF(%ebx), %eax
 ; X32-NEXT: movl %eax, (%esp)
 ; X32-NEXT: calll __emutls_get_address@PLT
 ; X32-NEXT: addl (%eax), %esi
 ; X32-NEXT: movl %esi, %eax

 ; X64-LABEL: f5:
-; X64:      movq __emutls_v.j@GOTPCREL(%rip), %rdi
+; X64:      leaq __emutls_v.j(%rip), %rdi
 ; X64-NEXT: callq __emutls_get_address@PLT
 ; X64-NEXT: movl (%rax), %ebx
-; X64-NEXT: movq __emutls_v.k@GOTPCREL(%rip), %rdi
+; X64-NEXT: leaq __emutls_v.k(%rip), %rdi
 ; X64-NEXT: callq __emutls_get_address@PLT
 ; X64-NEXT: addl (%rax), %ebx
 ; X64-NEXT: movl %ebx, %eax

 ;;;;; 32-bit targets

-; X32:      .data
+; X32:      .data{{$}}
+; X32:      .globl __emutls_v.i
 ; X32-LABEL: __emutls_v.i:
 ; X32-NEXT: .long 4
 ; X32-NEXT: .long 4
@ -114,7 +115,8 @@ entry:
 ; X32-LABEL: __emutls_t.i:
 ; X32-NEXT: .long 15

-; X32:      .data
+; X32:      .data{{$}}
+; X32-NOT:  .globl
 ; X32-LABEL: __emutls_v.j:
 ; X32-NEXT: .long 4
 ; X32-NEXT: .long 4
@ -125,7 +127,8 @@ entry:
 ; X32-LABEL: __emutls_t.j:
 ; X32-NEXT: .long 42

-; X32:      .data
+; X32:      .data{{$}}
+; X32-NOT:  .globl
 ; X32-LABEL: __emutls_v.k:
 ; X32-NEXT: .long 4
 ; X32-NEXT: .long 8
@ -136,7 +139,8 @@ entry:

 ;;;;; 64-bit targets

-; X64:      .data
+; X64:      .data{{$}}
+; X64:      .globl __emutls_v.i
 ; X64-LABEL: __emutls_v.i:
 ; X64-NEXT: .quad 4
 ; X64-NEXT: .quad 4
@ -147,7 +151,8 @@ entry:
 ; X64-LABEL: __emutls_t.i:
 ; X64-NEXT: .long 15

-; X64:      .data
+; X64:      .data{{$}}
+; X64-NOT:  .globl
 ; X64-LABEL: __emutls_v.j:
 ; X64-NEXT: .quad 4
 ; X64-NEXT: .quad 4
@ -158,7 +163,8 @@ entry:
 ; X64-LABEL: __emutls_t.j:
 ; X64-NEXT: .long 42

-; X64:      .data
+; X64:      .data{{$}}
+; X64-NOT:  .globl
 ; X64-LABEL: __emutls_v.k:
 ; X64-NEXT: .quad 4
 ; X64-NEXT: .quad 8
--- a/test/CodeGen/X86/emutls_generic.ll
+++ b/test/CodeGen/X86/emutls_generic.ll
@ -45,17 +45,18 @@ entry:
 ; CHECK:       __emutls_t.internal_y

 ; X86_32-LABEL:  get_external_x:
-; X86_32:        movl __emutls_v.external_x
+; X86_32:        movl __emutls_v.external_x@GOT(%ebx)
 ; X86_32:        calll __emutls_get_address
 ; X86_32-LABEL:  get_external_y:
-; X86_32:        movl __emutls_v.external_y
+; X86_32:        movl __emutls_v.external_y@GOT(%ebx)
 ; X86_32:        calll __emutls_get_address
 ; X86_32-LABEL:  get_internal_y:
-; X86_32:      movl __emutls_v.internal_y
-; X86_32:      calll __emutls_get_address
-; X86_32-NOT:   __emutls_t.external_x
-; X86_32-NOT:   __emutls_v.external_x:
-; X86_32:        .data
+; X86_32:        leal __emutls_v.internal_y@GOTOFF(%ebx)
+; X86_32:        calll __emutls_get_address
+; X86_32-NOT:    __emutls_t.external_x
+; X86_32-NOT:    __emutls_v.external_x:
+; X86_32:        .data{{$}}
+; X86_32:        .globl __emutls_v.external_y
 ; X86_32:        .align 4
 ; X86_32-LABEL:  __emutls_v.external_y:
 ; X86_32-NEXT:   .long 1
@ -65,7 +66,8 @@ entry:
 ; X86_32:        .section .rodata,
 ; X86_32-LABEL:  __emutls_t.external_y:
 ; X86_32-NEXT:   .byte 7
-; X86_32:        .data
+; X86_32:        .data{{$}}
+; X86_32-NOT:    .globl
 ; X86_32:        .align 4
 ; X86_32-LABEL:  __emutls_v.internal_y:
 ; X86_32-NEXT:   .long 8
@ -75,16 +77,17 @@ entry:
 ; X86_32-LABEL:  __emutls_t.internal_y:
 ; X86_32-NEXT:   .quad 9
 ; X86_64-LABEL:  get_external_x:
-; X86_64:      __emutls_v.external_x
-; X86_64:      __emutls_get_address
+; X86_64:        __emutls_v.external_x@GOTPCREL(%rip)
+; X86_64:        __emutls_get_address
 ; X86_64-LABEL:  get_external_y:
-; X86_64:      __emutls_v.external_y
-; X86_64:      __emutls_get_address
+; X86_64:        __emutls_v.external_y@GOTPCREL(%rip)
+; X86_64:        __emutls_get_address
 ; X86_64-LABEL:  get_internal_y:
-; X86_64:      __emutls_v.internal_y
-; X86_64:      __emutls_get_address
-; X86_64-NOT:   __emutls_t.external_x
-; X86_64-NOT:   __emutls_v.external_x:
+; X86_64:        __emutls_v.internal_y(%rip)
+; X86_64:        __emutls_get_address
+; X86_64-NOT:    __emutls_t.external_x
+; X86_64-NOT:    __emutls_v.external_x:
+; X86_64:        .globl __emutls_v.external_y
 ; X86_64:        .align 8
 ; X86_64-LABEL:  __emutls_v.external_y:
 ; X86_64-NEXT:   .quad 1
@ -95,7 +98,8 @@ entry:
 ; X86_64:        .section .rodata,
 ; X86_64-LABEL:  __emutls_t.external_y:
 ; X86_64-NEXT:   .byte 7
-; X86_64:        .data
+; X86_64:        .data{{$}}
+; X86_64-NOT:    .globl
 ; X86_64:        .align 8
 ; X86_64-LABEL:  __emutls_v.internal_y:
 ; X86_64-NEXT:   .quad 8