[sanitizer] Simplify GetTls with dl_iterate_phdr on Linux and use it on musl/FreeBSD

... so that FreeBSD specific GetTls/glibc specific pthread_self code can be
removed. This also helps FreeBSD arm64/powerpc64 which don't have GetTls
implementation yet.

GetTls is the range of

* thread control block and optional TLS_PRE_TCB_SIZE
* static TLS blocks plus static TLS surplus

On glibc, lsan requires the range to include
`pthread::{specific_1stblock,specific}` so that allocations only referenced by
`pthread_setspecific` can be scanned.

This patch uses `dl_iterate_phdr` to collect TLS blocks. Find the one
with `dlpi_tls_modid==1` as one of the initially loaded module, then find
consecutive ranges. The boundaries give us addr and size.

This allows us to drop the glibc internal `_dl_get_tls_static_info` and
`InitTlsSize`. However, huge glibc x86-64 binaries with numerous shared objects
may observe time complexity penalty, so exclude them for now. Use the simplified
method with non-Android Linux for now, but in theory this can be used with *BSD
and potentially other ELF OSes.

This removal of RISC-V `__builtin_thread_pointer` makes the code compilable with
more compiler versions (added in Clang in 2020-03, added in GCC in 2020-07).

This simplification enables D99566 for TLS Variant I architectures.

Note: as of musl 1.2.2 and FreeBSD 12.2, dlpi_tls_data returned by
dl_iterate_phdr is not desired: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=254774
This can be worked around by using `__tls_get_addr({modid,0})` instead
of `dlpi_tls_data`. The workaround can be shared with the workaround for glibc<2.25.

This fixes some tests on Alpine Linux x86-64 (musl)

```
test/lsan/Linux/cleanup_in_tsd_destructor.c
test/lsan/Linux/fork.cpp
test/lsan/Linux/fork_threaded.cpp
test/lsan/Linux/use_tls_static.cpp
test/lsan/many_tls_keys_thread.cpp

test/msan/tls_reuse.cpp
```

and `test/lsan/TestCases/many_tls_keys_pthread.cpp` on glibc aarch64.

The number of sanitizer test failures does not change on FreeBSD/amd64 12.2.

Differential Revision: https://reviews.llvm.org/D98926
This commit is contained in:
Fangrui Song 2021-04-15 15:34:43 -07:00
parent 8639e2aaaf
commit afec953857
8 changed files with 151 additions and 188 deletions

View File

@ -568,7 +568,7 @@ void UnpoisonStack(uptr bottom, uptr top, const char *type) {
type, top, bottom, top - bottom, top - bottom);
return;
}
PoisonShadow(bottom, top - bottom, 0);
PoisonShadow(bottom, RoundUpTo(top - bottom, SHADOW_GRANULARITY), 0);
}
static void UnpoisonDefaultStack() {

View File

@ -307,7 +307,7 @@ void AsanThread::SetThreadStackAndTls(const InitOptions *options) {
uptr stack_size = 0;
GetThreadStackAndTls(tid() == 0, &stack_bottom_, &stack_size, &tls_begin_,
&tls_size);
stack_top_ = stack_bottom_ + stack_size;
stack_top_ = RoundDownTo(stack_bottom_ + stack_size, SHADOW_GRANULARITY);
tls_end_ = tls_begin_ + tls_size;
dtls_ = DTLS_Get();

View File

@ -98,7 +98,6 @@ class ThreadLister {
// Exposed for testing.
uptr ThreadDescriptorSize();
uptr ThreadSelf();
uptr ThreadSelfOffset();
// Matches a library's file name against a base name (stripping path and version
// information).

View File

@ -188,84 +188,40 @@ __attribute__((unused)) static bool GetLibcVersion(int *major, int *minor,
#endif
}
// True if we can use dlpi_tls_data. glibc before 2.25 may leave NULL (BZ
// #19826) so dlpi_tls_data cannot be used.
//
// musl before 1.2.3 and FreeBSD as of 12.2 incorrectly set dlpi_tls_data to
// the TLS initialization image
// https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=254774
#if !SANITIZER_GO
static int g_use_dlpi_tls_data;
#endif
#if SANITIZER_GLIBC && !SANITIZER_GO
static uptr g_tls_size;
#ifdef __i386__
#define CHECK_GET_TLS_STATIC_INFO_VERSION (!__GLIBC_PREREQ(2, 27))
#else
#define CHECK_GET_TLS_STATIC_INFO_VERSION 0
#endif
#if CHECK_GET_TLS_STATIC_INFO_VERSION
#define DL_INTERNAL_FUNCTION __attribute__((regparm(3), stdcall))
#else
#define DL_INTERNAL_FUNCTION
#endif
namespace {
struct GetTlsStaticInfoCall {
typedef void (*get_tls_func)(size_t*, size_t*);
};
struct GetTlsStaticInfoRegparmCall {
typedef void (*get_tls_func)(size_t*, size_t*) DL_INTERNAL_FUNCTION;
};
template <typename T>
void CallGetTls(void* ptr, size_t* size, size_t* align) {
typename T::get_tls_func get_tls;
CHECK_EQ(sizeof(get_tls), sizeof(ptr));
internal_memcpy(&get_tls, &ptr, sizeof(ptr));
CHECK_NE(get_tls, 0);
get_tls(size, align);
}
bool CmpLibcVersion(int major, int minor, int patch) {
int ma;
int mi;
int pa;
if (!GetLibcVersion(&ma, &mi, &pa))
return false;
if (ma > major)
return true;
if (ma < major)
return false;
if (mi > minor)
return true;
if (mi < minor)
return false;
return pa >= patch;
}
} // namespace
__attribute__((unused)) static uptr g_tls_size;
void InitTlsSize() {
// all current supported platforms have 16 bytes stack alignment
const size_t kStackAlign = 16;
void *get_tls_static_info_ptr = dlsym(RTLD_NEXT, "_dl_get_tls_static_info");
size_t tls_size = 0;
size_t tls_align = 0;
// On i?86, _dl_get_tls_static_info used to be internal_function, i.e.
// __attribute__((regparm(3), stdcall)) before glibc 2.27 and is normal
// function in 2.27 and later.
if (CHECK_GET_TLS_STATIC_INFO_VERSION && !CmpLibcVersion(2, 27, 0))
CallGetTls<GetTlsStaticInfoRegparmCall>(get_tls_static_info_ptr,
&tls_size, &tls_align);
else
CallGetTls<GetTlsStaticInfoCall>(get_tls_static_info_ptr,
&tls_size, &tls_align);
if (tls_align < kStackAlign)
tls_align = kStackAlign;
g_tls_size = RoundUpTo(tls_size, tls_align);
int major, minor, patch;
g_use_dlpi_tls_data =
GetLibcVersion(&major, &minor, &patch) && major == 2 && minor >= 25;
#ifdef __x86_64__
void *get_tls_static_info = dlsym(RTLD_NEXT, "_dl_get_tls_static_info");
size_t tls_align;
((void (*)(size_t *, size_t *))get_tls_static_info)(&g_tls_size, &tls_align);
#endif
}
#else
void InitTlsSize() { }
#endif // SANITIZER_GLIBC && !SANITIZER_GO
// On glibc x86_64, ThreadDescriptorSize() needs to be precise due to the usage
// of g_tls_size. On other targets, ThreadDescriptorSize() is only used by lsan
// to get the pointer to thread-specific data keys in the thread control block.
#if (defined(__x86_64__) || defined(__i386__) || defined(__mips__) || \
defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__) || \
defined(__arm__) || SANITIZER_RISCV64) && \
SANITIZER_LINUX && !SANITIZER_ANDROID
(SANITIZER_FREEBSD || SANITIZER_LINUX) && !SANITIZER_ANDROID
// sizeof(struct pthread) from glibc.
static atomic_uintptr_t thread_descriptor_size;
@ -334,13 +290,6 @@ uptr ThreadDescriptorSize() {
return val;
}
// The offset at which pointer to self is located in the thread descriptor.
const uptr kThreadSelfOffset = FIRST_32_SECOND_64(8, 16);
uptr ThreadSelfOffset() {
return kThreadSelfOffset;
}
#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
// TlsPreTcbSize includes size of struct pthread_descr and size of tcb
// head structure. It lies before the static tls blocks.
@ -359,68 +308,72 @@ static uptr TlsPreTcbSize() {
}
#endif
uptr ThreadSelf() {
uptr descr_addr;
#if defined(__i386__)
asm("mov %%gs:%c1,%0" : "=r"(descr_addr) : "i"(kThreadSelfOffset));
#elif defined(__x86_64__)
asm("mov %%fs:%c1,%0" : "=r"(descr_addr) : "i"(kThreadSelfOffset));
#elif defined(__mips__)
// MIPS uses TLS variant I. The thread pointer (in hardware register $29)
// points to the end of the TCB + 0x7000. The pthread_descr structure is
// immediately in front of the TCB. TlsPreTcbSize() includes the size of the
// TCB and the size of pthread_descr.
const uptr kTlsTcbOffset = 0x7000;
uptr thread_pointer;
asm volatile(".set push;\
.set mips64r2;\
rdhwr %0,$29;\
.set pop" : "=r" (thread_pointer));
descr_addr = thread_pointer - kTlsTcbOffset - TlsPreTcbSize();
#elif defined(__aarch64__) || defined(__arm__)
descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer()) -
ThreadDescriptorSize();
#elif SANITIZER_RISCV64
// https://github.com/riscv/riscv-elf-psabi-doc/issues/53
uptr thread_pointer = reinterpret_cast<uptr>(__builtin_thread_pointer());
descr_addr = thread_pointer - TlsPreTcbSize();
#elif defined(__s390__)
descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer());
#elif defined(__powerpc64__)
// PPC64LE uses TLS variant I. The thread pointer (in GPR 13)
// points to the end of the TCB + 0x7000. The pthread_descr structure is
// immediately in front of the TCB. TlsPreTcbSize() includes the size of the
// TCB and the size of pthread_descr.
const uptr kTlsTcbOffset = 0x7000;
uptr thread_pointer;
asm("addi %0,13,%1" : "=r"(thread_pointer) : "I"(-kTlsTcbOffset));
descr_addr = thread_pointer - TlsPreTcbSize();
#else
#error "unsupported CPU arch"
#endif
return descr_addr;
}
#endif // (x86_64 || i386 || MIPS) && SANITIZER_LINUX
#if !SANITIZER_GO
namespace {
struct TlsBlock {
uptr begin, end, align;
size_t tls_modid;
bool operator<(const TlsBlock &rhs) const { return begin < rhs.begin; }
};
} // namespace
#if SANITIZER_FREEBSD
static void **ThreadSelfSegbase() {
void **segbase = 0;
#if defined(__i386__)
// sysarch(I386_GET_GSBASE, segbase);
__asm __volatile("mov %%gs:0, %0" : "=r" (segbase));
#elif defined(__x86_64__)
// sysarch(AMD64_GET_FSBASE, segbase);
__asm __volatile("movq %%fs:0, %0" : "=r" (segbase));
#else
#error "unsupported CPU arch"
#endif
return segbase;
extern "C" void *__tls_get_addr(size_t *);
static int CollectStaticTlsBlocks(struct dl_phdr_info *info, size_t size,
void *data) {
if (!info->dlpi_tls_modid)
return 0;
uptr begin = (uptr)info->dlpi_tls_data;
if (!g_use_dlpi_tls_data) {
// Call __tls_get_addr as a fallback. This forces TLS allocation on glibc
// and FreeBSD.
size_t mod_and_off[2] = {info->dlpi_tls_modid, 0};
begin = (uptr)__tls_get_addr(mod_and_off);
}
for (unsigned i = 0; i != info->dlpi_phnum; ++i)
if (info->dlpi_phdr[i].p_type == PT_TLS) {
static_cast<InternalMmapVector<TlsBlock> *>(data)->push_back(
TlsBlock{begin, begin + info->dlpi_phdr[i].p_memsz,
info->dlpi_phdr[i].p_align, info->dlpi_tls_modid});
break;
}
return 0;
}
uptr ThreadSelf() {
return (uptr)ThreadSelfSegbase()[2];
__attribute__((unused)) static void GetStaticTlsBoundary(uptr *addr, uptr *size,
uptr *align) {
InternalMmapVector<TlsBlock> ranges;
dl_iterate_phdr(CollectStaticTlsBlocks, &ranges);
uptr len = ranges.size();
Sort(ranges.begin(), len);
// Find the range with tls_modid=1. For glibc, because libc.so uses PT_TLS,
// this module is guaranteed to exist and is one of the initially loaded
// modules.
uptr one = 0;
while (one != len && ranges[one].tls_modid != 1) ++one;
if (one == len) {
// This may happen with musl if no module uses PT_TLS.
*addr = 0;
*size = 0;
*align = 1;
return;
}
// Find the maximum consecutive ranges. We consider two modules consecutive if
// the gap is smaller than the alignment. The dynamic loader places static TLS
// blocks this way not to waste space.
uptr l = one;
*align = ranges[l].align;
while (l != 0 && ranges[l].begin < ranges[l - 1].end + ranges[l - 1].align)
*align = Max(*align, ranges[--l].align);
uptr r = one + 1;
while (r != len && ranges[r].begin < ranges[r - 1].end + ranges[r - 1].align)
*align = Max(*align, ranges[r++].align);
*addr = ranges[l].begin;
*size = ranges[r - 1].end - ranges[l].begin;
}
#endif // SANITIZER_FREEBSD
#endif // !SANITIZER_GO
#endif // (x86_64 || i386 || mips || ...) && (SANITIZER_FREEBSD ||
// SANITIZER_LINUX) && !SANITIZER_ANDROID
#if SANITIZER_NETBSD
static struct tls_tcb * ThreadSelfTlsTcb() {
@ -471,33 +424,59 @@ static void GetTls(uptr *addr, uptr *size) {
*addr = 0;
*size = 0;
}
#elif SANITIZER_LINUX
#if defined(__x86_64__) || defined(__i386__) || defined(__s390__)
*addr = ThreadSelf();
*size = GetTlsSize();
#elif SANITIZER_GLIBC && defined(__x86_64__)
// For x86-64, use an O(1) approach which requires precise
// ThreadDescriptorSize. g_tls_size was initialized in InitTlsSize.
asm("mov %%fs:16,%0" : "=r"(*addr));
*size = g_tls_size;
*addr -= *size;
*addr += ThreadDescriptorSize();
#elif defined(__mips__) || defined(__aarch64__) || defined(__powerpc64__) || \
defined(__arm__) || SANITIZER_RISCV64
*addr = ThreadSelf();
*size = GetTlsSize();
#elif SANITIZER_FREEBSD || SANITIZER_LINUX
uptr align;
GetStaticTlsBoundary(addr, size, &align);
#if defined(__x86_64__) || defined(__i386__) || defined(__s390__)
if (SANITIZER_GLIBC) {
#if defined(__s390__)
align = Max<uptr>(align, 16);
#else
*addr = 0;
*size = 0;
align = Max<uptr>(align, 64);
#endif
#elif SANITIZER_FREEBSD
void** segbase = ThreadSelfSegbase();
*addr = 0;
*size = 0;
if (segbase != 0) {
// tcbalign = 16
// tls_size = round(tls_static_space, tcbalign);
// dtv = segbase[1];
// dtv[2] = segbase - tls_static_space;
void **dtv = (void**) segbase[1];
*addr = (uptr) dtv[2];
*size = (*addr == 0) ? 0 : ((uptr) segbase[0] - (uptr) dtv[2]);
}
const uptr tp = RoundUpTo(*addr + *size, align);
// lsan requires the range to additionally cover the static TLS surplus
// (elf/dl-tls.c defines 1664). Otherwise there may be false positives for
// allocations only referenced by tls in dynamically loaded modules.
if (SANITIZER_GLIBC)
*size += 1644;
else if (SANITIZER_FREEBSD)
*size += 128; // RTLD_STATIC_TLS_EXTRA
// Extend the range to include the thread control block. On glibc, lsan needs
// the range to include pthread::{specific_1stblock,specific} so that
// allocations only referenced by pthread_setspecific can be scanned. This may
// underestimate by at most TLS_TCB_ALIGN-1 bytes but it should be fine
// because the number of bytes after pthread::specific is larger.
*addr = tp - RoundUpTo(*size, align);
*size = tp - *addr + ThreadDescriptorSize();
#else
if (SANITIZER_GLIBC)
*size += 1664;
else if (SANITIZER_FREEBSD)
*size += 128; // RTLD_STATIC_TLS_EXTRA
#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
const uptr pre_tcb_size = TlsPreTcbSize();
*addr -= pre_tcb_size;
*size += pre_tcb_size;
#else
// arm and aarch64 reserve two words at TP, so this underestimates the range.
// However, this is sufficient for the purpose of finding the pointers to
// thread-specific data keys.
const uptr tcb_size = ThreadDescriptorSize();
*addr -= tcb_size;
*size += tcb_size;
#endif
#endif
#elif SANITIZER_NETBSD
struct tls_tcb * const tcb = ThreadSelfTlsTcb();
*addr = 0;
@ -524,17 +503,11 @@ static void GetTls(uptr *addr, uptr *size) {
#if !SANITIZER_GO
uptr GetTlsSize() {
#if SANITIZER_FREEBSD || SANITIZER_ANDROID || SANITIZER_NETBSD || \
#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
SANITIZER_SOLARIS
uptr addr, size;
GetTls(&addr, &size);
return size;
#elif SANITIZER_GLIBC
#if defined(__mips__) || defined(__powerpc64__) || SANITIZER_RISCV64
return RoundUpTo(g_tls_size + TlsPreTcbSize(), 16);
#else
return g_tls_size;
#endif
#else
return 0;
#endif
@ -557,10 +530,9 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size,
if (!main) {
// If stack and tls intersect, make them non-intersecting.
if (*tls_addr > *stk_addr && *tls_addr < *stk_addr + *stk_size) {
CHECK_GT(*tls_addr + *tls_size, *stk_addr);
CHECK_LE(*tls_addr + *tls_size, *stk_addr + *stk_size);
*stk_size -= *tls_size;
*tls_addr = *stk_addr + *stk_size;
if (*stk_addr + *stk_size < *tls_addr + *tls_size)
*tls_size = *stk_addr + *stk_size - *tls_addr;
*stk_size = *tls_addr - *stk_addr;
}
}
#endif

View File

@ -188,24 +188,9 @@ TEST(SanitizerCommon, SetEnvTest) {
}
#if (defined(__x86_64__) || defined(__i386__)) && !SANITIZER_ANDROID
void *thread_self_offset_test_func(void *arg) {
bool result =
*(uptr *)((char *)ThreadSelf() + ThreadSelfOffset()) == ThreadSelf();
return (void *)result;
}
TEST(SanitizerLinux, ThreadSelfOffset) {
EXPECT_TRUE((bool)thread_self_offset_test_func(0));
pthread_t tid;
void *result;
ASSERT_EQ(0, pthread_create(&tid, 0, thread_self_offset_test_func, 0));
ASSERT_EQ(0, pthread_join(tid, &result));
EXPECT_TRUE((bool)result);
}
// libpthread puts the thread descriptor at the end of stack space.
void *thread_descriptor_size_test_func(void *arg) {
uptr descr_addr = ThreadSelf();
uptr descr_addr = (uptr)pthread_self();
pthread_attr_t attr;
pthread_getattr_np(pthread_self(), &attr);
void *stackaddr;

View File

@ -655,8 +655,11 @@ TSAN_INTERCEPTOR(void*, malloc, uptr size) {
return p;
}
// In glibc<2.25, dynamic TLS blocks are allocated by __libc_memalign. Intercept
// __libc_memalign so that (1) we can detect races (2) free will not be called
// on libc internally allocated blocks.
TSAN_INTERCEPTOR(void*, __libc_memalign, uptr align, uptr sz) {
SCOPED_TSAN_INTERCEPTOR(__libc_memalign, align, sz);
SCOPED_INTERCEPTOR_RAW(__libc_memalign, align, sz);
return user_memalign(thr, pc, align, sz);
}

View File

@ -11,7 +11,11 @@
// XFAIL: aarch64
// binutils 2.26 has a change that causes this test to fail on powerpc64.
// UNSUPPORTED: powerpc64
// UNSUPPORTED: powerpc64
/// We call __tls_get_addr early in GetTls to work around an issue for glibc<2.25,
/// so we don't get a log for f().
// REQUIRES: glibc-2.27
#ifndef SHARED
#include <stdio.h>

View File

@ -7,7 +7,7 @@
// On glibc, this requires the range returned by GetTLS to include
// specific_1stblock and specific in `struct pthread`.
// UNSUPPORTED: arm-linux, armhf-linux, aarch64
// UNSUPPORTED: arm-linux, armhf-linux
// TSD on NetBSD does not use TLS
// UNSUPPORTED: netbsd