[ASan][libc++] Turn on ASan annotations for short strings (#79049)

Originally merged here: https://github.com/llvm/llvm-project/pull/75882
Reverted here: https://github.com/llvm/llvm-project/pull/78627

Reverted due to failing buildbots. The problem was not caused by the
annotations code, but by code in the `UniqueFunctionBase` class and in
the `JSON.h` file. That code caused the program to write to memory that
was already being used by string objects, which resulted in an ASan
error.

Fixes are implemented in:
- https://github.com/llvm/llvm-project/pull/79065
- https://github.com/llvm/llvm-project/pull/79066

Problematic code from `UniqueFunctionBase` for example:
```cpp
#ifndef NDEBUG
    // In debug builds, we also scribble across the rest of the storage.
    memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize);
#endif
```

---

Original description:

This commit turns on ASan annotations in `std::basic_string` for short
stings (SSO case).

Originally suggested here: https://reviews.llvm.org/D147680

String annotations added here:
https://github.com/llvm/llvm-project/pull/72677

Requires to pass CI without fails:
- https://github.com/llvm/llvm-project/pull/75845
- https://github.com/llvm/llvm-project/pull/75858

Annotating `std::basic_string` with default allocator is implemented in
https://github.com/llvm/llvm-project/pull/72677 but annotations for
short strings (SSO - Short String Optimization) are turned off there.
This commit turns them on. This also removes
`_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to
support turning on and off short string annotations.

Support in ASan API exists since
dd1b7b797a.
You can turn off annotations for a specific allocator based on changes
from
2fa1bec7a2.

This PR is a part of a series of patches extending AddressSanitizer C++
container overflow detection capabilities by adding annotations, similar
to those existing in `std::vector` and `std::deque` collections. These
enhancements empower ASan to effectively detect instances where the
instrumented program attempts to access memory within a collection's
internal allocation that remains unused. This includes cases where
access occurs before or after the stored elements in `std::deque`, or
between the `std::basic_string`'s size (including the null terminator)
and capacity bounds.

The introduction of these annotations was spurred by a real-world
software bug discovered by Trail of Bits, involving an out-of-bounds
memory access during the comparison of two strings using the
`std::equals` function. This function was taking iterators
(`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison,
using a custom comparison function. When the `iter1` object exceeded the
length of `iter2`, an out-of-bounds read could occur on the `iter2`
object. Container sanitization, upon enabling these annotations, would
effectively identify and flag this potential vulnerability.

If you have any questions, please email:

    advenam.tacet@trailofbits.com
    disconnect3d@trailofbits.com
This commit is contained in:
Tacet 2024-01-23 19:18:53 +01:00 committed by GitHub
parent c416b2efe8
commit cb528ec5e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 429 additions and 34 deletions

View File

@ -659,7 +659,6 @@ _LIBCPP_PUSH_MACROS
#else
# define _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS
#endif
#define _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED false
_LIBCPP_BEGIN_NAMESPACE_STD
@ -1896,22 +1895,17 @@ private:
#endif
}
// ASan: short string is poisoned if and only if this function returns true.
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __asan_short_string_is_annotated() const _NOEXCEPT {
return _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED && !__libcpp_is_constant_evaluated();
}
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_new(size_type __current_size) const _NOEXCEPT {
(void) __current_size;
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
if (!__libcpp_is_constant_evaluated())
__annotate_contiguous_container(data() + capacity() + 1, data() + __current_size + 1);
#endif
}
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_delete() const _NOEXCEPT {
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
if (!__libcpp_is_constant_evaluated())
__annotate_contiguous_container(data() + size() + 1, data() + capacity() + 1);
#endif
}
@ -1919,7 +1913,7 @@ private:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_increase(size_type __n) const _NOEXCEPT {
(void) __n;
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
if (!__libcpp_is_constant_evaluated())
__annotate_contiguous_container(data() + size() + 1, data() + size() + 1 + __n);
#endif
}
@ -1927,7 +1921,7 @@ private:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_shrink(size_type __old_size) const _NOEXCEPT {
(void) __old_size;
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
if (!__libcpp_is_constant_evaluated())
__annotate_contiguous_container(data() + __old_size + 1, data() + size() + 1);
#endif
}

View File

@ -0,0 +1,182 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: asan
// UNSUPPORTED: c++03
#include <cassert>
#include <string>
#include <array>
#include <deque>
#include "test_macros.h"
#include "asan_testing.h"
#include "min_allocator.h"
// This tests exists to check if strings work well with deque, as those
// may be partialy annotated, we cannot simply call
// is_double_ended_contiguous_container_asan_correct, as it assumes that
// object memory inside is not annotated, so we check everything in a more careful way.
template <typename D>
void verify_inside(D const& d) {
for (size_t i = 0; i < d.size(); ++i) {
assert(is_string_asan_correct(d[i]));
}
}
template <typename S, size_t N>
S get_s(char c) {
S s;
for (size_t i = 0; i < N; ++i)
s.push_back(c);
return s;
}
template <class C, class S>
void test_string() {
size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16;
{
C d1a(1), d1b(N), d1c(N + 1), d1d(5 * N);
verify_inside(d1a);
verify_inside(d1b);
verify_inside(d1c);
verify_inside(d1d);
}
{
C d2;
for (size_t i = 0; i < 3 * N + 2; ++i) {
d2.push_back(get_s<S, 1>(i % 10 + 'a'));
verify_inside(d2);
d2.push_back(get_s<S, 22>(i % 10 + 'b'));
verify_inside(d2);
d2.pop_front();
verify_inside(d2);
}
}
{
C d3;
for (size_t i = 0; i < 3 * N + 2; ++i) {
d3.push_front(get_s<S, 1>(i % 10 + 'a'));
verify_inside(d3);
d3.push_front(get_s<S, 28>(i % 10 + 'b'));
verify_inside(d3);
d3.pop_back();
verify_inside(d3);
}
}
{
C d4;
for (size_t i = 0; i < 3 * N + 2; ++i) {
// When there is no SSO, all elements inside should not be poisoned,
// so we can verify deque poisoning.
d4.push_front(get_s<S, 33>(i % 10 + 'a'));
verify_inside(d4);
assert(is_double_ended_contiguous_container_asan_correct(d4));
d4.push_back(get_s<S, 28>(i % 10 + 'b'));
verify_inside(d4);
assert(is_double_ended_contiguous_container_asan_correct(d4));
}
}
{
C d5;
for (size_t i = 0; i < 3 * N + 2; ++i) {
// In d4 we never had poisoned memory inside deque.
// Here we start with SSO, so part of the inside of the container,
// will be poisoned.
d5.push_front(S());
verify_inside(d5);
}
for (size_t i = 0; i < d5.size(); ++i) {
// We change the size to have long string.
// Memory owne by deque should not be poisoned by string.
d5[i].resize(100);
verify_inside(d5);
}
assert(is_double_ended_contiguous_container_asan_correct(d5));
d5.erase(d5.begin() + 2);
verify_inside(d5);
d5.erase(d5.end() - 2);
verify_inside(d5);
assert(is_double_ended_contiguous_container_asan_correct(d5));
}
{
C d6a;
assert(is_double_ended_contiguous_container_asan_correct(d6a));
C d6b(N + 2, get_s<S, 100>('a'));
d6b.push_front(get_s<S, 101>('b'));
while (!d6b.empty()) {
d6b.pop_back();
assert(is_double_ended_contiguous_container_asan_correct(d6b));
}
C d6c(N + 2, get_s<S, 102>('c'));
while (!d6c.empty()) {
d6c.pop_back();
assert(is_double_ended_contiguous_container_asan_correct(d6c));
}
}
{
C d7(9 * N + 2);
d7.insert(d7.begin() + 1, S());
verify_inside(d7);
d7.insert(d7.end() - 3, S());
verify_inside(d7);
d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a'));
verify_inside(d7);
d7.insert(d7.end() - 2 * N, get_s<S, 1>('b'));
verify_inside(d7);
d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c'));
verify_inside(d7);
// It may not be short for big element types, but it will be checked correctly:
d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d'));
verify_inside(d7);
d7.erase(d7.begin() + 2);
verify_inside(d7);
d7.erase(d7.end() - 2);
verify_inside(d7);
}
}
template <class S>
void test_container() {
test_string<std::deque<S, std::allocator<S>>, S>();
test_string<std::deque<S, min_allocator<S>>, S>();
test_string<std::deque<S, safe_allocator<S>>, S>();
}
int main(int, char**) {
// Those tests support only types based on std::basic_string.
test_container<std::string>();
test_container<std::wstring>();
#if TEST_STD_VER >= 11
test_container<std::u16string>();
test_container<std::u32string>();
#endif
#if TEST_STD_VER >= 20
test_container<std::u8string>();
#endif
return 0;
}

View File

@ -0,0 +1,56 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: asan
// UNSUPPORTED: c++03
// <string>
// Basic test if ASan annotations work for short strings.
#include <string>
#include <cassert>
#include <cstdlib>
#include "asan_testing.h"
#include "min_allocator.h"
#include "test_iterators.h"
#include "test_macros.h"
extern "C" void __sanitizer_set_death_callback(void (*callback)(void));
void do_exit() { exit(0); }
int main(int, char**) {
{
typedef cpp17_input_iterator<char*> MyInputIter;
// Should not trigger ASan.
std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v;
char i[] = {'a', 'b', 'c', 'd'};
v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 4));
assert(v[0] == 'a');
assert(is_string_asan_correct(v));
}
__sanitizer_set_death_callback(do_exit);
{
using T = char;
using C = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>;
const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'};
C c(std::begin(t), std::end(t));
assert(is_string_asan_correct(c));
assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
0);
volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away.
assert(false); // if we got here, ASAN didn't trigger
((void)foo);
}
return 0;
}

View File

@ -0,0 +1,182 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// REQUIRES: asan
// UNSUPPORTED: c++03
#include <cassert>
#include <string>
#include <vector>
#include <array>
#include "test_macros.h"
#include "asan_testing.h"
#include "min_allocator.h"
// This tests exists to check if strings work well with vector, as those
// may be partialy annotated, we cannot simply call
// is_contiguous_container_asan_correct, as it assumes that
// object memory inside is not annotated, so we check everything in a more careful way.
template <typename D>
void verify_inside(D const& d) {
for (size_t i = 0; i < d.size(); ++i) {
assert(is_string_asan_correct(d[i]));
}
}
template <typename S, size_t N>
S get_s(char c) {
S s;
for (size_t i = 0; i < N; ++i)
s.push_back(c);
return s;
}
template <class C, class S>
void test_string() {
size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16;
{
C d1a(1), d1b(N), d1c(N + 1), d1d(5 * N);
verify_inside(d1a);
verify_inside(d1b);
verify_inside(d1c);
verify_inside(d1d);
}
{
C d2;
for (size_t i = 0; i < 3 * N + 2; ++i) {
d2.push_back(get_s<S, 1>(i % 10 + 'a'));
verify_inside(d2);
d2.push_back(get_s<S, 28>(i % 10 + 'b'));
verify_inside(d2);
d2.erase(d2.cbegin());
verify_inside(d2);
}
}
{
C d3;
for (size_t i = 0; i < 3 * N + 2; ++i) {
d3.push_back(get_s<S, 1>(i % 10 + 'a'));
verify_inside(d3);
d3.push_back(get_s<S, 28>(i % 10 + 'b'));
verify_inside(d3);
d3.pop_back();
verify_inside(d3);
}
}
{
C d4;
for (size_t i = 0; i < 3 * N + 2; ++i) {
// When there is no SSO, all elements inside should not be poisoned,
// so we can verify vector poisoning.
d4.push_back(get_s<S, 33>(i % 10 + 'a'));
verify_inside(d4);
assert(is_contiguous_container_asan_correct(d4));
d4.push_back(get_s<S, 28>(i % 10 + 'b'));
verify_inside(d4);
assert(is_contiguous_container_asan_correct(d4));
}
}
{
C d5;
for (size_t i = 0; i < 3 * N + 2; ++i) {
// In d4 we never had poisoned memory inside vector.
// Here we start with SSO, so part of the inside of the container,
// will be poisoned.
d5.push_back(S());
verify_inside(d5);
}
for (size_t i = 0; i < d5.size(); ++i) {
// We change the size to have long string.
// Memory owne by vector should not be poisoned by string.
d5[i].resize(100);
verify_inside(d5);
}
assert(is_contiguous_container_asan_correct(d5));
d5.erase(d5.begin() + 2);
verify_inside(d5);
d5.erase(d5.end() - 2);
verify_inside(d5);
assert(is_contiguous_container_asan_correct(d5));
}
{
C d6a;
assert(is_contiguous_container_asan_correct(d6a));
C d6b(N + 2, get_s<S, 100>('a'));
d6b.push_back(get_s<S, 101>('b'));
while (!d6b.empty()) {
d6b.pop_back();
assert(is_contiguous_container_asan_correct(d6b));
}
C d6c(N + 2, get_s<S, 102>('c'));
while (!d6c.empty()) {
d6c.pop_back();
assert(is_contiguous_container_asan_correct(d6c));
}
}
{
C d7(9 * N + 2);
d7.insert(d7.begin() + 1, S());
verify_inside(d7);
d7.insert(d7.end() - 3, S());
verify_inside(d7);
d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a'));
verify_inside(d7);
d7.insert(d7.end() - 2 * N, get_s<S, 1>('b'));
verify_inside(d7);
d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c'));
verify_inside(d7);
// It may not be short for big element types, but it will be checked correctly:
d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d'));
verify_inside(d7);
d7.erase(d7.begin() + 2);
verify_inside(d7);
d7.erase(d7.end() - 2);
verify_inside(d7);
}
}
template <class S>
void test_container() {
test_string<std::vector<S, std::allocator<S>>, S>();
test_string<std::vector<S, min_allocator<S>>, S>();
test_string<std::vector<S, safe_allocator<S>>, S>();
}
int main(int, char**) {
// Those tests support only types based on std::basic_string.
test_container<std::string>();
test_container<std::wstring>();
#if TEST_STD_VER >= 11
test_container<std::u16string>();
test_container<std::u32string>();
#endif
#if TEST_STD_VER >= 20
test_container<std::u8string>();
#endif
return 0;
}

View File

@ -56,35 +56,16 @@ TEST_CONSTEXPR bool is_double_ended_contiguous_container_asan_correct(const std:
#endif
#if TEST_HAS_FEATURE(address_sanitizer)
template <typename S>
bool is_string_short(S const& s) {
// We do not have access to __is_long(), but we can check if strings
// buffer is inside strings memory. If strings memory contains its content,
// SSO is in use. To check it, we can just confirm that the beginning is in
// the string object memory block.
// &s - beginning of objects memory
// &s[0] - beginning of the buffer
// (&s+1) - end of objects memory
return (void*)std::addressof(s) <= (void*)std::addressof(s[0]) &&
(void*)std::addressof(s[0]) < (void*)(std::addressof(s) + 1);
}
template <typename ChrT, typename TraitsT, typename Alloc>
TEST_CONSTEXPR bool is_string_asan_correct(const std::basic_string<ChrT, TraitsT, Alloc>& c) {
if (TEST_IS_CONSTANT_EVALUATED)
return true;
if (!is_string_short(c) || _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED) {
if (std::__asan_annotate_container_with_allocator<Alloc>::value)
return __sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
0;
else
return __sanitizer_verify_contiguous_container(
c.data(), c.data() + c.capacity() + 1, c.data() + c.capacity() + 1) != 0;
} else {
return __sanitizer_verify_contiguous_container(std::addressof(c), std::addressof(c) + 1, std::addressof(c) + 1) !=
0;
}
if (std::__asan_annotate_container_with_allocator<Alloc>::value)
return __sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) != 0;
else
return __sanitizer_verify_contiguous_container(
c.data(), c.data() + c.capacity() + 1, c.data() + c.capacity() + 1) != 0;
}
#else
# include <string>