[libc++abi] Improve performance of __dynamic_cast

The original `__dynamic_cast` implementation does not use the ABI-provided `src2dst_offset` parameter which helps improve performance on the hot paths. This patch improves the performance on the hot paths in `__dynamic_cast` by leveraging hints provided by the `src2dst_offset` parameter. This patch also includes a performance benchmark suite for the `__dynamic_cast` implementation.

Reviewed By: philnik, ldionne, #libc, #libc_abi, avogelsgesang

Spies: mikhail.ramalho, avogelsgesang, xingxue, libcxx-commits

Differential Revision: https://reviews.llvm.org/D138005
This commit is contained in:
Sirui Mu 2023-03-19 10:03:38 +01:00 committed by Nikolas Klauser
parent 8510cf9fc1
commit c9d475c937
4 changed files with 300 additions and 62 deletions

View File

@ -176,6 +176,7 @@ set(BENCHMARK_TESTS
algorithms/sort.bench.cpp
algorithms/sort_heap.bench.cpp
algorithms/stable_sort.bench.cpp
libcxxabi/dynamic_cast.bench.cpp
allocation.bench.cpp
deque.bench.cpp
deque_iterator.bench.cpp

View File

@ -0,0 +1,172 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <cstddef>
#include "benchmark/benchmark.h"
template <std::size_t Depth>
struct Chain : Chain<Depth - 1> {};
template <>
struct Chain<0> {
virtual ~Chain() noexcept = default;
};
template <std::size_t Index, std::size_t Depth>
struct Dag : Dag<Index, Depth - 1>, Dag<Index + 1, Depth - 1> {};
template <std::size_t Index>
struct Dag<Index, 0> {
virtual ~Dag() noexcept = default;
};
template <std::size_t Depth>
struct VChain : virtual VChain<Depth - 1> {};
template <>
struct VChain<0> {
virtual ~VChain() noexcept = default;
};
template <std::size_t Index, std::size_t Depth>
struct VDag : virtual VDag<Index, Depth - 1>, virtual VDag<Index + 1, Depth - 1> {};
template <std::size_t Index>
struct VDag<Index, 0> {
virtual ~VDag() noexcept = default;
};
template <typename Dyn, typename From, typename To = Dyn>
static void DynCast(benchmark::State& state) {
Dyn obj;
From* from_ptr = &obj;
for (auto _ : state) {
To* to_ptr = dynamic_cast<To*>(from_ptr);
benchmark::DoNotOptimize(to_ptr);
}
}
static void StaticCast(benchmark::State& state) {
Chain<9> obj;
Chain<0>* from_ptr = &obj;
for (auto _ : state) {
Chain<9>* to_ptr = static_cast<Chain<9>*>(from_ptr);
benchmark::DoNotOptimize(to_ptr);
}
}
// Downcast along a chain from base to the most derived type
BENCHMARK(DynCast<Chain<1>, Chain<0>>)->Name("Chain, 1 level");
BENCHMARK(DynCast<Chain<2>, Chain<0>>)->Name("Chain, 2 levels");
BENCHMARK(DynCast<Chain<3>, Chain<0>>)->Name("Chain, 3 levels");
BENCHMARK(DynCast<Chain<4>, Chain<0>>)->Name("Chain, 4 levels");
BENCHMARK(DynCast<Chain<5>, Chain<0>>)->Name("Chain, 5 levels");
BENCHMARK(DynCast<Chain<6>, Chain<0>>)->Name("Chain, 6 levels");
BENCHMARK(DynCast<Chain<7>, Chain<0>>)->Name("Chain, 7 levels");
BENCHMARK(DynCast<Chain<8>, Chain<0>>)->Name("Chain, 8 levels");
BENCHMARK(DynCast<Chain<9>, Chain<0>>)->Name("Chain, 9 levels");
// Downcast along a chain from base to the middle of the chain
BENCHMARK(DynCast<Chain<2>, Chain<0>, Chain<1>>)->Name("Chain middle, 1 level");
BENCHMARK(DynCast<Chain<4>, Chain<0>, Chain<2>>)->Name("Chain middle, 2 levels");
BENCHMARK(DynCast<Chain<6>, Chain<0>, Chain<3>>)->Name("Chain middle, 3 levels");
BENCHMARK(DynCast<Chain<8>, Chain<0>, Chain<4>>)->Name("Chain middle, 4 levels");
// Downcast along a chain that fails
BENCHMARK(DynCast<Chain<1>, Chain<0>, Chain<9>>)->Name("Chain fail, 1 level");
BENCHMARK(DynCast<Chain<2>, Chain<0>, Chain<9>>)->Name("Chain fail, 2 levels");
BENCHMARK(DynCast<Chain<3>, Chain<0>, Chain<9>>)->Name("Chain fail, 3 levels");
BENCHMARK(DynCast<Chain<4>, Chain<0>, Chain<9>>)->Name("Chain fail, 4 levels");
BENCHMARK(DynCast<Chain<5>, Chain<0>, Chain<9>>)->Name("Chain fail, 5 levels");
BENCHMARK(DynCast<Chain<6>, Chain<0>, Chain<9>>)->Name("Chain fail, 6 levels");
BENCHMARK(DynCast<Chain<7>, Chain<0>, Chain<9>>)->Name("Chain fail, 7 levels");
BENCHMARK(DynCast<Chain<8>, Chain<0>, Chain<9>>)->Name("Chain fail, 8 levels");
// Downcast along a virtual inheritance chain from base to the most derived type
BENCHMARK(DynCast<VChain<1>, VChain<0>>)->Name("VChain, 1 level");
BENCHMARK(DynCast<VChain<2>, VChain<0>>)->Name("VChain, 2 levels");
BENCHMARK(DynCast<VChain<3>, VChain<0>>)->Name("VChain, 3 levels");
BENCHMARK(DynCast<VChain<4>, VChain<0>>)->Name("VChain, 4 levels");
BENCHMARK(DynCast<VChain<5>, VChain<0>>)->Name("VChain, 5 levels");
// Downcast along a virtual inheritance chain from base to the middle of the chain
BENCHMARK(DynCast<VChain<2>, VChain<0>, VChain<1>>)->Name("VChain middle, 1 level");
BENCHMARK(DynCast<VChain<4>, VChain<0>, VChain<2>>)->Name("VChain middle, 2 levels");
BENCHMARK(DynCast<VChain<6>, VChain<0>, VChain<3>>)->Name("VChain middle, 3 levels");
BENCHMARK(DynCast<VChain<8>, VChain<0>, VChain<4>>)->Name("VChain middle, 4 levels");
// Downcast along a virtual chain that fails
BENCHMARK(DynCast<VChain<1>, VChain<0>, VChain<8>>)->Name("VChain fail, 1 level");
BENCHMARK(DynCast<VChain<2>, VChain<0>, VChain<8>>)->Name("VChain fail, 2 levels");
BENCHMARK(DynCast<VChain<3>, VChain<0>, VChain<8>>)->Name("VChain fail, 3 levels");
BENCHMARK(DynCast<VChain<4>, VChain<0>, VChain<8>>)->Name("VChain fail, 4 levels");
BENCHMARK(DynCast<VChain<5>, VChain<0>, VChain<8>>)->Name("VChain fail, 5 levels");
// Downcast along a DAG from base to the most derived type
BENCHMARK(DynCast<Dag<0, 3>, Dag<3, 0>>)->Name("DAG rightmost, 3 levels");
BENCHMARK(DynCast<Dag<0, 4>, Dag<4, 0>>)->Name("DAG rightmost, 4 levels");
BENCHMARK(DynCast<Dag<0, 5>, Dag<5, 0>>)->Name("DAG rightmost, 5 levels");
BENCHMARK(DynCast<Dag<0, 3>, Dag<0, 0>>)->Name("DAG leftmost, 3 levels");
BENCHMARK(DynCast<Dag<0, 4>, Dag<0, 0>>)->Name("DAG leftmost, 4 levels");
BENCHMARK(DynCast<Dag<0, 5>, Dag<0, 0>>)->Name("DAG leftmost, 5 levels");
// Downcast along a DAG from base to the middle of the DAG
BENCHMARK(DynCast<Dag<0, 4>, Dag<4, 0>, Dag<3, 1>>)->Name("DAG rightmost middle, 1 level");
BENCHMARK(DynCast<Dag<0, 4>, Dag<4, 0>, Dag<2, 2>>)->Name("DAG rightmost middle, 2 levels");
BENCHMARK(DynCast<Dag<0, 4>, Dag<4, 0>, Dag<1, 3>>)->Name("DAG rightmost middle, 3 levels");
BENCHMARK(DynCast<Dag<0, 4>, Dag<0, 0>, Dag<0, 1>>)->Name("DAG leftmost middle, 1 level");
BENCHMARK(DynCast<Dag<0, 4>, Dag<0, 0>, Dag<0, 2>>)->Name("DAG leftmost middle, 2 levels");
BENCHMARK(DynCast<Dag<0, 4>, Dag<0, 0>, Dag<0, 3>>)->Name("DAG leftmost middle, 3 levels");
// Sidecast along a DAG
BENCHMARK(DynCast<Dag<0, 3>, Dag<3, 0>, Dag<0, 0>>)->Name("DAG sidecast, 3 levels");
BENCHMARK(DynCast<Dag<0, 3>, Dag<2, 1>, Dag<0, 1>>)->Name("DAG sidecast, 2 levels");
BENCHMARK(DynCast<Dag<0, 3>, Dag<1, 2>, Dag<0, 2>>)->Name("DAG sidecast, 1 level");
// Sidecast along a DAG that fails
BENCHMARK(DynCast<Dag<0, 3>, Dag<3, 0>, Dag<0, 4>>)->Name("DAG sidecast fail, 3 levels");
BENCHMARK(DynCast<Dag<0, 3>, Dag<2, 1>, Dag<0, 4>>)->Name("DAG sidecast fail, 2 levels");
BENCHMARK(DynCast<Dag<0, 3>, Dag<1, 2>, Dag<0, 4>>)->Name("DAG sidecast fail, 1 level");
// Downcast along a virtual inheritance DAG from base to the most derived type
BENCHMARK(DynCast<VDag<0, 3>, VDag<3, 0>>)->Name("VDAG rightmost, 3 levels");
BENCHMARK(DynCast<VDag<0, 4>, VDag<4, 0>>)->Name("VDAG rightmost, 4 levels");
BENCHMARK(DynCast<VDag<0, 5>, VDag<5, 0>>)->Name("VDAG rightmost, 5 levels");
BENCHMARK(DynCast<VDag<0, 3>, VDag<0, 0>>)->Name("VDAG leftmost, 3 levels");
BENCHMARK(DynCast<VDag<0, 4>, VDag<0, 0>>)->Name("VDAG leftmost, 4 levels");
BENCHMARK(DynCast<VDag<0, 5>, VDag<0, 0>>)->Name("VDAG leftmost, 5 levels");
// Downcast along a virtual inheritance DAG from base to the middle of the DAG
BENCHMARK(DynCast<VDag<0, 3>, VDag<3, 0>, VDag<2, 1>>)->Name("VDAG rightmost middle, 1 level");
BENCHMARK(DynCast<VDag<0, 4>, VDag<4, 0>, VDag<2, 2>>)->Name("VDAG rightmost middle, 2 levels");
BENCHMARK(DynCast<VDag<0, 5>, VDag<5, 0>, VDag<2, 3>>)->Name("VDAG rightmost middle, 3 levels");
BENCHMARK(DynCast<VDag<0, 3>, VDag<0, 0>, VDag<0, 1>>)->Name("VDAG leftmost middle, 1 level");
BENCHMARK(DynCast<VDag<0, 4>, VDag<0, 0>, VDag<0, 2>>)->Name("VDAG leftmost middle, 2 levels");
BENCHMARK(DynCast<VDag<0, 5>, VDag<0, 0>, VDag<0, 3>>)->Name("VDAG leftmost middle, 3 levels");
// Sidecast along a virtual inheritance DAG
BENCHMARK(DynCast<VDag<0, 3>, VDag<3, 0>, VDag<0, 0>>)->Name("VDAG sidecast, 3 levels");
BENCHMARK(DynCast<VDag<0, 3>, VDag<2, 1>, VDag<0, 1>>)->Name("VDAG sidecast, 2 levels");
BENCHMARK(DynCast<VDag<0, 3>, VDag<1, 2>, VDag<0, 2>>)->Name("VDAG sidecast, 1 level");
// Sidecast along a virtual inheritance DAG that fails
BENCHMARK(DynCast<VDag<0, 3>, VDag<3, 0>, VDag<0, 4>>)->Name("VDAG sidecast fail, 3 levels");
BENCHMARK(DynCast<VDag<0, 3>, VDag<2, 1>, VDag<0, 4>>)->Name("VDAG sidecast fail, 2 levels");
BENCHMARK(DynCast<VDag<0, 3>, VDag<1, 2>, VDag<0, 4>>)->Name("VDAG sidecast fail, 1 level");
// Cast to complete object pointer
BENCHMARK(DynCast<Chain<8>, Chain<0>, void>)->Name("Chain to complete");
BENCHMARK(DynCast<VChain<5>, VChain<0>, void>)->Name("VChain to complete");
BENCHMARK(DynCast<Dag<0, 3>, Dag<3, 0>, void>)->Name("DAG to complete");
BENCHMARK(DynCast<VDag<0, 3>, VDag<3, 0>, void>)->Name("VDAG to complete");
// Static cast as the baseline.
BENCHMARK(StaticCast)->Name("Static");
BENCHMARK_MAIN();

View File

@ -48,6 +48,9 @@ Improvements and New Features
- ``std::string_view`` now provides iterators that check for out-of-bounds accesses when the safe
libc++ mode is enabled.
- The performance of ``dynamic_cast`` on its hot paths is greatly improved and is as efficient as the
``libsupc++`` implementation. Note that the performance improvements are shipped in ``libcxxabi``.
Deprecations and Removals
-------------------------

View File

@ -41,6 +41,7 @@
// Defining _LIBCXXABI_FORGIVING_DYNAMIC_CAST does not help since can_catch() calls
// is_equal() with use_strcmp=false so the string names are not compared.
#include <cstdint>
#include <string.h>
#ifdef _LIBCXXABI_FORGIVING_DYNAMIC_CAST
@ -656,13 +657,47 @@ __dynamic_cast(const void *static_ptr, const __class_type_info *static_type,
// Find out if we can use a giant short cut in the search
if (is_equal(dynamic_type, dst_type, false))
{
// We're downcasting from src_type to the complete object's dynamic
// type. This is a really hot path that can be further optimized
// with the `src2dst_offset` hint.
// In such a case, dynamic_ptr already gives the casting result if the
// casting ever succeeds. All we have to do now is to check
// static_ptr points to a public base sub-object of dynamic_ptr.
if (src2dst_offset >= 0)
{
// The static type is a unique public non-virtual base type of
// dst_type at offset `src2dst_offset` from the origin of dst.
// Note that there might be other non-public static_type bases. The
// hint only guarantees that the public base is non-virtual and
// unique. So we have to check whether static_ptr points to that
// unique public base sub-object.
if (offset_to_derived == -src2dst_offset)
dst_ptr = dynamic_ptr;
}
else if (src2dst_offset == -2)
{
// static_type is not a public base of dst_type.
dst_ptr = nullptr;
}
else
{
// If src2dst_offset == -3, then:
// src_type is a multiple public base type but never a virtual
// base type. We can't conclude that static_ptr points to those
// public base sub-objects because there might be other non-
// public static_type bases. The search is inevitable.
// Fallback to the slow path to check that static_type is a public
// base type of dynamic_type.
// Using giant short cut. Add that information to info.
info.number_of_dst_type = 1;
// Do the search
dynamic_type->search_above_dst(&info, dynamic_ptr, dynamic_ptr, public_path, false);
#ifdef _LIBCXXABI_FORGIVING_DYNAMIC_CAST
// The following if should always be false because we should definitely
// find (static_ptr, static_type), either on a public or private path
// The following if should always be false because we should
// definitely find (static_ptr, static_type), either on a public
// or private path
if (info.path_dst_ptr_to_static_ptr == unknown)
{
// We get here only if there is some kind of visibility problem
@ -684,13 +719,39 @@ __dynamic_cast(const void *static_ptr, const __class_type_info *static_type,
if (info.path_dst_ptr_to_static_ptr == public_path)
dst_ptr = dynamic_ptr;
}
}
else
{
if (src2dst_offset >= 0)
{
// Optimize toward downcasting: dst_type has one unique public
// static_type bases. Let's first try to do a downcast before
// falling back to the slow path. The downcast succeeds if there
// is at least one path regardless of visibility from
// dynamic_type to dst_type.
const void* dst_ptr_to_static = reinterpret_cast<const char*>(static_ptr) - src2dst_offset;
if (reinterpret_cast<std::intptr_t>(dst_ptr_to_static) >= reinterpret_cast<std::intptr_t>(dynamic_ptr))
{
// Try to search a path from dynamic_type to dst_type.
__dynamic_cast_info dynamic_to_dst_info = {dynamic_type, dst_ptr_to_static, dst_type, src2dst_offset};
dynamic_to_dst_info.number_of_dst_type = 1;
dynamic_type->search_above_dst(&dynamic_to_dst_info, dynamic_ptr, dynamic_ptr, public_path, false);
if (dynamic_to_dst_info.path_dst_ptr_to_static_ptr != unknown) {
// We have found at least one path from dynamic_ptr to
// dst_ptr. The downcast can succeed.
dst_ptr = dst_ptr_to_static;
}
}
}
if (!dst_ptr)
{
// Not using giant short cut. Do the search
dynamic_type->search_below_dst(&info, dynamic_ptr, public_path, false);
#ifdef _LIBCXXABI_FORGIVING_DYNAMIC_CAST
// The following if should always be false because we should definitely
// find (static_ptr, static_type), either on a public or private path
// The following if should always be false because we should
// definitely find (static_ptr, static_type), either on a public
// or private path
if (info.path_dst_ptr_to_static_ptr == unknown &&
info.path_dynamic_ptr_to_static_ptr == unknown)
{
@ -729,6 +790,7 @@ __dynamic_cast(const void *static_ptr, const __class_type_info *static_type,
break;
}
}
}
return const_cast<void*>(dst_ptr);
}