mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-27 07:31:28 +00:00
6fe4e033f0
Credits: this change is based on analysis and a proof of concept by gerbens@google.com. Before, the compiler loses track of end as 'this' and other references possibly escape beyond the compiler's scope. This can be see in the generated assembly: 16.28 │200c80: mov %r15d,(%rax) 60.87 │200c83: add $0x4,%rax │200c87: mov %rax,-0x38(%rbp) 0.03 │200c8b: → jmpq 200d4e ... ... 1.69 │200d4e: cmp %r15d,%r12d │200d51: → je 200c40 16.34 │200d57: inc %r15d 0.05 │200d5a: mov -0x38(%rbp),%rax 3.27 │200d5e: mov -0x30(%rbp),%r13 1.47 │200d62: cmp %r13,%rax │200d65: → jne 200c80 We fix this by always explicitly storing the loaded local and pointer back at the end of push back. This generates some slight source 'noise', but creates nice and compact fast path code, i.e.: 32.64 │200760: mov %r14d,(%r12) 9.97 │200764: add $0x4,%r12 6.97 │200768: mov %r12,-0x38(%rbp) 32.17 │20076c: add $0x1,%r14d 2.36 │200770: cmp %r14d,%ebx │200773: → je 200730 8.98 │200775: mov -0x30(%rbp),%r13 6.75 │200779: cmp %r13,%r12 │20077c: → jne 200760 Now there is a single store for the push_back value (as before), and a single store for the end without a reload (dependency). For fully local vectors, (i.e., not referenced elsewhere), the capacity load and store inside the loop could also be removed, but this requires more substantial refactoring inside vector. Differential Revision: https://reviews.llvm.org/D80588
229 lines
6.3 KiB
C++
229 lines
6.3 KiB
C++
// -*- C++ -*-
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef BENCHMARK_CONTAINER_BENCHMARKS_H
|
|
#define BENCHMARK_CONTAINER_BENCHMARKS_H
|
|
|
|
#include <cassert>
|
|
|
|
#include "Utilities.h"
|
|
#include "benchmark/benchmark.h"
|
|
|
|
namespace ContainerBenchmarks {
|
|
|
|
template <class Container>
|
|
void BM_ConstructSize(benchmark::State& st, Container) {
|
|
auto size = st.range(0);
|
|
for (auto _ : st) {
|
|
Container c(size);
|
|
DoNotOptimizeData(c);
|
|
}
|
|
}
|
|
|
|
template <class Container>
|
|
void BM_CopyConstruct(benchmark::State& st, Container) {
|
|
auto size = st.range(0);
|
|
Container c(size);
|
|
for (auto _ : st) {
|
|
auto v = c;
|
|
DoNotOptimizeData(v);
|
|
}
|
|
}
|
|
|
|
template <class Container>
|
|
void BM_Assignment(benchmark::State& st, Container) {
|
|
auto size = st.range(0);
|
|
Container c1;
|
|
Container c2(size);
|
|
for (auto _ : st) {
|
|
c1 = c2;
|
|
DoNotOptimizeData(c1);
|
|
DoNotOptimizeData(c2);
|
|
}
|
|
}
|
|
|
|
template <class Container>
|
|
void BM_ConstructSizeValue(benchmark::State& st, Container, typename Container::value_type const& val) {
|
|
const auto size = st.range(0);
|
|
for (auto _ : st) {
|
|
Container c(size, val);
|
|
DoNotOptimizeData(c);
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto begin = in.begin();
|
|
const auto end = in.end();
|
|
benchmark::DoNotOptimize(&in);
|
|
while (st.KeepRunning()) {
|
|
Container c(begin, end);
|
|
DoNotOptimizeData(c);
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_ConstructFromRange(benchmark::State& st, Container, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
benchmark::DoNotOptimize(&in);
|
|
while (st.KeepRunning()) {
|
|
Container c(std::from_range, in);
|
|
DoNotOptimizeData(c);
|
|
}
|
|
}
|
|
|
|
template <class Container>
|
|
void BM_Pushback(benchmark::State& state, Container c) {
|
|
int count = state.range(0);
|
|
c.reserve(count);
|
|
while (state.KeepRunningBatch(count)) {
|
|
c.clear();
|
|
for (int i = 0; i != count; ++i) {
|
|
c.push_back(i);
|
|
}
|
|
benchmark::DoNotOptimize(c.data());
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_InsertValue(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto end = in.end();
|
|
while (st.KeepRunning()) {
|
|
c.clear();
|
|
for (auto it = in.begin(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.insert(*it).first));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_InsertValueRehash(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto end = in.end();
|
|
while (st.KeepRunning()) {
|
|
c.clear();
|
|
c.rehash(16);
|
|
for (auto it = in.begin(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.insert(*it).first));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_InsertDuplicate(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto end = in.end();
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(&c);
|
|
benchmark::DoNotOptimize(&in);
|
|
while (st.KeepRunning()) {
|
|
for (auto it = in.begin(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.insert(*it).first));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
void BM_EmplaceDuplicate(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
const auto end = in.end();
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(&c);
|
|
benchmark::DoNotOptimize(&in);
|
|
while (st.KeepRunning()) {
|
|
for (auto it = in.begin(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.emplace(*it).first));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
static void BM_Find(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(&(*c.begin()));
|
|
const auto end = in.data() + in.size();
|
|
while (st.KeepRunning()) {
|
|
for (auto it = in.data(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.find(*it)));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
static void BM_FindRehash(benchmark::State& st, Container c, GenInputs gen) {
|
|
c.rehash(8);
|
|
auto in = gen(st.range(0));
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(&(*c.begin()));
|
|
const auto end = in.data() + in.size();
|
|
while (st.KeepRunning()) {
|
|
for (auto it = in.data(); it != end; ++it) {
|
|
benchmark::DoNotOptimize(&(*c.find(*it)));
|
|
}
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
static void BM_Rehash(benchmark::State& st, Container c, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
c.max_load_factor(3.0);
|
|
c.insert(in.begin(), in.end());
|
|
benchmark::DoNotOptimize(c);
|
|
const auto bucket_count = c.bucket_count();
|
|
while (st.KeepRunning()) {
|
|
c.rehash(bucket_count + 1);
|
|
c.rehash(bucket_count);
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
static void BM_Compare_same_container(benchmark::State& st, Container, GenInputs gen) {
|
|
auto in = gen(st.range(0));
|
|
Container c1(in.begin(), in.end());
|
|
Container c2 = c1;
|
|
|
|
benchmark::DoNotOptimize(&(*c1.begin()));
|
|
benchmark::DoNotOptimize(&(*c2.begin()));
|
|
while (st.KeepRunning()) {
|
|
bool res = c1 == c2;
|
|
benchmark::DoNotOptimize(&res);
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
template <class Container, class GenInputs>
|
|
static void BM_Compare_different_containers(benchmark::State& st, Container, GenInputs gen) {
|
|
auto in1 = gen(st.range(0));
|
|
auto in2 = gen(st.range(0));
|
|
Container c1(in1.begin(), in1.end());
|
|
Container c2(in2.begin(), in2.end());
|
|
|
|
benchmark::DoNotOptimize(&(*c1.begin()));
|
|
benchmark::DoNotOptimize(&(*c2.begin()));
|
|
while (st.KeepRunning()) {
|
|
bool res = c1 == c2;
|
|
benchmark::DoNotOptimize(&res);
|
|
benchmark::ClobberMemory();
|
|
}
|
|
}
|
|
|
|
} // end namespace ContainerBenchmarks
|
|
|
|
#endif // BENCHMARK_CONTAINER_BENCHMARKS_H
|