Optimize StringTableBuilder.

This is a patch to improve StringTableBuilder's performance. That class'
finalize function is very hot particularly in LLD because the function
does tail-merge strings in string tables or SHF_MERGE sections.

Generic std::sort-style sorter is not efficient for sorting strings.
The function implemented in this patch seems to be more efficient.

Here's a benchmark of LLD to link Clang with or without this patch.
The numbers are medians of 50 runs.

-O0
real 0m0.455s
real 0m0.430s (5.5% faster)

-O3
real 0m0.487s
real 0m0.452s (7.2% faster)

Since that is a benchmark of the whole linker, the speedup of
StringTableBuilder itself is much more than that.

http://reviews.llvm.org/D14053

llvm-svn: 251337
This commit is contained in:
Rui Ueyama 2015-10-26 19:58:29 +00:00
parent fd8a0e49e0
commit e3ea91c6f5

View File

@ -18,20 +18,47 @@ using namespace llvm;
StringTableBuilder::StringTableBuilder(Kind K) : K(K) {}
static int compareBySuffix(std::pair<StringRef, size_t> *const *AP,
std::pair<StringRef, size_t> *const *BP) {
StringRef A = (*AP)->first;
StringRef B = (*BP)->first;
size_t SizeA = A.size();
size_t SizeB = B.size();
size_t Len = std::min(SizeA, SizeB);
for (size_t I = 0; I < Len; ++I) {
char CA = A[SizeA - I - 1];
char CB = B[SizeB - I - 1];
if (CA != CB)
return CB - CA;
typedef std::pair<StringRef, size_t> StringPair;
// Returns the character at Pos from end of a string.
static int charTailAt(StringPair *P, size_t Pos) {
StringRef S = P->first;
if (Pos >= S.size())
return -1;
return (unsigned char)S[S.size() - Pos - 1];
}
// Three-way radix quicksort. This is much faster than std::sort with strcmp
// because it does not compare characters that we already know the same.
static void qsort(StringPair **Begin, StringPair **End, int Pos) {
tailcall:
if (End - Begin <= 1)
return;
// Partition items. Items in [Begin, P) are greater than the pivot,
// [P, Q) are the same as the pivot, and [Q, End) are less than the pivot.
int Pivot = charTailAt(*Begin, Pos);
StringPair **P = Begin;
StringPair **Q = End;
for (StringPair **R = Begin + 1; R < Q;) {
int C = charTailAt(*R, Pos);
if (C > Pivot)
std::swap(*P++, *R++);
else if (C < Pivot)
std::swap(*--Q, *R);
else
R++;
}
qsort(Begin, P, Pos);
qsort(Q, End, Pos);
if (Pivot != -1) {
// qsort(P, Q, Pos + 1), but with tail call optimization.
Begin = P;
End = Q;
++Pos;
goto tailcall;
}
return SizeB - SizeA;
}
void StringTableBuilder::finalize() {
@ -40,7 +67,8 @@ void StringTableBuilder::finalize() {
for (std::pair<StringRef, size_t> &P : StringIndexMap)
Strings.push_back(&P);
array_pod_sort(Strings.begin(), Strings.end(), compareBySuffix);
if (!Strings.empty())
qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
switch (K) {
case RAW: