[libc++] Improve std::to_chars for base != 10.

This improves the speed of `to_chars` for bases 2, 8, and 16.
These bases are common and used in `<format>`. This change
uses a lookup table, like done in base 10 and causes an increase
in code size. The change has a small overhead for the other bases.

```
Benchmark                             Time             CPU      Time Old      Time New       CPU Old       CPU New
------------------------------------------------------------------------------------------------------------------
BM_to_chars_good/2                 -0.9476         -0.9476           252            13           252            13
BM_to_chars_good/3                 +0.0018         +0.0018           145           145           145           145
BM_to_chars_good/4                 +0.0108         +0.0108           104           105           104           105
BM_to_chars_good/5                 +0.0159         +0.0160            89            91            89            91
BM_to_chars_good/6                 +0.0162         +0.0162            80            81            80            81
BM_to_chars_good/7                 +0.0117         +0.0117            72            73            72            73
BM_to_chars_good/8                 -0.8643         -0.8643            64             9            64             9
BM_to_chars_good/9                 +0.0095         +0.0095            60            60            60            60
BM_to_chars_good/10                +0.0540         +0.0540             6             6             6             6
BM_to_chars_good/11                +0.0299         +0.0299            55            57            55            57
BM_to_chars_good/12                +0.0060         +0.0060            48            49            49            49
BM_to_chars_good/13                +0.0102         +0.0102            48            48            48            48
BM_to_chars_good/14                +0.0184         +0.0185            47            48            47            48
BM_to_chars_good/15                +0.0269         +0.0269            44            45            44            45
BM_to_chars_good/16                -0.8207         -0.8207            37             7            37             7
BM_to_chars_good/17                +0.0241         +0.0241            37            38            37            38
BM_to_chars_good/18                +0.0221         +0.0221            37            38            37            38
BM_to_chars_good/19                +0.0222         +0.0223            37            38            37            38
BM_to_chars_good/20                +0.0317         +0.0317            38            39            38            39
BM_to_chars_good/21                +0.0342         +0.0341            38            39            38            39
BM_to_chars_good/22                +0.0336         +0.0336            36            38            36            38
BM_to_chars_good/23                +0.0222         +0.0222            34            35            34            35
BM_to_chars_good/24                +0.0185         +0.0185            31            32            31            32
BM_to_chars_good/25                +0.0157         +0.0157            32            32            32            32
BM_to_chars_good/26                +0.0181         +0.0181            32            32            32            32
BM_to_chars_good/27                +0.0153         +0.0153            32            32            32            32
BM_to_chars_good/28                +0.0179         +0.0179            32            32            32            32
BM_to_chars_good/29                +0.0189         +0.0189            32            33            32            33
BM_to_chars_good/30                +0.0212         +0.0212            32            33            32            33
BM_to_chars_good/31                +0.0221         +0.0221            32            33            32            33
BM_to_chars_good/32                +0.0292         +0.0292            32            33            32            33
BM_to_chars_good/33                +0.0319         +0.0319            32            33            32            33
BM_to_chars_good/34                +0.0411         +0.0410            33            34            33            34
BM_to_chars_good/35                +0.0515         +0.0515            33            34            33            34
BM_to_chars_good/36                +0.0502         +0.0502            32            34            32            34
BM_to_chars_bad/2                  -0.8752         -0.8752            40             5            40             5
BM_to_chars_bad/3                  +0.1952         +0.1952            21            26            21            26
BM_to_chars_bad/4                  +0.3626         +0.3626            16            22            16            22
BM_to_chars_bad/5                  +0.2267         +0.2268            17            21            17            21
BM_to_chars_bad/6                  +0.3560         +0.3559            14            19            14            19
BM_to_chars_bad/7                  +0.4599         +0.4600            12            18            12            18
BM_to_chars_bad/8                  -0.5074         -0.5074            11             5            11             5
BM_to_chars_bad/9                  +0.4814         +0.4814            10            15            10            15
BM_to_chars_bad/10                 +0.7761         +0.7761             2             4             2             4
BM_to_chars_bad/11                 +0.3948         +0.3948            12            16            12            16
BM_to_chars_bad/12                 +0.3203         +0.3203            10            13            10            13
BM_to_chars_bad/13                 +0.3067         +0.3067            11            14            11            14
BM_to_chars_bad/14                 +0.2235         +0.2235            12            14            12            14
BM_to_chars_bad/15                 +0.2675         +0.2675            11            14            11            14
BM_to_chars_bad/16                 -0.1801         -0.1801             7             5             7             5
BM_to_chars_bad/17                 +0.5651         +0.5651             7            11             7            11
BM_to_chars_bad/18                 +0.5407         +0.5406             7            11             7            11
BM_to_chars_bad/19                 +0.5593         +0.5593             8            12             8            12
BM_to_chars_bad/20                 +0.5823         +0.5823             8            13             8            13
BM_to_chars_bad/21                 +0.6032         +0.6032             9            15             9            15
BM_to_chars_bad/22                 +0.6407         +0.6408             9            14             9            14
BM_to_chars_bad/23                 +0.6292         +0.6292             7            12             7            12
BM_to_chars_bad/24                 +0.5784         +0.5784             6            10             6            10
BM_to_chars_bad/25                 +0.5784         +0.5784             6            10             6            10
BM_to_chars_bad/26                 +0.5713         +0.5713             7            10             7            10
BM_to_chars_bad/27                 +0.5969         +0.5969             7            11             7            11
BM_to_chars_bad/28                 +0.6131         +0.6131             7            11             7            11
BM_to_chars_bad/29                 +0.6937         +0.6937             7            11             7            11
BM_to_chars_bad/30                 +0.7655         +0.7656             7            12             7            12
BM_to_chars_bad/31                 +0.8939         +0.8939             6            12             6            12
BM_to_chars_bad/32                 +1.0157         +1.0157             6            13             6            13
BM_to_chars_bad/33                 +1.0279         +1.0279             7            14             7            14
BM_to_chars_bad/34                 +1.0388         +1.0388             7            14             7            14
BM_to_chars_bad/35                 +1.0990         +1.0990             7            15             7            15
BM_to_chars_bad/36                 +1.1503         +1.1503             7            15             7            15
```

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D97705
This commit is contained in:
Mark de Wever 2021-02-27 16:52:39 +01:00
parent 92eea11cca
commit 6c11aebd30

View File

@ -319,6 +319,197 @@ __to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
return __to_chars_integral(__first, __last, __x, __base, false_type());
}
namespace __itoa {
static constexpr char __base_2_lut[64] = {
'0', '0', '0', '0',
'0', '0', '0', '1',
'0', '0', '1', '0',
'0', '0', '1', '1',
'0', '1', '0', '0',
'0', '1', '0', '1',
'0', '1', '1', '0',
'0', '1', '1', '1',
'1', '0', '0', '0',
'1', '0', '0', '1',
'1', '0', '1', '0',
'1', '0', '1', '1',
'1', '1', '0', '0',
'1', '1', '0', '1',
'1', '1', '1', '0',
'1', '1', '1', '1'
};
static constexpr char __base_8_lut[128] = {
'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7',
'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7',
'2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7',
'3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7',
'4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7',
'5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7',
'6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7',
'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7'
};
static constexpr char __base_16_lut[512] = {
'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '0', 'a', '0', 'b', '0', 'c', '0', 'd', '0', 'e', '0', 'f',
'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '1', 'a', '1', 'b', '1', 'c', '1', 'd', '1', 'e', '1', 'f',
'2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', '2', 'a', '2', 'b', '2', 'c', '2', 'd', '2', 'e', '2', 'f',
'3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '3', 'a', '3', 'b', '3', 'c', '3', 'd', '3', 'e', '3', 'f',
'4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', '4', 'a', '4', 'b', '4', 'c', '4', 'd', '4', 'e', '4', 'f',
'5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', '5', 'a', '5', 'b', '5', 'c', '5', 'd', '5', 'e', '5', 'f',
'6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', '6', 'a', '6', 'b', '6', 'c', '6', 'd', '6', 'e', '6', 'f',
'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '7', 'a', '7', 'b', '7', 'c', '7', 'd', '7', 'e', '7', 'f',
'8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', '8', 'a', '8', 'b', '8', 'c', '8', 'd', '8', 'e', '8', 'f',
'9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9', '9', 'a', '9', 'b', '9', 'c', '9', 'd', '9', 'e', '9', 'f',
'a', '0', 'a', '1', 'a', '2', 'a', '3', 'a', '4', 'a', '5', 'a', '6', 'a', '7', 'a', '8', 'a', '9', 'a', 'a', 'a', 'b', 'a', 'c', 'a', 'd', 'a', 'e', 'a', 'f',
'b', '0', 'b', '1', 'b', '2', 'b', '3', 'b', '4', 'b', '5', 'b', '6', 'b', '7', 'b', '8', 'b', '9', 'b', 'a', 'b', 'b', 'b', 'c', 'b', 'd', 'b', 'e', 'b', 'f',
'c', '0', 'c', '1', 'c', '2', 'c', '3', 'c', '4', 'c', '5', 'c', '6', 'c', '7', 'c', '8', 'c', '9', 'c', 'a', 'c', 'b', 'c', 'c', 'c', 'd', 'c', 'e', 'c', 'f',
'd', '0', 'd', '1', 'd', '2', 'd', '3', 'd', '4', 'd', '5', 'd', '6', 'd', '7', 'd', '8', 'd', '9', 'd', 'a', 'd', 'b', 'd', 'c', 'd', 'd', 'd', 'e', 'd', 'f',
'e', '0', 'e', '1', 'e', '2', 'e', '3', 'e', '4', 'e', '5', 'e', '6', 'e', '7', 'e', '8', 'e', '9', 'e', 'a', 'e', 'b', 'e', 'c', 'e', 'd', 'e', 'e', 'e', 'f',
'f', '0', 'f', '1', 'f', '2', 'f', '3', 'f', '4', 'f', '5', 'f', '6', 'f', '7', 'f', '8', 'f', '9', 'f', 'a', 'f', 'b', 'f', 'c', 'f', 'd', 'f', 'e', 'f', 'f'
};
template <unsigned _Base>
struct _LIBCPP_HIDDEN __integral;
template <>
struct _LIBCPP_HIDDEN __integral<2> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
// If value == 0 still need one digit. If the value != this has no
// effect since the code scans for the most significant bit set. (Note
// that __libcpp_clz doesn't work for 0.)
return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1);
}
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static to_chars_result __to_chars(char* __first, char* __last, _Tp __value) {
ptrdiff_t __cap = __last - __first;
int __n = __width(__value);
if (__n > __cap)
return {__last, errc::value_too_large};
__last = __first + __n;
char* __p = __last;
const unsigned __divisor = 16;
while (__value > __divisor) {
unsigned __c = __value % __divisor;
__value /= __divisor;
__p -= 4;
std::memcpy(__p, &__base_2_lut[4 * __c], 4);
}
do {
unsigned __c = __value % 2;
__value /= 2;
*--__p = "01"[__c];
} while (__value != 0);
return {__last, errc(0)};
}
};
template <>
struct _LIBCPP_HIDDEN __integral<8> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
// If value == 0 still need one digit. If the value != this has no
// effect since the code scans for the most significat bit set. (Note
// that __libcpp_clz doesn't work for 0.)
return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3;
}
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static to_chars_result __to_chars(char* __first, char* __last, _Tp __value) {
ptrdiff_t __cap = __last - __first;
int __n = __width(__value);
if (__n > __cap)
return {__last, errc::value_too_large};
__last = __first + __n;
char* __p = __last;
unsigned __divisor = 64;
while (__value > __divisor) {
unsigned __c = __value % __divisor;
__value /= __divisor;
__p -= 2;
std::memcpy(__p, &__base_8_lut[2 * __c], 2);
}
do {
unsigned __c = __value % 8;
__value /= 8;
*--__p = "01234567"[__c];
} while (__value != 0);
return {__last, errc(0)};
}
};
template <>
struct _LIBCPP_HIDDEN __integral<16> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
// If value == 0 still need one digit. If the value != this has no
// effect since the code scans for the most significat bit set. (Note
// that __libcpp_clz doesn't work for 0.)
return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4;
}
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static to_chars_result __to_chars(char* __first, char* __last, _Tp __value) {
ptrdiff_t __cap = __last - __first;
int __n = __width(__value);
if (__n > __cap)
return {__last, errc::value_too_large};
__last = __first + __n;
char* __p = __last;
unsigned __divisor = 256;
while (__value > __divisor) {
unsigned __c = __value % __divisor;
__value /= __divisor;
__p -= 2;
std::memcpy(__p, &__base_16_lut[2 * __c], 2);
}
if (__first != __last)
do {
unsigned __c = __value % 16;
__value /= 16;
*--__p = "0123456789abcdef"[__c];
} while (__value != 0);
return {__last, errc(0)};
}
};
} // namespace __itoa
template <unsigned _Base, typename _Tp,
typename enable_if<(sizeof(_Tp) >= sizeof(unsigned)), int>::type = 0>
_LIBCPP_HIDE_FROM_ABI int
__to_chars_integral_width(_Tp __value) {
return __itoa::__integral<_Base>::__width(__value);
}
template <unsigned _Base, typename _Tp,
typename enable_if<(sizeof(_Tp) < sizeof(unsigned)), int>::type = 0>
_LIBCPP_HIDE_FROM_ABI int
__to_chars_integral_width(_Tp __value) {
return std::__to_chars_integral_width<_Base>(static_cast<unsigned>(__value));
}
template <unsigned _Base, typename _Tp,
typename enable_if<(sizeof(_Tp) >= sizeof(unsigned)), int>::type = 0>
_LIBCPP_HIDE_FROM_ABI to_chars_result
__to_chars_integral(char* __first, char* __last, _Tp __value) {
return __itoa::__integral<_Base>::__to_chars(__first, __last, __value);
}
template <unsigned _Base, typename _Tp,
typename enable_if<(sizeof(_Tp) < sizeof(unsigned)), int>::type = 0>
_LIBCPP_HIDE_FROM_ABI to_chars_result
__to_chars_integral(char* __first, char* __last, _Tp __value) {
return std::__to_chars_integral<_Base>(__first, __last, static_cast<unsigned>(__value));
}
template <typename _Tp>
_LIBCPP_AVAILABILITY_TO_CHARS _LIBCPP_HIDE_FROM_ABI int
__to_chars_integral_width(_Tp __value, unsigned __base) {
@ -352,9 +543,18 @@ inline _LIBCPP_HIDE_FROM_ABI to_chars_result
__to_chars_integral(char* __first, char* __last, _Tp __value, int __base,
false_type)
{
if (__base == 10)
if (__base == 10) [[likely]]
return __to_chars_itoa(__first, __last, __value, false_type());
switch (__base) {
case 2:
return __to_chars_integral<2>(__first, __last, __value);
case 8:
return __to_chars_integral<8>(__first, __last, __value);
case 16:
return __to_chars_integral<16>(__first, __last, __value);
}
ptrdiff_t __cap = __last - __first;
int __n = __to_chars_integral_width(__value, __base);
if (__n > __cap)