From 6bc4a7685ee4321a74ad987032e678c6273775ff Mon Sep 17 00:00:00 2001 From: Zhihao Yuan Date: Mon, 10 Jun 2019 17:11:46 +0000 Subject: [PATCH] [libc++] Fix leading zeros in std::to_chars Summary: It is a bugfix proposal for https://bugs.llvm.org/show_bug.cgi?id=42166. `std::to_chars` appends leading zeros if input 64-bit value has 9, 10 or 11 digits. According to documentation `std::to_chars` must not append leading zeros: https://en.cppreference.com/w/cpp/utility/to_chars Changeset should not affect `std::to_chars` performance: http://quick-bench.com/CEpRs14xxA9WLvkXFtaJ3TWOVAg Unit test that `std::from_chars` supports compatibility for both `std::to_chars` outputs (previous and fixed one) already exists: https://github.com/llvm-mirror/libcxx/blob/1f60111b597e5cb80a4513ec86f79b7e137f7793/test/std/utilities/charconv/charconv.from.chars/integral.pass.cpp#L63 Reviewers: lichray, mclow.lists, ldionne, EricWF Reviewed By: lichray, mclow.lists Subscribers: zoecarver, christof, dexonsmith, libcxx-commits Differential Revision: https://reviews.llvm.org/D63047 llvm-svn: 362967 --- libcxx/src/charconv.cpp | 160 +++++------------- .../charconv.to.chars/integral.pass.cpp | 97 +++++++++++ 2 files changed, 141 insertions(+), 116 deletions(-) diff --git a/libcxx/src/charconv.cpp b/libcxx/src/charconv.cpp index 6ea473b25f8f..d3035883a78f 100644 --- a/libcxx/src/charconv.cpp +++ b/libcxx/src/charconv.cpp @@ -60,48 +60,50 @@ append4(char* buffer, T i) return append2(append2(buffer, (i) / 100), (i) % 100); } +template +inline _LIBCPP_INLINE_VISIBILITY char* +append2_no_zeros(char* buffer, T v) +{ + if (v < 10) + return append1(buffer, v); + else + return append2(buffer, v); +} + +template +inline _LIBCPP_INLINE_VISIBILITY char* +append4_no_zeros(char* buffer, T v) +{ + if (v < 100) + return append2_no_zeros(buffer, v); + else if (v < 1000) + return append3(buffer, v); + else + return append4(buffer, v); +} + +template +inline _LIBCPP_INLINE_VISIBILITY char* +append8_no_zeros(char* buffer, T v) +{ + if (v < 10000) + { + buffer = append4_no_zeros(buffer, v); + } + else + { + buffer = append4_no_zeros(buffer, v / 10000); + buffer = append4(buffer, v % 10000); + } + return buffer; +} + char* __u32toa(uint32_t value, char* buffer) { - if (value < 10000) + if (value < 100000000) { - if (value < 100) - { - if (value < 10) - buffer = append1(buffer, value); - else - buffer = append2(buffer, value); - } - else - { - if (value < 1000) - buffer = append3(buffer, value); - else - buffer = append4(buffer, value); - } - } - else if (value < 100000000) - { - // value = bbbbcccc - const uint32_t b = value / 10000; - const uint32_t c = value % 10000; - - if (value < 1000000) - { - if (value < 100000) - buffer = append1(buffer, b); - else - buffer = append2(buffer, b); - } - else - { - if (value < 10000000) - buffer = append3(buffer, b); - else - buffer = append4(buffer, b); - } - - buffer = append4(buffer, c); + buffer = append8_no_zeros(buffer, value); } else { @@ -109,11 +111,7 @@ __u32toa(uint32_t value, char* buffer) const uint32_t a = value / 100000000; // 1 to 42 value %= 100000000; - if (a < 10) - buffer = append1(buffer, a); - else - buffer = append2(buffer, a); - + buffer = append2_no_zeros(buffer, a); buffer = append4(buffer, value / 10000); buffer = append4(buffer, value % 10000); } @@ -127,71 +125,14 @@ __u64toa(uint64_t value, char* buffer) if (value < 100000000) { uint32_t v = static_cast(value); - if (v < 10000) - { - if (v < 100) - { - if (v < 10) - buffer = append1(buffer, v); - else - buffer = append2(buffer, v); - } - else - { - if (v < 1000) - buffer = append3(buffer, v); - else - buffer = append4(buffer, v); - } - } - else - { - // value = bbbbcccc - const uint32_t b = v / 10000; - const uint32_t c = v % 10000; - - if (v < 1000000) - { - if (v < 100000) - buffer = append1(buffer, b); - else - buffer = append2(buffer, b); - } - else - { - if (v < 10000000) - buffer = append3(buffer, b); - else - buffer = append4(buffer, b); - } - - buffer = append4(buffer, c); - } + buffer = append8_no_zeros(buffer, v); } else if (value < 10000000000000000) { const uint32_t v0 = static_cast(value / 100000000); const uint32_t v1 = static_cast(value % 100000000); - const uint32_t b0 = v0 / 10000; - const uint32_t c0 = v0 % 10000; - - if (v0 < 1000000) - { - if (v0 < 100000) - buffer = append1(buffer, b0); - else - buffer = append2(buffer, b0); - } - else - { - if (v0 < 10000000) - buffer = append3(buffer, b0); - else - buffer = append4(buffer, b0); - } - - buffer = append4(buffer, c0); + buffer = append8_no_zeros(buffer, v0); buffer = append4(buffer, v1 / 10000); buffer = append4(buffer, v1 % 10000); } @@ -201,20 +142,7 @@ __u64toa(uint64_t value, char* buffer) static_cast(value / 10000000000000000); // 1 to 1844 value %= 10000000000000000; - if (a < 100) - { - if (a < 10) - buffer = append1(buffer, a); - else - buffer = append2(buffer, a); - } - else - { - if (a < 1000) - buffer = append3(buffer, a); - else - buffer = append4(buffer, a); - } + buffer = append4_no_zeros(buffer, a); const uint32_t v0 = static_cast(value / 100000000); const uint32_t v1 = static_cast(value % 100000000); diff --git a/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp index 4bfe27d6dac7..e30d3c849091 100644 --- a/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp +++ b/libcxx/test/std/utilities/charconv/charconv.to.chars/integral.pass.cpp @@ -45,6 +45,56 @@ struct test_basics : to_chars_test_base test(0xdeadbeaf, "deadbeaf", 16); test(0755, "755", 8); + // Test each len till len of UINT64_MAX = 20 because to_chars algorithm + // makes branches based on decimal digits count in the value string + // representation. + // Test driver automatically skips values not fitting into source type. + test(1UL, "1"); + test(12UL, "12"); + test(123UL, "123"); + test(1234UL, "1234"); + test(12345UL, "12345"); + test(123456UL, "123456"); + test(1234567UL, "1234567"); + test(12345678UL, "12345678"); + test(123456789UL, "123456789"); + test(1234567890UL, "1234567890"); + test(12345678901UL, "12345678901"); + test(123456789012UL, "123456789012"); + test(1234567890123UL, "1234567890123"); + test(12345678901234UL, "12345678901234"); + test(123456789012345UL, "123456789012345"); + test(1234567890123456UL, "1234567890123456"); + test(12345678901234567UL, "12345678901234567"); + test(123456789012345678UL, "123456789012345678"); + test(1234567890123456789UL, "1234567890123456789"); + test(12345678901234567890UL, "12345678901234567890"); + + // Test special cases with zeros inside a value string representation, + // to_chars algorithm processes them in a special way and should not + // skip trailing zeros + // Test driver automatically skips values not fitting into source type. + test(0UL, "0"); + test(10UL, "10"); + test(100UL, "100"); + test(1000UL, "1000"); + test(10000UL, "10000"); + test(100000UL, "100000"); + test(1000000UL, "1000000"); + test(10000000UL, "10000000"); + test(100000000UL, "100000000"); + test(1000000000UL, "1000000000"); + test(10000000000UL, "10000000000"); + test(100000000000UL, "100000000000"); + test(1000000000000UL, "1000000000000"); + test(10000000000000UL, "10000000000000"); + test(100000000000000UL, "100000000000000"); + test(1000000000000000UL, "1000000000000000"); + test(10000000000000000UL, "10000000000000000"); + test(100000000000000000UL, "100000000000000000"); + test(1000000000000000000UL, "1000000000000000000"); + test(10000000000000000000UL, "10000000000000000000"); + for (int b = 2; b < 37; ++b) { using xl = std::numeric_limits; @@ -73,6 +123,53 @@ struct test_signed : to_chars_test_base test(-2647, "-101001010111", 2); test(-0xcc1, "-cc1", 16); + // Test each len till len of INT64_MAX = 19 because to_chars algorithm + // makes branches based on decimal digits count in the value string + // representation. + // Test driver automatically skips values not fitting into source type. + test(-1L, "-1"); + test(-12L, "-12"); + test(-123L, "-123"); + test(-1234L, "-1234"); + test(-12345L, "-12345"); + test(-123456L, "-123456"); + test(-1234567L, "-1234567"); + test(-12345678L, "-12345678"); + test(-123456789L, "-123456789"); + test(-1234567890L, "-1234567890"); + test(-12345678901L, "-12345678901"); + test(-123456789012L, "-123456789012"); + test(-1234567890123L, "-1234567890123"); + test(-12345678901234L, "-12345678901234"); + test(-123456789012345L, "-123456789012345"); + test(-1234567890123456L, "-1234567890123456"); + test(-12345678901234567L, "-12345678901234567"); + test(-123456789012345678L, "-123456789012345678"); + test(-1234567890123456789L, "-1234567890123456789"); + + // Test special cases with zeros inside a value string representation, + // to_chars algorithm processes them in a special way and should not + // skip trailing zeros + // Test driver automatically skips values not fitting into source type. + test(-10L, "-10"); + test(-100L, "-100"); + test(-1000L, "-1000"); + test(-10000L, "-10000"); + test(-100000L, "-100000"); + test(-1000000L, "-1000000"); + test(-10000000L, "-10000000"); + test(-100000000L, "-100000000"); + test(-1000000000L, "-1000000000"); + test(-10000000000L, "-10000000000"); + test(-100000000000L, "-100000000000"); + test(-1000000000000L, "-1000000000000"); + test(-10000000000000L, "-10000000000000"); + test(-100000000000000L, "-100000000000000"); + test(-1000000000000000L, "-1000000000000000"); + test(-10000000000000000L, "-10000000000000000"); + test(-100000000000000000L, "-100000000000000000"); + test(-1000000000000000000L, "-1000000000000000000"); + for (int b = 2; b < 37; ++b) { using xl = std::numeric_limits;