Bug 1343005 - Optimize Quote in json.cpp. r=jorendorff

Implements the following optimizations:

- Resize the destination buffer to the maximum size possible, write to it
  directly, and then shrink it back to the actual size written.
- Avoid iterating over non-special-character runs multiple times.
- Use a lookup table to determine both if we should escape the character
  what the character after the escape sequence should be if so.
- Converts the destination buffer to the correct character type in advance of
  stringifying, instead of checking every character in the inner loop.

MozReview-Commit-ID: 7iWRqm3EopX

--HG--
extra : rebase_source : 1b18faee586718ec54b922222a28a31e3899d771
This commit is contained in:
Thom Chiovoloni 2017-08-23 20:16:20 -04:00
parent 52deded719
commit a7d756b8ff
3 changed files with 88 additions and 66 deletions

View File

@ -41,78 +41,81 @@ const Class js::JSONClass = {
JSCLASS_HAS_CACHED_PROTO(JSProto_JSON)
};
static inline bool
IsQuoteSpecialCharacter(char16_t c)
/* ES5 15.12.3 Quote.
* Requires that the destination has enough space allocated for src after escaping
* (that is, `2 + 6 * (srcEnd - srcBegin)` characters).
*/
template <typename SrcCharT, typename DstCharT>
static MOZ_ALWAYS_INLINE RangedPtr<DstCharT>
InfallibleQuote(RangedPtr<const SrcCharT> srcBegin, RangedPtr<const SrcCharT> srcEnd, RangedPtr<DstCharT> dstPtr)
{
static_assert('\b' < ' ', "'\\b' must be treated as special below");
static_assert('\f' < ' ', "'\\f' must be treated as special below");
static_assert('\n' < ' ', "'\\n' must be treated as special below");
static_assert('\r' < ' ', "'\\r' must be treated as special below");
static_assert('\t' < ' ', "'\\t' must be treated as special below");
return c == '"' || c == '\\' || c < ' ';
}
/* ES5 15.12.3 Quote. */
template <typename CharT>
static bool
Quote(StringBuffer& sb, JSLinearString* str)
{
size_t len = str->length();
// Maps characters < 256 to the value that must follow the '\\' in the quoted string.
// Entries with 'u' are handled as \\u00xy, and entries with 0 are not escaped in any way.
// Characters >= 256 are all assumed to be unescaped.
static const Latin1Char escapeLookup[256] = {
'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't',
'n', 'u', 'f', 'r', 'u', 'u', 'u', 'u', 'u', 'u',
'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u',
'u', 'u', 0, 0, '\"', 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, '\\', // rest are all zeros
};
/* Step 1. */
if (!sb.append('"'))
return false;
*dstPtr++ = '"';
/* Step 2. */
JS::AutoCheckCannotGC nogc;
const RangedPtr<const CharT> buf(str->chars<CharT>(nogc), len);
for (size_t i = 0; i < len; ++i) {
/* Batch-append maximal character sequences containing no escapes. */
size_t mark = i;
do {
if (IsQuoteSpecialCharacter(buf[i]))
break;
} while (++i < len);
if (i > mark) {
if (!sb.appendSubstring(str, mark, i - mark))
return false;
if (i == len)
break;
while (srcBegin != srcEnd) {
SrcCharT c = *srcBegin++;
size_t escapeIndex = c % sizeof(escapeLookup);
Latin1Char escaped = escapeLookup[escapeIndex];
if (MOZ_LIKELY((escapeIndex != size_t(c)) || !escaped)) {
*dstPtr++ = c;
continue;
}
char16_t c = buf[i];
if (c == '"' || c == '\\') {
if (!sb.append('\\') || !sb.append(c))
return false;
} else if (c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') {
char16_t abbrev = (c == '\b')
? 'b'
: (c == '\f')
? 'f'
: (c == '\n')
? 'n'
: (c == '\r')
? 'r'
: 't';
if (!sb.append('\\') || !sb.append(abbrev))
return false;
} else {
*dstPtr++ = '\\';
*dstPtr++ = escaped;
if (escaped == 'u') {
MOZ_ASSERT(c < ' ');
if (!sb.append("\\u00"))
return false;
MOZ_ASSERT((c >> 4) < 10);
uint8_t x = c >> 4, y = c % 16;
if (!sb.append(Latin1Char('0' + x)) ||
!sb.append(Latin1Char(y < 10 ? '0' + y : 'a' + (y - 10))))
{
return false;
}
*dstPtr++ = '0';
*dstPtr++ = '0';
*dstPtr++ = '0' + x;
*dstPtr++ = y < 10 ? '0' + y : 'a' + (y - 10);
}
}
/* Steps 3-4. */
return sb.append('"');
*dstPtr++ = '"';
return dstPtr;
}
template <typename SrcCharT, typename CharVectorT>
static bool
Quote(CharVectorT& sb, JSLinearString* str)
{
// We resize the backing buffer to the maximum size we could possibly need,
// write the escaped string into it, and shrink it back to the size we ended
// up needing.
size_t len = str->length();
size_t sbInitialLen = sb.length();
if (!sb.growByUninitialized(len * 6 + 2))
return false;
typedef typename CharVectorT::ElementType DstCharT;
JS::AutoCheckCannotGC nogc;
RangedPtr<const SrcCharT> srcBegin{str->chars<SrcCharT>(nogc), len};
RangedPtr<DstCharT> dstBegin{sb.begin(), sb.begin(), sb.end()};
RangedPtr<DstCharT> dstEnd = InfallibleQuote(srcBegin, srcBegin + len, dstBegin + sbInitialLen);
size_t newSize = dstEnd - dstBegin;
sb.shrinkTo(newSize);
return true;
}
static bool
@ -122,9 +125,18 @@ Quote(JSContext* cx, StringBuffer& sb, JSString* str)
if (!linear)
return false;
return linear->hasLatin1Chars()
? Quote<Latin1Char>(sb, linear)
: Quote<char16_t>(sb, linear);
// Check if either has non-latin1 before calling ensure, so that the buffer's
// hasEnsured flag is set if the converstion to twoByte was automatic.
if (!sb.isUnderlyingBufferLatin1() || linear->hasTwoByteChars()) {
if (!sb.ensureTwoByteChars())
return false;
}
if (linear->hasTwoByteChars())
return Quote<char16_t>(sb.rawTwoByteBuffer(), linear);
return sb.isUnderlyingBufferLatin1()
? Quote<Latin1Char>(sb.latin1Chars(), linear)
: Quote<Latin1Char>(sb.rawTwoByteBuffer(), linear);
}
namespace {

View File

@ -34,6 +34,9 @@ assertStringify({'mmm\\mmm':"hmm"}, '{"mmm\\\\mmm":"hmm"}');
assertStringify({'mmm\\mmm\\mmm':"hmm"}, '{"mmm\\\\mmm\\\\mmm":"hmm"}');
assertStringify({"mm\u000bmm":"hmm"}, '{"mm\\u000bmm":"hmm"}');
assertStringify({"mm\u0000mm":"hmm"}, '{"mm\\u0000mm":"hmm"}');
assertStringify({"\u0000\u000b":""}, '{"\\u0000\\u000b":""}');
assertStringify({"\u000b\ufdfd":"hmm"}, '{"\\u000b\ufdfd":"hmm"}');
assertStringify({"\u000b\ufdfd":"h\xfc\ufdfdm"}, '{"\\u000b\ufdfd":"h\xfc\ufdfdm"}');
var x = {"free":"variable"};
assertStringify(x, '{"free":"variable"}');

View File

@ -62,12 +62,8 @@ class StringBuffer
MOZ_ALWAYS_INLINE bool isLatin1() const { return cb.constructed<Latin1CharBuffer>(); }
MOZ_ALWAYS_INLINE bool isTwoByte() const { return !isLatin1(); }
MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() { return cb.ref<Latin1CharBuffer>(); }
MOZ_ALWAYS_INLINE TwoByteCharBuffer& twoByteChars() { return cb.ref<TwoByteCharBuffer>(); }
MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const {
return cb.ref<Latin1CharBuffer>();
}
MOZ_ALWAYS_INLINE const TwoByteCharBuffer& twoByteChars() const {
return cb.ref<TwoByteCharBuffer>();
}
@ -85,6 +81,12 @@ class StringBuffer
cb.construct<Latin1CharBuffer>(cx);
}
MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() { return cb.ref<Latin1CharBuffer>(); }
MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const {
return cb.ref<Latin1CharBuffer>();
}
void clear() {
if (isLatin1())
latin1Chars().clear();
@ -135,6 +137,11 @@ class StringBuffer
return append(Latin1Char(c));
}
TwoByteCharBuffer& rawTwoByteBuffer() {
MOZ_ASSERT(hasEnsuredTwoByteChars_);
return twoByteChars();
}
inline MOZ_MUST_USE bool append(const char16_t* begin, const char16_t* end);
MOZ_MUST_USE bool append(const char16_t* chars, size_t len) {