mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-29 07:42:04 +00:00
Bug 1343005 - Optimize Quote in json.cpp. r=jorendorff
Implements the following optimizations: - Resize the destination buffer to the maximum size possible, write to it directly, and then shrink it back to the actual size written. - Avoid iterating over non-special-character runs multiple times. - Use a lookup table to determine both if we should escape the character what the character after the escape sequence should be if so. - Converts the destination buffer to the correct character type in advance of stringifying, instead of checking every character in the inner loop. MozReview-Commit-ID: 7iWRqm3EopX --HG-- extra : rebase_source : 1b18faee586718ec54b922222a28a31e3899d771
This commit is contained in:
parent
52deded719
commit
a7d756b8ff
136
js/src/json.cpp
136
js/src/json.cpp
@ -41,78 +41,81 @@ const Class js::JSONClass = {
|
||||
JSCLASS_HAS_CACHED_PROTO(JSProto_JSON)
|
||||
};
|
||||
|
||||
static inline bool
|
||||
IsQuoteSpecialCharacter(char16_t c)
|
||||
/* ES5 15.12.3 Quote.
|
||||
* Requires that the destination has enough space allocated for src after escaping
|
||||
* (that is, `2 + 6 * (srcEnd - srcBegin)` characters).
|
||||
*/
|
||||
template <typename SrcCharT, typename DstCharT>
|
||||
static MOZ_ALWAYS_INLINE RangedPtr<DstCharT>
|
||||
InfallibleQuote(RangedPtr<const SrcCharT> srcBegin, RangedPtr<const SrcCharT> srcEnd, RangedPtr<DstCharT> dstPtr)
|
||||
{
|
||||
static_assert('\b' < ' ', "'\\b' must be treated as special below");
|
||||
static_assert('\f' < ' ', "'\\f' must be treated as special below");
|
||||
static_assert('\n' < ' ', "'\\n' must be treated as special below");
|
||||
static_assert('\r' < ' ', "'\\r' must be treated as special below");
|
||||
static_assert('\t' < ' ', "'\\t' must be treated as special below");
|
||||
|
||||
return c == '"' || c == '\\' || c < ' ';
|
||||
}
|
||||
|
||||
/* ES5 15.12.3 Quote. */
|
||||
template <typename CharT>
|
||||
static bool
|
||||
Quote(StringBuffer& sb, JSLinearString* str)
|
||||
{
|
||||
size_t len = str->length();
|
||||
// Maps characters < 256 to the value that must follow the '\\' in the quoted string.
|
||||
// Entries with 'u' are handled as \\u00xy, and entries with 0 are not escaped in any way.
|
||||
// Characters >= 256 are all assumed to be unescaped.
|
||||
static const Latin1Char escapeLookup[256] = {
|
||||
'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't',
|
||||
'n', 'u', 'f', 'r', 'u', 'u', 'u', 'u', 'u', 'u',
|
||||
'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u',
|
||||
'u', 'u', 0, 0, '\"', 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, '\\', // rest are all zeros
|
||||
};
|
||||
|
||||
/* Step 1. */
|
||||
if (!sb.append('"'))
|
||||
return false;
|
||||
*dstPtr++ = '"';
|
||||
|
||||
/* Step 2. */
|
||||
JS::AutoCheckCannotGC nogc;
|
||||
const RangedPtr<const CharT> buf(str->chars<CharT>(nogc), len);
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
/* Batch-append maximal character sequences containing no escapes. */
|
||||
size_t mark = i;
|
||||
do {
|
||||
if (IsQuoteSpecialCharacter(buf[i]))
|
||||
break;
|
||||
} while (++i < len);
|
||||
if (i > mark) {
|
||||
if (!sb.appendSubstring(str, mark, i - mark))
|
||||
return false;
|
||||
if (i == len)
|
||||
break;
|
||||
while (srcBegin != srcEnd) {
|
||||
SrcCharT c = *srcBegin++;
|
||||
size_t escapeIndex = c % sizeof(escapeLookup);
|
||||
Latin1Char escaped = escapeLookup[escapeIndex];
|
||||
if (MOZ_LIKELY((escapeIndex != size_t(c)) || !escaped)) {
|
||||
*dstPtr++ = c;
|
||||
continue;
|
||||
}
|
||||
|
||||
char16_t c = buf[i];
|
||||
if (c == '"' || c == '\\') {
|
||||
if (!sb.append('\\') || !sb.append(c))
|
||||
return false;
|
||||
} else if (c == '\b' || c == '\f' || c == '\n' || c == '\r' || c == '\t') {
|
||||
char16_t abbrev = (c == '\b')
|
||||
? 'b'
|
||||
: (c == '\f')
|
||||
? 'f'
|
||||
: (c == '\n')
|
||||
? 'n'
|
||||
: (c == '\r')
|
||||
? 'r'
|
||||
: 't';
|
||||
if (!sb.append('\\') || !sb.append(abbrev))
|
||||
return false;
|
||||
} else {
|
||||
*dstPtr++ = '\\';
|
||||
*dstPtr++ = escaped;
|
||||
if (escaped == 'u') {
|
||||
MOZ_ASSERT(c < ' ');
|
||||
if (!sb.append("\\u00"))
|
||||
return false;
|
||||
MOZ_ASSERT((c >> 4) < 10);
|
||||
uint8_t x = c >> 4, y = c % 16;
|
||||
if (!sb.append(Latin1Char('0' + x)) ||
|
||||
!sb.append(Latin1Char(y < 10 ? '0' + y : 'a' + (y - 10))))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
*dstPtr++ = '0';
|
||||
*dstPtr++ = '0';
|
||||
*dstPtr++ = '0' + x;
|
||||
*dstPtr++ = y < 10 ? '0' + y : 'a' + (y - 10);
|
||||
}
|
||||
}
|
||||
|
||||
/* Steps 3-4. */
|
||||
return sb.append('"');
|
||||
*dstPtr++ = '"';
|
||||
return dstPtr;
|
||||
}
|
||||
|
||||
template <typename SrcCharT, typename CharVectorT>
|
||||
static bool
|
||||
Quote(CharVectorT& sb, JSLinearString* str)
|
||||
{
|
||||
// We resize the backing buffer to the maximum size we could possibly need,
|
||||
// write the escaped string into it, and shrink it back to the size we ended
|
||||
// up needing.
|
||||
size_t len = str->length();
|
||||
size_t sbInitialLen = sb.length();
|
||||
if (!sb.growByUninitialized(len * 6 + 2))
|
||||
return false;
|
||||
|
||||
typedef typename CharVectorT::ElementType DstCharT;
|
||||
|
||||
JS::AutoCheckCannotGC nogc;
|
||||
RangedPtr<const SrcCharT> srcBegin{str->chars<SrcCharT>(nogc), len};
|
||||
RangedPtr<DstCharT> dstBegin{sb.begin(), sb.begin(), sb.end()};
|
||||
RangedPtr<DstCharT> dstEnd = InfallibleQuote(srcBegin, srcBegin + len, dstBegin + sbInitialLen);
|
||||
size_t newSize = dstEnd - dstBegin;
|
||||
sb.shrinkTo(newSize);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
@ -122,9 +125,18 @@ Quote(JSContext* cx, StringBuffer& sb, JSString* str)
|
||||
if (!linear)
|
||||
return false;
|
||||
|
||||
return linear->hasLatin1Chars()
|
||||
? Quote<Latin1Char>(sb, linear)
|
||||
: Quote<char16_t>(sb, linear);
|
||||
// Check if either has non-latin1 before calling ensure, so that the buffer's
|
||||
// hasEnsured flag is set if the converstion to twoByte was automatic.
|
||||
if (!sb.isUnderlyingBufferLatin1() || linear->hasTwoByteChars()) {
|
||||
if (!sb.ensureTwoByteChars())
|
||||
return false;
|
||||
}
|
||||
if (linear->hasTwoByteChars())
|
||||
return Quote<char16_t>(sb.rawTwoByteBuffer(), linear);
|
||||
|
||||
return sb.isUnderlyingBufferLatin1()
|
||||
? Quote<Latin1Char>(sb.latin1Chars(), linear)
|
||||
: Quote<Latin1Char>(sb.rawTwoByteBuffer(), linear);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -34,6 +34,9 @@ assertStringify({'mmm\\mmm':"hmm"}, '{"mmm\\\\mmm":"hmm"}');
|
||||
assertStringify({'mmm\\mmm\\mmm':"hmm"}, '{"mmm\\\\mmm\\\\mmm":"hmm"}');
|
||||
assertStringify({"mm\u000bmm":"hmm"}, '{"mm\\u000bmm":"hmm"}');
|
||||
assertStringify({"mm\u0000mm":"hmm"}, '{"mm\\u0000mm":"hmm"}');
|
||||
assertStringify({"\u0000\u000b":""}, '{"\\u0000\\u000b":""}');
|
||||
assertStringify({"\u000b\ufdfd":"hmm"}, '{"\\u000b\ufdfd":"hmm"}');
|
||||
assertStringify({"\u000b\ufdfd":"h\xfc\ufdfdm"}, '{"\\u000b\ufdfd":"h\xfc\ufdfdm"}');
|
||||
|
||||
var x = {"free":"variable"};
|
||||
assertStringify(x, '{"free":"variable"}');
|
||||
|
@ -62,12 +62,8 @@ class StringBuffer
|
||||
MOZ_ALWAYS_INLINE bool isLatin1() const { return cb.constructed<Latin1CharBuffer>(); }
|
||||
MOZ_ALWAYS_INLINE bool isTwoByte() const { return !isLatin1(); }
|
||||
|
||||
MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() { return cb.ref<Latin1CharBuffer>(); }
|
||||
MOZ_ALWAYS_INLINE TwoByteCharBuffer& twoByteChars() { return cb.ref<TwoByteCharBuffer>(); }
|
||||
|
||||
MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const {
|
||||
return cb.ref<Latin1CharBuffer>();
|
||||
}
|
||||
MOZ_ALWAYS_INLINE const TwoByteCharBuffer& twoByteChars() const {
|
||||
return cb.ref<TwoByteCharBuffer>();
|
||||
}
|
||||
@ -85,6 +81,12 @@ class StringBuffer
|
||||
cb.construct<Latin1CharBuffer>(cx);
|
||||
}
|
||||
|
||||
MOZ_ALWAYS_INLINE Latin1CharBuffer& latin1Chars() { return cb.ref<Latin1CharBuffer>(); }
|
||||
|
||||
MOZ_ALWAYS_INLINE const Latin1CharBuffer& latin1Chars() const {
|
||||
return cb.ref<Latin1CharBuffer>();
|
||||
}
|
||||
|
||||
void clear() {
|
||||
if (isLatin1())
|
||||
latin1Chars().clear();
|
||||
@ -135,6 +137,11 @@ class StringBuffer
|
||||
return append(Latin1Char(c));
|
||||
}
|
||||
|
||||
TwoByteCharBuffer& rawTwoByteBuffer() {
|
||||
MOZ_ASSERT(hasEnsuredTwoByteChars_);
|
||||
return twoByteChars();
|
||||
}
|
||||
|
||||
inline MOZ_MUST_USE bool append(const char16_t* begin, const char16_t* end);
|
||||
|
||||
MOZ_MUST_USE bool append(const char16_t* chars, size_t len) {
|
||||
|
Loading…
Reference in New Issue
Block a user