diff --git a/js/public/CharacterEncoding.h b/js/public/CharacterEncoding.h index 7861e9f63fc1..f247f7189848 100644 --- a/js/public/CharacterEncoding.h +++ b/js/public/CharacterEncoding.h @@ -82,6 +82,12 @@ class UTF8Chars : public mozilla::Range UTF8Chars(const char* aBytes, size_t aLength) : Base(reinterpret_cast(const_cast(aBytes)), aLength) {} + UTF8Chars(mozilla::Utf8Unit* aUnits, size_t aLength) + : UTF8Chars(reinterpret_cast(aUnits), aLength) + {} + UTF8Chars(const mozilla::Utf8Unit* aUnits, size_t aLength) + : UTF8Chars(reinterpret_cast(aUnits), aLength) + {} }; /* @@ -108,6 +114,10 @@ class UTF8CharsZ : public mozilla::RangedPtr MOZ_ASSERT(aBytes[aLength] == '\0'); } + UTF8CharsZ(mozilla::Utf8Unit* aUnits, size_t aLength) + : UTF8CharsZ(reinterpret_cast(aUnits), aLength) + {} + using Base::operator=; char* c_str() { return reinterpret_cast(get()); } diff --git a/js/src/util/StringBuffer.h b/js/src/util/StringBuffer.h index a9caed566fe9..410291cc5e62 100644 --- a/js/src/util/StringBuffer.h +++ b/js/src/util/StringBuffer.h @@ -9,6 +9,7 @@ #include "mozilla/DebugOnly.h" #include "mozilla/MaybeOneOf.h" +#include "mozilla/Utf8.h" #include "js/Vector.h" #include "vm/JSContext.h" @@ -159,6 +160,14 @@ class StringBuffer return append(chars, chars + len); } + /** + * Interpret the provided count of UTF-8 code units as UTF-8, and append + * the represented code points to this. If the code units contain invalid + * UTF-8, leave the internal buffer in a consistent but unspecified state, + * report an error, and return false. + */ + MOZ_MUST_USE bool append(const mozilla::Utf8Unit* units, size_t len); + MOZ_MUST_USE bool append(const JS::ConstCharPtr chars, size_t len) { return append(chars.get(), chars.get() + len); } diff --git a/js/src/vm/CharacterEncoding.cpp b/js/src/vm/CharacterEncoding.cpp index 2c2b5ba22e2a..cd55d0f4d416 100644 --- a/js/src/vm/CharacterEncoding.cpp +++ b/js/src/vm/CharacterEncoding.cpp @@ -8,13 +8,19 @@ #include "mozilla/Range.h" #include "mozilla/Sprintf.h" +#include "mozilla/TextUtils.h" +#include "mozilla/Utf8.h" #include #include +#include "util/StringBuffer.h" #include "util/Unicode.h" // unicode::REPLACEMENT_CHARACTER #include "vm/JSContext.h" +using mozilla::IsAscii; +using mozilla::Utf8Unit; + using namespace js; Latin1CharsZ @@ -607,3 +613,68 @@ JS::StringIsASCII(const char* s) } return true; } + +bool +StringBuffer::append(const Utf8Unit* units, size_t len) +{ + if (isLatin1()) { + Latin1CharBuffer& latin1 = latin1Chars(); + + while (len > 0) { + if (!IsAscii(*units)) { + break; + } + + if (!latin1.append(units->toUnsignedChar())) { + return false; + } + + ++units; + --len; + } + if (len == 0) { + return true; + } + + // Non-ASCII doesn't *necessarily* mean we couldn't keep appending to + // |latin1|, but it's only possible for [U+0080, U+0100) code points, + // and handling the full complexity of UTF-8 only for that very small + // additional range isn't worth it. Inflate to two-byte storage before + // appending the remaining code points. + if (!inflateChars()) { + return false; + } + } + + UTF8Chars remainingUtf8(units, len); + + // Determine how many UTF-16 code units are required to represent the + // remaining units. + size_t utf16Len = 0; + auto countInflated = [&utf16Len](char16_t c) -> LoopDisposition { + utf16Len++; + return LoopDisposition::Continue; + }; + if (!InflateUTF8ToUTF16(cx, remainingUtf8, countInflated)) { + return false; + } + + TwoByteCharBuffer& buf = twoByteChars(); + + size_t i = buf.length(); + if (!buf.growByUninitialized(utf16Len)) { + return false; + } + MOZ_ASSERT(i + utf16Len == buf.length(), + "growByUninitialized assumed to increase length immediately"); + + char16_t* toFill = &buf[i]; + auto appendUtf16 = [&toFill](char16_t unit) { + *toFill++ = unit; + return LoopDisposition::Continue; + }; + + MOZ_ALWAYS_TRUE(InflateUTF8ToUTF16(cx, remainingUtf8, appendUtf16)); + MOZ_ASSERT(toFill == buf.end()); + return true; +} diff --git a/js/src/vm/JSScript.cpp b/js/src/vm/JSScript.cpp index 0f26e8dbfc1c..db8fb8bce2eb 100644 --- a/js/src/vm/JSScript.cpp +++ b/js/src/vm/JSScript.cpp @@ -1832,17 +1832,27 @@ ScriptSource::appendSubstring(JSContext* cx, StringBuffer& buf, size_t start, si UncompressedSourceCache::AutoHoldEntry holder; if (hasSourceType()) { - MOZ_CRASH("for now"); - return false; - } else { - PinnedUnits units(cx, this, holder, start, len); - if (!units.asChars()) { + PinnedUnits pinned(cx, this, holder, start, len); + if (!pinned.get()) { return false; } if (len > SourceDeflateLimit && !buf.ensureTwoByteChars()) { return false; } - return buf.append(units.asChars(), len); + + const Utf8Unit* units = pinned.get(); + return buf.append(units, len); + } else { + PinnedUnits pinned(cx, this, holder, start, len); + if (!pinned.get()) { + return false; + } + if (len > SourceDeflateLimit && !buf.ensureTwoByteChars()) { + return false; + } + + const char16_t* units = pinned.get(); + return buf.append(units, len); } }