mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-21 09:15:35 +00:00
Bug 1498320 - Implement ScriptSource::appendSubstring for UTF-8 source text, using a newly-implemented StringBuffer::append(const Utf8Unit* units, size_t len). r=tcampbell
--HG-- extra : rebase_source : 8fbb71a4ca8c424c33af470fc0ff77760f33542e
This commit is contained in:
parent
804bbcfed2
commit
3452c23147
@ -82,6 +82,12 @@ class UTF8Chars : public mozilla::Range<unsigned char>
|
||||
UTF8Chars(const char* aBytes, size_t aLength)
|
||||
: Base(reinterpret_cast<unsigned char*>(const_cast<char*>(aBytes)), aLength)
|
||||
{}
|
||||
UTF8Chars(mozilla::Utf8Unit* aUnits, size_t aLength)
|
||||
: UTF8Chars(reinterpret_cast<char*>(aUnits), aLength)
|
||||
{}
|
||||
UTF8Chars(const mozilla::Utf8Unit* aUnits, size_t aLength)
|
||||
: UTF8Chars(reinterpret_cast<const char*>(aUnits), aLength)
|
||||
{}
|
||||
};
|
||||
|
||||
/*
|
||||
@ -108,6 +114,10 @@ class UTF8CharsZ : public mozilla::RangedPtr<unsigned char>
|
||||
MOZ_ASSERT(aBytes[aLength] == '\0');
|
||||
}
|
||||
|
||||
UTF8CharsZ(mozilla::Utf8Unit* aUnits, size_t aLength)
|
||||
: UTF8CharsZ(reinterpret_cast<char*>(aUnits), aLength)
|
||||
{}
|
||||
|
||||
using Base::operator=;
|
||||
|
||||
char* c_str() { return reinterpret_cast<char*>(get()); }
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "mozilla/DebugOnly.h"
|
||||
#include "mozilla/MaybeOneOf.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
#include "js/Vector.h"
|
||||
#include "vm/JSContext.h"
|
||||
@ -159,6 +160,14 @@ class StringBuffer
|
||||
return append(chars, chars + len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Interpret the provided count of UTF-8 code units as UTF-8, and append
|
||||
* the represented code points to this. If the code units contain invalid
|
||||
* UTF-8, leave the internal buffer in a consistent but unspecified state,
|
||||
* report an error, and return false.
|
||||
*/
|
||||
MOZ_MUST_USE bool append(const mozilla::Utf8Unit* units, size_t len);
|
||||
|
||||
MOZ_MUST_USE bool append(const JS::ConstCharPtr chars, size_t len) {
|
||||
return append(chars.get(), chars.get() + len);
|
||||
}
|
||||
|
@ -8,13 +8,19 @@
|
||||
|
||||
#include "mozilla/Range.h"
|
||||
#include "mozilla/Sprintf.h"
|
||||
#include "mozilla/TextUtils.h"
|
||||
#include "mozilla/Utf8.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
|
||||
#include "util/StringBuffer.h"
|
||||
#include "util/Unicode.h" // unicode::REPLACEMENT_CHARACTER
|
||||
#include "vm/JSContext.h"
|
||||
|
||||
using mozilla::IsAscii;
|
||||
using mozilla::Utf8Unit;
|
||||
|
||||
using namespace js;
|
||||
|
||||
Latin1CharsZ
|
||||
@ -607,3 +613,68 @@ JS::StringIsASCII(const char* s)
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
StringBuffer::append(const Utf8Unit* units, size_t len)
|
||||
{
|
||||
if (isLatin1()) {
|
||||
Latin1CharBuffer& latin1 = latin1Chars();
|
||||
|
||||
while (len > 0) {
|
||||
if (!IsAscii(*units)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!latin1.append(units->toUnsignedChar())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
++units;
|
||||
--len;
|
||||
}
|
||||
if (len == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Non-ASCII doesn't *necessarily* mean we couldn't keep appending to
|
||||
// |latin1|, but it's only possible for [U+0080, U+0100) code points,
|
||||
// and handling the full complexity of UTF-8 only for that very small
|
||||
// additional range isn't worth it. Inflate to two-byte storage before
|
||||
// appending the remaining code points.
|
||||
if (!inflateChars()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
UTF8Chars remainingUtf8(units, len);
|
||||
|
||||
// Determine how many UTF-16 code units are required to represent the
|
||||
// remaining units.
|
||||
size_t utf16Len = 0;
|
||||
auto countInflated = [&utf16Len](char16_t c) -> LoopDisposition {
|
||||
utf16Len++;
|
||||
return LoopDisposition::Continue;
|
||||
};
|
||||
if (!InflateUTF8ToUTF16<OnUTF8Error::Throw>(cx, remainingUtf8, countInflated)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
TwoByteCharBuffer& buf = twoByteChars();
|
||||
|
||||
size_t i = buf.length();
|
||||
if (!buf.growByUninitialized(utf16Len)) {
|
||||
return false;
|
||||
}
|
||||
MOZ_ASSERT(i + utf16Len == buf.length(),
|
||||
"growByUninitialized assumed to increase length immediately");
|
||||
|
||||
char16_t* toFill = &buf[i];
|
||||
auto appendUtf16 = [&toFill](char16_t unit) {
|
||||
*toFill++ = unit;
|
||||
return LoopDisposition::Continue;
|
||||
};
|
||||
|
||||
MOZ_ALWAYS_TRUE(InflateUTF8ToUTF16<OnUTF8Error::Throw>(cx, remainingUtf8, appendUtf16));
|
||||
MOZ_ASSERT(toFill == buf.end());
|
||||
return true;
|
||||
}
|
||||
|
@ -1832,17 +1832,27 @@ ScriptSource::appendSubstring(JSContext* cx, StringBuffer& buf, size_t start, si
|
||||
UncompressedSourceCache::AutoHoldEntry holder;
|
||||
|
||||
if (hasSourceType<Utf8Unit>()) {
|
||||
MOZ_CRASH("for now");
|
||||
return false;
|
||||
} else {
|
||||
PinnedUnits<char16_t> units(cx, this, holder, start, len);
|
||||
if (!units.asChars()) {
|
||||
PinnedUnits<Utf8Unit> pinned(cx, this, holder, start, len);
|
||||
if (!pinned.get()) {
|
||||
return false;
|
||||
}
|
||||
if (len > SourceDeflateLimit && !buf.ensureTwoByteChars()) {
|
||||
return false;
|
||||
}
|
||||
return buf.append(units.asChars(), len);
|
||||
|
||||
const Utf8Unit* units = pinned.get();
|
||||
return buf.append(units, len);
|
||||
} else {
|
||||
PinnedUnits<char16_t> pinned(cx, this, holder, start, len);
|
||||
if (!pinned.get()) {
|
||||
return false;
|
||||
}
|
||||
if (len > SourceDeflateLimit && !buf.ensureTwoByteChars()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const char16_t* units = pinned.get();
|
||||
return buf.append(units, len);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user