Bug 1881995 - Implement ForwardedAtoms and create them during atomization r=iain,sfink

Differential Revision: https://phabricator.services.mozilla.com/D202690
This commit is contained in:
Doug Thayer 2024-04-10 16:32:23 +00:00
parent a0ccb95355
commit 9ce4934a94
20 changed files with 429 additions and 72 deletions

View File

@ -34,9 +34,9 @@ struct String {
static constexpr uint32_t ATOM_BIT = js::Bit(3);
static constexpr uint32_t LINEAR_BIT = js::Bit(4);
static constexpr uint32_t INLINE_CHARS_BIT = js::Bit(6);
static constexpr uint32_t LATIN1_CHARS_BIT = js::Bit(9);
static constexpr uint32_t LATIN1_CHARS_BIT = js::Bit(10);
static constexpr uint32_t EXTERNAL_FLAGS = LINEAR_BIT | js::Bit(8);
static constexpr uint32_t TYPE_FLAGS_MASK = js::BitMask(9) - js::BitMask(3);
static constexpr uint32_t TYPE_FLAGS_MASK = js::BitMask(10) - js::BitMask(3);
static constexpr uint32_t PERMANENT_ATOM_MASK = ATOM_BIT | js::Bit(8);
uintptr_t flags_;

View File

@ -1138,7 +1138,7 @@ void js::gc::TenuringTracer::collectToStringFixedPoint() {
bool rootBaseNotYetForwarded = false;
JSLinearString* rootBase = nullptr;
if (str->isDependent()) {
if (str->isDependent() && !str->isAtomRef()) {
if (str->hasTwoByteChars()) {
relocateDependentStringChars<char16_t>(
&str->asDependent(), p->savedNurseryBaseOrRelocOverlay(), &offset,

View File

@ -111,7 +111,6 @@ void js::GCMarker::eagerlyMarkChildren(JSLinearString* linearStr) {
gc::AssertShouldMarkInZone(this, linearStr);
MOZ_ASSERT(linearStr->isMarkedAny());
MOZ_ASSERT(linearStr->JSString::isLinear());
// Use iterative marking to avoid blowing out the stack.
while (linearStr->hasBase()) {
linearStr = linearStr->base();

View File

@ -0,0 +1,52 @@
var dependedOnStrings = [];
var length = 50;
function getSubstr(src, i) {
return src.substring(i, i + 50);
}
function checkProp(o, prop) {
return o[prop];
}
var substrs = [];
var objs = [];
with({})
for (var i = 0; i < 1000; i++) {
var pieces = [];
for (var j = 0; j < 99; j++) {
pieces.push("a");
pieces.push(Math.floor(Math.random() * 10));
}
dependedOnStrings.push(pieces.join(""));
}
for (var i = 0; i < 1000; i++) {
// Create a bunch of substrings depending on strings in dependedOnStrings
substrs.push(getSubstr(dependedOnStrings[i], (i * 2) % 50));
objs.push({});
}
for (var i = 0; i < 1000; i++) {
// Use the depended on strings as keys to get them replaced with
// JSAtomRefStrings
checkProp(objs[i], dependedOnStrings[i]);
}
// Use a bunch of memory to try to ensure that we overwrite the buffers
// that could have erroneously been freed
for (var i = 0; i < 1000; i++) {
var pieces = [];
for (var j = 0; j < 99; j++) {
pieces.push("b");
pieces.push(Math.floor(Math.random() * 10));
}
dependedOnStrings.push(pieces.join(""));
}
// Ensure the buffers were not in fact freed
for (var i = 0; i < 1000; i++) {
assertEq(substrs[i].startsWith("a"), true);
}

View File

@ -0,0 +1,56 @@
var dependedOnStrings = [];
var length = 50;
var reg = /q[a0-9]{50}/;
function getSubstr(src, i) {
return reg.exec(src)[0];
}
function checkProp(o, prop) {
return o[prop];
}
var substrs = [];
var objs = [];
with({})
for (var i = 0; i < 1000; i++) {
var pieces = [];
for (var j = 0; j < 99; j++) {
if (j == (i * 2) % 50) {
pieces.push("q");
}
pieces.push("a");
pieces.push(Math.floor(Math.random() * 10));
}
dependedOnStrings.push(pieces.join(""));
}
for (var i = 0; i < 1000; i++) {
// Create a bunch of substrings depending on strings in dependedOnStrings
substrs.push(getSubstr(dependedOnStrings[i], (i * 2) % 50));
objs.push({});
}
for (var i = 0; i < 1000; i++) {
// Use the depended on strings as keys to get them replaced with
// JSAtomRefStrings
checkProp(objs[i], dependedOnStrings[i]);
}
// Use a bunch of memory to try to ensure that we overwrite the buffers
// that could have erroneously been freed
for (var i = 0; i < 1000; i++) {
var pieces = [];
for (var j = 0; j < 99; j++) {
pieces.push("b");
pieces.push(Math.floor(Math.random() * 10));
}
dependedOnStrings.push(pieces.join(""));
}
// Ensure the buffers were not in fact freed
for (var i = 0; i < 1000; i++) {
assertEq(substrs[i].startsWith("qa"), true);
}

View File

@ -427,7 +427,7 @@ bool BaselineCacheIRCompiler::emitGuardSpecificAtom(StringOperandId strId,
Address atomAddr(stubAddress(expectedOffset));
Label done;
Label done, notCachedAtom;
masm.branchPtr(Assembler::Equal, atomAddr, str, &done);
// The pointers are not equal, so if the input string is also an atom it
@ -435,6 +435,11 @@ bool BaselineCacheIRCompiler::emitGuardSpecificAtom(StringOperandId strId,
masm.branchTest32(Assembler::NonZero, Address(str, JSString::offsetOfFlags()),
Imm32(JSString::ATOM_BIT), failure->label());
masm.tryFastAtomize(str, scratch, scratch, &notCachedAtom);
masm.branchPtr(Assembler::Equal, atomAddr, scratch, &done);
masm.jump(failure->label());
masm.bind(&notCachedAtom);
// Check the length.
masm.loadPtr(atomAddr, scratch);
masm.loadStringLength(scratch, scratch);
@ -1464,9 +1469,13 @@ bool BaselineCacheIRCompiler::emitHasClassResult(ObjOperandId objId,
void BaselineCacheIRCompiler::emitAtomizeString(Register str, Register temp,
Label* failure) {
Label isAtom;
Label isAtom, notCachedAtom;
masm.branchTest32(Assembler::NonZero, Address(str, JSString::offsetOfFlags()),
Imm32(JSString::ATOM_BIT), &isAtom);
masm.tryFastAtomize(str, temp, str, &notCachedAtom);
masm.jump(&isAtom);
masm.bind(&notCachedAtom);
{
LiveRegisterSet save(GeneralRegisterSet::Volatile(),
liveVolatileFloatRegs());

View File

@ -1055,6 +1055,15 @@ void CacheIRStubInfo::replaceStubRawWord(uint8_t* stubData, uint32_t offset,
*addr = newWord;
}
void CacheIRStubInfo::replaceStubRawValueBits(uint8_t* stubData,
uint32_t offset, uint64_t oldBits,
uint64_t newBits) const {
MOZ_ASSERT(uint64_t(stubData + offset) % sizeof(uint64_t) == 0);
uint64_t* addr = reinterpret_cast<uint64_t*>(stubData + offset);
MOZ_ASSERT(*addr == oldBits);
*addr = newBits;
}
template <class Stub, StubField::Type type>
typename MapStubFieldToType<type>::WrappedType& CacheIRStubInfo::getStubField(
Stub* stub, uint32_t offset) const {
@ -2838,7 +2847,7 @@ bool CacheIRCompiler::emitStringToAtom(StringOperandId stringId) {
masm.branchTest32(Assembler::NonZero, Address(str, JSString::offsetOfFlags()),
Imm32(JSString::ATOM_BIT), &done);
masm.lookupStringInAtomCacheLastLookups(str, scratch, str, &vmCall);
masm.tryFastAtomize(str, scratch, str, &vmCall);
masm.jump(&done);
masm.bind(&vmCall);

View File

@ -1420,6 +1420,9 @@ class CacheIRStubInfo {
void replaceStubRawWord(uint8_t* stubData, uint32_t offset, uintptr_t oldWord,
uintptr_t newWord) const;
void replaceStubRawValueBits(uint8_t* stubData, uint32_t offset,
uint64_t oldBits, uint64_t newBits) const;
};
template <typename T>

View File

@ -2328,13 +2328,18 @@ void CreateDependentString::generate(MacroAssembler& masm,
masm.addToCharPtr(temp1_, temp2_, encoding_);
masm.storeNonInlineStringChars(temp1_, string_);
masm.storeDependentStringBase(base, string_);
masm.movePtr(base, temp1_);
// Ensure that the depended-on string is flagged as such, so we don't
// convert it into a forwarded atom
masm.load32(Address(base, JSString::offsetOfFlags()), temp2_);
masm.or32(Imm32(JSString::DEPENDED_ON_BIT), temp2_);
masm.store32(temp2_, Address(base, JSString::offsetOfFlags()));
// Follow any base pointer if the input is itself a dependent string.
// Watch for undepended strings, which have a base pointer but don't
// actually share their characters with it.
Label noBase;
masm.load32(Address(base, JSString::offsetOfFlags()), temp2_);
masm.movePtr(base, temp1_);
masm.and32(Imm32(JSString::TYPE_FLAGS_MASK), temp2_);
masm.branchTest32(Assembler::Zero, temp2_, Imm32(JSString::DEPENDENT_BIT),
&noBase);
@ -11319,6 +11324,7 @@ void CodeGenerator::visitCompareSInline(LCompareSInline* lir) {
masm.bind(&notPointerEqual);
Label setNotEqualResult;
if (str->isAtom()) {
// Atoms cannot be equal to each other if they point to different strings.
Imm32 atomBit(JSString::ATOM_BIT);
@ -11336,8 +11342,27 @@ void CodeGenerator::visitCompareSInline(LCompareSInline* lir) {
}
// Strings of different length can never be equal.
masm.branch32(Assembler::Equal, Address(input, JSString::offsetOfLength()),
Imm32(str->length()), &compareChars);
masm.branch32(Assembler::NotEqual,
Address(input, JSString::offsetOfLength()),
Imm32(str->length()), &setNotEqualResult);
if (str->isAtom()) {
Label forwardedPtrEqual;
masm.tryFastAtomize(input, output, output, &compareChars);
// We now have two atoms. Just check pointer equality.
masm.branchPtr(Assembler::Equal, output, ImmGCPtr(str),
&forwardedPtrEqual);
masm.move32(Imm32(op == JSOp::Ne || op == JSOp::StrictNe), output);
masm.jump(ool->rejoin());
masm.bind(&forwardedPtrEqual);
masm.move32(Imm32(op == JSOp::Eq || op == JSOp::StrictEq), output);
masm.jump(ool->rejoin());
} else {
masm.jump(&compareChars);
}
masm.bind(&setNotEqualResult);
masm.move32(Imm32(op == JSOp::Ne || op == JSOp::StrictNe), output);
@ -12506,6 +12531,9 @@ void CodeGenerator::visitSubstr(LSubstr* lir) {
masm.storeDependentStringBase(string, output);
auto initializeDependentString = [&](CharEncoding encoding) {
masm.or32(Imm32(JSString::DEPENDED_ON_BIT),
Address(string, JSString::offsetOfFlags()));
uint32_t flags = JSString::INIT_DEPENDENT_FLAGS;
if (encoding == CharEncoding::Latin1) {
flags |= JSString::LATIN1_CHARS_BIT;
@ -16317,9 +16345,26 @@ void CodeGenerator::emitMaybeAtomizeSlot(LInstruction* ins, Register stringReg,
OutOfLineAtomizeSlot* ool =
new (alloc()) OutOfLineAtomizeSlot(ins, stringReg, slotAddr, dest);
addOutOfLineCode(ool, ins->mirRaw()->toInstruction());
masm.branchTest32(Assembler::NonZero,
Address(stringReg, JSString::offsetOfFlags()),
Imm32(JSString::ATOM_BIT), ool->rejoin());
masm.branchTest32(Assembler::Zero,
Address(stringReg, JSString::offsetOfFlags()),
Imm32(JSString::ATOM_BIT), ool->entry());
Imm32(JSString::ATOM_REF_BIT), ool->entry());
masm.loadPtr(Address(stringReg, JSAtomRefString::offsetOfAtom()), stringReg);
if (dest.hasValue()) {
masm.moveValue(
TypedOrValueRegister(MIRType::String, AnyRegister(stringReg)),
dest.valueReg());
} else {
MOZ_ASSERT(dest.typedReg().gpr() == stringReg);
}
emitPreBarrier(slotAddr);
masm.storeTypedOrValue(dest, slotAddr);
masm.bind(ool->rejoin());
}
@ -20474,7 +20519,7 @@ void CodeGenerator::visitToHashableString(LToHashableString* ins) {
Address(input, JSString::offsetOfFlags()),
Imm32(JSString::ATOM_BIT), &isAtom);
masm.lookupStringInAtomCacheLastLookups(input, output, output, ool->entry());
masm.tryFastAtomize(input, output, output, ool->entry());
masm.jump(ool->rejoin());
masm.bind(&isAtom);
masm.movePtr(input, output);

View File

@ -1122,11 +1122,16 @@ MConstant::MConstant(TempAllocator& alloc, const js::Value& vp)
case MIRType::Double:
payload_.d = vp.toDouble();
break;
case MIRType::String:
MOZ_ASSERT(!IsInsideNursery(vp.toString()));
MOZ_ASSERT(vp.toString()->isLinear());
case MIRType::String: {
JSString* str = vp.toString();
if (str->isAtomRef()) {
str = str->atom();
}
MOZ_ASSERT(!IsInsideNursery(str));
MOZ_ASSERT(str->isAtom());
payload_.str = vp.toString();
break;
}
case MIRType::Symbol:
payload_.sym = vp.toSymbol();
break;

View File

@ -1310,7 +1310,7 @@ void MacroAssembler::loadStringChars(Register str, Register dest,
MOZ_ASSERT(encoding == CharEncoding::TwoByte);
static constexpr uint32_t Mask =
JSString::LINEAR_BIT | JSString::LATIN1_CHARS_BIT;
static_assert(Mask < 1024,
static_assert(Mask < 2048,
"Mask should be a small, near-null value to ensure we "
"block speculative execution when it's used as string "
"pointer");
@ -1344,7 +1344,7 @@ void MacroAssembler::loadNonInlineStringChars(Register str, Register dest,
static constexpr uint32_t Mask = JSString::LINEAR_BIT |
JSString::INLINE_CHARS_BIT |
JSString::LATIN1_CHARS_BIT;
static_assert(Mask < 1024,
static_assert(Mask < 2048,
"Mask should be a small, near-null value to ensure we "
"block speculative execution when it's used as string "
"pointer");
@ -2659,11 +2659,15 @@ void MacroAssembler::loadMegamorphicSetPropCache(Register dest) {
movePtr(ImmPtr(runtime()->addressOfMegamorphicSetPropCache()), dest);
}
void MacroAssembler::lookupStringInAtomCacheLastLookups(Register str,
Register scratch,
Register output,
Label* fail) {
Label found;
void MacroAssembler::tryFastAtomize(Register str, Register scratch,
Register output, Label* fail) {
Label found, done, notAtomRef;
branchTest32(Assembler::Zero, Address(str, JSString::offsetOfFlags()),
Imm32(JSString::ATOM_REF_BIT), &notAtomRef);
loadPtr(Address(str, JSAtomRefString::offsetOfAtom()), output);
jump(&done);
bind(&notAtomRef);
uintptr_t cachePtr = uintptr_t(runtime()->addressOfStringToAtomCache());
void* offset = (void*)(cachePtr + StringToAtomCache::offsetOfLastLookups());
@ -2682,6 +2686,7 @@ void MacroAssembler::lookupStringInAtomCacheLastLookups(Register str,
bind(&found);
size_t atomOffset = StringToAtomCache::LastLookup::offsetOfAtom();
loadPtr(Address(scratch, atomOffset), output);
bind(&done);
}
void MacroAssembler::loadAtomHash(Register id, Register outHash, Label* done) {
@ -2741,7 +2746,7 @@ void MacroAssembler::loadAtomOrSymbolAndHash(ValueOperand value, Register outId,
loadAtomHash(outId, outHash, &done);
bind(&nonAtom);
lookupStringInAtomCacheLastLookups(outId, outHash, outId, cacheMiss);
tryFastAtomize(outId, outHash, outId, cacheMiss);
jump(&atom);
bind(&done);
@ -3382,7 +3387,7 @@ void MacroAssembler::guardSpecificAtom(Register str, JSAtom* atom,
Register scratch,
const LiveRegisterSet& volatileRegs,
Label* fail) {
Label done;
Label done, notCachedAtom;
branchPtr(Assembler::Equal, str, ImmGCPtr(atom), &done);
// The pointers are not equal, so if the input string is also an atom it
@ -3390,6 +3395,12 @@ void MacroAssembler::guardSpecificAtom(Register str, JSAtom* atom,
branchTest32(Assembler::NonZero, Address(str, JSString::offsetOfFlags()),
Imm32(JSString::ATOM_BIT), fail);
// Try to do a cheap atomize on the string and repeat the above test
tryFastAtomize(str, scratch, scratch, &notCachedAtom);
branchPtr(Assembler::Equal, scratch, ImmGCPtr(atom), &done);
jump(fail);
bind(&notCachedAtom);
// Check the length.
branch32(Assembler::NotEqual, Address(str, JSString::offsetOfLength()),
Imm32(atom->length()), fail);

View File

@ -5602,8 +5602,8 @@ class MacroAssembler : public MacroAssemblerSpecific {
void setIsDefinitelyTypedArrayConstructor(Register obj, Register output);
void loadMegamorphicCache(Register dest);
void lookupStringInAtomCacheLastLookups(Register str, Register scratch,
Register output, Label* fail);
void tryFastAtomize(Register str, Register scratch, Register output,
Label* fail);
void loadMegamorphicSetPropCache(Register dest);
void loadAtomOrSymbolAndHash(ValueOperand value, Register outId,

View File

@ -510,6 +510,11 @@ AbortReasonOr<WarpScriptSnapshot*> WarpScriptOracle::createScriptSnapshot() {
break;
}
case JSOp::String:
if (!loc.atomizeString(cx_, script_)) {
return abort(AbortReason::Alloc);
}
break;
case JSOp::GetName:
case JSOp::GetGName:
case JSOp::GetProp:
@ -613,7 +618,6 @@ AbortReasonOr<WarpScriptSnapshot*> WarpScriptOracle::createScriptSnapshot() {
case JSOp::Int32:
case JSOp::Double:
case JSOp::BigInt:
case JSOp::String:
case JSOp::Symbol:
case JSOp::Pop:
case JSOp::PopN:
@ -1209,6 +1213,10 @@ bool WarpScriptOracle::replaceNurseryAndAllocSitePointers(
// If the stub data contains weak pointers then trigger a read barrier. This
// is necessary as these will now be strong references in the snapshot.
//
// If the stub data contains strings then atomize them. This ensures we don't
// try to access potentially unstable characters from a background thread and
// also facilitates certain optimizations.
//
// Also asserts non-object fields don't contain nursery pointers.
uint32_t field = 0;
@ -1270,11 +1278,17 @@ bool WarpScriptOracle::replaceNurseryAndAllocSitePointers(
break;
}
case StubField::Type::String: {
#ifdef DEBUG
JSString* str =
stubInfo->getStubField<StubField::Type::String>(stub, offset);
uintptr_t oldWord = stubInfo->getStubRawWord(stub, offset);
JSString* str = reinterpret_cast<JSString*>(oldWord);
MOZ_ASSERT(!IsInsideNursery(str));
#endif
JSAtom* atom = AtomizeString(cx_, str);
if (!atom) {
return false;
}
if (atom != str) {
uintptr_t newWord = reinterpret_cast<uintptr_t>(atom);
stubInfo->replaceStubRawWord(stubDataCopy, offset, oldWord, newWord);
}
break;
}
case StubField::Type::Id: {
@ -1287,10 +1301,19 @@ bool WarpScriptOracle::replaceNurseryAndAllocSitePointers(
break;
}
case StubField::Type::Value: {
#ifdef DEBUG
Value v = stubInfo->getStubField<StubField::Type::Value>(stub, offset);
Value v =
stubInfo->getStubField<StubField::Type::Value>(stub, offset).get();
MOZ_ASSERT_IF(v.isGCThing(), !IsInsideNursery(v.toGCThing()));
#endif
if (v.isString()) {
Value newVal;
JSAtom* atom = AtomizeString(cx_, v.toString());
if (!atom) {
return false;
}
newVal.setString(atom);
stubInfo->replaceStubRawValueBits(stubDataCopy, offset, v.asRawBits(),
newVal.asRawBits());
}
break;
}
case StubField::Type::AllocSite: {

View File

@ -32,6 +32,11 @@ inline JSString* BytecodeLocation::getString(const JSScript* script) const {
return script->getString(this->rawBytecode_);
}
inline bool BytecodeLocation::atomizeString(JSContext* cx, JSScript* script) {
MOZ_ASSERT(this->isValid());
return script->atomizeString(cx, this->rawBytecode_);
}
inline PropertyName* BytecodeLocation::getPropertyName(
const JSScript* script) const {
MOZ_ASSERT(this->isValid());

View File

@ -103,6 +103,7 @@ class BytecodeLocation {
inline JSAtom* getAtom(const JSScript* script) const;
inline JSString* getString(const JSScript* script) const;
inline bool atomizeString(JSContext* cx, JSScript* script);
inline PropertyName* getPropertyName(const JSScript* script) const;
inline JS::BigInt* getBigInt(const JSScript* script) const;
inline JSObject* getObject(const JSScript* script) const;

View File

@ -666,6 +666,10 @@ JSAtom* js::AtomizeString(JSContext* cx, JSString* str) {
return &str->asAtom();
}
if (str->isAtomRef()) {
return str->atom();
}
if (JSAtom* atom = cx->caches().stringToAtomCache.lookup(str)) {
return atom;
}
@ -691,6 +695,7 @@ JSAtom* js::AtomizeString(JSContext* cx, JSString* str) {
// not done in lookup() itself, because #including JSContext.h there
// causes some non-trivial #include ordering issues.
cx->markAtom(atom);
str->tryReplaceWithAtomRef(atom);
return atom;
}
}
@ -723,7 +728,9 @@ JSAtom* js::AtomizeString(JSContext* cx, JSString* str) {
return nullptr;
}
cx->caches().stringToAtomCache.maybePut(str, atom, key);
if (!str->tryReplaceWithAtomRef(atom)) {
cx->caches().stringToAtomCache.maybePut(str, atom, key);
}
return atom;
}

View File

@ -2040,6 +2040,23 @@ class JSScript : public js::BaseScript {
return getString(GET_GCTHING_INDEX(pc));
}
bool atomizeString(JSContext* cx, jsbytecode* pc) {
MOZ_ASSERT(containsPC<js::GCThingIndex>(pc));
MOZ_ASSERT(js::JOF_OPTYPE((JSOp)*pc) == JOF_STRING);
js::GCThingIndex index = GET_GCTHING_INDEX(pc);
JSString* str = getString(index);
if (str->isAtom()) {
return true;
}
JSAtom* atom = js::AtomizeString(cx, str);
if (!atom) {
return false;
}
js::gc::CellPtrPreWriteBarrier(data_->gcthings()[index]);
data_->gcthings()[index] = JS::GCCellPtr(atom);
return true;
}
JSAtom* getAtom(js::GCThingIndex index) const {
return &gcthings()[index].as<JSString>().asAtom();
}

View File

@ -376,6 +376,7 @@ inline JSDependentString::JSDependentString(JSLinearString* base, size_t start,
setLengthAndFlags(length, INIT_DEPENDENT_FLAGS);
d.s.u2.nonInlineCharsTwoByte = base->twoByteChars(nogc) + start;
}
base->setDependedOn();
d.s.u3.base = base;
if (isTenured() && !base->isTenured()) {
base->storeBuffer()->putWholeCell(this);

View File

@ -359,7 +359,7 @@ const char* RepresentationToString(const JSString* s) {
template <typename KnownF, typename UnknownF>
void ForEachStringFlag(const JSString* str, uint32_t flags, KnownF known,
UnknownF unknown) {
for (uint32_t i = js::Bit(3); i < js::Bit(16); i = i << 1) {
for (uint32_t i = js::Bit(3); i < js::Bit(17); i = i << 1) {
if (!(flags & i)) {
continue;
}
@ -406,7 +406,11 @@ void ForEachStringFlag(const JSString* str, uint32_t flags, KnownF known,
known("LATIN1_CHARS_BIT");
break;
case JSString::ATOM_IS_INDEX_BIT:
known("ATOM_IS_INDEX_BIT");
if (str->isAtom()) {
known("ATOM_IS_INDEX_BIT");
} else {
known("ATOM_REF_BIT");
}
break;
case JSString::INDEX_VALUE_BIT:
known("INDEX_VALUE_BIT");
@ -418,7 +422,7 @@ void ForEachStringFlag(const JSString* str, uint32_t flags, KnownF known,
if (str->isRope()) {
known("FLATTEN_VISIT_RIGHT");
} else {
known("NON_DEDUP_BIT");
known("DEPENDED_ON_BIT");
}
break;
case JSString::FLATTEN_FINISH_NODE:
@ -429,7 +433,7 @@ void ForEachStringFlag(const JSString* str, uint32_t flags, KnownF known,
} else if (str->isAtom()) {
known("PINNED_ATOM_BIT");
} else {
unknown(i);
known("NON_DEDUP_BIT");
}
break;
default:
@ -936,6 +940,7 @@ JSLinearString* JSRope::flattenInternal(JSRope* root) {
const size_t wholeLength = root->length();
size_t wholeCapacity;
CharT* wholeChars;
bool setRootDependedOn = false;
AutoCheckCannotGC nogc;
@ -1041,6 +1046,7 @@ finish_node: {
StringFlagsForCharType<CharT>(INIT_DEPENDENT_FLAGS));
str->d.s.u3.base =
reinterpret_cast<JSLinearString*>(root); /* will be true on exit */
setRootDependedOn = true;
// Every interior (rope) node in the rope's tree will be visited during
// the traversal and post-barriered here, so earlier additions of
@ -1093,6 +1099,11 @@ finish_root:
root->storeBuffer()->putWholeCell(&left);
root->setNonDeduplicatable();
}
setRootDependedOn = true;
}
if (setRootDependedOn) {
root->setDependedOn();
}
return &root->asLinear();
@ -2501,6 +2512,45 @@ bool JSString::fillWithRepresentatives(JSContext* cx,
return true;
}
bool JSString::tryReplaceWithAtomRef(JSAtom* atom) {
MOZ_ASSERT(!isAtomRef());
if (isDependedOn() || isInline() || isExternal()) {
return false;
}
AutoCheckCannotGC nogc;
if (hasOutOfLineChars()) {
void* buffer = asLinear().nonInlineCharsRaw();
// This is a little cheeky and so deserves a comment. If the string is
// not tenured, then either its buffer lives purely in the nursery, in
// which case it will just be forgotten and blown away in the next
// minor GC, or it is tracked in the nursery's mallocedBuffers hashtable,
// in which case it will be freed for us in the next minor GC. We opt
// to let the GC take care of it since there's a chance it will run
// during idle time.
if (isTenured()) {
RemoveCellMemory(this, allocSize(), MemoryUse::StringContents);
js_free(buffer);
}
}
uint32_t flags = INIT_ATOM_REF_FLAGS;
d.s.u3.atom = atom;
if (atom->hasLatin1Chars()) {
flags |= LATIN1_CHARS_BIT;
setLengthAndFlags(length(), flags);
setNonInlineChars(atom->chars<Latin1Char>(nogc));
} else {
setLengthAndFlags(length(), flags);
setNonInlineChars(atom->chars<char16_t>(nogc));
}
// Redundant, but just a reminder that this needs to be true or else we need
// to check and conditionally put ourselves in the store buffer
MOZ_ASSERT(atom->isTenured());
return true;
}
/*** Conversions ************************************************************/
UniqueChars js::EncodeLatin1(JSContext* cx, JSString* str) {

View File

@ -147,6 +147,8 @@ bool CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp);
* JSLinearString latin1Chars, twoByteChars / -
* |
* +-- JSDependentString base / -
* | |
* | +-- JSAtomRefString - / base points to an atom
* |
* +-- JSExternalString - / char array memory managed by embedding
* |
@ -275,6 +277,7 @@ class JSString : public js::gc::CellWithLengthAndFlags {
} u2;
union {
JSLinearString* base; /* JSDependentString */
JSAtom* atom; /* JSAtomRefString */
JSString* right; /* JSRope */
size_t capacity; /* JSLinearString (extensible) */
const JSExternalStringCallbacks*
@ -317,28 +320,30 @@ class JSString : public js::gc::CellWithLengthAndFlags {
* String Instance Subtype
* type encoding predicate
* -----------------------------------------
* Rope 000000 000 xxxx0x xxx
* Linear 000010 000 xxxx1x xxx
* Dependent 000110 000 xxx1xx xxx
* External 100010 000 100010 xxx
* Extensible 010010 000 010010 xxx
* Inline 001010 000 xx1xxx xxx
* FatInline 011010 000 x11xxx xxx
* JSAtom - xxxxx1 xxx
* NormalAtom 000011 000 xx0xx1 xxx
* PermanentAtom 100011 000 1xxxx1 xxx
* ThinInlineAtom 001011 000 x01xx1 xxx
* FatInlineAtom 011011 000 x11xx1 xxx
* |||||| |||
* |||||| ||\- [0] reserved (FORWARD_BIT)
* |||||| |\-- [1] reserved
* |||||| \--- [2] reserved
* |||||\----- [3] IsAtom
* ||||\------ [4] IsLinear
* |||\------- [5] IsDependent
* ||\-------- [6] IsInline
* |\--------- [7] FatInlineAtom/Extensible
* \---------- [8] External/Permanent
* Rope 0000000 000 xxxxx0x xxx
* Linear 0000010 000 xxxxx1x xxx
* Dependent 0000110 000 xxxx1xx xxx
* AtomRef 1000110 000 1xxxxxx xxx
* External 0100010 000 x100010 xxx
* Extensible 0010010 000 x010010 xxx
* Inline 0001010 000 xxx1xxx xxx
* FatInline 0011010 000 xx11xxx xxx
* JSAtom - xxxxxx1 xxx
* NormalAtom 0000011 000 xxx0xx1 xxx
* PermanentAtom 0100011 000 x1xxxx1 xxx
* ThinInlineAtom 0001011 000 xx01xx1 xxx
* FatInlineAtom 0011011 000 xx11xx1 xxx
* ||||||| |||
* ||||||| ||\- [0] reserved (FORWARD_BIT)
* ||||||| |\-- [1] reserved
* ||||||| \--- [2] reserved
* ||||||\----- [3] IsAtom
* |||||\------ [4] IsLinear
* ||||\------- [5] IsDependent
* |||\-------- [6] IsInline
* ||\--------- [7] FatInlineAtom/Extensible
* |\---------- [8] External/Permanent
* \----------- [9] AtomRef
*
* Bits 0..2 are reserved for use by the GC (see
* gc::CellFlagBitsReservedForGC). In particular, bit 0 is currently used for
@ -364,6 +369,8 @@ class JSString : public js::gc::CellWithLengthAndFlags {
static const uint32_t LINEAR_BIT = js::Bit(4);
static const uint32_t DEPENDENT_BIT = js::Bit(5);
static const uint32_t INLINE_CHARS_BIT = js::Bit(6);
// Indicates a dependent string pointing to an atom
static const uint32_t ATOM_REF_BIT = js::Bit(9);
static const uint32_t LINEAR_IS_EXTENSIBLE_BIT = js::Bit(7);
static const uint32_t INLINE_IS_FAT_BIT = js::Bit(7);
@ -383,11 +390,19 @@ class JSString : public js::gc::CellWithLengthAndFlags {
static const uint32_t INIT_ROPE_FLAGS = 0;
static const uint32_t INIT_LINEAR_FLAGS = LINEAR_BIT;
static const uint32_t INIT_DEPENDENT_FLAGS = LINEAR_BIT | DEPENDENT_BIT;
static const uint32_t INIT_ATOM_REF_FLAGS =
INIT_DEPENDENT_FLAGS | ATOM_REF_BIT;
static const uint32_t TYPE_FLAGS_MASK = js::BitMask(9) - js::BitMask(3);
static const uint32_t TYPE_FLAGS_MASK = js::BitMask(10) - js::BitMask(3);
static_assert((TYPE_FLAGS_MASK & js::gc::HeaderWord::RESERVED_MASK) == 0,
"GC reserved bits must not be used for Strings");
// Whether this atom's characters store an uint32 index value less than or
// equal to MAX_ARRAY_INDEX. This bit means something different if the
// string is not an atom (see ATOM_REF_BIT)
// See JSLinearString::isIndex.
static const uint32_t ATOM_IS_INDEX_BIT = js::Bit(9);
// Linear strings:
// - Content and representation are Latin-1 characters.
// - Unmodifiable after construction.
@ -397,12 +412,7 @@ class JSString : public js::gc::CellWithLengthAndFlags {
// - Flag may be cleared when the rope is changed into a dependent string.
//
// Also see LATIN1_CHARS_BIT description under "Flag Encoding".
static const uint32_t LATIN1_CHARS_BIT = js::Bit(9);
// Whether this atom's characters store an uint32 index value less than or
// equal to MAX_ARRAY_INDEX. Not used for non-atomized strings.
// See JSLinearString::isIndex.
static const uint32_t ATOM_IS_INDEX_BIT = js::Bit(10);
static const uint32_t LATIN1_CHARS_BIT = js::Bit(10);
static const uint32_t INDEX_VALUE_BIT = js::Bit(11);
static const uint32_t INDEX_VALUE_SHIFT = 16;
@ -424,6 +434,11 @@ class JSString : public js::gc::CellWithLengthAndFlags {
static const uint32_t FLATTEN_MASK =
FLATTEN_VISIT_RIGHT | FLATTEN_FINISH_NODE;
// Indicates that this string is depended on by another string. A rope should
// never be depended on, and this should never be set during flattening, so
// we can reuse the FLATTEN_VISIT_RIGHT bit.
static const uint32_t DEPENDED_ON_BIT = FLATTEN_VISIT_RIGHT;
static const uint32_t PINNED_ATOM_BIT = js::Bit(15);
static const uint32_t PERMANENT_ATOM_MASK =
ATOM_BIT | PINNED_ATOM_BIT | ATOM_IS_PERMANENT_BIT;
@ -543,6 +558,20 @@ class JSString : public js::gc::CellWithLengthAndFlags {
return flags() >> INDEX_VALUE_SHIFT;
}
/* Whether any dependent strings point to this string's chars. This is needed
so that we don't replace the string with a forwarded atom and free its
buffer */
bool isDependedOn() const {
bool result = flags() & DEPENDED_ON_BIT;
MOZ_ASSERT_IF(result, !isRope());
return result;
}
void setDependedOn() {
MOZ_ASSERT(!isRope());
setFlagBit(DEPENDED_ON_BIT);
}
inline size_t allocSize() const;
/* Fallible conversions to more-derived string types. */
@ -572,6 +601,11 @@ class JSString : public js::gc::CellWithLengthAndFlags {
MOZ_ALWAYS_INLINE
bool isDependent() const { return flags() & DEPENDENT_BIT; }
MOZ_ALWAYS_INLINE
bool isAtomRef() const {
return (flags() & ATOM_REF_BIT) && !(flags() & ATOM_BIT);
}
MOZ_ALWAYS_INLINE
JSDependentString& asDependent() const {
MOZ_ASSERT(isDependent());
@ -668,6 +702,8 @@ class JSString : public js::gc::CellWithLengthAndFlags {
inline JSLinearString* base() const;
inline JSAtom* atom() const;
// The base may be forwarded and becomes a relocation overlay.
// The return value can be a relocation overlay when the base is forwarded,
// or the return value can be the actual base when it is not forwarded.
@ -678,6 +714,8 @@ class JSString : public js::gc::CellWithLengthAndFlags {
// Only called by the GC during nursery collection.
inline void setBase(JSLinearString* newBase);
bool tryReplaceWithAtomRef(JSAtom* atom);
void traceBase(JSTracer* trc);
/* Only called by the GC for strings with the AllocKind::STRING kind. */
@ -1182,6 +1220,20 @@ class JSDependentString : public JSLinearString {
static_assert(sizeof(JSDependentString) == sizeof(JSString),
"string subclasses must be binary-compatible with JSString");
class JSAtomRefString : public JSDependentString {
friend class JSString;
friend class js::gc::CellAllocator;
friend class js::jit::MacroAssembler;
public:
inline static size_t offsetOfAtom() {
return offsetof(JSAtomRefString, d.s.u3.atom);
}
};
static_assert(sizeof(JSAtomRefString) == sizeof(JSString),
"string subclasses must be binary-compatible with JSString");
class JSExtensibleString : public JSLinearString {
/* Vacuous and therefore unimplemented. */
bool isExtensible() const = delete;
@ -2030,10 +2082,18 @@ MOZ_ALWAYS_INLINE JSLinearString* JSString::ensureLinear(JSContext* cx) {
inline JSLinearString* JSString::base() const {
MOZ_ASSERT(hasBase());
MOZ_ASSERT(!d.s.u3.base->isInline());
MOZ_ASSERT_IF(!isAtomRef(), !d.s.u3.base->isInline());
if (isAtomRef()) {
return static_cast<JSLinearString*>(d.s.u3.atom);
}
return d.s.u3.base;
}
inline JSAtom* JSString::atom() const {
MOZ_ASSERT(isAtomRef());
return d.s.u3.atom;
}
inline JSLinearString* JSString::nurseryBaseOrRelocOverlay() const {
MOZ_ASSERT(hasBase());
return d.s.u3.base;
@ -2184,7 +2244,9 @@ MOZ_ALWAYS_INLINE bool JSAtom::lengthFitsInline<char16_t>(size_t length) {
template <>
MOZ_ALWAYS_INLINE void JSString::setNonInlineChars(const char16_t* chars) {
// Check that the new buffer is located in the StringBufferArena
checkStringCharsArena(chars);
if (!(isAtomRef() && atom()->isInline())) {
checkStringCharsArena(chars);
}
d.s.u2.nonInlineCharsTwoByte = chars;
}
@ -2192,7 +2254,9 @@ template <>
MOZ_ALWAYS_INLINE void JSString::setNonInlineChars(
const JS::Latin1Char* chars) {
// Check that the new buffer is located in the StringBufferArena
checkStringCharsArena(chars);
if (!(isAtomRef() && atom()->isInline())) {
checkStringCharsArena(chars);
}
d.s.u2.nonInlineCharsLatin1 = chars;
}