Bug 1928407: Store segmenter string chars in a tagged pointer. r=sfink

Differential Revision: https://phabricator.services.mozilla.com/D228164
This commit is contained in:
André Bargull 2024-11-11 08:29:06 +00:00
parent 07ad6a19e1
commit b73b36214d
3 changed files with 97 additions and 30 deletions

View File

@ -611,16 +611,16 @@ void SegmentsObject::finalize(JS::GCContext* gcx, JSObject* obj) {
MOZ_ASSERT(gcx->onMainThread());
auto* segments = &obj->as<SegmentsObject>();
bool isLatin1 = segments->getString()->hasLatin1Chars();
if (void* chars = segments->getStringChars()) {
if (auto chars = segments->getStringChars()) {
size_t length = segments->getString()->length();
if (isLatin1) {
if (chars.has<JS::Latin1Char>()) {
intl::RemoveICUCellMemory(gcx, segments, length * sizeof(JS::Latin1Char));
js_free(chars.data<JS::Latin1Char>());
} else {
intl::RemoveICUCellMemory(gcx, segments, length * sizeof(char16_t));
js_free(chars.data<char16_t>());
}
js_free(chars);
}
if (segments->getBreakIterator()) {
@ -632,16 +632,16 @@ void SegmentIteratorObject::finalize(JS::GCContext* gcx, JSObject* obj) {
MOZ_ASSERT(gcx->onMainThread());
auto* iterator = &obj->as<SegmentIteratorObject>();
bool isLatin1 = iterator->getString()->hasLatin1Chars();
if (void* chars = iterator->getStringChars()) {
if (auto chars = iterator->getStringChars()) {
size_t length = iterator->getString()->length();
if (isLatin1) {
if (chars.has<JS::Latin1Char>()) {
intl::RemoveICUCellMemory(gcx, iterator, length * sizeof(JS::Latin1Char));
js_free(chars.data<JS::Latin1Char>());
} else {
intl::RemoveICUCellMemory(gcx, iterator, length * sizeof(char16_t));
js_free(chars.data<char16_t>());
}
js_free(chars);
}
if (iterator->getBreakIterator()) {
@ -721,7 +721,7 @@ static bool EnsureStringChars(JSContext* cx, Handle<T*> segments) {
if (!chars) {
return false;
}
segments->setLatin1Chars(chars.release());
segments->setStringChars(SegmentsStringChars{chars.release()});
intl::AddICUCellMemory(segments, length * sizeof(JS::Latin1Char));
} else {
@ -729,7 +729,7 @@ static bool EnsureStringChars(JSContext* cx, Handle<T*> segments) {
if (!chars) {
return false;
}
segments->setTwoByteChars(chars.release());
segments->setStringChars(SegmentsStringChars{chars.release()});
intl::AddICUCellMemory(segments, length * sizeof(char16_t));
}
@ -744,13 +744,13 @@ static auto* CreateBreakIterator(Handle<T*> segments) {
void* segmenter = segments->getSegmenter()->getSegmenter();
MOZ_ASSERT(segmenter);
void* chars = segments->getStringChars();
auto chars = segments->getStringChars();
MOZ_ASSERT(chars);
size_t length = segments->getString()->length();
auto* seg = static_cast<const typename Interface::Segmenter*>(segmenter);
auto* ch = static_cast<const typename Interface::Char*>(chars);
auto* ch = chars.template data<typename Interface::Char>();
return Interface::create(seg, ch, length);
}

View File

@ -8,6 +8,7 @@
#define builtin_intl_Segmenter_h
#include <stdint.h>
#include <type_traits>
#include "builtin/SelfHostingDefines.h"
#include "js/Class.h"
@ -84,6 +85,66 @@ class SegmenterObject : public NativeObject {
static void finalize(JS::GCContext* gcx, JSObject* obj);
};
class SegmentsStringChars final {
uintptr_t tagged_ = 0;
enum Tag {
Latin1 = 0,
TwoByte = 1,
TagMask = TwoByte,
};
static uintptr_t toTagged(const void* chars, Tag tag) {
MOZ_ASSERT(chars != nullptr, "can't tag nullptr");
auto ptr = reinterpret_cast<uintptr_t>(chars);
MOZ_ASSERT((ptr & TagMask) == 0, "pointer already tagged");
return ptr | tag;
}
Tag tag() const { return static_cast<Tag>(tagged_ & TagMask); }
uintptr_t untagged() const { return tagged_ & ~TagMask; }
explicit SegmentsStringChars(const void* taggedChars)
: tagged_(reinterpret_cast<uintptr_t>(taggedChars)) {}
public:
SegmentsStringChars() = default;
explicit SegmentsStringChars(const JS::Latin1Char* chars)
: tagged_(toTagged(chars, Latin1)) {}
explicit SegmentsStringChars(const char16_t* chars)
: tagged_(toTagged(chars, TwoByte)) {}
static auto fromTagged(const void* taggedChars) {
return SegmentsStringChars{taggedChars};
}
explicit operator bool() const { return tagged_ != 0; }
template <typename CharT>
bool has() const {
if constexpr (std::is_same_v<CharT, JS::Latin1Char>) {
return tag() == Latin1;
} else {
static_assert(std::is_same_v<CharT, char16_t>);
return tag() == TwoByte;
}
}
template <typename CharT>
CharT* data() const {
MOZ_ASSERT(has<CharT>());
return reinterpret_cast<CharT*>(untagged());
}
uintptr_t tagged() const { return tagged_; }
};
class SegmentsObject : public NativeObject {
public:
static const JSClass class_;
@ -125,20 +186,16 @@ class SegmentsObject : public NativeObject {
return !getFixedSlot(STRING_CHARS_SLOT).isUndefined();
}
void* getStringChars() const {
SegmentsStringChars getStringChars() const {
const auto& slot = getFixedSlot(STRING_CHARS_SLOT);
if (slot.isUndefined()) {
return nullptr;
return SegmentsStringChars{};
}
return slot.toPrivate();
return SegmentsStringChars::fromTagged(slot.toPrivate());
}
void setLatin1Chars(JS::Latin1Char* chars) {
setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars));
}
void setTwoByteChars(char16_t* chars) {
setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars));
void setStringChars(SegmentsStringChars chars) {
setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars.tagged()));
}
int32_t getIndex() const {
@ -226,20 +283,16 @@ class SegmentIteratorObject : public NativeObject {
return !getFixedSlot(STRING_CHARS_SLOT).isUndefined();
}
void* getStringChars() const {
SegmentsStringChars getStringChars() const {
const auto& slot = getFixedSlot(STRING_CHARS_SLOT);
if (slot.isUndefined()) {
return nullptr;
return SegmentsStringChars{};
}
return slot.toPrivate();
return SegmentsStringChars::fromTagged(slot.toPrivate());
}
void setLatin1Chars(JS::Latin1Char* chars) {
setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars));
}
void setTwoByteChars(char16_t* chars) {
setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars));
void setStringChars(SegmentsStringChars chars) {
setFixedSlot(STRING_CHARS_SLOT, PrivateValue(chars.tagged()));
}
int32_t getIndex() const {

View File

@ -0,0 +1,14 @@
// Create a two-byte string which has only Latin-1 characters.
var str = newString("12345678901234567890", {twoByte: true});
// Create a segmenter for |str|.
var segmenter = new Intl.Segmenter();
var segments = segmenter.segment(str);
var segment = segments.containing(0);
var obj = {};
// `obj[str]` to atomize the string. This will change |str| to a dependent
// string of the newly created atom. The atom string is allocated as Latin-1,
// because all characters are Latin-1.
assertEq(obj[str], undefined);