gecko-dev/xpcom/string/nsSubstring.cpp
Henri Sivonen 3edc601325 Bug 1402247 - Use encoding_rs for XPCOM string encoding conversions. r=Nika,erahm,froydnj.
Correctness improvements:

 * UTF errors are handled safely per spec instead of dangerously truncating
   strings.

 * There are fewer converter implementations.

Performance improvements:

 * The old code did exact buffer length math, which meant doing UTF math twice
   on each input string (once for length calculation and another time for
   conversion). Exact length math is more complicated when handling errors
   properly, which the old code didn't do. The new code does UTF math on the
   string content only once (when converting) but risks allocating more than
   once. There are heuristics in place to lower the probability of
   reallocation in cases where the double math avoidance isn't enough of a
   saving to absorb an allocation and memcpy.

 * Previously, in UTF-16 <-> UTF-8 conversions, an ASCII prefix was optimized
   but a single non-ASCII code point pessimized the rest of the string. The
   new code tries to get back on the fast ASCII path.

 * UTF-16 to Latin1 conversion guarantees less about handling of out-of-range
   input to eliminate an operation from the inner loop on x86/x86_64.

 * When assigning to a pre-existing string, the new code tries to reuse the
   old buffer instead of first releasing the old buffer and then allocating a
   new one.

 * When reallocating from the new code, the memcpy covers only the data that
   is part of the logical length of the old string instead of memcpying the
   whole capacity. (For old callers old excess memcpy behavior is preserved
   due to bogus callers. See bug 1472113.)

 * UTF-8 strings in XPConnect that are in the Latin1 range are passed to
   SpiderMonkey as Latin1.

New features:

 * Conversion between UTF-8 and Latin1 is added in order to enable faster
   future interop between Rust code (or otherwise UTF-8-using code) and text
   node and SpiderMonkey code that uses Latin1.

MozReview-Commit-ID: JaJuExfILM9
2018-08-14 14:43:42 +03:00

536 lines
14 KiB
C++

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifdef DEBUG
#define ENABLE_STRING_STATS
#endif
#include "mozilla/Atomics.h"
#include "mozilla/MemoryReporting.h"
#ifdef ENABLE_STRING_STATS
#include <stdio.h>
#endif
#include <stdlib.h>
#include "nsAString.h"
#include "nsString.h"
#include "nsStringBuffer.h"
#include "nsDependentString.h"
#include "nsPrintfCString.h"
#include "nsMemory.h"
#include "prprf.h"
#include "nsCOMPtr.h"
#include "mozilla/IntegerPrintfMacros.h"
#ifdef XP_WIN
#include <windows.h>
#include <process.h>
#define getpid() _getpid()
#define pthread_self() GetCurrentThreadId()
#else
#include <pthread.h>
#include <unistd.h>
#endif
#ifdef STRING_BUFFER_CANARY
#define CHECK_STRING_BUFFER_CANARY(c) \
do { \
if ((c) != CANARY_OK) { \
MOZ_CRASH_UNSAFE_PRINTF("Bad canary value 0x%x", c); \
} \
} while(0)
#else
#define CHECK_STRING_BUFFER_CANARY(c) \
do { \
} while(0)
#endif
using mozilla::Atomic;
// ---------------------------------------------------------------------------
static const char16_t gNullChar = 0;
char* const nsCharTraits<char>::sEmptyBuffer =
(char*)const_cast<char16_t*>(&gNullChar);
char16_t* const nsCharTraits<char16_t>::sEmptyBuffer =
const_cast<char16_t*>(&gNullChar);
// ---------------------------------------------------------------------------
#ifdef ENABLE_STRING_STATS
class nsStringStats
{
public:
nsStringStats()
: mAllocCount(0)
, mReallocCount(0)
, mFreeCount(0)
, mShareCount(0)
{
}
~nsStringStats()
{
// this is a hack to suppress duplicate string stats printing
// in seamonkey as a result of the string code being linked
// into seamonkey and libxpcom! :-(
if (!mAllocCount && !mAdoptCount) {
return;
}
printf("nsStringStats\n");
printf(" => mAllocCount: % 10d\n", int(mAllocCount));
printf(" => mReallocCount: % 10d\n", int(mReallocCount));
printf(" => mFreeCount: % 10d", int(mFreeCount));
if (mAllocCount > mFreeCount) {
printf(" -- LEAKED %d !!!\n", mAllocCount - mFreeCount);
} else {
printf("\n");
}
printf(" => mShareCount: % 10d\n", int(mShareCount));
printf(" => mAdoptCount: % 10d\n", int(mAdoptCount));
printf(" => mAdoptFreeCount: % 10d", int(mAdoptFreeCount));
if (mAdoptCount > mAdoptFreeCount) {
printf(" -- LEAKED %d !!!\n", mAdoptCount - mAdoptFreeCount);
} else {
printf("\n");
}
printf(" => Process ID: %" PRIuPTR ", Thread ID: %" PRIuPTR "\n",
uintptr_t(getpid()), uintptr_t(pthread_self()));
}
typedef Atomic<int32_t,
mozilla::SequentiallyConsistent,
mozilla::recordreplay::Behavior::DontPreserve> AtomicInt;
AtomicInt mAllocCount;
AtomicInt mReallocCount;
AtomicInt mFreeCount;
AtomicInt mShareCount;
AtomicInt mAdoptCount;
AtomicInt mAdoptFreeCount;
};
static nsStringStats gStringStats;
#define STRING_STAT_INCREMENT(_s) (gStringStats.m ## _s ## Count)++
#else
#define STRING_STAT_INCREMENT(_s)
#endif
// ---------------------------------------------------------------------------
void
ReleaseData(void* aData, nsAString::DataFlags aFlags)
{
if (aFlags & nsAString::DataFlags::REFCOUNTED) {
nsStringBuffer::FromData(aData)->Release();
} else if (aFlags & nsAString::DataFlags::OWNED) {
free(aData);
STRING_STAT_INCREMENT(AdoptFree);
// Treat this as destruction of a "StringAdopt" object for leak
// tracking purposes.
MOZ_LOG_DTOR(aData, "StringAdopt", 1);
}
// otherwise, nothing to do.
}
// ---------------------------------------------------------------------------
// XXX or we could make nsStringBuffer be a friend of nsTAString
class nsAStringAccessor : public nsAString
{
private:
nsAStringAccessor(); // NOT IMPLEMENTED
public:
char_type* data() const
{
return mData;
}
size_type length() const
{
return mLength;
}
DataFlags flags() const
{
return mDataFlags;
}
void set(char_type* aData, size_type aLen, DataFlags aDataFlags)
{
ReleaseData(mData, mDataFlags);
SetData(aData, aLen, aDataFlags);
}
};
class nsACStringAccessor : public nsACString
{
private:
nsACStringAccessor(); // NOT IMPLEMENTED
public:
char_type* data() const
{
return mData;
}
size_type length() const
{
return mLength;
}
DataFlags flags() const
{
return mDataFlags;
}
void set(char_type* aData, size_type aLen, DataFlags aDataFlags)
{
ReleaseData(mData, mDataFlags);
SetData(aData, aLen, aDataFlags);
}
};
// ---------------------------------------------------------------------------
void
nsStringBuffer::AddRef()
{
// Memory synchronization is not required when incrementing a
// reference count. The first increment of a reference count on a
// thread is not important, since the first use of the object on a
// thread can happen before it. What is important is the transfer
// of the pointer to that thread, which may happen prior to the
// first increment on that thread. The necessary memory
// synchronization is done by the mechanism that transfers the
// pointer between threads.
#ifdef NS_BUILD_REFCNT_LOGGING
uint32_t count =
#endif
mRefCount.fetch_add(1, std::memory_order_relaxed)
#ifdef NS_BUILD_REFCNT_LOGGING
+ 1
#endif
;
STRING_STAT_INCREMENT(Share);
NS_LOG_ADDREF(this, count, "nsStringBuffer", sizeof(*this));
}
void
nsStringBuffer::Release()
{
CHECK_STRING_BUFFER_CANARY(mCanary);
// Since this may be the last release on this thread, we need
// release semantics so that prior writes on this thread are visible
// to the thread that destroys the object when it reads mValue with
// acquire semantics.
uint32_t count = mRefCount.fetch_sub(1, std::memory_order_release) - 1;
NS_LOG_RELEASE(this, count, "nsStringBuffer");
if (count == 0) {
// We're going to destroy the object on this thread, so we need
// acquire semantics to synchronize with the memory released by
// the last release on other threads, that is, to ensure that
// writes prior to that release are now visible on this thread.
count = mRefCount.load(std::memory_order_acquire);
#ifdef STRING_BUFFER_CANARY
mCanary = CANARY_POISON;
#endif
STRING_STAT_INCREMENT(Free);
free(this); // we were allocated with |malloc|
}
}
/**
* Alloc returns a pointer to a new string header with set capacity.
*/
already_AddRefed<nsStringBuffer>
nsStringBuffer::Alloc(size_t aSize)
{
NS_ASSERTION(aSize != 0, "zero capacity allocation not allowed");
NS_ASSERTION(sizeof(nsStringBuffer) + aSize <= size_t(uint32_t(-1)) &&
sizeof(nsStringBuffer) + aSize > aSize,
"mStorageSize will truncate");
nsStringBuffer* hdr =
(nsStringBuffer*)malloc(sizeof(nsStringBuffer) + aSize);
if (hdr) {
STRING_STAT_INCREMENT(Alloc);
hdr->mRefCount = 1;
hdr->mStorageSize = aSize;
#ifdef STRING_BUFFER_CANARY
hdr->mCanary = CANARY_OK;
#endif
NS_LOG_ADDREF(hdr, 1, "nsStringBuffer", sizeof(*hdr));
}
return dont_AddRef(hdr);
}
nsStringBuffer*
nsStringBuffer::Realloc(nsStringBuffer* aHdr, size_t aSize)
{
STRING_STAT_INCREMENT(Realloc);
CHECK_STRING_BUFFER_CANARY(aHdr->mCanary);
NS_ASSERTION(aSize != 0, "zero capacity allocation not allowed");
NS_ASSERTION(sizeof(nsStringBuffer) + aSize <= size_t(uint32_t(-1)) &&
sizeof(nsStringBuffer) + aSize > aSize,
"mStorageSize will truncate");
// no point in trying to save ourselves if we hit this assertion
NS_ASSERTION(!aHdr->IsReadonly(), "|Realloc| attempted on readonly string");
// Treat this as a release and addref for refcounting purposes, since we
// just asserted that the refcount is 1. If we don't do that, refcount
// logging will claim we've leaked all sorts of stuff.
NS_LOG_RELEASE(aHdr, 0, "nsStringBuffer");
aHdr = (nsStringBuffer*)realloc(aHdr, sizeof(nsStringBuffer) + aSize);
if (aHdr) {
NS_LOG_ADDREF(aHdr, 1, "nsStringBuffer", sizeof(*aHdr));
aHdr->mStorageSize = aSize;
}
return aHdr;
}
nsStringBuffer*
nsStringBuffer::FromString(const nsAString& aStr)
{
const nsAStringAccessor* accessor =
static_cast<const nsAStringAccessor*>(&aStr);
if (!(accessor->flags() & nsAString::DataFlags::REFCOUNTED)) {
return nullptr;
}
return FromData(accessor->data());
}
nsStringBuffer*
nsStringBuffer::FromString(const nsACString& aStr)
{
const nsACStringAccessor* accessor =
static_cast<const nsACStringAccessor*>(&aStr);
if (!(accessor->flags() & nsACString::DataFlags::REFCOUNTED)) {
return nullptr;
}
return FromData(accessor->data());
}
void
nsStringBuffer::ToString(uint32_t aLen, nsAString& aStr,
bool aMoveOwnership)
{
char16_t* data = static_cast<char16_t*>(Data());
nsAStringAccessor* accessor = static_cast<nsAStringAccessor*>(&aStr);
MOZ_DIAGNOSTIC_ASSERT(data[aLen] == char16_t(0),
"data should be null terminated");
nsAString::DataFlags flags =
nsAString::DataFlags::REFCOUNTED | nsAString::DataFlags::TERMINATED;
if (!aMoveOwnership) {
AddRef();
}
accessor->set(data, aLen, flags);
}
void
nsStringBuffer::ToString(uint32_t aLen, nsACString& aStr,
bool aMoveOwnership)
{
char* data = static_cast<char*>(Data());
nsACStringAccessor* accessor = static_cast<nsACStringAccessor*>(&aStr);
MOZ_DIAGNOSTIC_ASSERT(data[aLen] == char(0),
"data should be null terminated");
nsACString::DataFlags flags =
nsACString::DataFlags::REFCOUNTED | nsACString::DataFlags::TERMINATED;
if (!aMoveOwnership) {
AddRef();
}
accessor->set(data, aLen, flags);
}
size_t
nsStringBuffer::SizeOfIncludingThisIfUnshared(mozilla::MallocSizeOf aMallocSizeOf) const
{
return IsReadonly() ? 0 : aMallocSizeOf(this);
}
size_t
nsStringBuffer::SizeOfIncludingThisEvenIfShared(mozilla::MallocSizeOf aMallocSizeOf) const
{
return aMallocSizeOf(this);
}
#ifdef STRING_BUFFER_CANARY
void
nsStringBuffer::FromDataCanaryCheckFailed() const
{
MOZ_CRASH_UNSAFE_PRINTF("Bad canary value 0x%x in FromData", mCanary);
}
#endif
// ---------------------------------------------------------------------------
// define nsAString
#include "nsTSubstring.cpp"
// Provide rust bindings to the nsA[C]String types
extern "C" {
// This is a no-op on release, so we ifdef it out such that using it in release
// results in a linker error.
#ifdef DEBUG
void Gecko_IncrementStringAdoptCount(void* aData)
{
MOZ_LOG_CTOR(aData, "StringAdopt", 1);
}
#elif defined(MOZ_DEBUG_RUST)
void Gecko_IncrementStringAdoptCount(void *aData)
{
}
#endif
void Gecko_FinalizeCString(nsACString* aThis)
{
aThis->~nsACString();
}
void Gecko_AssignCString(nsACString* aThis, const nsACString* aOther)
{
aThis->Assign(*aOther);
}
void Gecko_TakeFromCString(nsACString* aThis, nsACString* aOther)
{
aThis->Assign(std::move(*aOther));
}
void Gecko_AppendCString(nsACString* aThis, const nsACString* aOther)
{
aThis->Append(*aOther);
}
void Gecko_SetLengthCString(nsACString* aThis, uint32_t aLength)
{
aThis->SetLength(aLength);
}
bool Gecko_FallibleAssignCString(nsACString* aThis, const nsACString* aOther)
{
return aThis->Assign(*aOther, mozilla::fallible);
}
bool Gecko_FallibleTakeFromCString(nsACString* aThis, nsACString* aOther)
{
return aThis->Assign(std::move(*aOther), mozilla::fallible);
}
bool Gecko_FallibleAppendCString(nsACString* aThis, const nsACString* aOther)
{
return aThis->Append(*aOther, mozilla::fallible);
}
bool Gecko_FallibleSetLengthCString(nsACString* aThis, uint32_t aLength)
{
return aThis->SetLength(aLength, mozilla::fallible);
}
char* Gecko_BeginWritingCString(nsACString* aThis)
{
return aThis->BeginWriting();
}
char* Gecko_FallibleBeginWritingCString(nsACString* aThis)
{
return aThis->BeginWriting(mozilla::fallible);
}
uint32_t
Gecko_StartBulkWriteCString(nsACString* aThis,
uint32_t aCapacity,
uint32_t aUnitsToPreserve,
bool aAllowShrinking)
{
return aThis->StartBulkWrite(aCapacity, aUnitsToPreserve, aAllowShrinking).unwrapOr(UINT32_MAX);
}
void Gecko_FinalizeString(nsAString* aThis)
{
aThis->~nsAString();
}
void Gecko_AssignString(nsAString* aThis, const nsAString* aOther)
{
aThis->Assign(*aOther);
}
void Gecko_TakeFromString(nsAString* aThis, nsAString* aOther)
{
aThis->Assign(std::move(*aOther));
}
void Gecko_AppendString(nsAString* aThis, const nsAString* aOther)
{
aThis->Append(*aOther);
}
void Gecko_SetLengthString(nsAString* aThis, uint32_t aLength)
{
aThis->SetLength(aLength);
}
bool Gecko_FallibleAssignString(nsAString* aThis, const nsAString* aOther)
{
return aThis->Assign(*aOther, mozilla::fallible);
}
bool Gecko_FallibleTakeFromString(nsAString* aThis, nsAString* aOther)
{
return aThis->Assign(std::move(*aOther), mozilla::fallible);
}
bool Gecko_FallibleAppendString(nsAString* aThis, const nsAString* aOther)
{
return aThis->Append(*aOther, mozilla::fallible);
}
bool Gecko_FallibleSetLengthString(nsAString* aThis, uint32_t aLength)
{
return aThis->SetLength(aLength, mozilla::fallible);
}
char16_t* Gecko_BeginWritingString(nsAString* aThis)
{
return aThis->BeginWriting();
}
char16_t* Gecko_FallibleBeginWritingString(nsAString* aThis)
{
return aThis->BeginWriting(mozilla::fallible);
}
uint32_t
Gecko_StartBulkWriteString(nsAString* aThis,
uint32_t aCapacity,
uint32_t aUnitsToPreserve,
bool aAllowShrinking)
{
return aThis->StartBulkWrite(aCapacity, aUnitsToPreserve, aAllowShrinking).unwrapOr(UINT32_MAX);
}
} // extern "C"