gecko-dev/gfx/thebes/gfxScriptItemizer.cpp

247 lines
8.0 KiB
C++

/* -*- Mode: C++; tab-width: 20; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/*
* This file is based on usc_impl.c from ICU 4.2.0.1, slightly adapted
* for use within Mozilla Gecko, separate from a standard ICU build.
*
* The original ICU license of the code follows:
*
* ICU License - ICU 1.8.1 and later
*
* COPYRIGHT AND PERMISSION NOTICE
*
* Copyright (c) 1995-2009 International Business Machines Corporation and
* others
*
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, provided that the above copyright notice(s) and this
* permission notice appear in all copies of the Software and that both the
* above copyright notice(s) and this permission notice appear in supporting
* documentation.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
* BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
* OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Except as contained in this notice, the name of a copyright holder shall
* not be used in advertising or otherwise to promote the sale, use or other
* dealings in this Software without prior written authorization of the
* copyright holder.
*
* All trademarks and registered trademarks mentioned herein are the property
* of their respective owners.
*/
#include "gfxScriptItemizer.h"
#include "nsUnicodeProperties.h"
#include "nsCharTraits.h"
#define MOD(sp) ((sp) % PAREN_STACK_DEPTH)
#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH)
#define INC(sp,count) (MOD((sp) + (count)))
#define INC1(sp) (INC(sp, 1))
#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count)))
#define DEC1(sp) (DEC(sp, 1))
#define STACK_IS_EMPTY() (pushCount <= 0)
#define STACK_IS_NOT_EMPTY() (! STACK_IS_EMPTY())
#define TOP() (parenStack[parenSP])
#define SYNC_FIXUP() (fixupCount = 0)
void
gfxScriptItemizer::push(PRUint32 endPairChar, PRInt32 scriptCode)
{
pushCount = LIMIT_INC(pushCount);
fixupCount = LIMIT_INC(fixupCount);
parenSP = INC1(parenSP);
parenStack[parenSP].endPairChar = endPairChar;
parenStack[parenSP].scriptCode = scriptCode;
}
void
gfxScriptItemizer::pop()
{
if (STACK_IS_EMPTY()) {
return;
}
if (fixupCount > 0) {
fixupCount -= 1;
}
pushCount -= 1;
parenSP = DEC1(parenSP);
/* If the stack is now empty, reset the stack
pointers to their initial values.
*/
if (STACK_IS_EMPTY()) {
parenSP = -1;
}
}
void
gfxScriptItemizer::fixup(PRInt32 scriptCode)
{
PRInt32 fixupSP = DEC(parenSP, fixupCount);
while (fixupCount-- > 0) {
fixupSP = INC1(fixupSP);
parenStack[fixupSP].scriptCode = scriptCode;
}
}
static inline bool
SameScript(PRInt32 runScript, PRInt32 currCharScript)
{
return runScript <= MOZ_SCRIPT_INHERITED ||
currCharScript <= MOZ_SCRIPT_INHERITED ||
currCharScript == runScript;
}
// Return whether the char has a mirrored-pair counterpart.
// NOTE that this depends on the implementation of nsCharProps records in
// nsUnicodeProperties, and may need to be updated if those structures change
static inline bool
HasMirroredChar(PRUint32 aCh)
{
return GetCharProps1(aCh).mMirrorOffsetIndex != 0;
}
gfxScriptItemizer::gfxScriptItemizer(const PRUnichar *src, PRUint32 length)
: textPtr(src), textLength(length)
{
reset();
}
void
gfxScriptItemizer::SetText(const PRUnichar *src, PRUint32 length)
{
textPtr = src;
textLength = length;
reset();
}
bool
gfxScriptItemizer::Next(PRUint32& aRunStart, PRUint32& aRunLimit,
PRInt32& aRunScript)
{
/* if we've fallen off the end of the text, we're done */
if (scriptLimit >= textLength) {
return false;
}
SYNC_FIXUP();
scriptCode = MOZ_SCRIPT_COMMON;
for (scriptStart = scriptLimit; scriptLimit < textLength; scriptLimit += 1) {
PRUint32 ch;
PRInt32 sc;
PRUint32 startOfChar = scriptLimit;
ch = textPtr[scriptLimit];
/* decode UTF-16 (may be surrogate pair) */
if (NS_IS_HIGH_SURROGATE(ch) && scriptLimit < textLength - 1) {
PRUint32 low = textPtr[scriptLimit + 1];
if (NS_IS_LOW_SURROGATE(low)) {
ch = SURROGATE_TO_UCS4(ch, low);
scriptLimit += 1;
}
}
// Get the nsCharProps2 record for the current character,
// so we can read the script and (if needed) the gen category
// without needing to do two multi-level lookups.
// NOTE that this means we're relying on an implementation detail
// of the nsUnicodeProperties tables, and might have to revise this
// if the nsCharProps records used there are modified in future.
const nsCharProps2& charProps = GetCharProps2(ch);
// Initialize gc to UNASSIGNED; we'll only set it to the true GC
// if the character has script=COMMON, otherwise we don't care.
PRUint8 gc = HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED;
sc = charProps.mScriptCode;
if (sc == MOZ_SCRIPT_COMMON) {
/*
* Paired character handling:
*
* if it's an open character, push it onto the stack.
* if it's a close character, find the matching open on the
* stack, and use that script code. Any non-matching open
* characters above it on the stack will be popped.
*
* We only do this if the script is COMMON; for chars with
* specific script assignments, we just use them as-is.
*/
gc = charProps.mCategory;
if (gc == HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION) {
PRUint32 endPairChar = mozilla::unicode::GetMirroredChar(ch);
if (endPairChar != ch) {
push(endPairChar, scriptCode);
}
} else if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
HasMirroredChar(ch))
{
while (STACK_IS_NOT_EMPTY() && TOP().endPairChar != ch) {
pop();
}
if (STACK_IS_NOT_EMPTY()) {
sc = TOP().scriptCode;
}
}
}
if (SameScript(scriptCode, sc)) {
if (scriptCode <= MOZ_SCRIPT_INHERITED &&
sc > MOZ_SCRIPT_INHERITED)
{
scriptCode = sc;
fixup(scriptCode);
}
/*
* if this character is a close paired character,
* pop the matching open character from the stack
*/
if (gc == HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION &&
HasMirroredChar(ch)) {
pop();
}
} else {
/*
* reset scriptLimit in case it was advanced during reading a
* multiple-code-unit character
*/
scriptLimit = startOfChar;
break;
}
}
aRunStart = scriptStart;
aRunLimit = scriptLimit;
aRunScript = scriptCode;
return true;
}