gecko-dev/intl/icu/source/i18n/rbt.cpp
Jeff Walden 013fc50cd5 Bug 924839 - Update our embedded ICU to 52.1, plus a very few local patches. r=lots of people, see subsequent lines in this commit message for the original subcomponents (merged together for landing), and the original bug for the original patch divisions
Bug 924839 - Remove a patch already part of ICU 52.1.  See http://bugs.icu-project.org/trac/ticket/10283 but also note the relevant code was removed completely upstream.  r=glandium
* * *
Bug 924839 - Remove another patch already part of ICU 52.1.  See http://bugs.icu-project.org/trac/ticket/10290 for that.  r=gaston
* * *
Bug 924839 - Remove another patch already in ICU 52.1.  See http://bugs.icu-project.org/trac/ticket/10045 for more.  r=Norbert
* * *
Bug 924839 - Remove another patch already applied upstream.  See http://bugs.icu-project.org/trac/changeset/32937 for more.  r=gaston
* * *
Bug 924839 - Update the ICU update script to update to 52.1, *without* applying any of our local patches.  r=glandium
* * *
Bug 924839 - Make the ICU update script only do updating within intl/icu/source and nowhere else.  r=glandium
* * *
Bug 924839 - Implement the changes that would be made by |cd intl/; ./update-icu.sh http://source.icu-project.org/repos/icu/icu/tags/release-52-1/;|, run with the prior changesets' changes made (thus not applying any of our local patches).  These changes don't actually work without subsequent adjustments, but this provides a codebase upon which those adjustments can be made, for the purpose of generating local patches to be kept in intl/icu-patches/.  rs=the-usual-suspects
* * *
Bug 924839 - Update the bug 899722 local patch to make runConfigureICU not override CC/CXX on BSD systems.  r=gaston
* * *
Bug 924839 - Update the bug 724533 patch that makes ICU builds with MozillaBuild on Windows.  r=glandium
* * *
Bug 924839 - Import an upstream patch fixing the genrb tool to properly handle the -R (--omitCollationRules) option.  See http://bugs.icu-project.org/trac/ticket/10043 for the original bug report and a link to the ultimate upstream landing.  r=Norbert
* * *
Bug 924839 - Import the upstream fix for http://bugs.icu-project.org/trac/ticket/10486 so that ICU with -DU_USING_ICU_NAMESPACE=0 will compile on Windows.  r=Norbert
* * *
Bug 924839 - Adjust the update script to update ICU, then to apply all local patches (rather than skipping the second step).  Thus if the update script is properly run, now, the final result should be no changes at all to the tree.  NOT REVIEWED YET
* * *
Bug 924839 - Update jstests that depend on CLDR locale data to match CLDR 24.  r=Norbert
2013-11-12 16:23:48 -08:00

299 lines
10 KiB
C++

/*
**********************************************************************
* Copyright (C) 1999-2013, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/17/99 aliu Creation.
**********************************************************************
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/rep.h"
#include "unicode/uniset.h"
#include "rbt_pars.h"
#include "rbt_data.h"
#include "rbt_rule.h"
#include "rbt.h"
#include "umutex.h"
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedTransliterator)
static UMutex transliteratorDataMutex = U_MUTEX_INITIALIZER;
static Replaceable *gLockedText = NULL;
void RuleBasedTransliterator::_construct(const UnicodeString& rules,
UTransDirection direction,
UParseError& parseError,
UErrorCode& status) {
fData = 0;
isDataOwned = TRUE;
if (U_FAILURE(status)) {
return;
}
TransliteratorParser parser(status);
parser.parse(rules, direction, parseError, status);
if (U_FAILURE(status)) {
return;
}
if (parser.idBlockVector.size() != 0 ||
parser.compoundFilter != NULL ||
parser.dataVector.size() == 0) {
status = U_INVALID_RBT_SYNTAX; // ::ID blocks disallowed in RBT
return;
}
fData = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
}
/**
* Constructs a new transliterator from the given rules.
* @param id the id for the transliterator.
* @param rules rules, separated by ';'
* @param direction either FORWARD or REVERSE.
* @param adoptedFilter the filter for this transliterator.
* @param parseError Struct to recieve information on position
* of error if an error is encountered
* @param status Output param set to success/failure code.
* @exception IllegalArgumentException if rules are malformed
* or direction is invalid.
*/
RuleBasedTransliterator::RuleBasedTransliterator(
const UnicodeString& id,
const UnicodeString& rules,
UTransDirection direction,
UnicodeFilter* adoptedFilter,
UParseError& parseError,
UErrorCode& status) :
Transliterator(id, adoptedFilter) {
_construct(rules, direction,parseError,status);
}
/**
* Constructs a new transliterator from the given rules.
* @param id the id for the transliterator.
* @param rules rules, separated by ';'
* @param direction either FORWARD or REVERSE.
* @param adoptedFilter the filter for this transliterator.
* @param status Output param set to success/failure code.
* @exception IllegalArgumentException if rules are malformed
* or direction is invalid.
*/
/*RuleBasedTransliterator::RuleBasedTransliterator(
const UnicodeString& id,
const UnicodeString& rules,
UTransDirection direction,
UnicodeFilter* adoptedFilter,
UErrorCode& status) :
Transliterator(id, adoptedFilter) {
UParseError parseError;
_construct(rules, direction,parseError, status);
}*/
/**
* Covenience constructor with no filter.
*/
/*RuleBasedTransliterator::RuleBasedTransliterator(
const UnicodeString& id,
const UnicodeString& rules,
UTransDirection direction,
UErrorCode& status) :
Transliterator(id, 0) {
UParseError parseError;
_construct(rules, direction,parseError, status);
}*/
/**
* Covenience constructor with no filter and FORWARD direction.
*/
/*RuleBasedTransliterator::RuleBasedTransliterator(
const UnicodeString& id,
const UnicodeString& rules,
UErrorCode& status) :
Transliterator(id, 0) {
UParseError parseError;
_construct(rules, UTRANS_FORWARD, parseError, status);
}*/
/**
* Covenience constructor with FORWARD direction.
*/
/*RuleBasedTransliterator::RuleBasedTransliterator(
const UnicodeString& id,
const UnicodeString& rules,
UnicodeFilter* adoptedFilter,
UErrorCode& status) :
Transliterator(id, adoptedFilter) {
UParseError parseError;
_construct(rules, UTRANS_FORWARD,parseError, status);
}*/
RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
const TransliterationRuleData* theData,
UnicodeFilter* adoptedFilter) :
Transliterator(id, adoptedFilter),
fData((TransliterationRuleData*)theData), // cast away const
isDataOwned(FALSE) {
setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
}
/**
* Internal constructor.
*/
RuleBasedTransliterator::RuleBasedTransliterator(const UnicodeString& id,
TransliterationRuleData* theData,
UBool isDataAdopted) :
Transliterator(id, 0),
fData(theData),
isDataOwned(isDataAdopted) {
setMaximumContextLength(fData->ruleSet.getMaximumContextLength());
}
/**
* Copy constructor.
*/
RuleBasedTransliterator::RuleBasedTransliterator(
const RuleBasedTransliterator& other) :
Transliterator(other), fData(other.fData),
isDataOwned(other.isDataOwned) {
// The data object may or may not be owned. If it is not owned we
// share it; it is invariant. If it is owned, it's still
// invariant, but we need to copy it to prevent double-deletion.
// If this becomes a performance issue (if people do a lot of RBT
// copying -- unlikely) we can reference count the data object.
// Only do a deep copy if this is owned data, that is, data that
// will be later deleted. System transliterators contain
// non-owned data.
if (isDataOwned) {
fData = new TransliterationRuleData(*other.fData);
}
}
/**
* Destructor.
*/
RuleBasedTransliterator::~RuleBasedTransliterator() {
// Delete the data object only if we own it.
if (isDataOwned) {
delete fData;
}
}
Transliterator* // Covariant return NOT ALLOWED (for portability)
RuleBasedTransliterator::clone(void) const {
return new RuleBasedTransliterator(*this);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
void
RuleBasedTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index,
UBool isIncremental) const {
/* We keep contextStart and contextLimit fixed the entire time,
* relative to the text -- contextLimit may move numerically if
* text is inserted or removed. The start offset moves toward
* limit, with replacements happening under it.
*
* Example: rules 1. ab>x|y
* 2. yc>z
*
* |eabcd begin - no match, advance start
* e|abcd match rule 1 - change text & adjust start
* ex|ycd match rule 2 - change text & adjust start
* exz|d no match, advance start
* exzd| done
*/
/* A rule like
* a>b|a
* creates an infinite loop. To prevent that, we put an arbitrary
* limit on the number of iterations that we take, one that is
* high enough that any reasonable rules are ok, but low enough to
* prevent a server from hanging. The limit is 16 times the
* number of characters n, unless n is so large that 16n exceeds a
* uint32_t.
*/
uint32_t loopCount = 0;
uint32_t loopLimit = index.limit - index.start;
if (loopLimit >= 0x10000000) {
loopLimit = 0xFFFFFFFF;
} else {
loopLimit <<= 4;
}
// Transliterator locking. Rule-based Transliterators are not thread safe; concurrent
// operations must be prevented.
// A Complication: compound transliterators can result in recursive entries to this
// function, sometimes with different "This" objects, always with the same text.
// Double-locking must be prevented in these cases.
//
// If the transliteration data is exclusively owned by this transliterator object,
// we don't need to do any locking. No sharing between transliterators is possible,
// so no concurrent access from multiple threads is possible.
UBool lockedMutexAtThisLevel = FALSE;
if (isDataOwned == FALSE) {
// Test whether this request is operating on the same text string as some
// some other transliteration that is still in progress and holding the
// transliteration mutex. If so, do not lock the transliteration
// mutex again.
// TODO(andy): Need a better scheme for handling this.
UBool needToLock;
umtx_lock(NULL);
needToLock = (&text != gLockedText);
umtx_unlock(NULL);
if (needToLock) {
umtx_lock(&transliteratorDataMutex);
gLockedText = &text;
lockedMutexAtThisLevel = TRUE;
}
}
// Check to make sure we don't dereference a null pointer.
if (fData != NULL) {
while (index.start < index.limit &&
loopCount <= loopLimit &&
fData->ruleSet.transliterate(text, index, isIncremental)) {
++loopCount;
}
}
if (lockedMutexAtThisLevel) {
gLockedText = NULL;
umtx_unlock(&transliteratorDataMutex);
}
}
UnicodeString& RuleBasedTransliterator::toRules(UnicodeString& rulesSource,
UBool escapeUnprintable) const {
return fData->ruleSet.toRules(rulesSource, escapeUnprintable);
}
/**
* Implement Transliterator framework
*/
void RuleBasedTransliterator::handleGetSourceSet(UnicodeSet& result) const {
fData->ruleSet.getSourceTargetSet(result, FALSE);
}
/**
* Override Transliterator framework
*/
UnicodeSet& RuleBasedTransliterator::getTargetSet(UnicodeSet& result) const {
return fData->ruleSet.getSourceTargetSet(result, TRUE);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_TRANSLITERATION */