Bug 1714933 - Part 2: Update in-tree ICU to release 69.1. r=tcampbell

Update to ICU 69.1 by running "update-icu.sh" with "maint/maint-69" as the target.

Differential Revision: https://phabricator.services.mozilla.com/D116968
This commit is contained in:
André Bargull 2021-06-15 07:53:58 +00:00
parent ffd5b23c02
commit e3d2b6377c
856 changed files with 45827 additions and 27925 deletions

View File

@ -1,5 +1,5 @@
commit 6fe67a037c07135b6b3edafca18326a824e338c3
Author: Jeff Genovy <29107334+jefgen@users.noreply.github.com>
Date: Wed Jan 27 12:48:40 2021 -0800
commit 0e7b4428866f3133b4abba2d932ee3faa708db1d
Author: Long Nguyen <nguyen.long.908132@gmail.com>
Date: Thu Mar 25 22:26:56 2021 +0700
ICU-21473 Disable LayoutEx in GHA CI script for ICU 68, was disabled by default in ICU 69
ICU-21560 mingw: Remove version numbers from link flags

View File

@ -1677,7 +1677,7 @@ COMPACT_LATEX = NO
# The default value is: a4.
# This tag requires that the tag GENERATE_LATEX is set to YES.
PAPER_TYPE = a4wide
PAPER_TYPE = a4
# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
# that should be included in the LaTeX output. The package can be specified just

View File

@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
return write(i);
} else {
char intBytes[5];
return write(intBytes, internalEncodeDelta(i, intBytes));
}
char intBytes[5];
int32_t length;
}
int32_t
BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
U_ASSERT(i>=0);
if(i<=BytesTrie::kMaxOneByteDelta) {
intBytes[0]=(char)i;
return 1;
}
int32_t length=1;
if(i<=BytesTrie::kMaxTwoByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
length=1;
} else {
if(i<=BytesTrie::kMaxThreeByteDelta) {
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
length=2;
} else {
if(i<=0xffffff) {
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
length=3;
} else {
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
intBytes[1]=(char)(i>>24);
length=4;
length=2;
}
intBytes[1]=(char)(i>>16);
intBytes[length++]=(char)(i>>16);
}
intBytes[1]=(char)(i>>8);
intBytes[length++]=(char)(i>>8);
}
intBytes[length++]=(char)i;
return write(intBytes, length);
return length;
}
U_NAMESPACE_END

View File

@ -14,6 +14,8 @@
* created by: Markus W. Scherer
*/
#include <cstdlib>
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "charstr.h"
@ -141,6 +143,38 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
return *this;
}
CharString &CharString::appendNumber(int32_t number, UErrorCode &status) {
if (number < 0) {
this->append('-', status);
if (U_FAILURE(status)) {
return *this;
}
}
if (number == 0) {
this->append('0', status);
return *this;
}
int32_t numLen = 0;
while (number != 0) {
int32_t residue = number % 10;
number /= 10;
this->append(std::abs(residue) + '0', status);
numLen++;
if (U_FAILURE(status)) {
return *this;
}
}
int32_t start = this->length() - numLen, end = this->length() - 1;
while(start < end) {
std::swap(this->data()[start++], this->data()[end--]);
}
return *this;
}
char *CharString::getAppendBuffer(int32_t minCapacity,
int32_t desiredCapacityHint,
int32_t &resultCapacity,

View File

@ -127,6 +127,9 @@ public:
return append(s.data(), s.length(), errorCode);
}
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
CharString &appendNumber(int32_t number, UErrorCode &status);
/**
* Returns a writable buffer for appending and writes the buffer's capacity to
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().

View File

@ -31,14 +31,63 @@
#include <stddef.h>
#include <string.h>
#include "unicode/localpointer.h"
#include "uassert.h"
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
#include <stdio.h>
#endif
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
// uprv_memcpy and uprv_memmove
#if defined(__clang__)
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
/* Suppress warnings about addresses that will never be NULL */ \
_Pragma("clang diagnostic push") \
_Pragma("clang diagnostic ignored \"-Waddress\"") \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
_Pragma("clang diagnostic pop") \
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
/* Suppress warnings about addresses that will never be NULL */ \
_Pragma("clang diagnostic push") \
_Pragma("clang diagnostic ignored \"-Waddress\"") \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
_Pragma("clang diagnostic pop") \
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#elif defined(__GNUC__)
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
/* Suppress warnings about addresses that will never be NULL */ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
_Pragma("GCC diagnostic pop") \
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
/* Suppress warnings about addresses that will never be NULL */ \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
_Pragma("GCC diagnostic pop") \
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#else
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
U_ASSERT(dst != NULL); \
U_ASSERT(src != NULL); \
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
} UPRV_BLOCK_MACRO_END
#endif
/**
* \def UPRV_LENGTHOF

View File

@ -58,7 +58,7 @@
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
</ClCompile>
<Link>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68d.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc69d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
</Link>
@ -70,7 +70,7 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
</ClCompile>
<Link>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc69.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
</Link>

View File

@ -125,7 +125,7 @@
<Link>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<AdditionalDependencies>vccorlib.lib;msvcrt.lib;vcruntime.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc69.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
</Link>
@ -148,7 +148,7 @@
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>vccorlibd.lib;msvcrtd.lib;vcruntimed.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68d.dll</OutputFile>
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc69d.dll</OutputFile>
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
</Link>

View File

@ -265,13 +265,9 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@ -503,13 +499,9 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@ -699,13 +691,9 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
@ -908,13 +896,9 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
goto foundBest;
}
do {
int32_t wordsMatched = 1;
if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
if (wordsMatched < 2) {
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
wordsMatched = 2;
}
// Followed by another dictionary word; mark first word as a good candidate
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
// If we're already at the end of the range, we're done
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {

View File

@ -86,6 +86,7 @@ Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
}
Edits &Edits::operator=(const Edits &other) {
if (this == &other) { return *this; } // self-assignment: no-op
length = other.length;
delta = other.delta;
numChanges = other.numChanges;

View File

@ -20,6 +20,7 @@
#include "ubrkimpl.h" // U_ICUDATA_BRKITR
#include "uvector.h"
#include "cmemory.h"
#include "umutex.h"
U_NAMESPACE_BEGIN
@ -139,13 +140,30 @@ class SimpleFilteredSentenceBreakData : public UMemory {
public:
SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
: fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
virtual ~SimpleFilteredSentenceBreakData();
SimpleFilteredSentenceBreakData *incr() {
umtx_atomic_inc(&refcount);
return this;
}
SimpleFilteredSentenceBreakData *decr() {
if(umtx_atomic_dec(&refcount) <= 0) {
delete this;
}
return 0;
}
virtual ~SimpleFilteredSentenceBreakData();
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
int32_t refcount;
bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); }
bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); }
const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; }
const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; }
private:
// These tries own their data arrays.
// They are shared and must therefore not be modified.
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
u_atomic_int32_t refcount;
};
SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
@ -244,7 +262,13 @@ SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
fDelegate(adopt)
{
// all set..
if (fData == nullptr) {
delete forwards;
delete backwards;
if (U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
}
SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
@ -261,59 +285,62 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
int32_t bestValue = -1;
// loops while 'n' points to an exception.
utext_setNativeIndex(fText.getAlias(), n); // from n..
fData->fBackwardsTrie->reset();
UChar32 uch;
//if(debug2) u_printf(" n@ %d\n", n);
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
if(utext_previous32(fText.getAlias())==u' ') { // TODO: skip a class of chars here??
// TODO only do this the 1st time?
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
} else {
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
uch = utext_next32(fText.getAlias());
utext_next32(fText.getAlias());
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
}
UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
bestPosn = utext_getNativeIndex(fText.getAlias());
bestValue = fData->fBackwardsTrie->getValue();
}
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
{
// Do not modify the shared trie!
UCharsTrie iter(fData->getBackwardsTrie());
UChar32 uch;
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) { // more to consume backwards
UStringTrieResult r = iter.nextForCodePoint(uch);
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
bestPosn = utext_getNativeIndex(fText.getAlias());
bestValue = iter.getValue();
}
if(!USTRINGTRIE_HAS_NEXT(r)) {
break;
}
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
}
}
if(USTRINGTRIE_MATCHES(r)) { // exact match?
//if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
bestValue = fData->fBackwardsTrie->getValue();
bestPosn = utext_getNativeIndex(fText.getAlias());
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
}
//if(bestValue >= 0) {
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//}
if(bestPosn>=0) {
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
//int32_t bestValue = fBackwardsTrie->getValue();
//int32_t bestValue = iter.getValue();
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
if(bestValue == kMATCH) { // exact match!
//if(debug2) u_printf(" exact backward match\n");
return kExceptionHere; // See if the next is another exception.
} else if(bestValue == kPARTIAL
&& fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
&& fData->hasForwardsPartialTrie()) { // make sure there's a forward trie
//if(debug2) u_printf(" partial backward match\n");
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
// to see if it matches something going forward.
fData->fForwardsPartialTrie->reset();
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
// Do not modify the shared trie!
UCharsTrie iter(fData->getForwardsPartialTrie());
UChar32 uch;
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) {
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
}
if(USTRINGTRIE_MATCHES(rfwd)) {
@ -339,7 +366,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
int32_t
SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
if(n == UBRK_DONE || // at end or
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
@ -369,7 +396,7 @@ SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
int32_t
SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
if(n == 0 || n == UBRK_DONE || // at end or
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
return n;
}
// OK, do we need to break here?
@ -420,7 +447,7 @@ SimpleFilteredSentenceBreakIterator::previous(void) {
UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions
UErrorCode status = U_ZERO_ERROR;
resetState(status);

View File

@ -85,16 +85,22 @@ public:
inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
inline int32_t putiAllowZero(const UnicodeString& key, int32_t value, UErrorCode& status);
inline void* get(const UnicodeString& key) const;
inline int32_t geti(const UnicodeString& key) const;
inline int32_t getiAndFound(const UnicodeString& key, UBool &found) const;
inline void* remove(const UnicodeString& key);
inline int32_t removei(const UnicodeString& key);
inline void removeAll(void);
inline UBool containsKey(const UnicodeString& key) const;
inline const UHashElement* find(const UnicodeString& key) const;
/**
@ -203,6 +209,11 @@ inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCo
return uhash_puti(hash, new UnicodeString(key), value, &status);
}
inline int32_t Hashtable::putiAllowZero(const UnicodeString& key, int32_t value,
UErrorCode& status) {
return uhash_putiAllowZero(hash, new UnicodeString(key), value, &status);
}
inline void* Hashtable::get(const UnicodeString& key) const {
return uhash_get(hash, &key);
}
@ -211,6 +222,10 @@ inline int32_t Hashtable::geti(const UnicodeString& key) const {
return uhash_geti(hash, &key);
}
inline int32_t Hashtable::getiAndFound(const UnicodeString& key, UBool &found) const {
return uhash_getiAndFound(hash, &key, &found);
}
inline void* Hashtable::remove(const UnicodeString& key) {
return uhash_remove(hash, &key);
}
@ -219,6 +234,10 @@ inline int32_t Hashtable::removei(const UnicodeString& key) {
return uhash_removei(hash, &key);
}
inline UBool Hashtable::containsKey(const UnicodeString& key) const {
return uhash_containsKey(hash, &key);
}
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
return uhash_find(hash, &key);
}

View File

@ -345,9 +345,8 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return suppLength; }
int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
if (index == 0) {
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
if (U_SUCCESS(errorCode)) {
supportedLSRs[suppLength] = &lsr;
supportedIndexes[suppLength++] = i;
@ -685,12 +684,11 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
int32_t bestSupportedLsrIndex = -1;
for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
// Quick check for exact maximized LSR.
// Returns suppIndex+1 where 0 means not found.
if (supportedLsrToIndex != nullptr) {
desiredLSR.setHashCode();
int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
if (index != 0) {
int32_t suppIndex = index - 1;
UBool found = false;
int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
if (found) {
if (remainingIter != nullptr) {
remainingIter->rememberCurrent(desiredIndex, errorCode);
}

View File

@ -187,17 +187,18 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
if (U_FAILURE(errorCode)) { return false; }
}
LocalPointer<Locale> clone;
int32_t index = uhash_geti(map, &locale);
if (index != 0) {
UBool found = false;
int32_t index = uhash_getiAndFound(map, &locale, &found);
if (found) {
// Duplicate: Remove the old item and append it anew.
LocaleAndWeight &lw = list->array[index - 1];
LocaleAndWeight &lw = list->array[index];
clone.adoptInstead(lw.locale);
lw.locale = nullptr;
lw.weight = 0;
++numRemoved;
}
if (weight <= 0) { // do not add q=0
if (index != 0) {
if (found) {
// Not strictly necessary but cleaner.
uhash_removei(map, &locale);
}
@ -217,7 +218,7 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
return false;
}
}
uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
uhash_putiAllowZero(map, clone.getAlias(), listLength, &errorCode);
if (U_FAILURE(errorCode)) { return false; }
LocaleAndWeight &lw = list->array[listLength];
lw.locale = clone.orphan();

View File

@ -698,7 +698,7 @@ uloc_getDisplayName(const char *locale,
} /* end switch */
if (len>0) {
/* we addeed a component, so add separator and write it if there's room. */
/* we added a component, so add separator and write it if there's room. */
if(len+sepLen<=cap) {
const UChar * plimit = p + len;
for (; p < plimit; p++) {

View File

@ -254,7 +254,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
Locale::~Locale()
{
if (baseName != fullName) {
if ((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = NULL;
@ -466,17 +466,21 @@ Locale& Locale::operator=(const Locale& other) {
}
Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
if (baseName != fullName) uprv_free(baseName);
if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
if (fullName != fullNameBuffer) uprv_free(fullName);
if (other.fullName == other.fullNameBuffer) {
if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
}
if (other.fullName == other.fullNameBuffer) {
fullName = fullNameBuffer;
} else {
fullName = other.fullName;
}
if (other.baseName == other.fullName) {
if (other.baseName == other.fullNameBuffer) {
baseName = fullNameBuffer;
} else if (other.baseName == other.fullName) {
baseName = fullName;
} else {
baseName = other.baseName;
@ -524,7 +528,7 @@ static const char* const KNOWN_CANONICALIZED[] = {
"km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
"lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
"mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
"nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
"nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
"pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
"si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
"sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
@ -627,6 +631,17 @@ private:
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
// Read the subdivisionAlias data from alias to
// strings+types+replacementIndexes
// Allocate length items for types, to store the type field.
// Allocate length items for replacementIndexes,
// to store the index in the strings for the replacement variant.
void readSubdivisionAlias(UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
};
/**
@ -647,6 +662,7 @@ public:
const CharStringMap& scriptMap() const { return script; }
const CharStringMap& territoryMap() const { return territory; }
const CharStringMap& variantMap() const { return variant; }
const CharStringMap& subdivisionMap() const { return subdivision; }
static void U_CALLCONV loadData(UErrorCode &status);
static UBool U_CALLCONV cleanup();
@ -658,11 +674,13 @@ private:
CharStringMap scriptMap,
CharStringMap territoryMap,
CharStringMap variantMap,
CharStringMap subdivisionMap,
CharString* strings)
: language(std::move(languageMap)),
script(std::move(scriptMap)),
territory(std::move(territoryMap)),
variant(std::move(variantMap)),
subdivision(std::move(subdivisionMap)),
strings(strings) {
}
@ -676,6 +694,7 @@ private:
CharStringMap script;
CharStringMap territory;
CharStringMap variant;
CharStringMap subdivision;
CharString* strings;
friend class AliasDataBuilder;
@ -866,6 +885,34 @@ AliasDataBuilder::readVariantAlias(
status);
}
/**
* Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
* Allocate length items for types, to store the type field. Allocate length
* items for replacementIndexes, to store the index in the strings for the
* replacement regions.
*/
void
AliasDataBuilder::readSubdivisionAlias(
UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
UErrorCode &status)
{
return readAlias(
alias, strings, types, replacementIndexes, length,
#if U_DEBUG
[](const char* type) {
U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
},
#else
[](const char*) {},
#endif
[](const UnicodeString&) { },
status);
}
/**
* Initializes the alias data from the ICU resource bundles. The alias data
* contains alias of language, country, script and variants.
@ -905,12 +952,14 @@ AliasDataBuilder::build(UErrorCode &status) {
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
LocalUResourceBundlePointer variantAlias(
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
LocalUResourceBundlePointer subdivisionAlias(
ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
if (U_FAILURE(status)) {
return nullptr;
}
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
variantLength = 0;
variantLength = 0, subdivisionLength = 0;
// Read the languageAlias into languageTypes, languageReplacementIndexes
// and strings
@ -955,6 +1004,16 @@ AliasDataBuilder::build(UErrorCode &status) {
variantReplacementIndexes,
variantLength, status);
// Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
// and strings
LocalMemory<const char*> subdivisionTypes;
LocalMemory<int32_t> subdivisionReplacementIndexes;
readSubdivisionAlias(subdivisionAlias.getAlias(),
&strings,
subdivisionTypes,
subdivisionReplacementIndexes,
subdivisionLength, status);
if (U_FAILURE(status)) {
return nullptr;
}
@ -994,6 +1053,14 @@ AliasDataBuilder::build(UErrorCode &status) {
status);
}
// Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
CharStringMap subdivisionMap(2, status);
for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
subdivisionMap.put(subdivisionTypes[i],
strings.get(subdivisionReplacementIndexes[i]),
status);
}
if (U_FAILURE(status)) {
return nullptr;
}
@ -1004,6 +1071,7 @@ AliasDataBuilder::build(UErrorCode &status) {
std::move(scriptMap),
std::move(territoryMap),
std::move(variantMap),
std::move(subdivisionMap),
strings.orphanCharStrings());
if (data == nullptr) {
@ -1105,6 +1173,14 @@ private:
// Replace by using variantAlias.
bool replaceVariant(UErrorCode& status);
// Replace by using subdivisionAlias.
bool replaceSubdivision(StringPiece subdivision,
CharString& output, UErrorCode& status);
// Replace transformed extensions.
bool replaceTransformedExtensions(
CharString& transformedExtensions, CharString& output, UErrorCode& status);
};
CharString&
@ -1294,7 +1370,6 @@ AliasReplacer::replaceLanguage(
}
}
if (replacedExtensions != nullptr) {
// TODO(ICU-21292)
// DO NOTHING
// UTS35 does not specifiy what should we do if we have extensions in the
// replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
@ -1435,6 +1510,110 @@ AliasReplacer::replaceVariant(UErrorCode& status)
return false;
}
bool
AliasReplacer::replaceSubdivision(
StringPiece subdivision, CharString& output, UErrorCode& status)
{
if (U_FAILURE(status)) {
return false;
}
const char *replacement = data->subdivisionMap().get(subdivision.data());
if (replacement != nullptr) {
const char* firstSpace = uprv_strchr(replacement, ' ');
// Found replacement data for this subdivision.
size_t len = (firstSpace != nullptr) ?
(firstSpace - replacement) : uprv_strlen(replacement);
if (2 <= len && len <= 8) {
output.append(replacement, (int32_t)len, status);
if (2 == len) {
// Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
output.append("zzzz", 4, status);
}
}
return true;
}
return false;
}
bool
AliasReplacer::replaceTransformedExtensions(
CharString& transformedExtensions, CharString& output, UErrorCode& status)
{
// The content of the transformedExtensions will be modified in this
// function to NULL-terminating (tkey-tvalue) pairs.
if (U_FAILURE(status)) {
return false;
}
int32_t len = transformedExtensions.length();
const char* str = transformedExtensions.data();
const char* tkey = ultag_getTKeyStart(str);
int32_t tlangLen = (tkey == str) ? 0 :
((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
CharStringByteSink sink(&output);
if (tlangLen > 0) {
Locale tlang = LocaleBuilder()
.setLanguageTag(StringPiece(str, tlangLen))
.build(status);
tlang.canonicalize(status);
tlang.toLanguageTag(sink, status);
if (U_FAILURE(status)) {
return false;
}
T_CString_toLowerCase(output.data());
}
if (tkey != nullptr) {
// We need to sort the tfields by tkey
UVector tfields(status);
if (U_FAILURE(status)) {
return false;
}
do {
const char* tvalue = uprv_strchr(tkey, '-');
if (tvalue == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return false;
}
const char* nextTKey = ultag_getTKeyStart(tvalue);
if (nextTKey != nullptr) {
*((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue
}
tfields.insertElementAt((void*)tkey, tfields.size(), status);
if (U_FAILURE(status)) {
return false;
}
tkey = nextTKey;
} while (tkey != nullptr);
tfields.sort([](UElement e1, UElement e2) -> int8_t {
// uprv_strcmp return int and in some platform, such as arm64-v8a,
// it may return positive values > 127 which cause the casted value
// of int8_t negative.
int res = uprv_strcmp(
(const char*)e1.pointer, (const char*)e2.pointer);
return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
}, status);
for (int32_t i = 0; i < tfields.size(); i++) {
if (output.length() > 0) {
output.append('-', status);
}
const char* tfield = (const char*) tfields.elementAt(i);
const char* tvalue = uprv_strchr(tfield, '-');
if (tvalue == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return false;
}
// Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
*((char*)tvalue++) = '\0'; // NULL terminate tkey
output.append(tfield, status).append('-', status);
const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
}
}
if (U_FAILURE(status)) {
return false;
}
return true;
}
CharString&
AliasReplacer::outputToString(
CharString& out, UErrorCode status)
@ -1453,8 +1632,12 @@ AliasReplacer::outputToString(
out.append(SEP_CHAR, status);
}
variants.sort([](UElement e1, UElement e2) -> int8_t {
return uprv_strcmp(
// uprv_strcmp return int and in some platform, such as arm64-v8a,
// it may return positive values > 127 which cause the casted value
// of int8_t negative.
int res = uprv_strcmp(
(const char*)e1.pointer, (const char*)e2.pointer);
return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
}, status);
int32_t variantsStart = out.length();
for (int32_t i = 0; i < variants.size(); i++) {
@ -1497,7 +1680,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
region = nullptr;
}
const char* variantsStr = locale.getVariant();
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
CharString variantsBuff(variantsStr, -1, status);
if (!variantsBuff.isEmpty()) {
if (U_FAILURE(status)) { return false; }
@ -1516,8 +1698,12 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
// Sort the variants
variants.sort([](UElement e1, UElement e2) -> int8_t {
return uprv_strcmp(
// uprv_strcmp return int and in some platform, such as arm64-v8a,
// it may return positive values > 127 which cause the casted value
// of int8_t negative.
int res = uprv_strcmp(
(const char*)e1.pointer, (const char*)e2.pointer);
return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
}, status);
// A changed count to assert when loop too many times.
@ -1561,11 +1747,52 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
if (U_FAILURE(status)) { return false; }
// Nothing changed and we know the order of the vaiants are not change
// because we have no variant or only one.
if (changed == 0 && variants.size() <= 1) {
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
return false;
}
outputToString(out, status);
if (U_FAILURE(status)) {
return false;
}
if (extensionsStr != nullptr) {
changed = 0;
Locale temp(locale);
LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
if (U_SUCCESS(status) && !iter.isNull()) {
const char* key;
while ((key = iter->next(nullptr, status)) != nullptr) {
if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
uprv_strcmp("t", key) == 0) {
CharString value;
CharStringByteSink valueSink(&value);
locale.getKeywordValue(key, valueSink, status);
if (U_FAILURE(status)) {
status = U_ZERO_ERROR;
continue;
}
CharString replacement;
if (uprv_strlen(key) == 2) {
if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
changed++;
temp.setKeywordValue(key, replacement.data(), status);
}
} else {
U_ASSERT(uprv_strcmp(key, "t") == 0);
if (replaceTransformedExtensions(value, replacement, status)) {
changed++;
temp.setKeywordValue(key, replacement.data(), status);
}
}
if (U_FAILURE(status)) {
return false;
}
}
}
}
if (changed != 0) {
extensionsStr = locale_getKeywordsStart(temp.getName());
}
out.append(extensionsStr, status);
}
if (U_FAILURE(status)) {
@ -1573,8 +1800,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
}
// If the tag is not changed, return.
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
U_ASSERT(changed == 0);
U_ASSERT(variants.size() > 1);
out.clear();
return false;
}
@ -1636,7 +1861,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
{
fIsBogus = FALSE;
/* Free our current storage */
if (baseName != fullName) {
if ((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = NULL;
@ -1672,6 +1897,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
U_ASSERT(baseName == nullptr);
/*Go to heap for the fullName if necessary*/
fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
if(fullName == 0) {
@ -1825,7 +2051,7 @@ Locale::hashCode() const
void
Locale::setToBogus() {
/* Free our current storage */
if(baseName != fullName) {
if((baseName != fullName) && (baseName != fullNameBuffer)) {
uprv_free(baseName);
}
baseName = NULL;
@ -2312,16 +2538,16 @@ public:
virtual const char* next(int32_t* resultLength, UErrorCode& status) {
const char* legacy_key = KeywordEnumeration::next(nullptr, status);
if (U_SUCCESS(status) && legacy_key != nullptr) {
while (U_SUCCESS(status) && legacy_key != nullptr) {
const char* key = uloc_toUnicodeLocaleKey(legacy_key);
if (key == nullptr) {
status = U_ILLEGAL_ARGUMENT_ERROR;
} else {
if (key != nullptr) {
if (resultLength != nullptr) {
*resultLength = static_cast<int32_t>(uprv_strlen(key));
}
return key;
}
// Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
legacy_key = KeywordEnumeration::next(nullptr, status);
}
if (resultLength != nullptr) *resultLength = 0;
return nullptr;
@ -2478,6 +2704,9 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro
if (fullName != fullNameBuffer) {
// if full Name is already on the heap, need to free it.
uprv_free(fullName);
if (baseName == fullName) {
baseName = newFullName; // baseName should not point to freed memory.
}
}
fullName = newFullName;
status = U_ZERO_ERROR;

View File

@ -320,7 +320,8 @@ XLikelySubtags::~XLikelySubtags() {
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
const char *name = locale.getName();
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
// Private use language tag x-subtag-subtag...
// Private use language tag x-subtag-subtag... which CLDR changes to
// und-x-subtag-subtag...
return LSR(name, "", "", LSR::EXPLICIT_LSR);
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),

View File

@ -38,7 +38,7 @@ public:
virtual UnicodeString &
normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const {
UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
dest.setToBogus();
return dest;
@ -64,13 +64,13 @@ public:
virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
UErrorCode &errorCode) const U_OVERRIDE {
return normalizeSecondAndAppend(first, second, true, errorCode);
}
virtual UnicodeString &
append(UnicodeString &first,
const UnicodeString &second,
UErrorCode &errorCode) const {
UErrorCode &errorCode) const U_OVERRIDE {
return normalizeSecondAndAppend(first, second, false, errorCode);
}
UnicodeString &
@ -107,7 +107,7 @@ public:
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
virtual UBool
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
UChar buffer[4];
int32_t length;
const UChar *d=impl.getDecomposition(c, buffer, length);
@ -122,7 +122,7 @@ public:
return true;
}
virtual UBool
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
UChar buffer[30];
int32_t length;
const UChar *d=impl.getRawDecomposition(c, buffer, length);
@ -137,18 +137,18 @@ public:
return true;
}
virtual UChar32
composePair(UChar32 a, UChar32 b) const {
composePair(UChar32 a, UChar32 b) const U_OVERRIDE {
return impl.composePair(a, b);
}
virtual uint8_t
getCombiningClass(UChar32 c) const {
getCombiningClass(UChar32 c) const U_OVERRIDE {
return impl.getCC(impl.getNorm16(c));
}
// quick checks
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return false;
}
@ -161,11 +161,11 @@ public:
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
}
virtual UNormalizationCheckResult
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
}
virtual int32_t
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return 0;
}
@ -194,27 +194,57 @@ public:
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.decompose(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
void
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
if (U_FAILURE(errorCode)) {
return;
}
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
edits->reset();
}
const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
sink.Flush();
}
virtual UBool
isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
if(U_FAILURE(errorCode)) {
return false;
}
const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
const uint8_t *sLimit = s + sp.length();
return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
return impl.decompose(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
}
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
return impl.hasDecompBoundaryBefore(c);
}
virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
return impl.hasDecompBoundaryAfter(c);
}
virtual UBool isInert(UChar32 c) const U_OVERRIDE {
return impl.isDecompInert(c);
}
};
class ComposeNormalizer2 : public Normalizer2WithImpl {
@ -321,24 +351,30 @@ public:
private:
virtual void
normalize(const UChar *src, const UChar *limit,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.makeFCD(src, limit, &buffer, errorCode);
}
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
virtual void
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
UnicodeString &safeMiddle,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
}
virtual const UChar *
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
return impl.makeFCD(src, limit, NULL, errorCode);
}
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
return impl.hasFCDBoundaryBefore(c);
}
virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
return impl.hasFCDBoundaryAfter(c);
}
virtual UBool isInert(UChar32 c) const U_OVERRIDE {
return impl.isFCDInert(c);
}
};
struct Norm2AllModes : public UMemory {

View File

@ -731,9 +731,131 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode);
}
// Dual functionality:
// sink != nullptr: normalize
// sink == nullptr: isNormalized/spanQuickCheckYes
const uint8_t *
Normalizer2Impl::decomposeUTF8(uint32_t options,
const uint8_t *src, const uint8_t *limit,
ByteSink *sink, Edits *edits, UErrorCode &errorCode) const {
U_ASSERT(limit != nullptr);
UnicodeString s16;
uint8_t minNoLead = leadByteForCP(minDecompNoCP);
const uint8_t *prevBoundary = src;
// only for quick check
uint8_t prevCC = 0;
for (;;) {
// Fast path: Scan over a sequence of characters below the minimum "no" code point,
// or with (decompYes && ccc==0) properties.
const uint8_t *fastStart = src;
const uint8_t *prevSrc;
uint16_t norm16 = 0;
for (;;) {
if (src == limit) {
if (prevBoundary != limit && sink != nullptr) {
ByteSinkUtil::appendUnchanged(prevBoundary, limit,
*sink, options, edits, errorCode);
}
return src;
}
if (*src < minNoLead) {
++src;
} else {
prevSrc = src;
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
if (!isMostDecompYesAndZeroCC(norm16)) {
break;
}
}
}
// isMostDecompYesAndZeroCC(norm16) is false, that is, norm16>=minYesNo,
// and the current character at [prevSrc..src[ is not a common case with cc=0
// (MIN_NORMAL_MAYBE_YES or JAMO_VT).
// It could still be a maybeYes with cc=0.
if (prevSrc != fastStart) {
// The fast path looped over yes/0 characters before the current one.
if (sink != nullptr &&
!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
*sink, options, edits, errorCode)) {
break;
}
prevBoundary = prevSrc;
prevCC = 0;
}
// Medium-fast path: Quick check.
if (isMaybeOrNonZeroCC(norm16)) {
// Does not decompose.
uint8_t cc = getCCFromYesOrMaybe(norm16);
if (prevCC <= cc || cc == 0) {
prevCC = cc;
if (cc <= 1) {
if (sink != nullptr &&
!ByteSinkUtil::appendUnchanged(prevBoundary, src,
*sink, options, edits, errorCode)) {
break;
}
prevBoundary = src;
}
continue;
}
}
if (sink == nullptr) {
return prevBoundary; // quick check: "no" or cc out of order
}
// Slow path
// Decompose up to and including the current character.
if (prevBoundary != prevSrc && norm16HasDecompBoundaryBefore(norm16)) {
if (!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
*sink, options, edits, errorCode)) {
break;
}
prevBoundary = prevSrc;
}
ReorderingBuffer buffer(*this, s16, errorCode);
if (U_FAILURE(errorCode)) {
break;
}
decomposeShort(prevBoundary, src, STOP_AT_LIMIT, FALSE /* onlyContiguous */,
buffer, errorCode);
// Decompose until the next boundary.
if (buffer.getLastCC() > 1) {
src = decomposeShort(src, limit, STOP_AT_DECOMP_BOUNDARY, FALSE /* onlyContiguous */,
buffer, errorCode);
}
if (U_FAILURE(errorCode)) {
break;
}
if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals()
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
break;
}
// We already know there was a change if the original character decomposed;
// otherwise compare.
if (isMaybeOrNonZeroCC(norm16) && buffer.equals(prevBoundary, src)) {
if (!ByteSinkUtil::appendUnchanged(prevBoundary, src,
*sink, options, edits, errorCode)) {
break;
}
} else {
if (!ByteSinkUtil::appendChange(prevBoundary, src, buffer.getStart(), buffer.length(),
*sink, edits, errorCode)) {
break;
}
}
prevBoundary = src;
prevCC = 0;
}
return src;
}
const uint8_t *
Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
UBool stopAtCompBoundary, UBool onlyContiguous,
StopAt stopAt, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
if (U_FAILURE(errorCode)) {
return nullptr;
@ -746,21 +868,28 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
UChar32 c = U_SENTINEL;
if (norm16 >= limitNoNo) {
if (isMaybeOrNonZeroCC(norm16)) {
// No boundaries around this character.
// No comp boundaries around this character.
uint8_t cc = getCCFromYesOrMaybe(norm16);
if (cc == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
return prevSrc;
}
c = codePointFromValidUTF8(prevSrc, src);
if (!buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode)) {
if (!buffer.append(c, cc, errorCode)) {
return nullptr;
}
if (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1) {
return src;
}
continue;
}
// Maps to an isCompYesAndZeroCC.
if (stopAtCompBoundary) {
if (stopAt != STOP_AT_LIMIT) {
return prevSrc;
}
c = codePointFromValidUTF8(prevSrc, src);
c = mapAlgorithmic(c, norm16);
norm16 = getRawNorm16(c);
} else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) {
} else if (stopAt != STOP_AT_LIMIT && norm16 < minNoNoCompNoMaybeCC) {
return prevSrc;
}
// norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8.
@ -768,7 +897,8 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
// its norm16==INERT is normalization-inert,
// so it gets copied unchanged in the fast path,
// and we stop the slow path where invalid UTF-8 begins.
U_ASSERT(norm16 != INERT);
// c >= 0 is the result of an algorithmic mapping.
U_ASSERT(c >= 0 || norm16 != INERT);
if (norm16 < minYesNo) {
if (c < 0) {
c = codePointFromValidUTF8(prevSrc, src);
@ -798,11 +928,15 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
} else {
leadCC = 0;
}
if (leadCC == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
return prevSrc;
}
if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) {
return nullptr;
}
}
if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
if ((stopAt == STOP_AT_COMP_BOUNDARY && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) ||
(stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1)) {
return src;
}
}
@ -1954,10 +2088,10 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
break;
}
// We know there is not a boundary here.
decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous,
decomposeShort(prevSrc, src, STOP_AT_LIMIT, onlyContiguous,
buffer, errorCode);
// Decompose until the next boundary.
src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous,
src = decomposeShort(src, limit, STOP_AT_COMP_BOUNDARY, onlyContiguous,
buffer, errorCode);
if (U_FAILURE(errorCode)) {
break;

View File

@ -491,6 +491,12 @@ public:
UnicodeString &safeMiddle,
ReorderingBuffer &buffer,
UErrorCode &errorCode) const;
/** sink==nullptr: isNormalized()/spanQuickCheckYes() */
const uint8_t *decomposeUTF8(uint32_t options,
const uint8_t *src, const uint8_t *limit,
ByteSink *sink, Edits *edits, UErrorCode &errorCode) const;
UBool compose(const UChar *src, const UChar *limit,
UBool onlyContiguous,
UBool doCompose,
@ -649,6 +655,9 @@ private:
UChar32 minNeedDataCP,
ReorderingBuffer *buffer,
UErrorCode &errorCode) const;
enum StopAt { STOP_AT_LIMIT, STOP_AT_DECOMP_BOUNDARY, STOP_AT_COMP_BOUNDARY };
const UChar *decomposeShort(const UChar *src, const UChar *limit,
UBool stopAtCompBoundary, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
@ -656,7 +665,7 @@ private:
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
UBool stopAtCompBoundary, UBool onlyContiguous,
StopAt stopAt, UBool onlyContiguous,
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
static int32_t combine(const uint16_t *list, UChar32 trail);

View File

@ -24,7 +24,7 @@ class U_COMMON_API PluralMapBase : public UMemory {
public:
/**
* The names of all the plural categories. NONE is not an actual plural
* category, but rather represents the absense of a plural category.
* category, but rather represents the absence of a plural category.
*/
enum Category {
NONE = -1,

View File

@ -1139,7 +1139,7 @@ uprv_tzname(int n)
#endif
if (tzid != NULL && isValidOlsonID(tzid)
#if U_PLATFORM == U_PF_SOLARIS
/* When TZ equals localtime on Solaris, check the /etc/localtime file. */
/* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
&& uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
#endif
) {
@ -1361,7 +1361,7 @@ uprv_pathIsAbsolute(const char *path)
/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
(needed for some Darwin ICU build environments) */
#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
#if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
# endif

View File

@ -533,7 +533,7 @@ U_CAPI void * U_EXPORT2 uprv_maximumPtr(void *base);
* on the destination pointer and capacity cannot overflow.
*
* The pinned capacity must fulfill the following conditions (for positive capacities):
* - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.)
* - dest + capacity is a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
* - (dest + capacity) >= dest
* - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
*

View File

@ -812,7 +812,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
}
#endif
// handleNext alway sets the break tag value.
// handleNext always sets the break tag value.
// Set the default for it.
fRuleStatusIndex = 0;

View File

@ -258,7 +258,7 @@ void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode
previous(status);
} else {
// seek() leaves the BreakCache positioned at the preceding boundary
// if the requested position is between two bounaries.
// if the requested position is between two boundaries.
// current() pushes the BreakCache position out to the BreakIterator itself.
U_ASSERT(startPos > fTextIdx);
current();

View File

@ -284,7 +284,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
case doEndAssign:
{
// We have reached the end of an assignement statement.
// We have reached the end of an assignment statement.
// Current scan char is the ';' that terminates the assignment.
// Terminate expression, leaves expression parse tree rooted in TOS node.
@ -856,6 +856,10 @@ UChar32 RBBIRuleScanner::nextCharLL() {
return (UChar32)-1;
}
ch = fRB->fRules.char32At(fNextIndex);
if (U_IS_SURROGATE(ch)) {
error(U_ILLEGAL_CHAR_FOUND);
return U_SENTINEL;
}
fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1);
if (ch == chCR ||

View File

@ -151,7 +151,7 @@ void RBBITableBuilder::buildForwardTable() {
//
// calculate the functions nullable, firstpos, lastpos and followpos on
// nodes in the parse tree.
// See the alogrithm description in Aho.
// See the algorithm description in Aho.
// Understanding how this works by looking at the code alone will be
// nearly impossible.
//

View File

@ -274,8 +274,10 @@ public:
*
* @param key The key string of the enumeration-start resource.
* Empty if the enumeration starts at the top level of the bundle.
* @param value Call getArray() or getTable() as appropriate.
* Then reuse for output values from Array and Table getters.
* @param value Call getArray() or getTable() as appropriate. Then reuse for
* output values from Array and Table getters. Note: ResourceTable and
* ResourceArray instances must outlive the ResourceValue instance for
* ResourceTracer to be happy.
* @param noFallback true if the bundle has no parent;
* that is, its top-level table has the nofallback attribute,
* or it is the root bundle of a locale tree.

View File

@ -54,6 +54,9 @@ void ResourceTracer::traceOpen() const {
CharString& ResourceTracer::getFilePath(CharString& output, UErrorCode& status) const {
if (fResB) {
// Note: if you get a segfault around here, check that ResourceTable and
// ResourceArray instances outlive ResourceValue instances referring to
// their contents:
output.append(fResB->fData->fPath, status);
output.append('/', status);
output.append(fResB->fData->fName, status);

View File

@ -82,7 +82,7 @@ public:
/**
* Add a listener to be notified when notifyChanged is called.
* The listener must not be null. AcceptsListener must return
* true for the listener. Attempts to concurrently
* true for the listener. Attempts to concurrently
* register the identical listener more than once will be
* silently ignored.
*/
@ -90,7 +90,7 @@ public:
/**
* Stop notifying this listener. The listener must
* not be null. Attemps to remove a listener that is
* not be null. Attempts to remove a listener that is
* not registered will be silently ignored.
*/
virtual void removeListener(const EventListener* l, UErrorCode& status);

View File

@ -174,6 +174,18 @@ ubrk_safeClone(
return (UBreakIterator *)newBI;
}
U_CAPI UBreakIterator * U_EXPORT2
ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
if (U_FAILURE(*status)) {
return nullptr;
}
BreakIterator *newBI = ((BreakIterator *)bi)->clone();
if (newBI == nullptr) {
*status = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
return (UBreakIterator *)newBI;
}
U_CAPI void U_EXPORT2

View File

@ -681,7 +681,7 @@ ucase_isCaseSensitive(UChar32 c) {
* - In [CoreProps], C has one of the properties Uppercase, or Lowercase
* - Given D = NFD(C), then it is not the case that:
* D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
* (This third criterium does not add any characters to the list
* (This third criterion does not add any characters to the list
* for Unicode 3.2. Ignored.)
*
* D2. A character C is defined to be case-ignorable

View File

@ -194,7 +194,7 @@ u_isISOControl(UChar32 c) {
/* Some control characters that are used as space. */
#define IS_THAT_CONTROL_SPACE(c) \
(c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
(c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==0x85))
/* Java has decided that U+0085 New Line is not whitespace any more. */
#define IS_THAT_ASCII_CONTROL_SPACE(c) \
@ -677,14 +677,14 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
sa->add(sa->set, CR+1); /* range TAB..CR */
sa->add(sa->set, 0x1c);
sa->add(sa->set, 0x1f+1);
USET_ADD_CP_AND_NEXT(sa, NL);
USET_ADD_CP_AND_NEXT(sa, 0x85); // NEXT LINE (NEL)
/* add for u_isIDIgnorable() what was not added above */
sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
sa->add(sa->set, 0x7f); /* range DEL..NBSP-1, NBSP added below */
sa->add(sa->set, HAIRSP);
sa->add(sa->set, RLM+1);
sa->add(sa->set, INHSWAP);
sa->add(sa->set, NOMDIG+1);
sa->add(sa->set, 0x206a); // INHIBIT SYMMETRIC SWAPPING
sa->add(sa->set, 0x206f+1); // NOMINAL DIGIT SHAPES
USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
/* add no-break spaces for u_isWhitespace() what was not added above */
@ -693,23 +693,25 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
USET_ADD_CP_AND_NEXT(sa, NNBSP);
/* add for u_digit() */
sa->add(sa->set, U_a);
sa->add(sa->set, U_z+1);
sa->add(sa->set, U_A);
sa->add(sa->set, U_Z+1);
sa->add(sa->set, U_FW_a);
sa->add(sa->set, U_FW_z+1);
sa->add(sa->set, U_FW_A);
sa->add(sa->set, U_FW_Z+1);
sa->add(sa->set, u'a');
sa->add(sa->set, u'z'+1);
sa->add(sa->set, u'A');
sa->add(sa->set, u'Z'+1);
// fullwidth
sa->add(sa->set, u'');
sa->add(sa->set, u''+1);
sa->add(sa->set, u'');
sa->add(sa->set, u''+1);
/* add for u_isxdigit() */
sa->add(sa->set, U_f+1);
sa->add(sa->set, U_F+1);
sa->add(sa->set, U_FW_f+1);
sa->add(sa->set, U_FW_F+1);
sa->add(sa->set, u'f'+1);
sa->add(sa->set, u'F'+1);
// fullwidth
sa->add(sa->set, u''+1);
sa->add(sa->set, u''+1);
/* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
sa->add(sa->set, WJ); /* range WJ..NOMDIG */
sa->add(sa->set, 0x2060); /* range 2060..206f */
sa->add(sa->set, 0xfff0);
sa->add(sa->set, 0xfffb+1);
sa->add(sa->set, 0xe0000);

View File

@ -820,7 +820,7 @@ getKey_2022(char c,int32_t* key,int32_t* offset){
return INVALID_2022;
}
/*runs through a state machine to determine the escape sequence - codepage correspondance
/*runs through a state machine to determine the escape sequence - codepage correspondence
*/
static void
changeState_2022(UConverter* _this,
@ -1424,7 +1424,7 @@ toUnicodeCallback(UConverter *cnv,
* KSC5601 : alias to ibm-949 mapping table
* GB2312 : alias to ibm-1386 mapping table
* ISO-8859-1 : Algorithmic implemented as LATIN1 case
* ISO-8859-7 : alisas to ibm-9409 mapping table
* ISO-8859-7 : alias to ibm-9409 mapping table
*/
/* preference order of JP charsets */
@ -2324,7 +2324,7 @@ endloop:
/***************************************************************
* Rules for ISO-2022-KR encoding
* i) The KSC5601 designator sequence should appear only once in a file,
* at the begining of a line before any KSC5601 characters. This usually
* at the beginning of a line before any KSC5601 characters. This usually
* means that it appears by itself on the first line of the file
* ii) There are only 2 shifting sequences SO to shift into double byte mode
* and SI to shift into single byte mode

View File

@ -427,7 +427,7 @@ getAlgorithmicTypeFromName(const char *realName)
#define UCNV_CACHE_LOAD_FACTOR 2
/* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
/* Will always be called with the cnvCacheMutex alrady being held */
/* Will always be called with the cnvCacheMutex already being held */
/* by the calling function. */
/* Stores the shared data in the SHARED_DATA_HASHTABLE
* @param data The shared data

View File

@ -321,7 +321,7 @@ UCNV_FROM_U_CALLBACK_ESCAPE (
case UCNV_PRV_ESCAPE_CSS2:
valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
/* Always add space character, becase the next character might be whitespace,
/* Always add space character, because the next character might be whitespace,
which would erroneously be considered the termination of the escape sequence. */
valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
break;

View File

@ -81,7 +81,7 @@
[G] D1 [D2]
That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2
data bytes. The maximum size of a LMBCS chjaracter is 3 bytes:
data bytes. The maximum size of a LMBCS character is 3 bytes:
*/
#define ULMBCS_CHARSIZE_MAX 3
/*
@ -164,7 +164,7 @@ beginning of internal 'system' range names: */
/* Then we needed a place to put all the other ansi control characters
that must be moved to different values because LMBCS reserves those
values for other purposes. To represent the control characters, we start
with a first byte of 0xF & add the control chaarcter value as the
with a first byte of 0xF & add the control character value as the
second byte */
#define ULMBCS_GRP_CTRL 0x0F

View File

@ -814,7 +814,7 @@ const UConverterSharedData _UTF7Data=
* the use of "~" in some servers as a home directory indicator.
*
* 5) UTF-7 permits multiple alternate forms to represent the same
* string; in particular, printable US-ASCII chararacters can be
* string; in particular, printable US-ASCII characters can be
* represented in encoded form.
*
* In modified UTF-7, printable US-ASCII characters except for "&"

View File

@ -992,7 +992,7 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
if (sourceChar == PNJ_TIPPI) {
/* Make sure Tippi is converterd to Bindi. */
/* Make sure Tippi is converted to Bindi. */
sourceChar = PNJ_BINDI;
} else if (sourceChar == PNJ_ADHAK) {
/* This is for consonant cluster handling. */
@ -1147,7 +1147,7 @@ static const uint16_t lookupTable[][2]={
/* is the code point valid in current script? */ \
if(sourceChar> ASCII_END && \
(validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \
/* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
/* Vocallic RR is assigned in ISCII Telugu and Unicode */ \
if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \
targetUniChar!=VOCALLIC_RR){ \
targetUniChar=missingCharMarker; \
@ -1272,7 +1272,7 @@ UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCo
goto CALLBACK;
} else if (*contextCharToUnicode==ISCII_INV) {
if (sourceChar==ISCII_HALANT) {
targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
targetUniChar = 0x0020; /* replace with space according to Indic FAQ */
} else {
targetUniChar = ZWJ;
}

View File

@ -844,7 +844,7 @@ typedef struct {
#endif
// Comparason function used in quick sort.
// Comparison function used in quick sort.
static int U_CALLCONV currencyNameComparator(const void* a, const void* b) {
const CurrencyNameStruct* currName_1 = (const CurrencyNameStruct*)a;
const CurrencyNameStruct* currName_2 = (const CurrencyNameStruct*)b;
@ -1530,7 +1530,7 @@ uprv_parseCurrency(const char* locale,
int32_t max = 0;
int32_t matchIndex = -1;
// case in-sensitive comparision against currency names
// case in-sensitive comparison against currency names
searchCurrencyName(currencyNames, total_currency_name_count,
upperText, textLen, partialMatchLen, &max, &matchIndex);

View File

@ -133,8 +133,10 @@ static const float RESIZE_POLICY_RATIO_TABLE[6] = {
* or a pointer. If a hint bit is zero, then the associated
* token is assumed to be an integer.
*/
#define HINT_BOTH_INTEGERS (0)
#define HINT_KEY_POINTER (1)
#define HINT_VALUE_POINTER (2)
#define HINT_ALLOW_ZERO (4)
/********************************************************************
* PRIVATE Implementation
@ -479,8 +481,9 @@ _uhash_put(UHashtable *hash,
goto err;
}
U_ASSERT(hash != NULL);
/* Cannot always check pointer here or iSeries sees NULL every time. */
if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
if ((hint & HINT_VALUE_POINTER) ?
value.pointer == NULL :
value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) {
/* Disallow storage of NULL values, since NULL is returned by
* get() to indicate an absent key. Storing NULL == removing.
*/
@ -687,6 +690,28 @@ uhash_igeti(const UHashtable *hash,
return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
}
U_CAPI int32_t U_EXPORT2
uhash_getiAndFound(const UHashtable *hash,
const void *key,
UBool *found) {
UHashTok keyholder;
keyholder.pointer = (void *)key;
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
*found = !IS_EMPTY_OR_DELETED(e->hashcode);
return e->value.integer;
}
U_CAPI int32_t U_EXPORT2
uhash_igetiAndFound(const UHashtable *hash,
int32_t key,
UBool *found) {
UHashTok keyholder;
keyholder.integer = key;
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
*found = !IS_EMPTY_OR_DELETED(e->hashcode);
return e->value.integer;
}
U_CAPI void* U_EXPORT2
uhash_put(UHashtable *hash,
void* key,
@ -736,7 +761,34 @@ uhash_iputi(UHashtable *hash,
keyholder.integer = key;
valueholder.integer = value;
return _uhash_put(hash, keyholder, valueholder,
0, /* neither is a ptr */
HINT_BOTH_INTEGERS,
status).integer;
}
U_CAPI int32_t U_EXPORT2
uhash_putiAllowZero(UHashtable *hash,
void *key,
int32_t value,
UErrorCode *status) {
UHashTok keyholder, valueholder;
keyholder.pointer = key;
valueholder.integer = value;
return _uhash_put(hash, keyholder, valueholder,
HINT_KEY_POINTER | HINT_ALLOW_ZERO,
status).integer;
}
U_CAPI int32_t U_EXPORT2
uhash_iputiAllowZero(UHashtable *hash,
int32_t key,
int32_t value,
UErrorCode *status) {
UHashTok keyholder, valueholder;
keyholder.integer = key;
valueholder.integer = value;
return _uhash_put(hash, keyholder, valueholder,
HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO,
status).integer;
}
@ -785,6 +837,29 @@ uhash_removeAll(UHashtable *hash) {
U_ASSERT(hash->count == 0);
}
U_CAPI UBool U_EXPORT2
uhash_containsKey(const UHashtable *hash, const void *key) {
UHashTok keyholder;
keyholder.pointer = (void *)key;
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
return !IS_EMPTY_OR_DELETED(e->hashcode);
}
/**
* Returns true if the UHashtable contains an item with this integer key.
*
* @param hash The target UHashtable.
* @param key An integer key stored in a hashtable
* @return true if the key is found.
*/
U_CAPI UBool U_EXPORT2
uhash_icontainsKey(const UHashtable *hash, int32_t key) {
UHashTok keyholder;
keyholder.integer = key;
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
return !IS_EMPTY_OR_DELETED(e->hashcode);
}
U_CAPI const UHashElement* U_EXPORT2
uhash_find(const UHashtable *hash, const void* key) {
UHashTok keyholder;

View File

@ -23,7 +23,7 @@
/**
* UHashtable stores key-value pairs and does moderately fast lookup
* based on keys. It provides a good tradeoff between access time and
* storage space. As elements are added to it, it grows to accomodate
* storage space. As elements are added to it, it grows to accommodate
* them. By default, the table never shrinks, even if all elements
* are removed from it.
*
@ -54,6 +54,13 @@
* uhash_remove() on that key. This keeps uhash_get(), uhash_count(),
* and uhash_nextElement() consistent with one another.
*
* Keys and values can be integers.
* Functions that work with an integer key have an "i" prefix.
* Functions that work with an integer value have an "i" suffix.
* As with putting a NULL value pointer, putting a zero value integer removes the item.
* Except, there are pairs of functions that allow setting zero values
* and fetching (value, found) pairs.
*
* To see everything in a hashtable, use uhash_nextElement() to
* iterate through its contents. Each call to this function returns a
* UHashElement pointer. A hash element contains a key, value, and
@ -405,6 +412,44 @@ uhash_iputi(UHashtable *hash,
int32_t value,
UErrorCode *status);
/**
* Put a (key=pointer, value=integer) item in a UHashtable. If the
* keyDeleter is non-NULL, then the hashtable owns 'key' after this
* call. valueDeleter must be NULL.
* Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero.
*
* @param hash The target UHashtable.
* @param key The key to store.
* @param value The integer value to store.
* @param status A pointer to an UErrorCode to receive any errors.
* @return The previous value, or 0 if none.
* @see uhash_getiAndFound
*/
U_CAPI int32_t U_EXPORT2
uhash_putiAllowZero(UHashtable *hash,
void *key,
int32_t value,
UErrorCode *status);
/**
* Put a (key=integer, value=integer) item in a UHashtable. If the
* keyDeleter is non-NULL, then the hashtable owns 'key' after this
* call. valueDeleter must be NULL.
* Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero.
*
* @param hash The target UHashtable.
* @param key The key to store.
* @param value The integer value to store.
* @param status A pointer to an UErrorCode to receive any errors.
* @return The previous value, or 0 if none.
* @see uhash_igetiAndFound
*/
U_CAPI int32_t U_EXPORT2
uhash_iputiAllowZero(UHashtable *hash,
int32_t key,
int32_t value,
UErrorCode *status);
/**
* Retrieve a pointer value from a UHashtable using a pointer key,
* as previously stored by uhash_put().
@ -448,6 +493,34 @@ U_CAPI int32_t U_EXPORT2
uhash_igeti(const UHashtable *hash,
int32_t key);
/**
* Retrieves an integer value from a UHashtable using a pointer key,
* as previously stored by uhash_putiAllowZero() or uhash_puti().
*
* @param hash The target UHashtable.
* @param key A pointer key stored in a hashtable
* @param found A pointer to a boolean which will be set for whether the key was found.
* @return The requested item, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
uhash_getiAndFound(const UHashtable *hash,
const void *key,
UBool *found);
/**
* Retrieves an integer value from a UHashtable using an integer key,
* as previously stored by uhash_iputiAllowZero() or uhash_iputi().
*
* @param hash The target UHashtable.
* @param key An integer key stored in a hashtable
* @param found A pointer to a boolean which will be set for whether the key was found.
* @return The requested item, or 0 if not found.
*/
U_CAPI int32_t U_EXPORT2
uhash_igetiAndFound(const UHashtable *hash,
int32_t key,
UBool *found);
/**
* Remove an item from a UHashtable stored by uhash_put().
* @param hash The target UHashtable.
@ -495,6 +568,26 @@ uhash_iremovei(UHashtable *hash,
U_CAPI void U_EXPORT2
uhash_removeAll(UHashtable *hash);
/**
* Returns true if the UHashtable contains an item with this pointer key.
*
* @param hash The target UHashtable.
* @param key A pointer key stored in a hashtable
* @return true if the key is found.
*/
U_CAPI UBool U_EXPORT2
uhash_containsKey(const UHashtable *hash, const void *key);
/**
* Returns true if the UHashtable contains an item with this integer key.
*
* @param hash The target UHashtable.
* @param key An integer key stored in a hashtable
* @return true if the key is found.
*/
U_CAPI UBool U_EXPORT2
uhash_icontainsKey(const UHashtable *hash, int32_t key);
/**
* Locate an element of a UHashtable. The caller must not modify the
* returned object. The primary use of this function is to obtain the

View File

@ -143,7 +143,7 @@ static const char * const LANGUAGES[] = {
"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
"mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
"ml", "mn", "mnc", "mni", "mo",
"ml", "mn", "mnc", "mni",
"moh", "mos", "mr", "mrj",
"ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
"my", "mye", "myv", "mzn",
@ -166,9 +166,9 @@ static const char * const LANGUAGES[] = {
"sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
"sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
"ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
"sv", "sw", "swb", "swc", "syc", "syr", "szl",
"sv", "sw", "swb", "syc", "syr", "szl",
"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
"th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
"th", "ti", "tig", "tiv", "tk", "tkl", "tkr",
"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
@ -181,7 +181,7 @@ static const char * const LANGUAGES[] = {
"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
"zun", "zxx", "zza",
NULL,
"in", "iw", "ji", "jw", "sh", /* obsolete language codes */
"in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", /* obsolete language codes */
NULL
};
@ -260,7 +260,7 @@ static const char * const LANGUAGES_3[] = {
"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
"mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
"mal", "mon", "mnc", "mni", "mol",
"mal", "mon", "mnc", "mni",
"moh", "mos", "mar", "mrj",
"msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
"mya", "mye", "myv", "mzn",
@ -283,9 +283,9 @@ static const char * const LANGUAGES_3[] = {
"slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
"sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
"ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
"swe", "swa", "swb", "swc", "syc", "syr", "szl",
"swe", "swa", "swb", "syc", "syr", "szl",
"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
@ -298,8 +298,8 @@ static const char * const LANGUAGES_3[] = {
"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
"zun", "zxx", "zza",
NULL,
/* "in", "iw", "ji", "jw", "sh", */
"ind", "heb", "yid", "jaw", "srp",
/* "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", */
"ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
NULL
};
@ -334,13 +334,13 @@ static const char * const COUNTRIES[] = {
"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
"DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
"DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
"ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
"IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
@ -357,7 +357,7 @@ static const char * const COUNTRIES[] = {
"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
"WS", "YE", "YT", "ZA", "ZM", "ZW",
"WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
NULL,
"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
NULL
@ -397,10 +397,10 @@ static const char * const COUNTRIES_3[] = {
"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
"DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
"DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
@ -409,8 +409,8 @@ static const char * const COUNTRIES_3[] = {
"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
"IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
"XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
@ -443,8 +443,8 @@ static const char * const COUNTRIES_3[] = {
"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
"WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
"WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
NULL,
/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",

View File

@ -271,7 +271,7 @@ initFromResourceBundle(UErrorCode& sts) {
if (U_FAILURE(sts)) {
break;
}
// check if this is an alias of canoncal legacy type
// check if this is an alias of canonical legacy type
if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
const char* from = ures_getKey(typeAliasDataEntry.getAlias());
if (isTZ) {

View File

@ -129,7 +129,6 @@ static const char* const LEGACY[] = {
// Legacy tags with no preferred value in the IANA
// registry. Kept for now for the backward compatibility
// because ICU has mapped them this way.
"cel-gaulish", "xtg-x-cel-gaulish",
"i-default", "en-x-i-default",
"i-enochian", "und-x-i-enochian",
"i-mingo", "see-x-i-mingo",
@ -647,6 +646,22 @@ _isTKey(const char* s, int32_t len)
return FALSE;
}
U_CAPI const char * U_EXPORT2
ultag_getTKeyStart(const char *localeID) {
const char *result = localeID;
const char *sep;
while((sep = uprv_strchr(result, SEP)) != nullptr) {
if (_isTKey(result, static_cast<int32_t>(sep - result))) {
return result;
}
result = ++sep;
}
if (_isTKey(result, -1)) {
return result;
}
return nullptr;
}
static UBool
_isTValue(const char* s, int32_t len)
{
@ -671,9 +686,13 @@ _isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
if (len < 0) {
len = (int32_t)uprv_strlen(s);
}
switch (state) {
case kStart:
if (ultag_isLanguageSubtag(s, len)) {
if (ultag_isLanguageSubtag(s, len) && len != 4) {
state = kGotLanguage;
return TRUE;
}
@ -1775,11 +1794,6 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
return;
}
/* Determine if variants already exists */
if (ultag_getVariantsSize(langtag)) {
posixVariant = TRUE;
}
n = ultag_getExtensionsSize(langtag);
/* resolve locale keywords and reordering keys */
@ -1787,6 +1801,11 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
key = ultag_getExtensionKey(langtag, i);
type = ultag_getExtensionValue(langtag, i);
if (*key == LDMLEXT) {
/* Determine if variants already exists */
if (ultag_getVariantsSize(langtag)) {
posixVariant = TRUE;
}
_appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
if (U_FAILURE(*status)) {
break;
@ -2028,7 +2047,10 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
uprv_memcpy(tagBuf, tag, tagLen);
if (tagLen > 0) {
uprv_memcpy(tagBuf, tag, tagLen);
}
*(tagBuf + tagLen) = 0;
/* create a ULanguageTag */
@ -2692,8 +2714,7 @@ ulocimp_toLanguageTag(const char* localeID,
if (U_SUCCESS(tmpStatus)) {
if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
/* return private use only tag */
static const char PREFIX[] = { PRIVATEUSE, SEP };
sink.Append(PREFIX, sizeof(PREFIX));
sink.Append("und-x-", 6);
sink.Append(buf.data(), buf.length());
done = TRUE;
} else if (strict) {

View File

@ -286,6 +286,9 @@ ultag_isUnicodeLocaleType(const char* s, int32_t len);
U_CFUNC UBool
ultag_isVariantSubtags(const char* s, int32_t len);
U_CAPI const char * U_EXPORT2
ultag_getTKeyStart(const char *localeID);
U_CFUNC const char*
ulocimp_toBcpKey(const char* key);

View File

@ -71,7 +71,6 @@ public:
*/
virtual void Append(const char* bytes, int32_t n) = 0;
#ifndef U_HIDE_DRAFT_API
/**
* Appends n bytes to this. Same as Append().
* Call AppendU8() with u8"string literals" which are const char * in C++11
@ -81,7 +80,7 @@ public:
*
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @draft ICU 67
* @stable ICU 67
*/
inline void AppendU8(const char* bytes, int32_t n) {
Append(bytes, n);
@ -97,13 +96,12 @@ public:
*
* @param bytes the pointer to the bytes
* @param n the number of bytes; must be non-negative
* @draft ICU 67
* @stable ICU 67
*/
inline void AppendU8(const char8_t* bytes, int32_t n) {
Append(reinterpret_cast<const char*>(bytes), n);
}
#endif
#endif // U_HIDE_DRAFT_API
/**
* Returns a writable buffer for appending and writes the buffer's capacity to

View File

@ -30,6 +30,8 @@
#include "unicode/uobject.h"
#include "unicode/ustringtrie.h"
class BytesTrieTest;
U_NAMESPACE_BEGIN
class ByteSink;
@ -378,6 +380,7 @@ public:
private:
friend class BytesTrieBuilder;
friend class ::BytesTrieTest;
/**
* Constructs a BytesTrie reader instance.

View File

@ -30,6 +30,8 @@
#include "unicode/stringpiece.h"
#include "unicode/stringtriebuilder.h"
class BytesTrieTest;
U_NAMESPACE_BEGIN
class BytesTrieElement;
@ -125,6 +127,8 @@ public:
BytesTrieBuilder &clear();
private:
friend class ::BytesTrieTest;
BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor
BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator
@ -168,6 +172,7 @@ private:
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
virtual int32_t writeDeltaTo(int32_t jumpTarget);
static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
CharString *strings; // Pointer not object so we need not #include internal charstr.h.
BytesTrieElement *elements;

View File

@ -15,7 +15,7 @@
* \file
* \brief (Non API- contains Doxygen definitions)
*
* This file contains documentation for Doxygen and doesnot have
* This file contains documentation for Doxygen and does not have
* any significance with respect to C or C++ API
*/
@ -74,7 +74,7 @@
* </tr>
* <tr>
* <td>Strings and Character Iteration</td>
* <td>ustring.h, utf8.h, utf16.h, UText, UCharIterator</td>
* <td>ustring.h, utf8.h, utf16.h, icu::StringPiece, UText, UCharIterator, icu::ByteSink</td>
* <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td>
* </tr>
* <tr>
@ -128,9 +128,9 @@
* <td>icu::Normalizer2</td>
* </tr>
* <tr>
* <td>Calendars</td>
* <td>Calendars and Time Zones</td>
* <td>ucal.h</td>
* <td>icu::Calendar</td>
* <td>icu::Calendar, icu::TimeZone</td>
* </tr>
* <tr>
* <td>Date and Time Formatting</td>

View File

@ -117,14 +117,13 @@
/* === Basic types === */
#ifndef U_HIDE_INTERNAL_API
struct UPlugData;
/**
* @{
* Opaque structure passed to/from a plugin.
* use the APIs to access it.
* Typedef for opaque structure passed to/from a plugin.
* Use the APIs to access it.
* @internal ICU 4.4 Technology Preview
*/
struct UPlugData;
typedef struct UPlugData UPlugData;
/** @} */

View File

@ -91,8 +91,6 @@ enum ULocMatchDemotion {
typedef enum ULocMatchDemotion ULocMatchDemotion;
#endif
#ifndef U_FORCE_HIDE_DRAFT_API
/**
* Builder option for whether to include or ignore one-way (fallback) match data.
* The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries.
@ -108,20 +106,20 @@ typedef enum ULocMatchDemotion ULocMatchDemotion;
* but not if it is merely a fallback.
*
* @see LocaleMatcher::Builder#setDirection(ULocMatchDirection)
* @draft ICU 67
* @stable ICU 67
*/
enum ULocMatchDirection {
/**
* Locale matching includes one-way matches such as BretonFrench. (default)
*
* @draft ICU 67
* @stable ICU 67
*/
ULOCMATCH_DIRECTION_WITH_ONE_WAY,
/**
* Locale matching limited to two-way matches including e.g. DanishNorwegian
* but ignoring one-way matches.
*
* @draft ICU 67
* @stable ICU 67
*/
ULOCMATCH_DIRECTION_ONLY_TWO_WAY
};
@ -129,8 +127,6 @@ enum ULocMatchDirection {
typedef enum ULocMatchDirection ULocMatchDirection;
#endif
#endif // U_FORCE_HIDE_DRAFT_API
struct UHashtable;
U_NAMESPACE_BEGIN
@ -463,14 +459,13 @@ public:
*/
Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
#ifndef U_HIDE_DRAFT_API
/**
* Option for whether to include or ignore one-way (fallback) match data.
* By default, they are included.
*
* @param direction the match direction to set.
* @return this Builder object
* @draft ICU 67
* @stable ICU 67
*/
Builder &setDirection(ULocMatchDirection direction) {
if (U_SUCCESS(errorCode_)) {
@ -478,7 +473,6 @@ public:
}
return *this;
}
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DRAFT_API
/**
@ -704,7 +698,7 @@ private:
LSR *lsrs;
int32_t supportedLocalesLength;
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
UHashtable *supportedLsrToIndex; // Map<LSR, Integer>
// Array versions of the supportedLsrToIndex keys and values.
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
const LSR **supportedLSRs;

View File

@ -571,15 +571,13 @@ public:
*/
void minimizeSubtags(UErrorCode& status);
#ifndef U_HIDE_DRAFT_API
/**
* Canonicalize the locale ID of this object according to CLDR.
* @param status the status code
* @draft ICU 67
* @stable ICU 67
* @see createCanonical
*/
void canonicalize(UErrorCode& status);
#endif // U_HIDE_DRAFT_API
/**
* Gets the list of keywords for the specified locale.

View File

@ -225,10 +225,8 @@ public:
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
* Currently implemented completely only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
* Implemented completely for all built-in modes except for FCD.
* The base class implementation converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
@ -381,11 +379,9 @@ public:
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
* This works for all normalization modes,
* but it is currently optimized for UTF-8 only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* For other modes it currently converts to UTF-16 and calls isNormalized().
* This works for all normalization modes.
* It is optimized for UTF-8 for all built-in modes except for FCD.
* The base class implementation converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must
@ -543,10 +539,8 @@ public:
* Normalizes a UTF-8 string and optionally records how source substrings
* relate to changed and unchanged result substrings.
*
* Currently implemented completely only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* Otherwise currently converts to & from UTF-16 and does not support edits.
* Implemented completely for most built-in modes except for FCD.
* The base class implementation converts to & from UTF-16 and does not support edits.
*
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src Source UTF-8 string.
@ -676,11 +670,9 @@ public:
* resolves to "yes" or "no" to provide a definitive result,
* at the cost of doing more work in those cases.
*
* This works for all normalization modes,
* but it is currently optimized for UTF-8 only for "compose" modes,
* such as for NFC, NFKC, and NFKC_Casefold
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
* For other modes it currently converts to UTF-16 and calls isNormalized().
* This works for all normalization modes.
* It is optimized for UTF-8 for all built-in modes except for FCD.
* The base class implementation converts to UTF-16 and calls isNormalized().
*
* @param s UTF-8 input string
* @param errorCode Standard ICU error code. Its input value must

View File

@ -880,6 +880,6 @@ namespace std {
#else
# define U_CALLCONV_FPTR
#endif
/* @} */
/** @} */
#endif // _PLATFORM_H

View File

@ -75,12 +75,11 @@ class U_COMMON_API StringPiece : public UMemory {
* @stable ICU 4.2
*/
StringPiece(const char* str);
#ifndef U_HIDE_DRAFT_API
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a NUL-terminated const char8_t * pointer.
* @param str a NUL-terminated const char8_t * pointer
* @draft ICU 67
* @stable ICU 67
*/
StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
#endif
@ -88,10 +87,9 @@ class U_COMMON_API StringPiece : public UMemory {
* Constructs an empty StringPiece.
* Needed for type disambiguation from multiple other overloads.
* @param p nullptr
* @draft ICU 67
* @stable ICU 67
*/
StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
#endif // U_HIDE_DRAFT_API
/**
* Constructs from a std::string.
@ -99,17 +97,15 @@ class U_COMMON_API StringPiece : public UMemory {
*/
StringPiece(const std::string& str)
: ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
#ifndef U_HIDE_DRAFT_API
#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a std::u8string.
* @draft ICU 67
* @stable ICU 67
*/
StringPiece(const std::u8string& str)
: ptr_(reinterpret_cast<const char*>(str.data())),
length_(static_cast<int32_t>(str.size())) { }
#endif
#endif // U_HIDE_DRAFT_API
/**
* Constructs from some other implementation of a string piece class, from any
@ -152,18 +148,16 @@ class U_COMMON_API StringPiece : public UMemory {
* @stable ICU 4.2
*/
StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
#ifndef U_HIDE_DRAFT_API
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Constructs from a const char8_t * pointer and a specified length.
* @param str a const char8_t * pointer (need not be terminated)
* @param len the length of the string; must be non-negative
* @draft ICU 67
* @stable ICU 67
*/
StringPiece(const char8_t* str, int32_t len) :
StringPiece(reinterpret_cast<const char*>(str), len) {}
#endif
#endif // U_HIDE_DRAFT_API
/**
* Substring of another StringPiece.
@ -233,13 +227,12 @@ class U_COMMON_API StringPiece : public UMemory {
*/
void set(const char* str);
#ifndef U_HIDE_DRAFT_API
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
/**
* Resets the stringpiece to refer to new data.
* @param xdata pointer the new string data. Need not be NUL-terminated.
* @param len the length of the new data
* @draft ICU 67
* @stable ICU 67
*/
inline void set(const char8_t* xdata, int32_t len) {
set(reinterpret_cast<const char*>(xdata), len);
@ -248,13 +241,12 @@ class U_COMMON_API StringPiece : public UMemory {
/**
* Resets the stringpiece to refer to new data.
* @param str a pointer to a NUL-terminated string.
* @draft ICU 67
* @stable ICU 67
*/
inline void set(const char8_t* str) {
set(reinterpret_cast<const char*>(str));
}
#endif
#endif // U_HIDE_DRAFT_API
/**
* Removes the first n string units.
@ -286,13 +278,12 @@ class U_COMMON_API StringPiece : public UMemory {
}
}
#ifndef U_HIDE_DRAFT_API
/**
* Searches the StringPiece for the given search string (needle);
* @param needle The string for which to search.
* @param offset Where to start searching within this string (haystack).
* @return The offset of needle in haystack, or -1 if not found.
* @draft ICU 67
* @stable ICU 67
*/
int32_t find(StringPiece needle, int32_t offset);
@ -301,10 +292,9 @@ class U_COMMON_API StringPiece : public UMemory {
* similar to std::string::compare().
* @param other The string to compare to.
* @return below zero if this < other; above zero if this > other; 0 if this == other.
* @draft ICU 67
* @stable ICU 67
*/
int32_t compare(StringPiece other);
#endif // U_HIDE_DRAFT_API
/**
* Maximum integer, used as a default value for substring methods.

View File

@ -296,6 +296,8 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
const UChar * text, int32_t textLength,
UErrorCode * status);
#ifndef U_HIDE_DEPRECATED_API
/**
* Thread safe cloning operation
* @param bi iterator to be cloned
@ -312,7 +314,7 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
* @param status to indicate whether the operation went on smoothly or there were errors
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
* @return pointer to the new clone
* @stable ICU 2.0
* @deprecated ICU 69 Use ubrk_clone() instead.
*/
U_CAPI UBreakIterator * U_EXPORT2
ubrk_safeClone(
@ -321,6 +323,23 @@ ubrk_safeClone(
int32_t *pBufferSize,
UErrorCode *status);
#endif /* U_HIDE_DEPRECATED_API */
#ifndef U_HIDE_DRAFT_API
/**
* Thread safe cloning operation.
* @param bi iterator to be cloned
* @param status to indicate whether the operation went on smoothly or there were errors
* @return pointer to the new clone
* @draft ICU 69
*/
U_CAPI UBreakIterator * U_EXPORT2
ubrk_clone(const UBreakIterator *bi,
UErrorCode *status);
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_DEPRECATED_API
/**

View File

@ -1699,10 +1699,10 @@ ucnv_countAvailable(void);
/**
* Gets the canonical converter name of the specified converter from a list of
* all available converters contaied in the alias file. All converters
* all available converters contained in the alias file. All converters
* in this list can be opened.
*
* @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
* @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvailable()]</TT>)
* @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
* @see ucnv_countAvailable
* @stable ICU 2.0

View File

@ -45,11 +45,11 @@
* from the serialized form.
*/
struct UConverterSelector;
/**
* @{
* The selector data structure
* Typedef for selector data structure.
*/
struct UConverterSelector;
typedef struct UConverterSelector UConverterSelector;
/** @} */

View File

@ -40,8 +40,8 @@ U_NAMESPACE_BEGIN
*
* <code>UnicodeFilter</code> defines a protocol for selecting a
* subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
* Currently, filters are used in conjunction with classes like {@link
* Transliterator} to only process selected characters through a
* Currently, filters are used in conjunction with classes like
* {@link Transliterator} to only process selected characters through a
* transformation.
*
* <p>Note: UnicodeFilter currently stubs out two pure virtual methods

View File

@ -178,8 +178,6 @@ class RuleCharacterIterator;
* Unicode property
* </table>
*
* <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
*
* <p><b>Formal syntax</b></p>
*
* \htmlonly<blockquote>\endhtmlonly
@ -601,7 +599,7 @@ public:
/**
* Make this object represent the range `start - end`.
* If `end > start` then this object is set to an empty range.
* If `start > end` then this object is set to an empty range.
* A frozen set will not be modified.
*
* @param start first character in the set, inclusive
@ -1077,7 +1075,7 @@ public:
/**
* Adds the specified range to this set if it is not already
* present. If this set already contains the specified range,
* the call leaves this set unchanged. If <code>end > start</code>
* the call leaves this set unchanged. If <code>start > end</code>
* then an empty range is added, leaving the set unchanged.
* This is equivalent to a boolean logic OR, or a set UNION.
* A frozen set will not be modified.
@ -1095,6 +1093,9 @@ public:
* present. If this set already contains the specified character,
* the call leaves this set unchanged.
* A frozen set will not be modified.
*
* @param c the character (code point)
* @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& add(UChar32 c);
@ -1104,8 +1105,8 @@ public:
* present. If this set already contains the multicharacter,
* the call leaves this set unchanged.
* Thus "ch" => {"ch"}
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
* A frozen set will not be modified.
*
* @param s the source string
* @return this object, for chaining
* @stable ICU 2.4
@ -1124,8 +1125,8 @@ public:
public:
/**
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
* If this set already contains any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@ -1135,7 +1136,6 @@ public:
/**
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@ -1145,7 +1145,6 @@ public:
/**
* Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@ -1155,7 +1154,6 @@ public:
/**
* Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param s the source string
* @return this object, for chaining
@ -1165,7 +1163,7 @@ public:
/**
* Makes a set from a multicharacter string. Thus "ch" => {"ch"}
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
*
* @param s the source string
* @return a newly created set containing the given string.
* The caller owns the return object and is responsible for deleting it.
@ -1185,15 +1183,13 @@ public:
/**
* Retain only the elements in this set that are contained in the
* specified range. If <code>end > start</code> then an empty range is
* specified range. If <code>start > end</code> then an empty range is
* retained, leaving the set empty. This is equivalent to
* a boolean logic AND, or a set INTERSECTION.
* A frozen set will not be modified.
*
* @param start first character, inclusive, of range to be retained
* to this set.
* @param end last character, inclusive, of range to be retained
* to this set.
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 2.0
*/
virtual UnicodeSet& retain(UChar32 start, UChar32 end);
@ -1202,14 +1198,31 @@ public:
/**
* Retain the specified character from this set if it is present.
* A frozen set will not be modified.
*
* @param c the character (code point)
* @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& retain(UChar32 c);
#ifndef U_HIDE_DRAFT_API
/**
* Retains only the specified string from this set if it is present.
* Upon return this set will be empty if it did not contain s, or
* will only contain s if it did contain s.
* A frozen set will not be modified.
*
* @param s the source string
* @return this object, for chaining
* @draft ICU 69
*/
UnicodeSet& retain(const UnicodeString &s);
#endif // U_HIDE_DRAFT_API
/**
* Removes the specified range from this set if it is present.
* The set will not contain the specified range once the call
* returns. If <code>end > start</code> then an empty range is
* returns. If <code>start > end</code> then an empty range is
* removed, leaving the set unchanged.
* A frozen set will not be modified.
*
@ -1226,6 +1239,9 @@ public:
* The set will not contain the specified range once the call
* returns.
* A frozen set will not be modified.
*
* @param c the character (code point)
* @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& remove(UChar32 c);
@ -1253,15 +1269,13 @@ public:
/**
* Complements the specified range in this set. Any character in
* the range will be removed if it is in this set, or will be
* added if it is not in this set. If <code>end > start</code>
* added if it is not in this set. If <code>start > end</code>
* then an empty range is complemented, leaving the set unchanged.
* This is equivalent to a boolean logic XOR.
* A frozen set will not be modified.
*
* @param start first character, inclusive, of range to be removed
* from this set.
* @param end last character, inclusive, of range to be removed
* from this set.
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 2.0
*/
virtual UnicodeSet& complement(UChar32 start, UChar32 end);
@ -1271,16 +1285,18 @@ public:
* will be removed if it is in this set, or will be added if it is
* not in this set.
* A frozen set will not be modified.
*
* @param c the character (code point)
* @return this object, for chaining
* @stable ICU 2.0
*/
UnicodeSet& complement(UChar32 c);
/**
* Complement the specified string in this set.
* The set will not contain the specified string once the call
* returns.
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
* The string will be removed if it is in this set, or will be added if it is not in this set.
* A frozen set will not be modified.
*
* @param s the string to complement
* @return this object, for chaining
* @stable ICU 2.4

View File

@ -44,9 +44,10 @@ struct UConverter; // unicode/ucnv.h
#ifndef USTRING_H
/**
* \ingroup ustring_ustrlen
* @param s Pointer to sequence of UChars.
* @return Length of sequence.
*/
U_CAPI int32_t U_EXPORT2
u_strlen(const UChar *s);
U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
#endif
U_NAMESPACE_BEGIN
@ -2766,7 +2767,6 @@ public:
* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param options Options bit set, see ucasemap_open().
* @return A reference to this.
* @stable ICU 3.8
*/
@ -3614,7 +3614,7 @@ private:
// turn a bogus string into an empty one
void unBogus();
// implements assigment operator, copy constructor, and fastCopyFrom()
// implements assignment operator, copy constructor, and fastCopyFrom()
UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
// Copies just the fields without memory management.

View File

@ -482,6 +482,7 @@
#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
#define ubrk_clone U_ICU_ENTRY_POINT_RENAME(ubrk_clone)
#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current)
@ -534,6 +535,7 @@
#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName)
#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID)
#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID)
#define ucal_getTimeZoneOffsetFromLocal U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneOffsetFromLocal)
#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate)
#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType)
#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition)
@ -962,6 +964,7 @@
#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString)
#define uhash_containsKey U_ICU_ENTRY_POINT_RENAME(uhash_containsKey)
#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count)
#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable)
#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet)
@ -970,6 +973,7 @@
#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find)
#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get)
#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti)
#define uhash_getiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_getiAndFound)
#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
@ -977,12 +981,15 @@
#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString)
#define uhash_icontainsKey U_ICU_ENTRY_POINT_RENAME(uhash_icontainsKey)
#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
#define uhash_igetiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_igetiAndFound)
#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize)
#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
#define uhash_iputiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_iputiAllowZero)
#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei)
#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement)
@ -990,6 +997,7 @@
#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize)
#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put)
#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti)
#define uhash_putiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_putiAllowZero)
#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove)
#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll)
#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement)
@ -1150,6 +1158,8 @@
#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
#define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags)
#define umeas_getPrefixBase U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixBase)
#define umeas_getPrefixPower U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixPower)
#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
@ -1672,6 +1682,9 @@
#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact)
#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement)
#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll)
#define uset_complementAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_complementAllCodePoints)
#define uset_complementRange U_ICU_ENTRY_POINT_RENAME(uset_complementRange)
#define uset_complementString U_ICU_ENTRY_POINT_RENAME(uset_complementString)
#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains)
#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll)
#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints)
@ -1695,12 +1708,15 @@
#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions)
#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove)
#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll)
#define uset_removeAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_removeAllCodePoints)
#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings)
#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange)
#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString)
#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern)
#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain)
#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll)
#define uset_retainAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_retainAllCodePoints)
#define uset_retainString U_ICU_ENTRY_POINT_RENAME(uset_retainString)
#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize)
#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains)
#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set)

View File

@ -582,8 +582,8 @@ U_CAPI void U_EXPORT2
uset_addString(USet* set, const UChar* str, int32_t strLen);
/**
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
* If this set already any particular character, it has no effect on that character.
* Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
* If this set already contains any particular character, it has no effect on that character.
* A frozen set will not be modified.
* @param set the object to which to add the character
* @param str the source string
@ -628,6 +628,20 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end);
U_CAPI void U_EXPORT2
uset_removeString(USet* set, const UChar* str, int32_t strLen);
#ifndef U_HIDE_DRAFT_API
/**
* Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
#endif // U_HIDE_DRAFT_API
/**
* Removes from this set all of its elements that are contained in the
* specified set. This operation effectively modifies this
@ -650,15 +664,41 @@ uset_removeAll(USet* set, const USet* removeSet);
* A frozen set will not be modified.
*
* @param set the object for which to retain only the specified range
* @param start first character, inclusive, of range to be retained
* to this set.
* @param end last character, inclusive, of range to be retained
* to this set.
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @stable ICU 3.2
*/
U_CAPI void U_EXPORT2
uset_retain(USet* set, UChar32 start, UChar32 end);
#ifndef U_HIDE_DRAFT_API
/**
* Retains only the specified string from this set if it is present.
* Upon return this set will be empty if it did not contain s, or
* will only contain s if it did contain s.
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_retainString(USet *set, const UChar *str, int32_t length);
/**
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
#endif // U_HIDE_DRAFT_API
/**
* Retains only the elements in this set that are contained in the
* specified set. In other words, removes from this set all of
@ -696,6 +736,49 @@ uset_compact(USet* set);
U_CAPI void U_EXPORT2
uset_complement(USet* set);
#ifndef U_HIDE_DRAFT_API
/**
* Complements the specified range in this set. Any character in
* the range will be removed if it is in this set, or will be
* added if it is not in this set. If <code>start > end</code>
* then an empty range is complemented, leaving the set unchanged.
* This is equivalent to a boolean logic XOR.
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param start first character, inclusive, of range
* @param end last character, inclusive, of range
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_complementRange(USet *set, UChar32 start, UChar32 end);
/**
* Complements the specified string in this set.
* The string will be removed if it is in this set, or will be added if it is not in this set.
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_complementString(USet *set, const UChar *str, int32_t length);
/**
* Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
* A frozen set will not be modified.
*
* @param set the object to be modified
* @param str the string
* @param length the length of the string, or -1 if NUL-terminated
* @draft ICU 69
*/
U_CAPI void U_EXPORT2
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
#endif // U_HIDE_DRAFT_API
/**
* Complements in this set all elements contained in the specified
* set. Any character in the other set will be removed if it is

View File

@ -323,7 +323,7 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
#define U_SHAPE_PRESERVE_PRESENTATION 0x8000
/** Presentation form option:
* Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with
* their unshaped correspondants in range 0+06xx, before shaping.
* their unshaped correspondents in range 0+06xx, before shaping.
* @stable ICU 3.6
*/
#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0

View File

@ -173,24 +173,23 @@ typedef enum UTraceFunctionNumber {
UTRACE_RES_DATA_LIMIT,
#endif // U_HIDE_INTERNAL_API
#ifndef U_HIDE_DRAFT_API
/**
* The lowest break iterator location.
* @draft ICU 67
* @stable ICU 67
*/
UTRACE_UBRK_START=0x4000,
/**
* Indicates that a character instance of break iterator was created.
*
* @draft ICU 67
* @stable ICU 67
*/
UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
/**
* Indicates that a word instance of break iterator was created.
*
* @draft ICU 67
* @stable ICU 67
*/
UTRACE_UBRK_CREATE_WORD,
@ -200,21 +199,21 @@ typedef enum UTraceFunctionNumber {
* Provides one C-style string to UTraceData: the lb value ("",
* "loose", "strict", or "normal").
*
* @draft ICU 67
* @stable ICU 67
*/
UTRACE_UBRK_CREATE_LINE,
/**
* Indicates that a sentence instance of break iterator was created.
*
* @draft ICU 67
* @stable ICU 67
*/
UTRACE_UBRK_CREATE_SENTENCE,
/**
* Indicates that a title instance of break iterator was created.
*
* @draft ICU 67
* @stable ICU 67
*/
UTRACE_UBRK_CREATE_TITLE,
@ -224,12 +223,10 @@ typedef enum UTraceFunctionNumber {
* Provides one C-style string to UTraceData: the script code of what
* the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
*
* @draft ICU 67
* @stable ICU 67
*/
UTRACE_UBRK_CREATE_BREAK_ENGINE,
#endif // U_HIDE_DRAFT_API
#ifndef U_HIDE_INTERNAL_API
/**
* One more than the highest normal break iterator trace location.

View File

@ -60,13 +60,13 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION_MAJOR_NUM 68
#define U_ICU_VERSION_MAJOR_NUM 69
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_MINOR_NUM 2
#define U_ICU_VERSION_MINOR_NUM 1
/** The current ICU patchlevel version as an integer.
* This value will change in the subsequent releases of ICU
@ -86,7 +86,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SUFFIX _68
#define U_ICU_VERSION_SUFFIX _69
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@ -139,7 +139,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
#define U_ICU_VERSION "68.2"
#define U_ICU_VERSION "69.1"
/**
* The current ICU library major version number as a string, for library name suffixes.
@ -152,13 +152,13 @@
*
* @stable ICU 2.6
*/
#define U_ICU_VERSION_SHORT "68"
#define U_ICU_VERSION_SHORT "69"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
#define U_ICU_DATA_VERSION "68.2"
#define U_ICU_DATA_VERSION "69.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================

View File

@ -30,24 +30,6 @@
#include "bmpset.h"
#include "unisetspan.h"
// Define UChar constants using hex for EBCDIC compatibility
// Used #define to reduce private static exports and memory access time.
#define SET_OPEN ((UChar)0x005B) /*[*/
#define SET_CLOSE ((UChar)0x005D) /*]*/
#define HYPHEN ((UChar)0x002D) /*-*/
#define COMPLEMENT ((UChar)0x005E) /*^*/
#define COLON ((UChar)0x003A) /*:*/
#define BACKSLASH ((UChar)0x005C) /*\*/
#define INTERSECTION ((UChar)0x0026) /*&*/
#define UPPER_U ((UChar)0x0055) /*U*/
#define LOWER_U ((UChar)0x0075) /*u*/
#define OPEN_BRACE ((UChar)123) /*{*/
#define CLOSE_BRACE ((UChar)125) /*}*/
#define UPPER_P ((UChar)0x0050) /*P*/
#define LOWER_P ((UChar)0x0070) /*p*/
#define UPPER_N ((UChar)78) /*N*/
#define EQUALS ((UChar)0x003D) /*=*/
// HIGH_VALUE > all valid values. 110000 for codepoints
#define UNICODESET_HIGH 0x0110000
@ -444,7 +426,6 @@ UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
* @return <tt>true</tt> if this set contains the specified string
*/
UBool UnicodeSet::contains(const UnicodeString& s) const {
if (s.length() == 0) return FALSE;
int32_t cp = getSingleCP(s);
if (cp < 0) {
return stringsContains(s);
@ -559,11 +540,9 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
if (hasStrings()) {
for (i=0; i<strings->size(); ++i) {
const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
//if (s.length() == 0) {
// // Empty strings match everything
// return TRUE;
//}
// assert(s.length() != 0); // We enforce this elsewhere
if (s.isEmpty()) {
continue; // skip the empty string
}
UChar32 c = s.char32At(0);
if ((c & 0xFF) == v) {
return TRUE;
@ -582,9 +561,6 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
int32_t limit,
UBool incremental) {
if (offset == limit) {
// Strings, if any, have length != 0, so we don't worry
// about them here. If we ever allow zero-length strings
// we much check for them here.
if (contains(U_ETHER)) {
return incremental ? U_PARTIAL_MATCH : U_MATCH;
} else {
@ -614,11 +590,9 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
for (i=0; i<strings->size(); ++i) {
const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
//if (trial.length() == 0) {
// return U_MATCH; // null-string always matches
//}
// assert(trial.length() != 0); // We ensure this elsewhere
if (trial.isEmpty()) {
continue; // skip the empty string
}
UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
@ -971,12 +945,12 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
* present. If this set already contains the multicharacter,
* the call leaves this set unchanged.
* Thus "ch" => {"ch"}
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
*
* @param s the source string
* @return the modified set, for chaining
*/
UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (!stringsContains(s)) {
@ -991,8 +965,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
/**
* Adds the given string, in order, to 'strings'. The given string
* must have been checked by the caller to not be empty and to not
* already be in 'strings'.
* must have been checked by the caller to not already be in 'strings'.
*/
void UnicodeSet::_add(const UnicodeString& s) {
if (isFrozen() || isBogus()) {
@ -1021,16 +994,13 @@ void UnicodeSet::_add(const UnicodeString& s) {
* @param string to test
*/
int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
//if (s.length() < 1) {
// throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
//}
if (s.length() > 2) return -1;
if (s.length() == 1) return s.charAt(0);
// at this point, len = 2
UChar32 cp = s.char32At(0);
if (cp > 0xFFFF) { // is surrogate pair
return cp;
int32_t sLength = s.length();
if (sLength == 1) return s.charAt(0);
if (sLength == 2) {
UChar32 cp = s.char32At(0);
if (cp > 0xFFFF) { // is surrogate pair
return cp;
}
}
return -1;
}
@ -1150,6 +1120,26 @@ UnicodeSet& UnicodeSet::retain(UChar32 c) {
return retain(c, c);
}
UnicodeSet& UnicodeSet::retain(const UnicodeString &s) {
if (isFrozen() || isBogus()) { return *this; }
UChar32 cp = getSingleCP(s);
if (cp < 0) {
bool isIn = stringsContains(s);
// Check for getRangeCount() first to avoid somewhat-expensive size()
// when there are single code points.
if (isIn && getRangeCount() == 0 && size() == 1) {
return *this;
}
clear();
if (isIn) {
_add(s);
}
} else {
retain(cp, cp);
}
return *this;
}
/**
* Removes the specified range from this set if it is present.
* The set will not contain the specified range once the call
@ -1186,7 +1176,7 @@ UnicodeSet& UnicodeSet::remove(UChar32 c) {
* @return the modified set, for chaining
*/
UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (strings != nullptr && strings->removeElement((void*) &s)) {
@ -1252,12 +1242,12 @@ UnicodeSet& UnicodeSet::complement(void) {
* Complement the specified string in this set.
* The set will not contain the specified string once the call
* returns.
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
*
* @param s the string to complement
* @return this object, for chaining
*/
UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (stringsContains(s)) {
@ -2001,22 +1991,22 @@ escapeUnprintable) {
}
// Okay to let ':' pass through
switch (c) {
case SET_OPEN:
case SET_CLOSE:
case HYPHEN:
case COMPLEMENT:
case INTERSECTION:
case BACKSLASH:
case OPEN_BRACE:
case CLOSE_BRACE:
case COLON:
case u'[':
case u']':
case u'-':
case u'^':
case u'&':
case u'\\':
case u'{':
case u'}':
case u':':
case SymbolTable::SYMBOL_REF:
buf.append(BACKSLASH);
buf.append(u'\\');
break;
default:
// Escape whitespace
if (PatternProps::isWhiteSpace(c)) {
buf.append(BACKSLASH);
buf.append(u'\\');
}
break;
}
@ -2049,7 +2039,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
backslashCount = 0;
} else {
result.append(c);
if (c == BACKSLASH) {
if (c == u'\\') {
++backslashCount;
} else {
backslashCount = 0;
@ -2082,13 +2072,13 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
UBool escapeUnprintable) const
{
result.append(SET_OPEN);
result.append(u'[');
// // Check against the predefined categories. We implicitly build
// // up ALL category sets the first time toPattern() is called.
// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
// if (*this == getCategorySet(cat)) {
// result.append(COLON);
// result.append(u':');
// result.append(CATEGORY_NAMES, cat*2, 2);
// return result.append(CATEGORY_CLOSE);
// }
@ -2104,7 +2094,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
getRangeEnd(count-1) == MAX_VALUE) {
// Emit the inverse
result.append(COMPLEMENT);
result.append(u'^');
for (int32_t i = 1; i < count; ++i) {
UChar32 start = getRangeEnd(i-1)+1;
@ -2112,7 +2102,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
_appendToPat(result, start, escapeUnprintable);
if (start != end) {
if ((start+1) != end) {
result.append(HYPHEN);
result.append(u'-');
}
_appendToPat(result, end, escapeUnprintable);
}
@ -2127,7 +2117,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
_appendToPat(result, start, escapeUnprintable);
if (start != end) {
if ((start+1) != end) {
result.append(HYPHEN);
result.append(u'-');
}
_appendToPat(result, end, escapeUnprintable);
}
@ -2136,14 +2126,14 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
if (strings != nullptr) {
for (int32_t i = 0; i<strings->size(); ++i) {
result.append(OPEN_BRACE);
result.append(u'{');
_appendToPat(result,
*(const UnicodeString*) strings->elementAt(i),
escapeUnprintable);
result.append(CLOSE_BRACE);
result.append(u'}');
}
}
return result.append(SET_CLOSE);
return result.append(u']');
}
/**

View File

@ -47,31 +47,6 @@
U_NAMESPACE_USE
// Define UChar constants using hex for EBCDIC compatibility
// Used #define to reduce private static exports and memory access time.
#define SET_OPEN ((UChar)0x005B) /*[*/
#define SET_CLOSE ((UChar)0x005D) /*]*/
#define HYPHEN ((UChar)0x002D) /*-*/
#define COMPLEMENT ((UChar)0x005E) /*^*/
#define COLON ((UChar)0x003A) /*:*/
#define BACKSLASH ((UChar)0x005C) /*\*/
#define INTERSECTION ((UChar)0x0026) /*&*/
#define UPPER_U ((UChar)0x0055) /*U*/
#define LOWER_U ((UChar)0x0075) /*u*/
#define OPEN_BRACE ((UChar)123) /*{*/
#define CLOSE_BRACE ((UChar)125) /*}*/
#define UPPER_P ((UChar)0x0050) /*P*/
#define LOWER_P ((UChar)0x0070) /*p*/
#define UPPER_N ((UChar)78) /*N*/
#define EQUALS ((UChar)0x003D) /*=*/
//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:"
static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]"
//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p"
//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}"
//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N"
static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
// Special property set IDs
static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
@ -81,12 +56,6 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
#define NAME_PROP "na"
#define NAME_PROP_LENGTH 2
/**
* Delimiter string used in patterns to close a category reference:
* ":]". Example: "[:Lu:]".
*/
//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
// Cached sets ------------------------------------------------------------- ***
U_CDECL_BEGIN
@ -140,27 +109,27 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) {
static inline UBool
isPerlOpen(const UnicodeString &pattern, int32_t pos) {
UChar c;
return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P');
}
/*static inline UBool
isPerlClose(const UnicodeString &pattern, int32_t pos) {
return pattern.charAt(pos)==CLOSE_BRACE;
return pattern.charAt(pos)==u'}';
}*/
static inline UBool
isNameOpen(const UnicodeString &pattern, int32_t pos) {
return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N';
}
static inline UBool
isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':';
}
/*static inline UBool
isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE;
return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']';
}*/
// TODO memory debugging provided inside uniset.cpp
@ -326,9 +295,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
while (mode != 2 && !chars.atEnd()) {
U_ASSERT((lastItem == 0 && op == 0) ||
(lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
(lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
op == INTERSECTION /*'&'*/)));
(lastItem == 1 && (op == 0 || op == u'-')) ||
(lastItem == 2 && (op == 0 || op == u'-' || op == u'&')));
UChar32 c = 0;
UBool literal = FALSE;
@ -356,27 +324,27 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
if (c == 0x5B /*'['*/ && !literal) {
if (c == u'[' && !literal) {
if (mode == 1) {
chars.setPos(backup); // backup
setMode = 1;
} else {
// Handle opening '[' delimiter
mode = 1;
patLocal.append((UChar) 0x5B /*'['*/);
patLocal.append(u'[');
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
if (c == 0x5E /*'^'*/ && !literal) {
if (c == u'^' && !literal) {
invert = TRUE;
patLocal.append((UChar) 0x5E /*'^'*/);
patLocal.append(u'^');
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
}
// Fall through to handle special leading '-';
// otherwise restart loop for nested [], \p{}, etc.
if (c == HYPHEN /*'-'*/) {
if (c == u'-') {
literal = TRUE;
// Fall through to handle literal '-' below
} else {
@ -418,7 +386,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
op = 0;
}
if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
if (op == u'-' || op == u'&') {
patLocal.append(op);
}
@ -454,10 +422,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
}
switch (op) {
case HYPHEN: /*'-'*/
case u'-':
removeAll(*nested);
break;
case INTERSECTION: /*'&'*/
case u'&':
retainAll(*nested);
break;
case 0:
@ -483,24 +451,24 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
if (!literal) {
switch (c) {
case 0x5D /*']'*/:
case u']':
if (lastItem == 1) {
add(lastChar, lastChar);
_appendToPat(patLocal, lastChar, FALSE);
}
// Treat final trailing '-' as a literal
if (op == HYPHEN /*'-'*/) {
if (op == u'-') {
add(op, op);
patLocal.append(op);
} else if (op == INTERSECTION /*'&'*/) {
} else if (op == u'&') {
// syntaxError(chars, "Trailing '&'");
ec = U_MALFORMED_SET;
return;
}
patLocal.append((UChar) 0x5D /*']'*/);
patLocal.append(u']');
mode = 2;
continue;
case HYPHEN /*'-'*/:
case u'-':
if (op == 0) {
if (lastItem != 0) {
op = (UChar) c;
@ -510,8 +478,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
add(c, c);
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
if (c == 0x5D /*']'*/ && !literal) {
patLocal.append(HYPHEN_RIGHT_BRACE, 2);
if (c == u']' && !literal) {
patLocal.append(u"-]", 2);
mode = 2;
continue;
}
@ -520,7 +488,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// syntaxError(chars, "'-' not after char or set");
ec = U_MALFORMED_SET;
return;
case INTERSECTION /*'&'*/:
case u'&':
if (lastItem == 2 && op == 0) {
op = (UChar) c;
continue;
@ -528,11 +496,11 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// syntaxError(chars, "'&' not after set");
ec = U_MALFORMED_SET;
return;
case 0x5E /*'^'*/:
case u'^':
// syntaxError(chars, "'^' not after '['");
ec = U_MALFORMED_SET;
return;
case 0x7B /*'{'*/:
case u'{':
if (op != 0) {
// syntaxError(chars, "Missing operand after operator");
ec = U_MALFORMED_SET;
@ -549,13 +517,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
while (!chars.atEnd()) {
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
if (c == 0x7D /*'}'*/ && !literal) {
if (c == u'}' && !literal) {
ok = TRUE;
break;
}
buf.append(c);
}
if (buf.length() < 1 || !ok) {
if (!ok) {
// syntaxError(chars, "Invalid multicharacter string");
ec = U_MALFORMED_SET;
return;
@ -565,9 +533,9 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
// we don't need to drop through to the further
// processing
add(buf);
patLocal.append((UChar) 0x7B /*'{'*/);
patLocal.append(u'{');
_appendToPat(patLocal, buf, FALSE);
patLocal.append((UChar) 0x7D /*'}'*/);
patLocal.append(u'}');
continue;
case SymbolTable::SYMBOL_REF:
// symbols nosymbols
@ -580,7 +548,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
chars.getPos(backup);
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
UBool anchor = (c == 0x5D /*']'*/ && !literal);
UBool anchor = (c == u']' && !literal);
if (symbols == 0 && !anchor) {
c = SymbolTable::SYMBOL_REF;
chars.setPos(backup);
@ -594,7 +562,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
add(U_ETHER);
usePat = TRUE;
patLocal.append((UChar) SymbolTable::SYMBOL_REF);
patLocal.append((UChar) 0x5D /*']'*/);
patLocal.append(u']');
mode = 2;
continue;
}
@ -617,7 +585,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
lastChar = c;
break;
case 1:
if (op == HYPHEN /*'-'*/) {
if (op == u'-') {
if (lastChar >= c) {
// Don't allow redundant (a-a) or empty (b-a) ranges;
// these are most likely typos.
@ -1036,11 +1004,11 @@ UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
RuleCharacterIterator::Pos pos;
chars.getPos(pos);
UChar32 c = chars.next(iterOpts, literal, ec);
if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) {
if (c == u'[' || c == u'\\') {
UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
literal, ec);
result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) :
(d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/);
result = (c == u'[') ? (d == u':') :
(d == u'N' || d == u'p' || d == u'P');
}
chars.setPos(pos);
return result && U_SUCCESS(ec);
@ -1071,17 +1039,17 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
posix = TRUE;
pos += 2;
pos = ICU_Utility::skipWhitespace(pattern, pos);
if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) {
if (pos < pattern.length() && pattern.charAt(pos) == u'^') {
++pos;
invert = TRUE;
}
} else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
UChar c = pattern.charAt(pos+1);
invert = (c == UPPER_P);
isName = (c == UPPER_N);
invert = (c == u'P');
isName = (c == u'N');
pos += 2;
pos = ICU_Utility::skipWhitespace(pattern, pos);
if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) {
if (pos == pattern.length() || pattern.charAt(pos++) != u'{') {
// Syntax error; "\p" or "\P" not followed by "{"
FAIL(ec);
}
@ -1093,9 +1061,9 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
// Look for the matching close delimiter, either :] or }
int32_t close;
if (posix) {
close = pattern.indexOf(POSIX_CLOSE, 2, pos);
close = pattern.indexOf(u":]", 2, pos);
} else {
close = pattern.indexOf(CLOSE_BRACE, pos);
close = pattern.indexOf(u'}', pos);
}
if (close < 0) {
// Syntax error; close delimiter missing
@ -1105,7 +1073,7 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
// Look for an '=' sign. If this is present, we will parse a
// medium \p{gc=Cf} or long \p{GeneralCategory=Format}
// pattern.
int32_t equals = pattern.indexOf(EQUALS, pos);
int32_t equals = pattern.indexOf(u'=', pos);
UnicodeString propName, valueName;
if (equals >= 0 && equals < close && !isName) {
// Equals seen; parse medium/long pattern

View File

@ -231,6 +231,9 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
if (length16==0) {
continue; // skip the empty string
}
UBool thisRelevant;
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
if(spanLength<length16) { // Relevant string.
@ -312,7 +315,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
if(spanLength<length16) { // Relevant string.
if(spanLength<length16 && length16>0) { // Relevant string.
if(which&UTF16) {
if(which&CONTAINED) {
if(which&FWD) {
@ -362,7 +365,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
addToSpanNotSet(c);
}
}
} else { // Irrelevant string.
} else { // Irrelevant string. (Also the empty string.)
if(which&UTF8) {
if(which&CONTAINED) { // Only necessary for LONGEST_MATCH.
uint8_t *s8=utf8+utf8Count;
@ -653,11 +656,12 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
for(i=0; i<stringsLength; ++i) {
int32_t overlap=spanLengths[i];
if(overlap==ALL_CP_CONTAINED) {
continue; // Irrelevant string.
continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
U_ASSERT(length>0);
// Try to match this string at pos-overlap..pos.
if(overlap>=LONG_SPAN) {
@ -697,6 +701,9 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
if (length16==0) {
continue; // skip the empty string
}
// Try to match this string at pos-overlap..pos.
if(overlap>=LONG_SPAN) {
@ -817,11 +824,12 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
for(i=0; i<stringsLength; ++i) {
int32_t overlap=spanBackLengths[i];
if(overlap==ALL_CP_CONTAINED) {
continue; // Irrelevant string.
continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
U_ASSERT(length>0);
// Try to match this string at pos-(length16-overlap)..pos-length16.
if(overlap>=LONG_SPAN) {
@ -863,6 +871,9 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
if (length16==0) {
continue; // skip the empty string
}
// Try to match this string at pos-(length16-overlap)..pos-length16.
if(overlap>=LONG_SPAN) {
@ -1358,11 +1369,12 @@ int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const {
// Try to match the strings at pos.
for(i=0; i<stringsLength; ++i) {
if(spanLengths[i]==ALL_CP_CONTAINED) {
continue; // Irrelevant string.
continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
U_ASSERT(length>0);
if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {
return pos; // There is a set element at pos.
}
@ -1401,11 +1413,12 @@ int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const
// it is easier and we only need to know whether the string is irrelevant
// which is the same in either array.
if(spanLengths[i]==ALL_CP_CONTAINED) {
continue; // Irrelevant string.
continue; // Irrelevant string. (Also the empty string.)
}
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
const UChar *s16=string.getBuffer();
int32_t length16=string.length();
U_ASSERT(length>0);
if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {
return pos; // There is a set element at pos.
}

View File

@ -310,55 +310,12 @@ u_isgraphPOSIX(UChar32 c);
U_CFUNC UBool
u_isprintPOSIX(UChar32 c);
/** Turn a bit index into a bit flag. @internal */
#define FLAG(n) ((uint32_t)1<<(n))
/** Flags for general categories in the order of UCharCategory. @internal */
#define _Cn FLAG(U_GENERAL_OTHER_TYPES)
#define _Lu FLAG(U_UPPERCASE_LETTER)
#define _Ll FLAG(U_LOWERCASE_LETTER)
#define _Lt FLAG(U_TITLECASE_LETTER)
#define _Lm FLAG(U_MODIFIER_LETTER)
/* #define _Lo FLAG(U_OTHER_LETTER) -- conflicts with MS Visual Studio 9.0 xiosbase */
#define _Mn FLAG(U_NON_SPACING_MARK)
#define _Me FLAG(U_ENCLOSING_MARK)
#define _Mc FLAG(U_COMBINING_SPACING_MARK)
#define _Nd FLAG(U_DECIMAL_DIGIT_NUMBER)
#define _Nl FLAG(U_LETTER_NUMBER)
#define _No FLAG(U_OTHER_NUMBER)
#define _Zs FLAG(U_SPACE_SEPARATOR)
#define _Zl FLAG(U_LINE_SEPARATOR)
#define _Zp FLAG(U_PARAGRAPH_SEPARATOR)
#define _Cc FLAG(U_CONTROL_CHAR)
#define _Cf FLAG(U_FORMAT_CHAR)
#define _Co FLAG(U_PRIVATE_USE_CHAR)
#define _Cs FLAG(U_SURROGATE)
#define _Pd FLAG(U_DASH_PUNCTUATION)
#define _Ps FLAG(U_START_PUNCTUATION)
/* #define _Pe FLAG(U_END_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 xlocnum */
/* #define _Pc FLAG(U_CONNECTOR_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
#define _Po FLAG(U_OTHER_PUNCTUATION)
#define _Sm FLAG(U_MATH_SYMBOL)
#define _Sc FLAG(U_CURRENCY_SYMBOL)
#define _Sk FLAG(U_MODIFIER_SYMBOL)
#define _So FLAG(U_OTHER_SYMBOL)
#define _Pi FLAG(U_INITIAL_PUNCTUATION)
/* #define _Pf FLAG(U_FINAL_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
/** Some code points. @internal */
enum {
TAB =0x0009,
LF =0x000a,
FF =0x000c,
CR =0x000d,
U_A =0x0041,
U_F =0x0046,
U_Z =0x005a,
U_a =0x0061,
U_f =0x0066,
U_z =0x007a,
DEL =0x007f,
NL =0x0085,
NBSP =0x00a0,
CGJ =0x034f,
FIGURESP=0x2007,
@ -367,15 +324,6 @@ enum {
ZWJ =0x200d,
RLM =0x200f,
NNBSP =0x202f,
WJ =0x2060,
INHSWAP =0x206a,
NOMDIG =0x206f,
U_FW_A =0xff21,
U_FW_F =0xff26,
U_FW_Z =0xff3a,
U_FW_a =0xff41,
U_FW_f =0xff46,
U_FW_z =0xff5a,
ZWNBSP =0xfeff
};

View File

@ -91,6 +91,15 @@ static UBool chopLocale(char *name) {
return FALSE;
}
/**
* Called to check whether a name without '_' needs to be checked for a parent.
* Some code had assumed that locale IDs with '_' could not have a non-root parent.
* We may want a better way of doing this.
*/
static UBool mayHaveParent(char *name) {
return (name[0] != 0 && uprv_strstr("nb nn",name) != nullptr);
}
/**
* Internal function
*/
@ -529,8 +538,8 @@ loadParentsExceptRoot(UResourceDataEntry *&t1,
char name[], int32_t nameCapacity,
UBool usingUSRData, char usrDataPath[], UErrorCode *status) {
if (U_FAILURE(*status)) { return FALSE; }
UBool hasChopped = TRUE;
while (hasChopped && t1->fParent == NULL && !t1->fData.noFallback &&
UBool checkParent = TRUE;
while (checkParent && t1->fParent == NULL && !t1->fData.noFallback &&
res_getResource(&t1->fData,"%%ParentIsRoot") == RES_BOGUS) {
Resource parentRes = res_getResource(&t1->fData, "%%Parent");
if (parentRes != RES_BOGUS) { // An explicit parent was found.
@ -573,7 +582,7 @@ loadParentsExceptRoot(UResourceDataEntry *&t1,
}
}
t1 = t2;
hasChopped = chopLocale(name);
checkParent = chopLocale(name) || mayHaveParent(name);
}
return TRUE;
}
@ -692,7 +701,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
}
}
}
if (hasChopped && !isRoot) {
if ((hasChopped || mayHaveParent(name)) && !isRoot) {
if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
goto finish;
}
@ -716,7 +725,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
hasRealData = TRUE;
isDefault = TRUE;
// TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path?
if (hasChopped && !isRoot) {
if ((hasChopped || mayHaveParent(name)) && !isRoot) {
if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
goto finish;
}
@ -1908,6 +1917,8 @@ ures_getByKeyWithFallback(const UResourceBundle *resB,
} else {
break;
}
} else if (res == RES_BOGUS) {
break;
}
} while(*myPath); /* Continue until the whole path is consumed */
}
@ -3019,7 +3030,7 @@ ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status)
U_CAPI UBool U_EXPORT2
ures_equal(const UResourceBundle* res1, const UResourceBundle* res2){
if(res1==NULL || res2==NULL){
return res1==res2; /* pointer comparision */
return res1==res2; /* pointer comparison */
}
if(res1->fKey==NULL|| res2->fKey==NULL){
return (res1->fKey==res2->fKey);

View File

@ -960,14 +960,6 @@ res_findResource(const ResourceData *pResData, Resource r, char** path, const ch
if(URES_IS_TABLE(type)) {
*key = pathP;
t2 = res_getTableItemByKey(pResData, t1, &indexR, key);
if(t2 == RES_BOGUS) {
/* if we fail to get the resource by key, maybe we got an index */
indexR = uprv_strtol(pathP, &closeIndex, 10);
if(indexR >= 0 && *closeIndex == 0 && (*pathP != '0' || closeIndex - pathP == 1)) {
/* if we indeed have an index, try to get the item by index */
t2 = res_getTableItemByIndex(pResData, t1, indexR, key);
} // else t2 is already RES_BOGUS
}
} else if(URES_IS_ARRAY(type)) {
indexR = uprv_strtol(pathP, &closeIndex, 10);
if(indexR >= 0 && *closeIndex == 0) {

View File

@ -270,11 +270,13 @@ ures_getByKeyWithFallback(const UResourceBundle *resB,
* function can perform fallback on the sub-resources of the table.
* @param resB a resource
* @param inKey a key associated with the requested resource
* @param len if not NULL, used to return the length of the string
* @param status: fills in the outgoing error code
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
* could be a non-failing error
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
* @return returns a pointer to a zero-terminated UChar array which lives in a
* memory mapped/DLL file.
*/
U_CAPI const UChar* U_EXPORT2
ures_getStringByKeyWithFallback(const UResourceBundle *resB,

View File

@ -116,6 +116,12 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen) {
((UnicodeSet*) set)->UnicodeSet::remove(s);
}
U_CAPI void U_EXPORT2
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::removeAll(s);
}
U_CAPI void U_EXPORT2
uset_removeAll(USet* set, const USet* remove) {
((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
@ -126,6 +132,18 @@ uset_retain(USet* set, UChar32 start, UChar32 end) {
((UnicodeSet*) set)->UnicodeSet::retain(start, end);
}
U_CAPI void U_EXPORT2
uset_retainString(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::retain(s);
}
U_CAPI void U_EXPORT2
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::retainAll(s);
}
U_CAPI void U_EXPORT2
uset_retainAll(USet* set, const USet* retain) {
((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
@ -141,6 +159,23 @@ uset_complement(USet* set) {
((UnicodeSet*) set)->UnicodeSet::complement();
}
U_CAPI void U_EXPORT2
uset_complementRange(USet *set, UChar32 start, UChar32 end) {
((UnicodeSet*) set)->UnicodeSet::complement(start, end);
}
U_CAPI void U_EXPORT2
uset_complementString(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::complement(s);
}
U_CAPI void U_EXPORT2
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length) {
UnicodeString s(length==-1, str, length);
((UnicodeSet*) set)->UnicodeSet::complementAll(s);
}
U_CAPI void U_EXPORT2
uset_complementAll(USet* set, const USet* complement) {
((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);

View File

@ -575,7 +575,7 @@ usprep_map( const UStringPrepProfile* profile,
}
}else if(type==USPREP_DELETE){
// just consume the codepoint and contine
// just consume the codepoint and continue
continue;
}
//copy the code point into destination

View File

@ -364,7 +364,7 @@ _strFromWCS( UChar *dest,
}
/* we have found a null so convert the
* chunk from begining of non-null char to null
* chunk from beginning of non-null char to null
*/
retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
@ -387,7 +387,7 @@ _strFromWCS( UChar *dest,
* null terminate it and convert wchar_ts to chars
*/
if(nulLen >= _STACK_BUFFER_CAPACITY){
/* Should rarely occcur */
/* Should rarely occur */
/* allocate new buffer buffer */
pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
if(pWStack==NULL){

View File

@ -382,7 +382,7 @@ utext_previous32From(UText *ut, int64_t index) {
//
UChar32 cPrev; // The character preceding cCurr, which is what we will return.
// Address the chunk containg the position preceding the incoming index
// Address the chunk containing the position preceding the incoming index
// A tricky edge case:
// We try to test the requested native index against the chunkNativeStart to determine
// whether the character preceding the one at the index is in the current chunk.
@ -894,7 +894,7 @@ struct UTF8Buf {
// one for a supplementary starting in the last normal position,
// and one for an entry for the buffer limit position.
uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
// correspoding offset in filled part of buf.
// corresponding offset in filled part of buf.
int32_t align;
};
@ -1545,7 +1545,7 @@ utf8TextMapOffsetToNative(const UText *ut) {
}
//
// Map a native index to the corrsponding chunk offset
// Map a native index to the corresponding chunk offset
//
static int32_t U_CALLCONV
utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {

View File

@ -13,10 +13,10 @@
#ifndef ICU_UTIL_H
#define ICU_UTIL_H
#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "charstr.h"
#include "unicode/unistr.h"
#include "unicode/uobject.h"
#include "unicode/utypes.h"
//--------------------------------------------------------------------
// class ICU_Utility
// i18n utility functions, scoped into the class ICU_Utility.

View File

@ -193,7 +193,7 @@ UPRV_BLOCK_MACRO_BEGIN { \
* Trace statement for each exit point of a function that has a UTRACE_ENTRY()
* statement, and that returns a value.
*
* @param val The function's return value, int32_t or comatible type.
* @param val The function's return value, int32_t or compatible type.
*
* @internal
*/

View File

@ -312,7 +312,7 @@ int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const {
} else {
for (i=startIndex; i<count; ++i) {
/* Pointers are not always the same size as ints so to perform
* a valid comparision we need to know whether we are being
* a valid comparison we need to know whether we are being
* provided an int or a pointer. */
if (hint & HINT_KEY_POINTER) {
if (key.pointer == elements[i].pointer) {
@ -518,7 +518,7 @@ sortiComparator(const void * /*context */, const void *left, const void *right)
}
/**
* Sort the vector, assuming it constains ints.
* Sort the vector, assuming it contains ints.
* (A more general sort would take a comparison function, but it's
* not clear whether UVector's UElementComparator or
* UComparator from uprv_sortAray would be more appropriate.)

View File

@ -57,7 +57,7 @@ LINK.c= $(CXX) $(CXXFLAGS) $(LDFLAGS)
#LINK.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS)
## Shared library options
LD_SOOPTIONS= -Wl,-Bsymbolic
LD_SOOPTIONS=
## Commands to make a shared library
SHLIB.c= $(CC) $(CFLAGS) $(LDFLAGS) -shared $(LD_SOOPTIONS) -Wl,--enable-auto-import -Wl,--out-implib=$(dir $@)lib$(notdir $(@:$(SO_TARGET_VERSION_MAJOR).$(SO)=))$(IMPORT_LIB_EXT)#M#
@ -101,11 +101,14 @@ LIBICU = $(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)
#SH#ICULIBS_COMMON_LIB_NAME="${LIBICU}${COMMON_STUBNAME}${ICULIBSUFFIX}${ICULIBSUFFIX_VERSION}.${SO}"
#SH#ICULIBS_COMMON_LIB_NAME_A="${LIBICU}${COMMON_STUBNAME}${ICULIBSUFFIX}.${A}"
#SH#ICULIBS_DATA="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
ICULIBS_DT="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
ICULIBS_I18N="-l$(ICUPREFIX)$(I18N_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
ICULIBS_IO="-l$(ICUPREFIX)$(IO_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
ICULIBS_UC="-l$(ICUPREFIX)$(COMMON_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
#SH#ICULIBS_DATA="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_DT="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_I18N="-l$(ICUPREFIX)$(I18N_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_LX="-l$(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)$(LAYOUTEX_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_IO="-l$(ICUPREFIX)$(IO_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_UC="-l$(ICUPREFIX)$(COMMON_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_CTESTFW="-l$(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)$(CTESTFW_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_TOOLUTIL="-l$(ICUPREFIX)tu$(ICULIBSUFFIX)"
#SH#
#SH## ICULIBS is the set of libraries your application should link
#SH## with usually. Many applications will want to add ${ICULIBS_I18N} as well.

View File

@ -57,7 +57,7 @@ LINK.c= $(CXX) $(CXXFLAGS) $(LDFLAGS)
#LINK.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS)
## Shared library options
LD_SOOPTIONS= -Wl,-Bsymbolic
LD_SOOPTIONS=
## Commands to make a shared library
SHLIB.c= $(CC) $(CFLAGS) $(LDFLAGS) -shared $(LD_SOOPTIONS) -Wl,--enable-auto-import -Wl,--out-implib=$(dir $@)lib$(notdir $(@:$(SO_TARGET_VERSION_MAJOR).$(SO)=))$(IMPORT_LIB_EXT)#M#
@ -101,11 +101,14 @@ LIBICU = $(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)
#SH#ICULIBS_COMMON_LIB_NAME="${LIBICU}${COMMON_STUBNAME}${ICULIBSUFFIX}${ICULIBSUFFIX_VERSION}.${SO}"
#SH#ICULIBS_COMMON_LIB_NAME_A="${LIBICU}${COMMON_STUBNAME}${ICULIBSUFFIX}.${A}"
#SH#ICULIBS_DATA="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
ICULIBS_DT="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
ICULIBS_I18N="-l$(ICUPREFIX)$(I18N_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
ICULIBS_IO="-l$(ICUPREFIX)$(IO_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
ICULIBS_UC="-l$(ICUPREFIX)$(COMMON_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
#SH#ICULIBS_DATA="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_DT="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_I18N="-l$(ICUPREFIX)$(I18N_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_LX="-l$(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)$(LAYOUTEX_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_IO="-l$(ICUPREFIX)$(IO_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_UC="-l$(ICUPREFIX)$(COMMON_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_CTESTFW="-l$(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)$(CTESTFW_STUBNAME)$(ICULIBSUFFIX)"
ICULIBS_TOOLUTIL="-l$(ICUPREFIX)tu$(ICULIBSUFFIX)"
#SH#
#SH## ICULIBS is the set of libraries your application should link
#SH## with usually. Many applications will want to add ${ICULIBS_I18N} as well.

View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for ICU 68.2.
# Generated by GNU Autoconf 2.69 for ICU 69.1.
#
# Report bugs to <http://icu-project.org/bugs>.
#
@ -582,8 +582,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ICU'
PACKAGE_TARNAME='International Components for Unicode'
PACKAGE_VERSION='68.2'
PACKAGE_STRING='ICU 68.2'
PACKAGE_VERSION='69.1'
PACKAGE_STRING='ICU 69.1'
PACKAGE_BUGREPORT='http://icu-project.org/bugs'
PACKAGE_URL='http://icu-project.org'
@ -1364,7 +1364,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures ICU 68.2 to adapt to many kinds of systems.
\`configure' configures ICU 69.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1430,7 +1430,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of ICU 68.2:";;
short | recursive ) echo "Configuration of ICU 69.1:";;
esac
cat <<\_ACEOF
@ -1455,7 +1455,7 @@ Optional Features:
--enable-weak-threads weakly reference the threading library default=no
--enable-extras build ICU extras default=yes
--enable-icuio build ICU's icuio library default=yes
--enable-layoutex build ICU's Paragraph Layout library default=yes.
--enable-layoutex build ICU's Paragraph Layout library default=no.
icu-le-hb must be installed via pkg-config. See http://harfbuzz.org
--enable-tools build ICU's tools default=yes
@ -1568,7 +1568,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
ICU configure 68.2
ICU configure 69.1
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@ -2314,7 +2314,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by ICU $as_me 68.2, which was
It was created by ICU $as_me 69.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@ -7708,7 +7708,7 @@ if test "${enable_layoutex+set}" = set; then :
*) as_fn_error $? "bad value ${enableval} for --enable-layoutex" "$LINENO" 5 ;;
esac
else
layoutex=$have_icu_le_hb
layoutex=false
fi
@ -8550,7 +8550,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by ICU $as_me 68.2, which was
This file was extended by ICU $as_me 69.1, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -8604,7 +8604,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
ICU config.status 68.2
ICU config.status 69.1
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"

View File

@ -1101,14 +1101,14 @@ ICU_CONDITIONAL(ICUIO, test "$icuio" = true)
# Enable/disable layoutex
AC_ARG_ENABLE(layoutex,
[ --enable-layoutex build ICU's Paragraph Layout library [default=yes].
[ --enable-layoutex build ICU's Paragraph Layout library [default=no].
icu-le-hb must be installed via pkg-config. See http://harfbuzz.org],
[case "${enableval}" in
yes) layoutex=$have_icu_le_hb ;;
no) layoutex=false ;;
*) AC_MSG_ERROR(bad value ${enableval} for --enable-layoutex) ;;
esac],
layoutex=$have_icu_le_hb)
layoutex=false)
ICU_CONDITIONAL(LAYOUTEX, test "$layoutex" = true)
# Enable/disable layout
@ -1399,6 +1399,7 @@ AC_CONFIG_FILES([icudefs.mk \
test/perf/ubrkperf/Makefile \
test/perf/charperf/Makefile \
test/perf/convperf/Makefile \
test/perf/localecanperf/Makefile \
test/perf/normperf/Makefile \
test/perf/DateFmtPerf/Makefile \
test/perf/howExpensiveIs/Makefile \

View File

@ -3,5 +3,5 @@
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
{
"cldrVersion": "38.1"
"cldrVersion": "39"
}

View File

@ -2,7 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
root{
Version{"38.1"}
Version{"39"}
boundaries{
grapheme:process(dependency){"char.brk"}
line:process(dependency){"line.brk"}

View File

@ -30,18 +30,18 @@
<fail unless="is.cldr.dir.set" message="Please set the CLDR_DIR environment variable to the top level CLDR source dir (containing 'common')."/>
<available property="cldrtools.dir" value="${env.CLDR_DIR}/cldr-tools" file="${env.CLDR_DIR}/cldr-tools" type="dir"/>
<available property="cldrtools.dir" value="${env.CLDR_DIR}/tools/java" file="${env.CLDR_DIR}/tools/java" type="dir"/>
<available property="cldrtools.dir" value="${env.CLDR_DIR}/tools" file="${env.CLDR_DIR}/tools" type="dir"/>
<fail unless="cldrtools.dir" message="Please make sure that the CLDR tools directory is checked out into CLDR_DIR"/>
<available property="env.CLDR_CLASSES" value="${cldrtools.dir}/classes" file="${cldrtools.dir}/classes" type="dir"/>
<available property="cldrtools.jar" value="${cldrtools.dir}/cldr.jar" file="${cldrtools.dir}/cldr.jar" type="file"/>
<available property="env.CLDR_CLASSES" value="${cldrtools.dir}/cldr-code/target/classes" file="${cldrtools.dir}/cldr-code/target/classes" type="dir"/>
<available property="cldrtools.jar" value="${cldrtools.dir}/cldr-code/target/cldr-code.jar" file="${cldrtools.dir}/cldr-code/target/cldr-code.jar" type="file"/>
<condition property="is.cldr.classes.set">
<or>
<isset property="env.CLDR_CLASSES" />
<isset property="cldrtools.jar" />
</or>
</condition>
<fail unless="is.cldr.classes.set" message="CLDR classes not found in ${cldrtools.dir}. Please either set the CLDR_CLASSES environment variable or build cldr.jar."/>
<fail unless="is.cldr.classes.set" message="CLDR classes not found in ${cldrtools.dir}/cldr-code/target/classes. Please either set the CLDR_CLASSES environment variable or build cldr-code.jar."/>
<property name="env.CLDR_TMP_DIR" location="${env.CLDR_DIR}/../cldr-aux" /> <!-- Hack: see CLDRPaths -->
<property name="cldr.prod.dir" location="${env.CLDR_TMP_DIR}/production/" />

View File

@ -30,6 +30,9 @@
# plus the xml-apis.jar from the Apache xalan package
# (http://xml.apache.org/xalan-j/downloads.html).
#
# You will also need to have performed the CLDR Maven setup (non-Eclipse version)
# per http://cldr.unicode.org/development/maven
#
# Note: Enough things can (and will) fail in this process that it is best to
# run the commands separately from an interactive shell. They should all
# copy and paste without problems.
@ -195,7 +198,7 @@ ant copy-cldr-testdata
# 4d. Copy from CLDR common/testData/localeIdentifiers/localeCanonicalization.txt
# into icu4c/source/test/testdata/localeCanonicalization.txt
# and icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt
# and add the following line to the begginning of these two files
# and add the following line to the beginning of these two files
# # File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt
# 5. Check which data files have modifications, which have been added or removed

View File

@ -3,7 +3,7 @@
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
{
"cldrVersion": "38.1",
"cldrVersion": "39",
"aliases": {
"ars": "ar_SA",
"in": "id",
@ -11,8 +11,7 @@
"iw": "he",
"iw_IL": "he_IL",
"mo": "ro",
"no": "nb",
"no_NO": "nb_NO",
"no_NO": "no",
"pa_IN": "pa_Guru_IN",
"sh": "sr_Latn",
"sh_BA": "sr_Latn_BA",
@ -33,6 +32,8 @@
"zh_TW": "zh_Hant_TW"
},
"parents": {
"ff_Adlm": "root"
"ff_Adlm": "root",
"nb": "no",
"nn": "no"
}
}

View File

@ -5,7 +5,7 @@ af{
collations{
standard{
Sequence{"&N<<<ʼn"}
Version{"38.1"}
Version{"39"}
}
}
}

View File

@ -5,7 +5,7 @@ am{
collations{
standard{
Sequence{"[reorder Ethi]"}
Version{"38.1"}
Version{"39"}
}
}
}

View File

@ -9,7 +9,7 @@ ar{
"&ت<<ة<<<ﺔ<<<ﺓ"
"&ي<<ى<<<ﯨ<<<ﯩ<<<ﻰ<<<ﻯ<<<ﲐ<<<ﱝ"
}
Version{"38.1"}
Version{"39"}
}
standard{
Sequence{
@ -397,7 +397,7 @@ ar{
"&ۓ‎=ﮰ‎=ﮱ"
"&ۀ‎=ﮤ‎=ﮥ"
}
Version{"38.1"}
Version{"39"}
}
}
}

View File

@ -11,7 +11,7 @@ as{
"&[before 1]ত<ৎ=ত্\u200D"
"&হ<ক্ষ"
}
Version{"38.1"}
Version{"39"}
}
}
}

View File

@ -9,7 +9,7 @@ az{
"[import az-u-co-standard]"
"[reorder others]"
}
Version{"38.1"}
Version{"39"}
}
standard{
Sequence{
@ -26,7 +26,7 @@ az{
"&H<x<<<X"
"&Z<w<<<W"
}
Version{"38.1"}
Version{"39"}
}
}
}

View File

@ -9,7 +9,7 @@ be{
"&Е<ё<<<Ё"
"&у<ў<<<Ў"
}
Version{"38.1"}
Version{"39"}
}
}
}

View File

@ -5,7 +5,7 @@ bg{
collations{
standard{
Sequence{"[reorder Cyrl]"}
Version{"38.1"}
Version{"39"}
}
}
}

View File

@ -9,7 +9,7 @@ bn{
"[reorder Beng Deva Guru Gujr Orya Taml Telu Knda Mlym Sinh]"
"&ঔ<ং<ঃ<ঁ"
}
Version{"38.1"}
Version{"39"}
}
traditional{
Sequence{
@ -629,7 +629,7 @@ bn{
"&যৌ<<<য়ৌ"
"&য্<<<য়্"
}
Version{"38.1"}
Version{"39"}
}
}
}

View File

@ -5,7 +5,7 @@ br{
collations{
standard{
Sequence{"&C<ch<<<Ch<<<CH<c''h=c\u2019h<<<C''h=C\u2019h<<<C''H=C\u2019H"}
Version{"38.1"}
Version{"39"}
}
}
}

Some files were not shown because too many files have changed in this diff Show More