mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 21:01:08 +00:00
Bug 1714933 - Part 2: Update in-tree ICU to release 69.1. r=tcampbell
Update to ICU 69.1 by running "update-icu.sh" with "maint/maint-69" as the target. Differential Revision: https://phabricator.services.mozilla.com/D116968
This commit is contained in:
parent
ffd5b23c02
commit
e3d2b6377c
Binary file not shown.
@ -1,5 +1,5 @@
|
||||
commit 6fe67a037c07135b6b3edafca18326a824e338c3
|
||||
Author: Jeff Genovy <29107334+jefgen@users.noreply.github.com>
|
||||
Date: Wed Jan 27 12:48:40 2021 -0800
|
||||
commit 0e7b4428866f3133b4abba2d932ee3faa708db1d
|
||||
Author: Long Nguyen <nguyen.long.908132@gmail.com>
|
||||
Date: Thu Mar 25 22:26:56 2021 +0700
|
||||
|
||||
ICU-21473 Disable LayoutEx in GHA CI script for ICU 68, was disabled by default in ICU 69
|
||||
ICU-21560 mingw: Remove version numbers from link flags
|
||||
|
@ -1677,7 +1677,7 @@ COMPACT_LATEX = NO
|
||||
# The default value is: a4.
|
||||
# This tag requires that the tag GENERATE_LATEX is set to YES.
|
||||
|
||||
PAPER_TYPE = a4wide
|
||||
PAPER_TYPE = a4
|
||||
|
||||
# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
|
||||
# that should be included in the LaTeX output. The package can be specified just
|
||||
|
@ -474,31 +474,39 @@ BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
|
||||
U_ASSERT(i>=0);
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
return write(i);
|
||||
} else {
|
||||
char intBytes[5];
|
||||
return write(intBytes, internalEncodeDelta(i, intBytes));
|
||||
}
|
||||
char intBytes[5];
|
||||
int32_t length;
|
||||
}
|
||||
|
||||
int32_t
|
||||
BytesTrieBuilder::internalEncodeDelta(int32_t i, char intBytes[]) {
|
||||
U_ASSERT(i>=0);
|
||||
if(i<=BytesTrie::kMaxOneByteDelta) {
|
||||
intBytes[0]=(char)i;
|
||||
return 1;
|
||||
}
|
||||
int32_t length=1;
|
||||
if(i<=BytesTrie::kMaxTwoByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
|
||||
length=1;
|
||||
} else {
|
||||
if(i<=BytesTrie::kMaxThreeByteDelta) {
|
||||
intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
|
||||
length=2;
|
||||
} else {
|
||||
if(i<=0xffffff) {
|
||||
intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
|
||||
length=3;
|
||||
} else {
|
||||
intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
|
||||
intBytes[1]=(char)(i>>24);
|
||||
length=4;
|
||||
length=2;
|
||||
}
|
||||
intBytes[1]=(char)(i>>16);
|
||||
intBytes[length++]=(char)(i>>16);
|
||||
}
|
||||
intBytes[1]=(char)(i>>8);
|
||||
intBytes[length++]=(char)(i>>8);
|
||||
}
|
||||
intBytes[length++]=(char)i;
|
||||
return write(intBytes, length);
|
||||
return length;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
@ -14,6 +14,8 @@
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "charstr.h"
|
||||
@ -141,6 +143,38 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error
|
||||
return *this;
|
||||
}
|
||||
|
||||
CharString &CharString::appendNumber(int32_t number, UErrorCode &status) {
|
||||
if (number < 0) {
|
||||
this->append('-', status);
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
if (number == 0) {
|
||||
this->append('0', status);
|
||||
return *this;
|
||||
}
|
||||
|
||||
int32_t numLen = 0;
|
||||
while (number != 0) {
|
||||
int32_t residue = number % 10;
|
||||
number /= 10;
|
||||
this->append(std::abs(residue) + '0', status);
|
||||
numLen++;
|
||||
if (U_FAILURE(status)) {
|
||||
return *this;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t start = this->length() - numLen, end = this->length() - 1;
|
||||
while(start < end) {
|
||||
std::swap(this->data()[start++], this->data()[end--]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
char *CharString::getAppendBuffer(int32_t minCapacity,
|
||||
int32_t desiredCapacityHint,
|
||||
int32_t &resultCapacity,
|
||||
|
@ -127,6 +127,9 @@ public:
|
||||
return append(s.data(), s.length(), errorCode);
|
||||
}
|
||||
CharString &append(const char *s, int32_t sLength, UErrorCode &status);
|
||||
|
||||
CharString &appendNumber(int32_t number, UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
* resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS().
|
||||
|
@ -31,14 +31,63 @@
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include "unicode/localpointer.h"
|
||||
#include "uassert.h"
|
||||
|
||||
#if U_DEBUG && defined(UPRV_MALLOC_COUNT)
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
|
||||
#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size)
|
||||
#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size)
|
||||
// uprv_memcpy and uprv_memmove
|
||||
#if defined(__clang__)
|
||||
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("clang diagnostic push") \
|
||||
_Pragma("clang diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("clang diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("clang diagnostic push") \
|
||||
_Pragma("clang diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("clang diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#elif defined(__GNUC__)
|
||||
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
/* Suppress warnings about addresses that will never be NULL */ \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Waddress\"") \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#else
|
||||
#define uprv_memcpy(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#define uprv_memmove(dst, src, size) UPRV_BLOCK_MACRO_BEGIN { \
|
||||
U_ASSERT(dst != NULL); \
|
||||
U_ASSERT(src != NULL); \
|
||||
U_STANDARD_CPP_NAMESPACE memmove(dst, src, size); \
|
||||
} UPRV_BLOCK_MACRO_END
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UPRV_LENGTHOF
|
||||
|
@ -58,7 +58,7 @@
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68d.dll</OutputFile>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc69d.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
|
||||
</Link>
|
||||
@ -70,7 +70,7 @@
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68.dll</OutputFile>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc69.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
|
||||
</Link>
|
||||
|
@ -125,7 +125,7 @@
|
||||
<Link>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<AdditionalDependencies>vccorlib.lib;msvcrt.lib;vcruntime.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68.dll</OutputFile>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc69.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuuc.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuuc.lib</ImportLibrary>
|
||||
</Link>
|
||||
@ -148,7 +148,7 @@
|
||||
<Link>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>vccorlibd.lib;msvcrtd.lib;vcruntimed.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc68d.dll</OutputFile>
|
||||
<OutputFile>..\..\$(IcuBinOutputDir)\icuuc69d.dll</OutputFile>
|
||||
<ProgramDatabaseFile>.\..\..\$(IcuLibOutputDir)\icuucd.pdb</ProgramDatabaseFile>
|
||||
<ImportLibrary>..\..\$(IcuLibOutputDir)\icuucd.lib</ImportLibrary>
|
||||
</Link>
|
||||
|
@ -265,13 +265,9 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%THAI_LOOKAHEAD].markCurrent();
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
@ -503,13 +499,9 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%LAO_LOOKAHEAD].markCurrent();
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
@ -699,13 +691,9 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound%BURMESE_LOOKAHEAD].markCurrent();
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
@ -908,13 +896,9 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
|
||||
goto foundBest;
|
||||
}
|
||||
do {
|
||||
int32_t wordsMatched = 1;
|
||||
if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
|
||||
if (wordsMatched < 2) {
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
|
||||
wordsMatched = 2;
|
||||
}
|
||||
// Followed by another dictionary word; mark first word as a good candidate
|
||||
words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
|
||||
|
||||
// If we're already at the end of the range, we're done
|
||||
if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
|
||||
|
@ -86,6 +86,7 @@ Edits &Edits::moveArray(Edits &src) U_NOEXCEPT {
|
||||
}
|
||||
|
||||
Edits &Edits::operator=(const Edits &other) {
|
||||
if (this == &other) { return *this; } // self-assignment: no-op
|
||||
length = other.length;
|
||||
delta = other.delta;
|
||||
numChanges = other.numChanges;
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "ubrkimpl.h" // U_ICUDATA_BRKITR
|
||||
#include "uvector.h"
|
||||
#include "cmemory.h"
|
||||
#include "umutex.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
@ -139,13 +140,30 @@ class SimpleFilteredSentenceBreakData : public UMemory {
|
||||
public:
|
||||
SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
|
||||
: fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
|
||||
SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
|
||||
SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; }
|
||||
virtual ~SimpleFilteredSentenceBreakData();
|
||||
SimpleFilteredSentenceBreakData *incr() {
|
||||
umtx_atomic_inc(&refcount);
|
||||
return this;
|
||||
}
|
||||
SimpleFilteredSentenceBreakData *decr() {
|
||||
if(umtx_atomic_dec(&refcount) <= 0) {
|
||||
delete this;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
virtual ~SimpleFilteredSentenceBreakData();
|
||||
|
||||
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
|
||||
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
|
||||
int32_t refcount;
|
||||
bool hasForwardsPartialTrie() const { return fForwardsPartialTrie.isValid(); }
|
||||
bool hasBackwardsTrie() const { return fBackwardsTrie.isValid(); }
|
||||
|
||||
const UCharsTrie &getForwardsPartialTrie() const { return *fForwardsPartialTrie; }
|
||||
const UCharsTrie &getBackwardsTrie() const { return *fBackwardsTrie; }
|
||||
|
||||
private:
|
||||
// These tries own their data arrays.
|
||||
// They are shared and must therefore not be modified.
|
||||
LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
|
||||
LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
|
||||
u_atomic_int32_t refcount;
|
||||
};
|
||||
|
||||
SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
|
||||
@ -244,7 +262,13 @@ SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
|
||||
fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
|
||||
fDelegate(adopt)
|
||||
{
|
||||
// all set..
|
||||
if (fData == nullptr) {
|
||||
delete forwards;
|
||||
delete backwards;
|
||||
if (U_SUCCESS(status)) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
|
||||
@ -261,59 +285,62 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
|
||||
int32_t bestValue = -1;
|
||||
// loops while 'n' points to an exception.
|
||||
utext_setNativeIndex(fText.getAlias(), n); // from n..
|
||||
fData->fBackwardsTrie->reset();
|
||||
UChar32 uch;
|
||||
|
||||
//if(debug2) u_printf(" n@ %d\n", n);
|
||||
// Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
|
||||
if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
|
||||
if(utext_previous32(fText.getAlias())==u' ') { // TODO: skip a class of chars here??
|
||||
// TODO only do this the 1st time?
|
||||
//if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
|
||||
} else {
|
||||
//if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
|
||||
uch = utext_next32(fText.getAlias());
|
||||
utext_next32(fText.getAlias());
|
||||
//if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
|
||||
}
|
||||
|
||||
UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
|
||||
|
||||
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
|
||||
USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
|
||||
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
bestValue = fData->fBackwardsTrie->getValue();
|
||||
}
|
||||
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
|
||||
{
|
||||
// Do not modify the shared trie!
|
||||
UCharsTrie iter(fData->getBackwardsTrie());
|
||||
UChar32 uch;
|
||||
while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL) { // more to consume backwards
|
||||
UStringTrieResult r = iter.nextForCodePoint(uch);
|
||||
if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
bestValue = iter.getValue();
|
||||
}
|
||||
if(!USTRINGTRIE_HAS_NEXT(r)) {
|
||||
break;
|
||||
}
|
||||
//if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
|
||||
}
|
||||
}
|
||||
|
||||
if(USTRINGTRIE_MATCHES(r)) { // exact match?
|
||||
//if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
bestValue = fData->fBackwardsTrie->getValue();
|
||||
bestPosn = utext_getNativeIndex(fText.getAlias());
|
||||
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
}
|
||||
//if(bestValue >= 0) {
|
||||
//if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
//}
|
||||
|
||||
if(bestPosn>=0) {
|
||||
//if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
|
||||
|
||||
//if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
|
||||
//int32_t bestValue = fBackwardsTrie->getValue();
|
||||
//int32_t bestValue = iter.getValue();
|
||||
////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
|
||||
|
||||
if(bestValue == kMATCH) { // exact match!
|
||||
//if(debug2) u_printf(" exact backward match\n");
|
||||
return kExceptionHere; // See if the next is another exception.
|
||||
} else if(bestValue == kPARTIAL
|
||||
&& fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
|
||||
&& fData->hasForwardsPartialTrie()) { // make sure there's a forward trie
|
||||
//if(debug2) u_printf(" partial backward match\n");
|
||||
// We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
|
||||
// to see if it matches something going forward.
|
||||
fData->fForwardsPartialTrie->reset();
|
||||
UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
|
||||
utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
|
||||
//if(debug2) u_printf("Retrying at %d\n", bestPosn);
|
||||
// Do not modify the shared trie!
|
||||
UCharsTrie iter(fData->getForwardsPartialTrie());
|
||||
UChar32 uch;
|
||||
while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
|
||||
USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) {
|
||||
USTRINGTRIE_HAS_NEXT(rfwd=iter.nextForCodePoint(uch))) {
|
||||
//if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
|
||||
}
|
||||
if(USTRINGTRIE_MATCHES(rfwd)) {
|
||||
@ -339,7 +366,7 @@ SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
|
||||
if(n == UBRK_DONE || // at end or
|
||||
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
|
||||
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
// OK, do we need to break here?
|
||||
@ -369,7 +396,7 @@ SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
|
||||
int32_t
|
||||
SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
|
||||
if(n == 0 || n == UBRK_DONE || // at end or
|
||||
fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
|
||||
!fData->hasBackwardsTrie()) { // .. no backwards table loaded == no exceptions
|
||||
return n;
|
||||
}
|
||||
// OK, do we need to break here?
|
||||
@ -420,7 +447,7 @@ SimpleFilteredSentenceBreakIterator::previous(void) {
|
||||
UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
|
||||
if (!fDelegate->isBoundary(offset)) return false; // no break to suppress
|
||||
|
||||
if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions
|
||||
if (!fData->hasBackwardsTrie()) return true; // no data = no suppressions
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
resetState(status);
|
||||
|
@ -85,16 +85,22 @@ public:
|
||||
|
||||
inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status);
|
||||
|
||||
inline int32_t putiAllowZero(const UnicodeString& key, int32_t value, UErrorCode& status);
|
||||
|
||||
inline void* get(const UnicodeString& key) const;
|
||||
|
||||
inline int32_t geti(const UnicodeString& key) const;
|
||||
|
||||
inline int32_t getiAndFound(const UnicodeString& key, UBool &found) const;
|
||||
|
||||
inline void* remove(const UnicodeString& key);
|
||||
|
||||
inline int32_t removei(const UnicodeString& key);
|
||||
|
||||
inline void removeAll(void);
|
||||
|
||||
inline UBool containsKey(const UnicodeString& key) const;
|
||||
|
||||
inline const UHashElement* find(const UnicodeString& key) const;
|
||||
|
||||
/**
|
||||
@ -203,6 +209,11 @@ inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCo
|
||||
return uhash_puti(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::putiAllowZero(const UnicodeString& key, int32_t value,
|
||||
UErrorCode& status) {
|
||||
return uhash_putiAllowZero(hash, new UnicodeString(key), value, &status);
|
||||
}
|
||||
|
||||
inline void* Hashtable::get(const UnicodeString& key) const {
|
||||
return uhash_get(hash, &key);
|
||||
}
|
||||
@ -211,6 +222,10 @@ inline int32_t Hashtable::geti(const UnicodeString& key) const {
|
||||
return uhash_geti(hash, &key);
|
||||
}
|
||||
|
||||
inline int32_t Hashtable::getiAndFound(const UnicodeString& key, UBool &found) const {
|
||||
return uhash_getiAndFound(hash, &key, &found);
|
||||
}
|
||||
|
||||
inline void* Hashtable::remove(const UnicodeString& key) {
|
||||
return uhash_remove(hash, &key);
|
||||
}
|
||||
@ -219,6 +234,10 @@ inline int32_t Hashtable::removei(const UnicodeString& key) {
|
||||
return uhash_removei(hash, &key);
|
||||
}
|
||||
|
||||
inline UBool Hashtable::containsKey(const UnicodeString& key) const {
|
||||
return uhash_containsKey(hash, &key);
|
||||
}
|
||||
|
||||
inline const UHashElement* Hashtable::find(const UnicodeString& key) const {
|
||||
return uhash_find(hash, &key);
|
||||
}
|
||||
|
@ -345,9 +345,8 @@ UBool compareLSRs(const UHashTok t1, const UHashTok t2) {
|
||||
int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength,
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) { return suppLength; }
|
||||
int32_t index = uhash_geti(supportedLsrToIndex, &lsr);
|
||||
if (index == 0) {
|
||||
uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode);
|
||||
if (!uhash_containsKey(supportedLsrToIndex, &lsr)) {
|
||||
uhash_putiAllowZero(supportedLsrToIndex, const_cast<LSR *>(&lsr), i, &errorCode);
|
||||
if (U_SUCCESS(errorCode)) {
|
||||
supportedLSRs[suppLength] = &lsr;
|
||||
supportedIndexes[suppLength++] = i;
|
||||
@ -685,12 +684,11 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
|
||||
int32_t bestSupportedLsrIndex = -1;
|
||||
for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
|
||||
// Quick check for exact maximized LSR.
|
||||
// Returns suppIndex+1 where 0 means not found.
|
||||
if (supportedLsrToIndex != nullptr) {
|
||||
desiredLSR.setHashCode();
|
||||
int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR);
|
||||
if (index != 0) {
|
||||
int32_t suppIndex = index - 1;
|
||||
UBool found = false;
|
||||
int32_t suppIndex = uhash_getiAndFound(supportedLsrToIndex, &desiredLSR, &found);
|
||||
if (found) {
|
||||
if (remainingIter != nullptr) {
|
||||
remainingIter->rememberCurrent(desiredIndex, errorCode);
|
||||
}
|
||||
|
@ -187,17 +187,18 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
}
|
||||
LocalPointer<Locale> clone;
|
||||
int32_t index = uhash_geti(map, &locale);
|
||||
if (index != 0) {
|
||||
UBool found = false;
|
||||
int32_t index = uhash_getiAndFound(map, &locale, &found);
|
||||
if (found) {
|
||||
// Duplicate: Remove the old item and append it anew.
|
||||
LocaleAndWeight &lw = list->array[index - 1];
|
||||
LocaleAndWeight &lw = list->array[index];
|
||||
clone.adoptInstead(lw.locale);
|
||||
lw.locale = nullptr;
|
||||
lw.weight = 0;
|
||||
++numRemoved;
|
||||
}
|
||||
if (weight <= 0) { // do not add q=0
|
||||
if (index != 0) {
|
||||
if (found) {
|
||||
// Not strictly necessary but cleaner.
|
||||
uhash_removei(map, &locale);
|
||||
}
|
||||
@ -217,7 +218,7 @@ bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &e
|
||||
return false;
|
||||
}
|
||||
}
|
||||
uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode);
|
||||
uhash_putiAllowZero(map, clone.getAlias(), listLength, &errorCode);
|
||||
if (U_FAILURE(errorCode)) { return false; }
|
||||
LocaleAndWeight &lw = list->array[listLength];
|
||||
lw.locale = clone.orphan();
|
||||
|
@ -698,7 +698,7 @@ uloc_getDisplayName(const char *locale,
|
||||
} /* end switch */
|
||||
|
||||
if (len>0) {
|
||||
/* we addeed a component, so add separator and write it if there's room. */
|
||||
/* we added a component, so add separator and write it if there's room. */
|
||||
if(len+sepLen<=cap) {
|
||||
const UChar * plimit = p + len;
|
||||
for (; p < plimit; p++) {
|
||||
|
@ -254,7 +254,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
|
||||
|
||||
Locale::~Locale()
|
||||
{
|
||||
if (baseName != fullName) {
|
||||
if ((baseName != fullName) && (baseName != fullNameBuffer)) {
|
||||
uprv_free(baseName);
|
||||
}
|
||||
baseName = NULL;
|
||||
@ -466,17 +466,21 @@ Locale& Locale::operator=(const Locale& other) {
|
||||
}
|
||||
|
||||
Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
|
||||
if (baseName != fullName) uprv_free(baseName);
|
||||
if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
|
||||
if (fullName != fullNameBuffer) uprv_free(fullName);
|
||||
|
||||
if (other.fullName == other.fullNameBuffer) {
|
||||
if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
|
||||
uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
|
||||
}
|
||||
if (other.fullName == other.fullNameBuffer) {
|
||||
fullName = fullNameBuffer;
|
||||
} else {
|
||||
fullName = other.fullName;
|
||||
}
|
||||
|
||||
if (other.baseName == other.fullName) {
|
||||
if (other.baseName == other.fullNameBuffer) {
|
||||
baseName = fullNameBuffer;
|
||||
} else if (other.baseName == other.fullName) {
|
||||
baseName = fullName;
|
||||
} else {
|
||||
baseName = other.baseName;
|
||||
@ -524,7 +528,7 @@ static const char* const KNOWN_CANONICALIZED[] = {
|
||||
"km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
|
||||
"lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
|
||||
"mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
|
||||
"nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
|
||||
"nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
|
||||
"pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
|
||||
"si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
|
||||
"sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
|
||||
@ -627,6 +631,17 @@ private:
|
||||
LocalMemory<const char*>& types,
|
||||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length, UErrorCode &status);
|
||||
|
||||
// Read the subdivisionAlias data from alias to
|
||||
// strings+types+replacementIndexes
|
||||
// Allocate length items for types, to store the type field.
|
||||
// Allocate length items for replacementIndexes,
|
||||
// to store the index in the strings for the replacement variant.
|
||||
void readSubdivisionAlias(UResourceBundle* alias,
|
||||
UniqueCharStrings* strings,
|
||||
LocalMemory<const char*>& types,
|
||||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length, UErrorCode &status);
|
||||
};
|
||||
|
||||
/**
|
||||
@ -647,6 +662,7 @@ public:
|
||||
const CharStringMap& scriptMap() const { return script; }
|
||||
const CharStringMap& territoryMap() const { return territory; }
|
||||
const CharStringMap& variantMap() const { return variant; }
|
||||
const CharStringMap& subdivisionMap() const { return subdivision; }
|
||||
|
||||
static void U_CALLCONV loadData(UErrorCode &status);
|
||||
static UBool U_CALLCONV cleanup();
|
||||
@ -658,11 +674,13 @@ private:
|
||||
CharStringMap scriptMap,
|
||||
CharStringMap territoryMap,
|
||||
CharStringMap variantMap,
|
||||
CharStringMap subdivisionMap,
|
||||
CharString* strings)
|
||||
: language(std::move(languageMap)),
|
||||
script(std::move(scriptMap)),
|
||||
territory(std::move(territoryMap)),
|
||||
variant(std::move(variantMap)),
|
||||
subdivision(std::move(subdivisionMap)),
|
||||
strings(strings) {
|
||||
}
|
||||
|
||||
@ -676,6 +694,7 @@ private:
|
||||
CharStringMap script;
|
||||
CharStringMap territory;
|
||||
CharStringMap variant;
|
||||
CharStringMap subdivision;
|
||||
CharString* strings;
|
||||
|
||||
friend class AliasDataBuilder;
|
||||
@ -866,6 +885,34 @@ AliasDataBuilder::readVariantAlias(
|
||||
status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
|
||||
* Allocate length items for types, to store the type field. Allocate length
|
||||
* items for replacementIndexes, to store the index in the strings for the
|
||||
* replacement regions.
|
||||
*/
|
||||
void
|
||||
AliasDataBuilder::readSubdivisionAlias(
|
||||
UResourceBundle* alias,
|
||||
UniqueCharStrings* strings,
|
||||
LocalMemory<const char*>& types,
|
||||
LocalMemory<int32_t>& replacementIndexes,
|
||||
int32_t &length,
|
||||
UErrorCode &status)
|
||||
{
|
||||
return readAlias(
|
||||
alias, strings, types, replacementIndexes, length,
|
||||
#if U_DEBUG
|
||||
[](const char* type) {
|
||||
U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
|
||||
},
|
||||
#else
|
||||
[](const char*) {},
|
||||
#endif
|
||||
[](const UnicodeString&) { },
|
||||
status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the alias data from the ICU resource bundles. The alias data
|
||||
* contains alias of language, country, script and variants.
|
||||
@ -905,12 +952,14 @@ AliasDataBuilder::build(UErrorCode &status) {
|
||||
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
|
||||
LocalUResourceBundlePointer variantAlias(
|
||||
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
|
||||
LocalUResourceBundlePointer subdivisionAlias(
|
||||
ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
|
||||
variantLength = 0;
|
||||
variantLength = 0, subdivisionLength = 0;
|
||||
|
||||
// Read the languageAlias into languageTypes, languageReplacementIndexes
|
||||
// and strings
|
||||
@ -955,6 +1004,16 @@ AliasDataBuilder::build(UErrorCode &status) {
|
||||
variantReplacementIndexes,
|
||||
variantLength, status);
|
||||
|
||||
// Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
|
||||
// and strings
|
||||
LocalMemory<const char*> subdivisionTypes;
|
||||
LocalMemory<int32_t> subdivisionReplacementIndexes;
|
||||
readSubdivisionAlias(subdivisionAlias.getAlias(),
|
||||
&strings,
|
||||
subdivisionTypes,
|
||||
subdivisionReplacementIndexes,
|
||||
subdivisionLength, status);
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -994,6 +1053,14 @@ AliasDataBuilder::build(UErrorCode &status) {
|
||||
status);
|
||||
}
|
||||
|
||||
// Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
|
||||
CharStringMap subdivisionMap(2, status);
|
||||
for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
|
||||
subdivisionMap.put(subdivisionTypes[i],
|
||||
strings.get(subdivisionReplacementIndexes[i]),
|
||||
status);
|
||||
}
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -1004,6 +1071,7 @@ AliasDataBuilder::build(UErrorCode &status) {
|
||||
std::move(scriptMap),
|
||||
std::move(territoryMap),
|
||||
std::move(variantMap),
|
||||
std::move(subdivisionMap),
|
||||
strings.orphanCharStrings());
|
||||
|
||||
if (data == nullptr) {
|
||||
@ -1105,6 +1173,14 @@ private:
|
||||
|
||||
// Replace by using variantAlias.
|
||||
bool replaceVariant(UErrorCode& status);
|
||||
|
||||
// Replace by using subdivisionAlias.
|
||||
bool replaceSubdivision(StringPiece subdivision,
|
||||
CharString& output, UErrorCode& status);
|
||||
|
||||
// Replace transformed extensions.
|
||||
bool replaceTransformedExtensions(
|
||||
CharString& transformedExtensions, CharString& output, UErrorCode& status);
|
||||
};
|
||||
|
||||
CharString&
|
||||
@ -1294,7 +1370,6 @@ AliasReplacer::replaceLanguage(
|
||||
}
|
||||
}
|
||||
if (replacedExtensions != nullptr) {
|
||||
// TODO(ICU-21292)
|
||||
// DO NOTHING
|
||||
// UTS35 does not specifiy what should we do if we have extensions in the
|
||||
// replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
|
||||
@ -1435,6 +1510,110 @@ AliasReplacer::replaceVariant(UErrorCode& status)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
AliasReplacer::replaceSubdivision(
|
||||
StringPiece subdivision, CharString& output, UErrorCode& status)
|
||||
{
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
const char *replacement = data->subdivisionMap().get(subdivision.data());
|
||||
if (replacement != nullptr) {
|
||||
const char* firstSpace = uprv_strchr(replacement, ' ');
|
||||
// Found replacement data for this subdivision.
|
||||
size_t len = (firstSpace != nullptr) ?
|
||||
(firstSpace - replacement) : uprv_strlen(replacement);
|
||||
if (2 <= len && len <= 8) {
|
||||
output.append(replacement, (int32_t)len, status);
|
||||
if (2 == len) {
|
||||
// Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
|
||||
output.append("zzzz", 4, status);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
AliasReplacer::replaceTransformedExtensions(
|
||||
CharString& transformedExtensions, CharString& output, UErrorCode& status)
|
||||
{
|
||||
// The content of the transformedExtensions will be modified in this
|
||||
// function to NULL-terminating (tkey-tvalue) pairs.
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
int32_t len = transformedExtensions.length();
|
||||
const char* str = transformedExtensions.data();
|
||||
const char* tkey = ultag_getTKeyStart(str);
|
||||
int32_t tlangLen = (tkey == str) ? 0 :
|
||||
((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
|
||||
CharStringByteSink sink(&output);
|
||||
if (tlangLen > 0) {
|
||||
Locale tlang = LocaleBuilder()
|
||||
.setLanguageTag(StringPiece(str, tlangLen))
|
||||
.build(status);
|
||||
tlang.canonicalize(status);
|
||||
tlang.toLanguageTag(sink, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
T_CString_toLowerCase(output.data());
|
||||
}
|
||||
if (tkey != nullptr) {
|
||||
// We need to sort the tfields by tkey
|
||||
UVector tfields(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
do {
|
||||
const char* tvalue = uprv_strchr(tkey, '-');
|
||||
if (tvalue == nullptr) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return false;
|
||||
}
|
||||
const char* nextTKey = ultag_getTKeyStart(tvalue);
|
||||
if (nextTKey != nullptr) {
|
||||
*((char*)(nextTKey-1)) = '\0'; // NULL terminate tvalue
|
||||
}
|
||||
tfields.insertElementAt((void*)tkey, tfields.size(), status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
tkey = nextTKey;
|
||||
} while (tkey != nullptr);
|
||||
tfields.sort([](UElement e1, UElement e2) -> int8_t {
|
||||
// uprv_strcmp return int and in some platform, such as arm64-v8a,
|
||||
// it may return positive values > 127 which cause the casted value
|
||||
// of int8_t negative.
|
||||
int res = uprv_strcmp(
|
||||
(const char*)e1.pointer, (const char*)e2.pointer);
|
||||
return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
|
||||
}, status);
|
||||
for (int32_t i = 0; i < tfields.size(); i++) {
|
||||
if (output.length() > 0) {
|
||||
output.append('-', status);
|
||||
}
|
||||
const char* tfield = (const char*) tfields.elementAt(i);
|
||||
const char* tvalue = uprv_strchr(tfield, '-');
|
||||
if (tvalue == nullptr) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return false;
|
||||
}
|
||||
// Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
|
||||
*((char*)tvalue++) = '\0'; // NULL terminate tkey
|
||||
output.append(tfield, status).append('-', status);
|
||||
const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
|
||||
output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
CharString&
|
||||
AliasReplacer::outputToString(
|
||||
CharString& out, UErrorCode status)
|
||||
@ -1453,8 +1632,12 @@ AliasReplacer::outputToString(
|
||||
out.append(SEP_CHAR, status);
|
||||
}
|
||||
variants.sort([](UElement e1, UElement e2) -> int8_t {
|
||||
return uprv_strcmp(
|
||||
// uprv_strcmp return int and in some platform, such as arm64-v8a,
|
||||
// it may return positive values > 127 which cause the casted value
|
||||
// of int8_t negative.
|
||||
int res = uprv_strcmp(
|
||||
(const char*)e1.pointer, (const char*)e2.pointer);
|
||||
return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
|
||||
}, status);
|
||||
int32_t variantsStart = out.length();
|
||||
for (int32_t i = 0; i < variants.size(); i++) {
|
||||
@ -1497,7 +1680,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
||||
region = nullptr;
|
||||
}
|
||||
const char* variantsStr = locale.getVariant();
|
||||
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
|
||||
CharString variantsBuff(variantsStr, -1, status);
|
||||
if (!variantsBuff.isEmpty()) {
|
||||
if (U_FAILURE(status)) { return false; }
|
||||
@ -1516,8 +1698,12 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
||||
|
||||
// Sort the variants
|
||||
variants.sort([](UElement e1, UElement e2) -> int8_t {
|
||||
return uprv_strcmp(
|
||||
// uprv_strcmp return int and in some platform, such as arm64-v8a,
|
||||
// it may return positive values > 127 which cause the casted value
|
||||
// of int8_t negative.
|
||||
int res = uprv_strcmp(
|
||||
(const char*)e1.pointer, (const char*)e2.pointer);
|
||||
return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
|
||||
}, status);
|
||||
|
||||
// A changed count to assert when loop too many times.
|
||||
@ -1561,11 +1747,52 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
||||
if (U_FAILURE(status)) { return false; }
|
||||
// Nothing changed and we know the order of the vaiants are not change
|
||||
// because we have no variant or only one.
|
||||
if (changed == 0 && variants.size() <= 1) {
|
||||
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
|
||||
if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
|
||||
return false;
|
||||
}
|
||||
outputToString(out, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
if (extensionsStr != nullptr) {
|
||||
changed = 0;
|
||||
Locale temp(locale);
|
||||
LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
|
||||
if (U_SUCCESS(status) && !iter.isNull()) {
|
||||
const char* key;
|
||||
while ((key = iter->next(nullptr, status)) != nullptr) {
|
||||
if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
|
||||
uprv_strcmp("t", key) == 0) {
|
||||
CharString value;
|
||||
CharStringByteSink valueSink(&value);
|
||||
locale.getKeywordValue(key, valueSink, status);
|
||||
if (U_FAILURE(status)) {
|
||||
status = U_ZERO_ERROR;
|
||||
continue;
|
||||
}
|
||||
CharString replacement;
|
||||
if (uprv_strlen(key) == 2) {
|
||||
if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
|
||||
changed++;
|
||||
temp.setKeywordValue(key, replacement.data(), status);
|
||||
}
|
||||
} else {
|
||||
U_ASSERT(uprv_strcmp(key, "t") == 0);
|
||||
if (replaceTransformedExtensions(value, replacement, status)) {
|
||||
changed++;
|
||||
temp.setKeywordValue(key, replacement.data(), status);
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (changed != 0) {
|
||||
extensionsStr = locale_getKeywordsStart(temp.getName());
|
||||
}
|
||||
out.append(extensionsStr, status);
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
@ -1573,8 +1800,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
|
||||
}
|
||||
// If the tag is not changed, return.
|
||||
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
|
||||
U_ASSERT(changed == 0);
|
||||
U_ASSERT(variants.size() > 1);
|
||||
out.clear();
|
||||
return false;
|
||||
}
|
||||
@ -1636,7 +1861,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
|
||||
{
|
||||
fIsBogus = FALSE;
|
||||
/* Free our current storage */
|
||||
if (baseName != fullName) {
|
||||
if ((baseName != fullName) && (baseName != fullNameBuffer)) {
|
||||
uprv_free(baseName);
|
||||
}
|
||||
baseName = NULL;
|
||||
@ -1672,6 +1897,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
|
||||
uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
|
||||
|
||||
if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
|
||||
U_ASSERT(baseName == nullptr);
|
||||
/*Go to heap for the fullName if necessary*/
|
||||
fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
|
||||
if(fullName == 0) {
|
||||
@ -1825,7 +2051,7 @@ Locale::hashCode() const
|
||||
void
|
||||
Locale::setToBogus() {
|
||||
/* Free our current storage */
|
||||
if(baseName != fullName) {
|
||||
if((baseName != fullName) && (baseName != fullNameBuffer)) {
|
||||
uprv_free(baseName);
|
||||
}
|
||||
baseName = NULL;
|
||||
@ -2312,16 +2538,16 @@ public:
|
||||
|
||||
virtual const char* next(int32_t* resultLength, UErrorCode& status) {
|
||||
const char* legacy_key = KeywordEnumeration::next(nullptr, status);
|
||||
if (U_SUCCESS(status) && legacy_key != nullptr) {
|
||||
while (U_SUCCESS(status) && legacy_key != nullptr) {
|
||||
const char* key = uloc_toUnicodeLocaleKey(legacy_key);
|
||||
if (key == nullptr) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
} else {
|
||||
if (key != nullptr) {
|
||||
if (resultLength != nullptr) {
|
||||
*resultLength = static_cast<int32_t>(uprv_strlen(key));
|
||||
}
|
||||
return key;
|
||||
}
|
||||
// Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
|
||||
legacy_key = KeywordEnumeration::next(nullptr, status);
|
||||
}
|
||||
if (resultLength != nullptr) *resultLength = 0;
|
||||
return nullptr;
|
||||
@ -2478,6 +2704,9 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro
|
||||
if (fullName != fullNameBuffer) {
|
||||
// if full Name is already on the heap, need to free it.
|
||||
uprv_free(fullName);
|
||||
if (baseName == fullName) {
|
||||
baseName = newFullName; // baseName should not point to freed memory.
|
||||
}
|
||||
}
|
||||
fullName = newFullName;
|
||||
status = U_ZERO_ERROR;
|
||||
|
@ -320,7 +320,8 @@ XLikelySubtags::~XLikelySubtags() {
|
||||
LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
|
||||
const char *name = locale.getName();
|
||||
if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=")
|
||||
// Private use language tag x-subtag-subtag...
|
||||
// Private use language tag x-subtag-subtag... which CLDR changes to
|
||||
// und-x-subtag-subtag...
|
||||
return LSR(name, "", "", LSR::EXPLICIT_LSR);
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
virtual UnicodeString &
|
||||
normalize(const UnicodeString &src,
|
||||
UnicodeString &dest,
|
||||
UErrorCode &errorCode) const {
|
||||
UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
dest.setToBogus();
|
||||
return dest;
|
||||
@ -64,13 +64,13 @@ public:
|
||||
virtual UnicodeString &
|
||||
normalizeSecondAndAppend(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return normalizeSecondAndAppend(first, second, true, errorCode);
|
||||
}
|
||||
virtual UnicodeString &
|
||||
append(UnicodeString &first,
|
||||
const UnicodeString &second,
|
||||
UErrorCode &errorCode) const {
|
||||
UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return normalizeSecondAndAppend(first, second, false, errorCode);
|
||||
}
|
||||
UnicodeString &
|
||||
@ -107,7 +107,7 @@ public:
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
|
||||
virtual UBool
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
|
||||
UChar buffer[4];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getDecomposition(c, buffer, length);
|
||||
@ -122,7 +122,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
virtual UBool
|
||||
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
|
||||
getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
|
||||
UChar buffer[30];
|
||||
int32_t length;
|
||||
const UChar *d=impl.getRawDecomposition(c, buffer, length);
|
||||
@ -137,18 +137,18 @@ public:
|
||||
return true;
|
||||
}
|
||||
virtual UChar32
|
||||
composePair(UChar32 a, UChar32 b) const {
|
||||
composePair(UChar32 a, UChar32 b) const U_OVERRIDE {
|
||||
return impl.composePair(a, b);
|
||||
}
|
||||
|
||||
virtual uint8_t
|
||||
getCombiningClass(UChar32 c) const {
|
||||
getCombiningClass(UChar32 c) const U_OVERRIDE {
|
||||
return impl.getCC(impl.getNorm16(c));
|
||||
}
|
||||
|
||||
// quick checks
|
||||
virtual UBool
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return false;
|
||||
}
|
||||
@ -161,11 +161,11 @@ public:
|
||||
return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
|
||||
}
|
||||
virtual UNormalizationCheckResult
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual int32_t
|
||||
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
|
||||
spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return 0;
|
||||
}
|
||||
@ -194,27 +194,57 @@ public:
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
impl.decompose(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
|
||||
void
|
||||
normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
|
||||
Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
|
||||
edits->reset();
|
||||
}
|
||||
const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
|
||||
impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
|
||||
sink.Flush();
|
||||
}
|
||||
virtual UBool
|
||||
isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
if(U_FAILURE(errorCode)) {
|
||||
return false;
|
||||
}
|
||||
const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
|
||||
const uint8_t *sLimit = s + sp.length();
|
||||
return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
|
||||
}
|
||||
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return impl.decompose(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
|
||||
virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
|
||||
return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
|
||||
}
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
|
||||
return impl.hasDecompBoundaryBefore(c);
|
||||
}
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
|
||||
return impl.hasDecompBoundaryAfter(c);
|
||||
}
|
||||
virtual UBool isInert(UChar32 c) const U_OVERRIDE {
|
||||
return impl.isDecompInert(c);
|
||||
}
|
||||
};
|
||||
|
||||
class ComposeNormalizer2 : public Normalizer2WithImpl {
|
||||
@ -321,24 +351,30 @@ public:
|
||||
private:
|
||||
virtual void
|
||||
normalize(const UChar *src, const UChar *limit,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
impl.makeFCD(src, limit, &buffer, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
|
||||
virtual void
|
||||
normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
|
||||
}
|
||||
virtual const UChar *
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
|
||||
spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
|
||||
return impl.makeFCD(src, limit, NULL, errorCode);
|
||||
}
|
||||
using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
|
||||
virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
|
||||
virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
|
||||
return impl.hasFCDBoundaryBefore(c);
|
||||
}
|
||||
virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
|
||||
return impl.hasFCDBoundaryAfter(c);
|
||||
}
|
||||
virtual UBool isInert(UChar32 c) const U_OVERRIDE {
|
||||
return impl.isFCDInert(c);
|
||||
}
|
||||
};
|
||||
|
||||
struct Norm2AllModes : public UMemory {
|
||||
|
@ -731,9 +731,131 @@ UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16,
|
||||
return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode);
|
||||
}
|
||||
|
||||
// Dual functionality:
|
||||
// sink != nullptr: normalize
|
||||
// sink == nullptr: isNormalized/spanQuickCheckYes
|
||||
const uint8_t *
|
||||
Normalizer2Impl::decomposeUTF8(uint32_t options,
|
||||
const uint8_t *src, const uint8_t *limit,
|
||||
ByteSink *sink, Edits *edits, UErrorCode &errorCode) const {
|
||||
U_ASSERT(limit != nullptr);
|
||||
UnicodeString s16;
|
||||
uint8_t minNoLead = leadByteForCP(minDecompNoCP);
|
||||
|
||||
const uint8_t *prevBoundary = src;
|
||||
// only for quick check
|
||||
uint8_t prevCC = 0;
|
||||
|
||||
for (;;) {
|
||||
// Fast path: Scan over a sequence of characters below the minimum "no" code point,
|
||||
// or with (decompYes && ccc==0) properties.
|
||||
const uint8_t *fastStart = src;
|
||||
const uint8_t *prevSrc;
|
||||
uint16_t norm16 = 0;
|
||||
|
||||
for (;;) {
|
||||
if (src == limit) {
|
||||
if (prevBoundary != limit && sink != nullptr) {
|
||||
ByteSinkUtil::appendUnchanged(prevBoundary, limit,
|
||||
*sink, options, edits, errorCode);
|
||||
}
|
||||
return src;
|
||||
}
|
||||
if (*src < minNoLead) {
|
||||
++src;
|
||||
} else {
|
||||
prevSrc = src;
|
||||
UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16);
|
||||
if (!isMostDecompYesAndZeroCC(norm16)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// isMostDecompYesAndZeroCC(norm16) is false, that is, norm16>=minYesNo,
|
||||
// and the current character at [prevSrc..src[ is not a common case with cc=0
|
||||
// (MIN_NORMAL_MAYBE_YES or JAMO_VT).
|
||||
// It could still be a maybeYes with cc=0.
|
||||
if (prevSrc != fastStart) {
|
||||
// The fast path looped over yes/0 characters before the current one.
|
||||
if (sink != nullptr &&
|
||||
!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
|
||||
*sink, options, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
prevBoundary = prevSrc;
|
||||
prevCC = 0;
|
||||
}
|
||||
|
||||
// Medium-fast path: Quick check.
|
||||
if (isMaybeOrNonZeroCC(norm16)) {
|
||||
// Does not decompose.
|
||||
uint8_t cc = getCCFromYesOrMaybe(norm16);
|
||||
if (prevCC <= cc || cc == 0) {
|
||||
prevCC = cc;
|
||||
if (cc <= 1) {
|
||||
if (sink != nullptr &&
|
||||
!ByteSinkUtil::appendUnchanged(prevBoundary, src,
|
||||
*sink, options, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
prevBoundary = src;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (sink == nullptr) {
|
||||
return prevBoundary; // quick check: "no" or cc out of order
|
||||
}
|
||||
|
||||
// Slow path
|
||||
// Decompose up to and including the current character.
|
||||
if (prevBoundary != prevSrc && norm16HasDecompBoundaryBefore(norm16)) {
|
||||
if (!ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
|
||||
*sink, options, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
prevBoundary = prevSrc;
|
||||
}
|
||||
ReorderingBuffer buffer(*this, s16, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
break;
|
||||
}
|
||||
decomposeShort(prevBoundary, src, STOP_AT_LIMIT, FALSE /* onlyContiguous */,
|
||||
buffer, errorCode);
|
||||
// Decompose until the next boundary.
|
||||
if (buffer.getLastCC() > 1) {
|
||||
src = decomposeShort(src, limit, STOP_AT_DECOMP_BOUNDARY, FALSE /* onlyContiguous */,
|
||||
buffer, errorCode);
|
||||
}
|
||||
if (U_FAILURE(errorCode)) {
|
||||
break;
|
||||
}
|
||||
if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals()
|
||||
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
break;
|
||||
}
|
||||
// We already know there was a change if the original character decomposed;
|
||||
// otherwise compare.
|
||||
if (isMaybeOrNonZeroCC(norm16) && buffer.equals(prevBoundary, src)) {
|
||||
if (!ByteSinkUtil::appendUnchanged(prevBoundary, src,
|
||||
*sink, options, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!ByteSinkUtil::appendChange(prevBoundary, src, buffer.getStart(), buffer.length(),
|
||||
*sink, edits, errorCode)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
prevBoundary = src;
|
||||
prevCC = 0;
|
||||
}
|
||||
return src;
|
||||
}
|
||||
|
||||
const uint8_t *
|
||||
Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
||||
UBool stopAtCompBoundary, UBool onlyContiguous,
|
||||
StopAt stopAt, UBool onlyContiguous,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return nullptr;
|
||||
@ -746,21 +868,28 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
||||
UChar32 c = U_SENTINEL;
|
||||
if (norm16 >= limitNoNo) {
|
||||
if (isMaybeOrNonZeroCC(norm16)) {
|
||||
// No boundaries around this character.
|
||||
// No comp boundaries around this character.
|
||||
uint8_t cc = getCCFromYesOrMaybe(norm16);
|
||||
if (cc == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
|
||||
return prevSrc;
|
||||
}
|
||||
c = codePointFromValidUTF8(prevSrc, src);
|
||||
if (!buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode)) {
|
||||
if (!buffer.append(c, cc, errorCode)) {
|
||||
return nullptr;
|
||||
}
|
||||
if (stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1) {
|
||||
return src;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Maps to an isCompYesAndZeroCC.
|
||||
if (stopAtCompBoundary) {
|
||||
if (stopAt != STOP_AT_LIMIT) {
|
||||
return prevSrc;
|
||||
}
|
||||
c = codePointFromValidUTF8(prevSrc, src);
|
||||
c = mapAlgorithmic(c, norm16);
|
||||
norm16 = getRawNorm16(c);
|
||||
} else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) {
|
||||
} else if (stopAt != STOP_AT_LIMIT && norm16 < minNoNoCompNoMaybeCC) {
|
||||
return prevSrc;
|
||||
}
|
||||
// norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8.
|
||||
@ -768,7 +897,8 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
||||
// its norm16==INERT is normalization-inert,
|
||||
// so it gets copied unchanged in the fast path,
|
||||
// and we stop the slow path where invalid UTF-8 begins.
|
||||
U_ASSERT(norm16 != INERT);
|
||||
// c >= 0 is the result of an algorithmic mapping.
|
||||
U_ASSERT(c >= 0 || norm16 != INERT);
|
||||
if (norm16 < minYesNo) {
|
||||
if (c < 0) {
|
||||
c = codePointFromValidUTF8(prevSrc, src);
|
||||
@ -798,11 +928,15 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit,
|
||||
} else {
|
||||
leadCC = 0;
|
||||
}
|
||||
if (leadCC == 0 && stopAt == STOP_AT_DECOMP_BOUNDARY) {
|
||||
return prevSrc;
|
||||
}
|
||||
if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
|
||||
if ((stopAt == STOP_AT_COMP_BOUNDARY && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) ||
|
||||
(stopAt == STOP_AT_DECOMP_BOUNDARY && buffer.getLastCC() <= 1)) {
|
||||
return src;
|
||||
}
|
||||
}
|
||||
@ -1954,10 +2088,10 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
|
||||
break;
|
||||
}
|
||||
// We know there is not a boundary here.
|
||||
decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous,
|
||||
decomposeShort(prevSrc, src, STOP_AT_LIMIT, onlyContiguous,
|
||||
buffer, errorCode);
|
||||
// Decompose until the next boundary.
|
||||
src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous,
|
||||
src = decomposeShort(src, limit, STOP_AT_COMP_BOUNDARY, onlyContiguous,
|
||||
buffer, errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
break;
|
||||
|
@ -491,6 +491,12 @@ public:
|
||||
UnicodeString &safeMiddle,
|
||||
ReorderingBuffer &buffer,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
/** sink==nullptr: isNormalized()/spanQuickCheckYes() */
|
||||
const uint8_t *decomposeUTF8(uint32_t options,
|
||||
const uint8_t *src, const uint8_t *limit,
|
||||
ByteSink *sink, Edits *edits, UErrorCode &errorCode) const;
|
||||
|
||||
UBool compose(const UChar *src, const UChar *limit,
|
||||
UBool onlyContiguous,
|
||||
UBool doCompose,
|
||||
@ -649,6 +655,9 @@ private:
|
||||
UChar32 minNeedDataCP,
|
||||
ReorderingBuffer *buffer,
|
||||
UErrorCode &errorCode) const;
|
||||
|
||||
enum StopAt { STOP_AT_LIMIT, STOP_AT_DECOMP_BOUNDARY, STOP_AT_COMP_BOUNDARY };
|
||||
|
||||
const UChar *decomposeShort(const UChar *src, const UChar *limit,
|
||||
UBool stopAtCompBoundary, UBool onlyContiguous,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
|
||||
@ -656,7 +665,7 @@ private:
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
|
||||
|
||||
const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit,
|
||||
UBool stopAtCompBoundary, UBool onlyContiguous,
|
||||
StopAt stopAt, UBool onlyContiguous,
|
||||
ReorderingBuffer &buffer, UErrorCode &errorCode) const;
|
||||
|
||||
static int32_t combine(const uint16_t *list, UChar32 trail);
|
||||
|
@ -24,7 +24,7 @@ class U_COMMON_API PluralMapBase : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* The names of all the plural categories. NONE is not an actual plural
|
||||
* category, but rather represents the absense of a plural category.
|
||||
* category, but rather represents the absence of a plural category.
|
||||
*/
|
||||
enum Category {
|
||||
NONE = -1,
|
||||
|
@ -1139,7 +1139,7 @@ uprv_tzname(int n)
|
||||
#endif
|
||||
if (tzid != NULL && isValidOlsonID(tzid)
|
||||
#if U_PLATFORM == U_PF_SOLARIS
|
||||
/* When TZ equals localtime on Solaris, check the /etc/localtime file. */
|
||||
/* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
|
||||
&& uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
|
||||
#endif
|
||||
) {
|
||||
@ -1361,7 +1361,7 @@ uprv_pathIsAbsolute(const char *path)
|
||||
|
||||
/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
|
||||
(needed for some Darwin ICU build environments) */
|
||||
#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR
|
||||
#if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
|
||||
# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
|
||||
# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
|
||||
# endif
|
||||
|
@ -533,7 +533,7 @@ U_CAPI void * U_EXPORT2 uprv_maximumPtr(void *base);
|
||||
* on the destination pointer and capacity cannot overflow.
|
||||
*
|
||||
* The pinned capacity must fulfill the following conditions (for positive capacities):
|
||||
* - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.)
|
||||
* - dest + capacity is a valid pointer according to the machine architecture (AS/400, 64-bit, etc.)
|
||||
* - (dest + capacity) >= dest
|
||||
* - The size (in bytes) of T[capacity] does not exceed 0x7fffffff
|
||||
*
|
||||
|
@ -812,7 +812,7 @@ int32_t RuleBasedBreakIterator::handleNext() {
|
||||
}
|
||||
#endif
|
||||
|
||||
// handleNext alway sets the break tag value.
|
||||
// handleNext always sets the break tag value.
|
||||
// Set the default for it.
|
||||
fRuleStatusIndex = 0;
|
||||
|
||||
|
@ -258,7 +258,7 @@ void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode
|
||||
previous(status);
|
||||
} else {
|
||||
// seek() leaves the BreakCache positioned at the preceding boundary
|
||||
// if the requested position is between two bounaries.
|
||||
// if the requested position is between two boundaries.
|
||||
// current() pushes the BreakCache position out to the BreakIterator itself.
|
||||
U_ASSERT(startPos > fTextIdx);
|
||||
current();
|
||||
|
@ -284,7 +284,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
|
||||
|
||||
case doEndAssign:
|
||||
{
|
||||
// We have reached the end of an assignement statement.
|
||||
// We have reached the end of an assignment statement.
|
||||
// Current scan char is the ';' that terminates the assignment.
|
||||
|
||||
// Terminate expression, leaves expression parse tree rooted in TOS node.
|
||||
@ -856,6 +856,10 @@ UChar32 RBBIRuleScanner::nextCharLL() {
|
||||
return (UChar32)-1;
|
||||
}
|
||||
ch = fRB->fRules.char32At(fNextIndex);
|
||||
if (U_IS_SURROGATE(ch)) {
|
||||
error(U_ILLEGAL_CHAR_FOUND);
|
||||
return U_SENTINEL;
|
||||
}
|
||||
fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1);
|
||||
|
||||
if (ch == chCR ||
|
||||
|
@ -151,7 +151,7 @@ void RBBITableBuilder::buildForwardTable() {
|
||||
//
|
||||
// calculate the functions nullable, firstpos, lastpos and followpos on
|
||||
// nodes in the parse tree.
|
||||
// See the alogrithm description in Aho.
|
||||
// See the algorithm description in Aho.
|
||||
// Understanding how this works by looking at the code alone will be
|
||||
// nearly impossible.
|
||||
//
|
||||
|
@ -274,8 +274,10 @@ public:
|
||||
*
|
||||
* @param key The key string of the enumeration-start resource.
|
||||
* Empty if the enumeration starts at the top level of the bundle.
|
||||
* @param value Call getArray() or getTable() as appropriate.
|
||||
* Then reuse for output values from Array and Table getters.
|
||||
* @param value Call getArray() or getTable() as appropriate. Then reuse for
|
||||
* output values from Array and Table getters. Note: ResourceTable and
|
||||
* ResourceArray instances must outlive the ResourceValue instance for
|
||||
* ResourceTracer to be happy.
|
||||
* @param noFallback true if the bundle has no parent;
|
||||
* that is, its top-level table has the nofallback attribute,
|
||||
* or it is the root bundle of a locale tree.
|
||||
|
@ -54,6 +54,9 @@ void ResourceTracer::traceOpen() const {
|
||||
|
||||
CharString& ResourceTracer::getFilePath(CharString& output, UErrorCode& status) const {
|
||||
if (fResB) {
|
||||
// Note: if you get a segfault around here, check that ResourceTable and
|
||||
// ResourceArray instances outlive ResourceValue instances referring to
|
||||
// their contents:
|
||||
output.append(fResB->fData->fPath, status);
|
||||
output.append('/', status);
|
||||
output.append(fResB->fData->fName, status);
|
||||
|
@ -82,7 +82,7 @@ public:
|
||||
/**
|
||||
* Add a listener to be notified when notifyChanged is called.
|
||||
* The listener must not be null. AcceptsListener must return
|
||||
* true for the listener. Attempts to concurrently
|
||||
* true for the listener. Attempts to concurrently
|
||||
* register the identical listener more than once will be
|
||||
* silently ignored.
|
||||
*/
|
||||
@ -90,7 +90,7 @@ public:
|
||||
|
||||
/**
|
||||
* Stop notifying this listener. The listener must
|
||||
* not be null. Attemps to remove a listener that is
|
||||
* not be null. Attempts to remove a listener that is
|
||||
* not registered will be silently ignored.
|
||||
*/
|
||||
virtual void removeListener(const EventListener* l, UErrorCode& status);
|
||||
|
@ -174,6 +174,18 @@ ubrk_safeClone(
|
||||
return (UBreakIterator *)newBI;
|
||||
}
|
||||
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) {
|
||||
return nullptr;
|
||||
}
|
||||
BreakIterator *newBI = ((BreakIterator *)bi)->clone();
|
||||
if (newBI == nullptr) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return nullptr;
|
||||
}
|
||||
return (UBreakIterator *)newBI;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
|
@ -681,7 +681,7 @@ ucase_isCaseSensitive(UChar32 c) {
|
||||
* - In [CoreProps], C has one of the properties Uppercase, or Lowercase
|
||||
* - Given D = NFD(C), then it is not the case that:
|
||||
* D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
|
||||
* (This third criterium does not add any characters to the list
|
||||
* (This third criterion does not add any characters to the list
|
||||
* for Unicode 3.2. Ignored.)
|
||||
*
|
||||
* D2. A character C is defined to be case-ignorable
|
||||
|
@ -194,7 +194,7 @@ u_isISOControl(UChar32 c) {
|
||||
|
||||
/* Some control characters that are used as space. */
|
||||
#define IS_THAT_CONTROL_SPACE(c) \
|
||||
(c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
|
||||
(c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==0x85))
|
||||
|
||||
/* Java has decided that U+0085 New Line is not whitespace any more. */
|
||||
#define IS_THAT_ASCII_CONTROL_SPACE(c) \
|
||||
@ -677,14 +677,14 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
sa->add(sa->set, CR+1); /* range TAB..CR */
|
||||
sa->add(sa->set, 0x1c);
|
||||
sa->add(sa->set, 0x1f+1);
|
||||
USET_ADD_CP_AND_NEXT(sa, NL);
|
||||
USET_ADD_CP_AND_NEXT(sa, 0x85); // NEXT LINE (NEL)
|
||||
|
||||
/* add for u_isIDIgnorable() what was not added above */
|
||||
sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
|
||||
sa->add(sa->set, 0x7f); /* range DEL..NBSP-1, NBSP added below */
|
||||
sa->add(sa->set, HAIRSP);
|
||||
sa->add(sa->set, RLM+1);
|
||||
sa->add(sa->set, INHSWAP);
|
||||
sa->add(sa->set, NOMDIG+1);
|
||||
sa->add(sa->set, 0x206a); // INHIBIT SYMMETRIC SWAPPING
|
||||
sa->add(sa->set, 0x206f+1); // NOMINAL DIGIT SHAPES
|
||||
USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
|
||||
|
||||
/* add no-break spaces for u_isWhitespace() what was not added above */
|
||||
@ -693,23 +693,25 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
|
||||
USET_ADD_CP_AND_NEXT(sa, NNBSP);
|
||||
|
||||
/* add for u_digit() */
|
||||
sa->add(sa->set, U_a);
|
||||
sa->add(sa->set, U_z+1);
|
||||
sa->add(sa->set, U_A);
|
||||
sa->add(sa->set, U_Z+1);
|
||||
sa->add(sa->set, U_FW_a);
|
||||
sa->add(sa->set, U_FW_z+1);
|
||||
sa->add(sa->set, U_FW_A);
|
||||
sa->add(sa->set, U_FW_Z+1);
|
||||
sa->add(sa->set, u'a');
|
||||
sa->add(sa->set, u'z'+1);
|
||||
sa->add(sa->set, u'A');
|
||||
sa->add(sa->set, u'Z'+1);
|
||||
// fullwidth
|
||||
sa->add(sa->set, u'a');
|
||||
sa->add(sa->set, u'z'+1);
|
||||
sa->add(sa->set, u'A');
|
||||
sa->add(sa->set, u'Z'+1);
|
||||
|
||||
/* add for u_isxdigit() */
|
||||
sa->add(sa->set, U_f+1);
|
||||
sa->add(sa->set, U_F+1);
|
||||
sa->add(sa->set, U_FW_f+1);
|
||||
sa->add(sa->set, U_FW_F+1);
|
||||
sa->add(sa->set, u'f'+1);
|
||||
sa->add(sa->set, u'F'+1);
|
||||
// fullwidth
|
||||
sa->add(sa->set, u'f'+1);
|
||||
sa->add(sa->set, u'F'+1);
|
||||
|
||||
/* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
|
||||
sa->add(sa->set, WJ); /* range WJ..NOMDIG */
|
||||
sa->add(sa->set, 0x2060); /* range 2060..206f */
|
||||
sa->add(sa->set, 0xfff0);
|
||||
sa->add(sa->set, 0xfffb+1);
|
||||
sa->add(sa->set, 0xe0000);
|
||||
|
@ -820,7 +820,7 @@ getKey_2022(char c,int32_t* key,int32_t* offset){
|
||||
return INVALID_2022;
|
||||
}
|
||||
|
||||
/*runs through a state machine to determine the escape sequence - codepage correspondance
|
||||
/*runs through a state machine to determine the escape sequence - codepage correspondence
|
||||
*/
|
||||
static void
|
||||
changeState_2022(UConverter* _this,
|
||||
@ -1424,7 +1424,7 @@ toUnicodeCallback(UConverter *cnv,
|
||||
* KSC5601 : alias to ibm-949 mapping table
|
||||
* GB2312 : alias to ibm-1386 mapping table
|
||||
* ISO-8859-1 : Algorithmic implemented as LATIN1 case
|
||||
* ISO-8859-7 : alisas to ibm-9409 mapping table
|
||||
* ISO-8859-7 : alias to ibm-9409 mapping table
|
||||
*/
|
||||
|
||||
/* preference order of JP charsets */
|
||||
@ -2324,7 +2324,7 @@ endloop:
|
||||
/***************************************************************
|
||||
* Rules for ISO-2022-KR encoding
|
||||
* i) The KSC5601 designator sequence should appear only once in a file,
|
||||
* at the begining of a line before any KSC5601 characters. This usually
|
||||
* at the beginning of a line before any KSC5601 characters. This usually
|
||||
* means that it appears by itself on the first line of the file
|
||||
* ii) There are only 2 shifting sequences SO to shift into double byte mode
|
||||
* and SI to shift into single byte mode
|
||||
|
@ -427,7 +427,7 @@ getAlgorithmicTypeFromName(const char *realName)
|
||||
#define UCNV_CACHE_LOAD_FACTOR 2
|
||||
|
||||
/* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */
|
||||
/* Will always be called with the cnvCacheMutex alrady being held */
|
||||
/* Will always be called with the cnvCacheMutex already being held */
|
||||
/* by the calling function. */
|
||||
/* Stores the shared data in the SHARED_DATA_HASHTABLE
|
||||
* @param data The shared data
|
||||
|
@ -321,7 +321,7 @@ UCNV_FROM_U_CALLBACK_ESCAPE (
|
||||
case UCNV_PRV_ESCAPE_CSS2:
|
||||
valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
|
||||
valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
|
||||
/* Always add space character, becase the next character might be whitespace,
|
||||
/* Always add space character, because the next character might be whitespace,
|
||||
which would erroneously be considered the termination of the escape sequence. */
|
||||
valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
|
||||
break;
|
||||
|
@ -81,7 +81,7 @@
|
||||
[G] D1 [D2]
|
||||
|
||||
That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2
|
||||
data bytes. The maximum size of a LMBCS chjaracter is 3 bytes:
|
||||
data bytes. The maximum size of a LMBCS character is 3 bytes:
|
||||
*/
|
||||
#define ULMBCS_CHARSIZE_MAX 3
|
||||
/*
|
||||
@ -164,7 +164,7 @@ beginning of internal 'system' range names: */
|
||||
/* Then we needed a place to put all the other ansi control characters
|
||||
that must be moved to different values because LMBCS reserves those
|
||||
values for other purposes. To represent the control characters, we start
|
||||
with a first byte of 0xF & add the control chaarcter value as the
|
||||
with a first byte of 0xF & add the control character value as the
|
||||
second byte */
|
||||
#define ULMBCS_GRP_CTRL 0x0F
|
||||
|
||||
|
@ -814,7 +814,7 @@ const UConverterSharedData _UTF7Data=
|
||||
* the use of "~" in some servers as a home directory indicator.
|
||||
*
|
||||
* 5) UTF-7 permits multiple alternate forms to represent the same
|
||||
* string; in particular, printable US-ASCII chararacters can be
|
||||
* string; in particular, printable US-ASCII characters can be
|
||||
* represented in encoded form.
|
||||
*
|
||||
* In modified UTF-7, printable US-ASCII characters except for "&"
|
||||
|
@ -992,7 +992,7 @@ UConverter_fromUnicode_ISCII_OFFSETS_LOGIC(
|
||||
|
||||
if (converterData->currentDeltaFromUnicode == PNJ_DELTA) {
|
||||
if (sourceChar == PNJ_TIPPI) {
|
||||
/* Make sure Tippi is converterd to Bindi. */
|
||||
/* Make sure Tippi is converted to Bindi. */
|
||||
sourceChar = PNJ_BINDI;
|
||||
} else if (sourceChar == PNJ_ADHAK) {
|
||||
/* This is for consonant cluster handling. */
|
||||
@ -1147,7 +1147,7 @@ static const uint16_t lookupTable[][2]={
|
||||
/* is the code point valid in current script? */ \
|
||||
if(sourceChar> ASCII_END && \
|
||||
(validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \
|
||||
/* Vocallic RR is assigne in ISCII Telugu and Unicode */ \
|
||||
/* Vocallic RR is assigned in ISCII Telugu and Unicode */ \
|
||||
if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \
|
||||
targetUniChar!=VOCALLIC_RR){ \
|
||||
targetUniChar=missingCharMarker; \
|
||||
@ -1272,7 +1272,7 @@ UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCo
|
||||
goto CALLBACK;
|
||||
} else if (*contextCharToUnicode==ISCII_INV) {
|
||||
if (sourceChar==ISCII_HALANT) {
|
||||
targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */
|
||||
targetUniChar = 0x0020; /* replace with space according to Indic FAQ */
|
||||
} else {
|
||||
targetUniChar = ZWJ;
|
||||
}
|
||||
|
@ -844,7 +844,7 @@ typedef struct {
|
||||
#endif
|
||||
|
||||
|
||||
// Comparason function used in quick sort.
|
||||
// Comparison function used in quick sort.
|
||||
static int U_CALLCONV currencyNameComparator(const void* a, const void* b) {
|
||||
const CurrencyNameStruct* currName_1 = (const CurrencyNameStruct*)a;
|
||||
const CurrencyNameStruct* currName_2 = (const CurrencyNameStruct*)b;
|
||||
@ -1530,7 +1530,7 @@ uprv_parseCurrency(const char* locale,
|
||||
|
||||
int32_t max = 0;
|
||||
int32_t matchIndex = -1;
|
||||
// case in-sensitive comparision against currency names
|
||||
// case in-sensitive comparison against currency names
|
||||
searchCurrencyName(currencyNames, total_currency_name_count,
|
||||
upperText, textLen, partialMatchLen, &max, &matchIndex);
|
||||
|
||||
|
@ -133,8 +133,10 @@ static const float RESIZE_POLICY_RATIO_TABLE[6] = {
|
||||
* or a pointer. If a hint bit is zero, then the associated
|
||||
* token is assumed to be an integer.
|
||||
*/
|
||||
#define HINT_BOTH_INTEGERS (0)
|
||||
#define HINT_KEY_POINTER (1)
|
||||
#define HINT_VALUE_POINTER (2)
|
||||
#define HINT_ALLOW_ZERO (4)
|
||||
|
||||
/********************************************************************
|
||||
* PRIVATE Implementation
|
||||
@ -479,8 +481,9 @@ _uhash_put(UHashtable *hash,
|
||||
goto err;
|
||||
}
|
||||
U_ASSERT(hash != NULL);
|
||||
/* Cannot always check pointer here or iSeries sees NULL every time. */
|
||||
if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) {
|
||||
if ((hint & HINT_VALUE_POINTER) ?
|
||||
value.pointer == NULL :
|
||||
value.integer == 0 && (hint & HINT_ALLOW_ZERO) == 0) {
|
||||
/* Disallow storage of NULL values, since NULL is returned by
|
||||
* get() to indicate an absent key. Storing NULL == removing.
|
||||
*/
|
||||
@ -687,6 +690,28 @@ uhash_igeti(const UHashtable *hash,
|
||||
return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_getiAndFound(const UHashtable *hash,
|
||||
const void *key,
|
||||
UBool *found) {
|
||||
UHashTok keyholder;
|
||||
keyholder.pointer = (void *)key;
|
||||
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
|
||||
*found = !IS_EMPTY_OR_DELETED(e->hashcode);
|
||||
return e->value.integer;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_igetiAndFound(const UHashtable *hash,
|
||||
int32_t key,
|
||||
UBool *found) {
|
||||
UHashTok keyholder;
|
||||
keyholder.integer = key;
|
||||
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
|
||||
*found = !IS_EMPTY_OR_DELETED(e->hashcode);
|
||||
return e->value.integer;
|
||||
}
|
||||
|
||||
U_CAPI void* U_EXPORT2
|
||||
uhash_put(UHashtable *hash,
|
||||
void* key,
|
||||
@ -736,7 +761,34 @@ uhash_iputi(UHashtable *hash,
|
||||
keyholder.integer = key;
|
||||
valueholder.integer = value;
|
||||
return _uhash_put(hash, keyholder, valueholder,
|
||||
0, /* neither is a ptr */
|
||||
HINT_BOTH_INTEGERS,
|
||||
status).integer;
|
||||
}
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_putiAllowZero(UHashtable *hash,
|
||||
void *key,
|
||||
int32_t value,
|
||||
UErrorCode *status) {
|
||||
UHashTok keyholder, valueholder;
|
||||
keyholder.pointer = key;
|
||||
valueholder.integer = value;
|
||||
return _uhash_put(hash, keyholder, valueholder,
|
||||
HINT_KEY_POINTER | HINT_ALLOW_ZERO,
|
||||
status).integer;
|
||||
}
|
||||
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_iputiAllowZero(UHashtable *hash,
|
||||
int32_t key,
|
||||
int32_t value,
|
||||
UErrorCode *status) {
|
||||
UHashTok keyholder, valueholder;
|
||||
keyholder.integer = key;
|
||||
valueholder.integer = value;
|
||||
return _uhash_put(hash, keyholder, valueholder,
|
||||
HINT_BOTH_INTEGERS | HINT_ALLOW_ZERO,
|
||||
status).integer;
|
||||
}
|
||||
|
||||
@ -785,6 +837,29 @@ uhash_removeAll(UHashtable *hash) {
|
||||
U_ASSERT(hash->count == 0);
|
||||
}
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uhash_containsKey(const UHashtable *hash, const void *key) {
|
||||
UHashTok keyholder;
|
||||
keyholder.pointer = (void *)key;
|
||||
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
|
||||
return !IS_EMPTY_OR_DELETED(e->hashcode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the UHashtable contains an item with this integer key.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key An integer key stored in a hashtable
|
||||
* @return true if the key is found.
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uhash_icontainsKey(const UHashtable *hash, int32_t key) {
|
||||
UHashTok keyholder;
|
||||
keyholder.integer = key;
|
||||
const UHashElement *e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder));
|
||||
return !IS_EMPTY_OR_DELETED(e->hashcode);
|
||||
}
|
||||
|
||||
U_CAPI const UHashElement* U_EXPORT2
|
||||
uhash_find(const UHashtable *hash, const void* key) {
|
||||
UHashTok keyholder;
|
||||
|
@ -23,7 +23,7 @@
|
||||
/**
|
||||
* UHashtable stores key-value pairs and does moderately fast lookup
|
||||
* based on keys. It provides a good tradeoff between access time and
|
||||
* storage space. As elements are added to it, it grows to accomodate
|
||||
* storage space. As elements are added to it, it grows to accommodate
|
||||
* them. By default, the table never shrinks, even if all elements
|
||||
* are removed from it.
|
||||
*
|
||||
@ -54,6 +54,13 @@
|
||||
* uhash_remove() on that key. This keeps uhash_get(), uhash_count(),
|
||||
* and uhash_nextElement() consistent with one another.
|
||||
*
|
||||
* Keys and values can be integers.
|
||||
* Functions that work with an integer key have an "i" prefix.
|
||||
* Functions that work with an integer value have an "i" suffix.
|
||||
* As with putting a NULL value pointer, putting a zero value integer removes the item.
|
||||
* Except, there are pairs of functions that allow setting zero values
|
||||
* and fetching (value, found) pairs.
|
||||
*
|
||||
* To see everything in a hashtable, use uhash_nextElement() to
|
||||
* iterate through its contents. Each call to this function returns a
|
||||
* UHashElement pointer. A hash element contains a key, value, and
|
||||
@ -405,6 +412,44 @@ uhash_iputi(UHashtable *hash,
|
||||
int32_t value,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Put a (key=pointer, value=integer) item in a UHashtable. If the
|
||||
* keyDeleter is non-NULL, then the hashtable owns 'key' after this
|
||||
* call. valueDeleter must be NULL.
|
||||
* Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key The key to store.
|
||||
* @param value The integer value to store.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @return The previous value, or 0 if none.
|
||||
* @see uhash_getiAndFound
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_putiAllowZero(UHashtable *hash,
|
||||
void *key,
|
||||
int32_t value,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Put a (key=integer, value=integer) item in a UHashtable. If the
|
||||
* keyDeleter is non-NULL, then the hashtable owns 'key' after this
|
||||
* call. valueDeleter must be NULL.
|
||||
* Storing a 0 value is possible; call uhash_igetiAndFound() to retrieve values including zero.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key The key to store.
|
||||
* @param value The integer value to store.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @return The previous value, or 0 if none.
|
||||
* @see uhash_igetiAndFound
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_iputiAllowZero(UHashtable *hash,
|
||||
int32_t key,
|
||||
int32_t value,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Retrieve a pointer value from a UHashtable using a pointer key,
|
||||
* as previously stored by uhash_put().
|
||||
@ -448,6 +493,34 @@ U_CAPI int32_t U_EXPORT2
|
||||
uhash_igeti(const UHashtable *hash,
|
||||
int32_t key);
|
||||
|
||||
/**
|
||||
* Retrieves an integer value from a UHashtable using a pointer key,
|
||||
* as previously stored by uhash_putiAllowZero() or uhash_puti().
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key A pointer key stored in a hashtable
|
||||
* @param found A pointer to a boolean which will be set for whether the key was found.
|
||||
* @return The requested item, or 0 if not found.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_getiAndFound(const UHashtable *hash,
|
||||
const void *key,
|
||||
UBool *found);
|
||||
|
||||
/**
|
||||
* Retrieves an integer value from a UHashtable using an integer key,
|
||||
* as previously stored by uhash_iputiAllowZero() or uhash_iputi().
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key An integer key stored in a hashtable
|
||||
* @param found A pointer to a boolean which will be set for whether the key was found.
|
||||
* @return The requested item, or 0 if not found.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uhash_igetiAndFound(const UHashtable *hash,
|
||||
int32_t key,
|
||||
UBool *found);
|
||||
|
||||
/**
|
||||
* Remove an item from a UHashtable stored by uhash_put().
|
||||
* @param hash The target UHashtable.
|
||||
@ -495,6 +568,26 @@ uhash_iremovei(UHashtable *hash,
|
||||
U_CAPI void U_EXPORT2
|
||||
uhash_removeAll(UHashtable *hash);
|
||||
|
||||
/**
|
||||
* Returns true if the UHashtable contains an item with this pointer key.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key A pointer key stored in a hashtable
|
||||
* @return true if the key is found.
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uhash_containsKey(const UHashtable *hash, const void *key);
|
||||
|
||||
/**
|
||||
* Returns true if the UHashtable contains an item with this integer key.
|
||||
*
|
||||
* @param hash The target UHashtable.
|
||||
* @param key An integer key stored in a hashtable
|
||||
* @return true if the key is found.
|
||||
*/
|
||||
U_CAPI UBool U_EXPORT2
|
||||
uhash_icontainsKey(const UHashtable *hash, int32_t key);
|
||||
|
||||
/**
|
||||
* Locate an element of a UHashtable. The caller must not modify the
|
||||
* returned object. The primary use of this function is to obtain the
|
||||
|
@ -143,7 +143,7 @@ static const char * const LANGUAGES[] = {
|
||||
"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
|
||||
"mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
|
||||
"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
|
||||
"ml", "mn", "mnc", "mni", "mo",
|
||||
"ml", "mn", "mnc", "mni",
|
||||
"moh", "mos", "mr", "mrj",
|
||||
"ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
|
||||
"my", "mye", "myv", "mzn",
|
||||
@ -166,9 +166,9 @@ static const char * const LANGUAGES[] = {
|
||||
"sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
|
||||
"sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
|
||||
"ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
|
||||
"sv", "sw", "swb", "swc", "syc", "syr", "szl",
|
||||
"sv", "sw", "swb", "syc", "syr", "szl",
|
||||
"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
|
||||
"th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
|
||||
"th", "ti", "tig", "tiv", "tk", "tkl", "tkr",
|
||||
"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
|
||||
"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
|
||||
"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
|
||||
@ -181,7 +181,7 @@ static const char * const LANGUAGES[] = {
|
||||
"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
|
||||
"zun", "zxx", "zza",
|
||||
NULL,
|
||||
"in", "iw", "ji", "jw", "sh", /* obsolete language codes */
|
||||
"in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", /* obsolete language codes */
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -260,7 +260,7 @@ static const char * const LANGUAGES_3[] = {
|
||||
"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
|
||||
"mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
|
||||
"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
|
||||
"mal", "mon", "mnc", "mni", "mol",
|
||||
"mal", "mon", "mnc", "mni",
|
||||
"moh", "mos", "mar", "mrj",
|
||||
"msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
|
||||
"mya", "mye", "myv", "mzn",
|
||||
@ -283,9 +283,9 @@ static const char * const LANGUAGES_3[] = {
|
||||
"slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
|
||||
"sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
|
||||
"ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
|
||||
"swe", "swa", "swb", "swc", "syc", "syr", "szl",
|
||||
"swe", "swa", "swb", "syc", "syr", "szl",
|
||||
"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
|
||||
"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
|
||||
"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
|
||||
"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
|
||||
"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
|
||||
"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
|
||||
@ -298,8 +298,8 @@ static const char * const LANGUAGES_3[] = {
|
||||
"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
|
||||
"zun", "zxx", "zza",
|
||||
NULL,
|
||||
/* "in", "iw", "ji", "jw", "sh", */
|
||||
"ind", "heb", "yid", "jaw", "srp",
|
||||
/* "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", */
|
||||
"ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -334,13 +334,13 @@ static const char * const COUNTRIES[] = {
|
||||
"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
|
||||
"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
|
||||
"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
|
||||
"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
|
||||
"DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
|
||||
"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK",
|
||||
"DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER",
|
||||
"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
|
||||
"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
|
||||
"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
|
||||
"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
|
||||
"ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
|
||||
"IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
|
||||
"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
|
||||
"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
|
||||
"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
|
||||
@ -357,7 +357,7 @@ static const char * const COUNTRIES[] = {
|
||||
"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
|
||||
"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
|
||||
"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
|
||||
"WS", "YE", "YT", "ZA", "ZM", "ZW",
|
||||
"WS", "XK", "YE", "YT", "ZA", "ZM", "ZW",
|
||||
NULL,
|
||||
"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */
|
||||
NULL
|
||||
@ -397,10 +397,10 @@ static const char * const COUNTRIES_3[] = {
|
||||
"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
|
||||
/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */
|
||||
"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
|
||||
/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */
|
||||
"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
|
||||
/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */
|
||||
"DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
|
||||
/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */
|
||||
"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
|
||||
/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */
|
||||
"DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
|
||||
/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */
|
||||
"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
|
||||
/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */
|
||||
@ -409,8 +409,8 @@ static const char * const COUNTRIES_3[] = {
|
||||
"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
|
||||
/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */
|
||||
"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
|
||||
/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
|
||||
"IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
|
||||
/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */
|
||||
"XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
|
||||
/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */
|
||||
"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
|
||||
/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */
|
||||
@ -443,8 +443,8 @@ static const char * const COUNTRIES_3[] = {
|
||||
"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
|
||||
/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */
|
||||
"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
|
||||
/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */
|
||||
"WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
|
||||
/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */
|
||||
"WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
|
||||
NULL,
|
||||
/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */
|
||||
"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
|
||||
|
@ -271,7 +271,7 @@ initFromResourceBundle(UErrorCode& sts) {
|
||||
if (U_FAILURE(sts)) {
|
||||
break;
|
||||
}
|
||||
// check if this is an alias of canoncal legacy type
|
||||
// check if this is an alias of canonical legacy type
|
||||
if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) {
|
||||
const char* from = ures_getKey(typeAliasDataEntry.getAlias());
|
||||
if (isTZ) {
|
||||
|
@ -129,7 +129,6 @@ static const char* const LEGACY[] = {
|
||||
// Legacy tags with no preferred value in the IANA
|
||||
// registry. Kept for now for the backward compatibility
|
||||
// because ICU has mapped them this way.
|
||||
"cel-gaulish", "xtg-x-cel-gaulish",
|
||||
"i-default", "en-x-i-default",
|
||||
"i-enochian", "und-x-i-enochian",
|
||||
"i-mingo", "see-x-i-mingo",
|
||||
@ -647,6 +646,22 @@ _isTKey(const char* s, int32_t len)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ultag_getTKeyStart(const char *localeID) {
|
||||
const char *result = localeID;
|
||||
const char *sep;
|
||||
while((sep = uprv_strchr(result, SEP)) != nullptr) {
|
||||
if (_isTKey(result, static_cast<int32_t>(sep - result))) {
|
||||
return result;
|
||||
}
|
||||
result = ++sep;
|
||||
}
|
||||
if (_isTKey(result, -1)) {
|
||||
return result;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static UBool
|
||||
_isTValue(const char* s, int32_t len)
|
||||
{
|
||||
@ -671,9 +686,13 @@ _isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len)
|
||||
const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here.
|
||||
const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end
|
||||
|
||||
|
||||
if (len < 0) {
|
||||
len = (int32_t)uprv_strlen(s);
|
||||
}
|
||||
switch (state) {
|
||||
case kStart:
|
||||
if (ultag_isLanguageSubtag(s, len)) {
|
||||
if (ultag_isLanguageSubtag(s, len) && len != 4) {
|
||||
state = kGotLanguage;
|
||||
return TRUE;
|
||||
}
|
||||
@ -1775,11 +1794,6 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Determine if variants already exists */
|
||||
if (ultag_getVariantsSize(langtag)) {
|
||||
posixVariant = TRUE;
|
||||
}
|
||||
|
||||
n = ultag_getExtensionsSize(langtag);
|
||||
|
||||
/* resolve locale keywords and reordering keys */
|
||||
@ -1787,6 +1801,11 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status)
|
||||
key = ultag_getExtensionKey(langtag, i);
|
||||
type = ultag_getExtensionValue(langtag, i);
|
||||
if (*key == LDMLEXT) {
|
||||
/* Determine if variants already exists */
|
||||
if (ultag_getVariantsSize(langtag)) {
|
||||
posixVariant = TRUE;
|
||||
}
|
||||
|
||||
_appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status);
|
||||
if (U_FAILURE(*status)) {
|
||||
break;
|
||||
@ -2028,7 +2047,10 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
uprv_memcpy(tagBuf, tag, tagLen);
|
||||
|
||||
if (tagLen > 0) {
|
||||
uprv_memcpy(tagBuf, tag, tagLen);
|
||||
}
|
||||
*(tagBuf + tagLen) = 0;
|
||||
|
||||
/* create a ULanguageTag */
|
||||
@ -2692,8 +2714,7 @@ ulocimp_toLanguageTag(const char* localeID,
|
||||
if (U_SUCCESS(tmpStatus)) {
|
||||
if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) {
|
||||
/* return private use only tag */
|
||||
static const char PREFIX[] = { PRIVATEUSE, SEP };
|
||||
sink.Append(PREFIX, sizeof(PREFIX));
|
||||
sink.Append("und-x-", 6);
|
||||
sink.Append(buf.data(), buf.length());
|
||||
done = TRUE;
|
||||
} else if (strict) {
|
||||
|
@ -286,6 +286,9 @@ ultag_isUnicodeLocaleType(const char* s, int32_t len);
|
||||
U_CFUNC UBool
|
||||
ultag_isVariantSubtags(const char* s, int32_t len);
|
||||
|
||||
U_CAPI const char * U_EXPORT2
|
||||
ultag_getTKeyStart(const char *localeID);
|
||||
|
||||
U_CFUNC const char*
|
||||
ulocimp_toBcpKey(const char* key);
|
||||
|
||||
|
@ -71,7 +71,6 @@ public:
|
||||
*/
|
||||
virtual void Append(const char* bytes, int32_t n) = 0;
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Appends n bytes to this. Same as Append().
|
||||
* Call AppendU8() with u8"string literals" which are const char * in C++11
|
||||
@ -81,7 +80,7 @@ public:
|
||||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char* bytes, int32_t n) {
|
||||
Append(bytes, n);
|
||||
@ -97,13 +96,12 @@ public:
|
||||
*
|
||||
* @param bytes the pointer to the bytes
|
||||
* @param n the number of bytes; must be non-negative
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void AppendU8(const char8_t* bytes, int32_t n) {
|
||||
Append(reinterpret_cast<const char*>(bytes), n);
|
||||
}
|
||||
#endif
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Returns a writable buffer for appending and writes the buffer's capacity to
|
||||
|
@ -30,6 +30,8 @@
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/ustringtrie.h"
|
||||
|
||||
class BytesTrieTest;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class ByteSink;
|
||||
@ -378,6 +380,7 @@ public:
|
||||
|
||||
private:
|
||||
friend class BytesTrieBuilder;
|
||||
friend class ::BytesTrieTest;
|
||||
|
||||
/**
|
||||
* Constructs a BytesTrie reader instance.
|
||||
|
@ -30,6 +30,8 @@
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/stringtriebuilder.h"
|
||||
|
||||
class BytesTrieTest;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class BytesTrieElement;
|
||||
@ -125,6 +127,8 @@ public:
|
||||
BytesTrieBuilder &clear();
|
||||
|
||||
private:
|
||||
friend class ::BytesTrieTest;
|
||||
|
||||
BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor
|
||||
BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator
|
||||
|
||||
@ -168,6 +172,7 @@ private:
|
||||
virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
|
||||
virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
|
||||
virtual int32_t writeDeltaTo(int32_t jumpTarget);
|
||||
static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
|
||||
|
||||
CharString *strings; // Pointer not object so we need not #include internal charstr.h.
|
||||
BytesTrieElement *elements;
|
||||
|
@ -15,7 +15,7 @@
|
||||
* \file
|
||||
* \brief (Non API- contains Doxygen definitions)
|
||||
*
|
||||
* This file contains documentation for Doxygen and doesnot have
|
||||
* This file contains documentation for Doxygen and does not have
|
||||
* any significance with respect to C or C++ API
|
||||
*/
|
||||
|
||||
@ -74,7 +74,7 @@
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Strings and Character Iteration</td>
|
||||
* <td>ustring.h, utf8.h, utf16.h, UText, UCharIterator</td>
|
||||
* <td>ustring.h, utf8.h, utf16.h, icu::StringPiece, UText, UCharIterator, icu::ByteSink</td>
|
||||
* <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
@ -128,9 +128,9 @@
|
||||
* <td>icu::Normalizer2</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Calendars</td>
|
||||
* <td>Calendars and Time Zones</td>
|
||||
* <td>ucal.h</td>
|
||||
* <td>icu::Calendar</td>
|
||||
* <td>icu::Calendar, icu::TimeZone</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>Date and Time Formatting</td>
|
||||
|
@ -117,14 +117,13 @@
|
||||
/* === Basic types === */
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
struct UPlugData;
|
||||
/**
|
||||
* @{
|
||||
* Opaque structure passed to/from a plugin.
|
||||
* use the APIs to access it.
|
||||
* Typedef for opaque structure passed to/from a plugin.
|
||||
* Use the APIs to access it.
|
||||
* @internal ICU 4.4 Technology Preview
|
||||
*/
|
||||
|
||||
struct UPlugData;
|
||||
typedef struct UPlugData UPlugData;
|
||||
|
||||
/** @} */
|
||||
|
@ -91,8 +91,6 @@ enum ULocMatchDemotion {
|
||||
typedef enum ULocMatchDemotion ULocMatchDemotion;
|
||||
#endif
|
||||
|
||||
#ifndef U_FORCE_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Builder option for whether to include or ignore one-way (fallback) match data.
|
||||
* The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries.
|
||||
@ -108,20 +106,20 @@ typedef enum ULocMatchDemotion ULocMatchDemotion;
|
||||
* but not if it is merely a fallback.
|
||||
*
|
||||
* @see LocaleMatcher::Builder#setDirection(ULocMatchDirection)
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
enum ULocMatchDirection {
|
||||
/**
|
||||
* Locale matching includes one-way matches such as Breton→French. (default)
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
ULOCMATCH_DIRECTION_WITH_ONE_WAY,
|
||||
/**
|
||||
* Locale matching limited to two-way matches including e.g. Danish↔Norwegian
|
||||
* but ignoring one-way matches.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
ULOCMATCH_DIRECTION_ONLY_TWO_WAY
|
||||
};
|
||||
@ -129,8 +127,6 @@ enum ULocMatchDirection {
|
||||
typedef enum ULocMatchDirection ULocMatchDirection;
|
||||
#endif
|
||||
|
||||
#endif // U_FORCE_HIDE_DRAFT_API
|
||||
|
||||
struct UHashtable;
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
@ -463,14 +459,13 @@ public:
|
||||
*/
|
||||
Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Option for whether to include or ignore one-way (fallback) match data.
|
||||
* By default, they are included.
|
||||
*
|
||||
* @param direction the match direction to set.
|
||||
* @return this Builder object
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
Builder &setDirection(ULocMatchDirection direction) {
|
||||
if (U_SUCCESS(errorCode_)) {
|
||||
@ -478,7 +473,6 @@ public:
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
@ -704,7 +698,7 @@ private:
|
||||
LSR *lsrs;
|
||||
int32_t supportedLocalesLength;
|
||||
// These are in preference order: 1. Default locale 2. paradigm locales 3. others.
|
||||
UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found"
|
||||
UHashtable *supportedLsrToIndex; // Map<LSR, Integer>
|
||||
// Array versions of the supportedLsrToIndex keys and values.
|
||||
// The distance lookup loops over the supportedLSRs and returns the index of the best match.
|
||||
const LSR **supportedLSRs;
|
||||
|
@ -571,15 +571,13 @@ public:
|
||||
*/
|
||||
void minimizeSubtags(UErrorCode& status);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Canonicalize the locale ID of this object according to CLDR.
|
||||
* @param status the status code
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
* @see createCanonical
|
||||
*/
|
||||
void canonicalize(UErrorCode& status);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Gets the list of keywords for the specified locale.
|
||||
|
@ -225,10 +225,8 @@ public:
|
||||
* Normalizes a UTF-8 string and optionally records how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
*
|
||||
* Currently implemented completely only for "compose" modes,
|
||||
* such as for NFC, NFKC, and NFKC_Casefold
|
||||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* Otherwise currently converts to & from UTF-16 and does not support edits.
|
||||
* Implemented completely for all built-in modes except for FCD.
|
||||
* The base class implementation converts to & from UTF-16 and does not support edits.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src Source UTF-8 string.
|
||||
@ -381,11 +379,9 @@ public:
|
||||
* resolves to "yes" or "no" to provide a definitive result,
|
||||
* at the cost of doing more work in those cases.
|
||||
*
|
||||
* This works for all normalization modes,
|
||||
* but it is currently optimized for UTF-8 only for "compose" modes,
|
||||
* such as for NFC, NFKC, and NFKC_Casefold
|
||||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* For other modes it currently converts to UTF-16 and calls isNormalized().
|
||||
* This works for all normalization modes.
|
||||
* It is optimized for UTF-8 for all built-in modes except for FCD.
|
||||
* The base class implementation converts to UTF-16 and calls isNormalized().
|
||||
*
|
||||
* @param s UTF-8 input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
@ -543,10 +539,8 @@ public:
|
||||
* Normalizes a UTF-8 string and optionally records how source substrings
|
||||
* relate to changed and unchanged result substrings.
|
||||
*
|
||||
* Currently implemented completely only for "compose" modes,
|
||||
* such as for NFC, NFKC, and NFKC_Casefold
|
||||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* Otherwise currently converts to & from UTF-16 and does not support edits.
|
||||
* Implemented completely for most built-in modes except for FCD.
|
||||
* The base class implementation converts to & from UTF-16 and does not support edits.
|
||||
*
|
||||
* @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
|
||||
* @param src Source UTF-8 string.
|
||||
@ -676,11 +670,9 @@ public:
|
||||
* resolves to "yes" or "no" to provide a definitive result,
|
||||
* at the cost of doing more work in those cases.
|
||||
*
|
||||
* This works for all normalization modes,
|
||||
* but it is currently optimized for UTF-8 only for "compose" modes,
|
||||
* such as for NFC, NFKC, and NFKC_Casefold
|
||||
* (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
|
||||
* For other modes it currently converts to UTF-16 and calls isNormalized().
|
||||
* This works for all normalization modes.
|
||||
* It is optimized for UTF-8 for all built-in modes except for FCD.
|
||||
* The base class implementation converts to UTF-16 and calls isNormalized().
|
||||
*
|
||||
* @param s UTF-8 input string
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
|
@ -880,6 +880,6 @@ namespace std {
|
||||
#else
|
||||
# define U_CALLCONV_FPTR
|
||||
#endif
|
||||
/* @} */
|
||||
/** @} */
|
||||
|
||||
#endif // _PLATFORM_H
|
||||
|
@ -75,12 +75,11 @@ class U_COMMON_API StringPiece : public UMemory {
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* str);
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a NUL-terminated const char8_t * pointer.
|
||||
* @param str a NUL-terminated const char8_t * pointer
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
|
||||
#endif
|
||||
@ -88,10 +87,9 @@ class U_COMMON_API StringPiece : public UMemory {
|
||||
* Constructs an empty StringPiece.
|
||||
* Needed for type disambiguation from multiple other overloads.
|
||||
* @param p nullptr
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Constructs from a std::string.
|
||||
@ -99,17 +97,15 @@ class U_COMMON_API StringPiece : public UMemory {
|
||||
*/
|
||||
StringPiece(const std::string& str)
|
||||
: ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a std::u8string.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const std::u8string& str)
|
||||
: ptr_(reinterpret_cast<const char*>(str.data())),
|
||||
length_(static_cast<int32_t>(str.size())) { }
|
||||
#endif
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Constructs from some other implementation of a string piece class, from any
|
||||
@ -152,18 +148,16 @@ class U_COMMON_API StringPiece : public UMemory {
|
||||
* @stable ICU 4.2
|
||||
*/
|
||||
StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Constructs from a const char8_t * pointer and a specified length.
|
||||
* @param str a const char8_t * pointer (need not be terminated)
|
||||
* @param len the length of the string; must be non-negative
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
StringPiece(const char8_t* str, int32_t len) :
|
||||
StringPiece(reinterpret_cast<const char*>(str), len) {}
|
||||
#endif
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Substring of another StringPiece.
|
||||
@ -233,13 +227,12 @@ class U_COMMON_API StringPiece : public UMemory {
|
||||
*/
|
||||
void set(const char* str);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
|
||||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param xdata pointer the new string data. Need not be NUL-terminated.
|
||||
* @param len the length of the new data
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* xdata, int32_t len) {
|
||||
set(reinterpret_cast<const char*>(xdata), len);
|
||||
@ -248,13 +241,12 @@ class U_COMMON_API StringPiece : public UMemory {
|
||||
/**
|
||||
* Resets the stringpiece to refer to new data.
|
||||
* @param str a pointer to a NUL-terminated string.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
inline void set(const char8_t* str) {
|
||||
set(reinterpret_cast<const char*>(str));
|
||||
}
|
||||
#endif
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Removes the first n string units.
|
||||
@ -286,13 +278,12 @@ class U_COMMON_API StringPiece : public UMemory {
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Searches the StringPiece for the given search string (needle);
|
||||
* @param needle The string for which to search.
|
||||
* @param offset Where to start searching within this string (haystack).
|
||||
* @return The offset of needle in haystack, or -1 if not found.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t find(StringPiece needle, int32_t offset);
|
||||
|
||||
@ -301,10 +292,9 @@ class U_COMMON_API StringPiece : public UMemory {
|
||||
* similar to std::string::compare().
|
||||
* @param other The string to compare to.
|
||||
* @return below zero if this < other; above zero if this > other; 0 if this == other.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
int32_t compare(StringPiece other);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Maximum integer, used as a default value for substring methods.
|
||||
|
@ -296,6 +296,8 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
|
||||
const UChar * text, int32_t textLength,
|
||||
UErrorCode * status);
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation
|
||||
* @param bi iterator to be cloned
|
||||
@ -312,7 +314,7 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
|
||||
* @return pointer to the new clone
|
||||
* @stable ICU 2.0
|
||||
* @deprecated ICU 69 Use ubrk_clone() instead.
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_safeClone(
|
||||
@ -321,6 +323,23 @@ ubrk_safeClone(
|
||||
int32_t *pBufferSize,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif /* U_HIDE_DEPRECATED_API */
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Thread safe cloning operation.
|
||||
* @param bi iterator to be cloned
|
||||
* @param status to indicate whether the operation went on smoothly or there were errors
|
||||
* @return pointer to the new clone
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI UBreakIterator * U_EXPORT2
|
||||
ubrk_clone(const UBreakIterator *bi,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_DEPRECATED_API
|
||||
|
||||
/**
|
||||
|
@ -1699,10 +1699,10 @@ ucnv_countAvailable(void);
|
||||
|
||||
/**
|
||||
* Gets the canonical converter name of the specified converter from a list of
|
||||
* all available converters contaied in the alias file. All converters
|
||||
* all available converters contained in the alias file. All converters
|
||||
* in this list can be opened.
|
||||
*
|
||||
* @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>)
|
||||
* @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvailable()]</TT>)
|
||||
* @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
|
||||
* @see ucnv_countAvailable
|
||||
* @stable ICU 2.0
|
||||
|
@ -45,11 +45,11 @@
|
||||
* from the serialized form.
|
||||
*/
|
||||
|
||||
struct UConverterSelector;
|
||||
/**
|
||||
* @{
|
||||
* The selector data structure
|
||||
* Typedef for selector data structure.
|
||||
*/
|
||||
struct UConverterSelector;
|
||||
typedef struct UConverterSelector UConverterSelector;
|
||||
/** @} */
|
||||
|
||||
|
@ -40,8 +40,8 @@ U_NAMESPACE_BEGIN
|
||||
*
|
||||
* <code>UnicodeFilter</code> defines a protocol for selecting a
|
||||
* subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
|
||||
* Currently, filters are used in conjunction with classes like {@link
|
||||
* Transliterator} to only process selected characters through a
|
||||
* Currently, filters are used in conjunction with classes like
|
||||
* {@link Transliterator} to only process selected characters through a
|
||||
* transformation.
|
||||
*
|
||||
* <p>Note: UnicodeFilter currently stubs out two pure virtual methods
|
||||
|
@ -178,8 +178,6 @@ class RuleCharacterIterator;
|
||||
* Unicode property
|
||||
* </table>
|
||||
*
|
||||
* <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p>
|
||||
*
|
||||
* <p><b>Formal syntax</b></p>
|
||||
*
|
||||
* \htmlonly<blockquote>\endhtmlonly
|
||||
@ -601,7 +599,7 @@ public:
|
||||
|
||||
/**
|
||||
* Make this object represent the range `start - end`.
|
||||
* If `end > start` then this object is set to an empty range.
|
||||
* If `start > end` then this object is set to an empty range.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param start first character in the set, inclusive
|
||||
@ -1077,7 +1075,7 @@ public:
|
||||
/**
|
||||
* Adds the specified range to this set if it is not already
|
||||
* present. If this set already contains the specified range,
|
||||
* the call leaves this set unchanged. If <code>end > start</code>
|
||||
* the call leaves this set unchanged. If <code>start > end</code>
|
||||
* then an empty range is added, leaving the set unchanged.
|
||||
* This is equivalent to a boolean logic OR, or a set UNION.
|
||||
* A frozen set will not be modified.
|
||||
@ -1095,6 +1093,9 @@ public:
|
||||
* present. If this set already contains the specified character,
|
||||
* the call leaves this set unchanged.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param c the character (code point)
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeSet& add(UChar32 c);
|
||||
@ -1104,8 +1105,8 @@ public:
|
||||
* present. If this set already contains the multicharacter,
|
||||
* the call leaves this set unchanged.
|
||||
* Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.4
|
||||
@ -1124,8 +1125,8 @@ public:
|
||||
|
||||
public:
|
||||
/**
|
||||
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
|
||||
* If this set already contains any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
@ -1135,7 +1136,6 @@ public:
|
||||
|
||||
/**
|
||||
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
@ -1145,7 +1145,6 @@ public:
|
||||
|
||||
/**
|
||||
* Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
@ -1155,7 +1154,6 @@ public:
|
||||
|
||||
/**
|
||||
* Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
@ -1165,7 +1163,7 @@ public:
|
||||
|
||||
/**
|
||||
* Makes a set from a multicharacter string. Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the source string
|
||||
* @return a newly created set containing the given string.
|
||||
* The caller owns the return object and is responsible for deleting it.
|
||||
@ -1185,15 +1183,13 @@ public:
|
||||
|
||||
/**
|
||||
* Retain only the elements in this set that are contained in the
|
||||
* specified range. If <code>end > start</code> then an empty range is
|
||||
* specified range. If <code>start > end</code> then an empty range is
|
||||
* retained, leaving the set empty. This is equivalent to
|
||||
* a boolean logic AND, or a set INTERSECTION.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param start first character, inclusive, of range to be retained
|
||||
* to this set.
|
||||
* @param end last character, inclusive, of range to be retained
|
||||
* to this set.
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UnicodeSet& retain(UChar32 start, UChar32 end);
|
||||
@ -1202,14 +1198,31 @@ public:
|
||||
/**
|
||||
* Retain the specified character from this set if it is present.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param c the character (code point)
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeSet& retain(UChar32 c);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Retains only the specified string from this set if it is present.
|
||||
* Upon return this set will be empty if it did not contain s, or
|
||||
* will only contain s if it did contain s.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param s the source string
|
||||
* @return this object, for chaining
|
||||
* @draft ICU 69
|
||||
*/
|
||||
UnicodeSet& retain(const UnicodeString &s);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Removes the specified range from this set if it is present.
|
||||
* The set will not contain the specified range once the call
|
||||
* returns. If <code>end > start</code> then an empty range is
|
||||
* returns. If <code>start > end</code> then an empty range is
|
||||
* removed, leaving the set unchanged.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
@ -1226,6 +1239,9 @@ public:
|
||||
* The set will not contain the specified range once the call
|
||||
* returns.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param c the character (code point)
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeSet& remove(UChar32 c);
|
||||
@ -1253,15 +1269,13 @@ public:
|
||||
/**
|
||||
* Complements the specified range in this set. Any character in
|
||||
* the range will be removed if it is in this set, or will be
|
||||
* added if it is not in this set. If <code>end > start</code>
|
||||
* added if it is not in this set. If <code>start > end</code>
|
||||
* then an empty range is complemented, leaving the set unchanged.
|
||||
* This is equivalent to a boolean logic XOR.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param start first character, inclusive, of range to be removed
|
||||
* from this set.
|
||||
* @param end last character, inclusive, of range to be removed
|
||||
* from this set.
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual UnicodeSet& complement(UChar32 start, UChar32 end);
|
||||
@ -1271,16 +1285,18 @@ public:
|
||||
* will be removed if it is in this set, or will be added if it is
|
||||
* not in this set.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param c the character (code point)
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
UnicodeSet& complement(UChar32 c);
|
||||
|
||||
/**
|
||||
* Complement the specified string in this set.
|
||||
* The set will not contain the specified string once the call
|
||||
* returns.
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
* The string will be removed if it is in this set, or will be added if it is not in this set.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param s the string to complement
|
||||
* @return this object, for chaining
|
||||
* @stable ICU 2.4
|
||||
|
@ -44,9 +44,10 @@ struct UConverter; // unicode/ucnv.h
|
||||
#ifndef USTRING_H
|
||||
/**
|
||||
* \ingroup ustring_ustrlen
|
||||
* @param s Pointer to sequence of UChars.
|
||||
* @return Length of sequence.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_strlen(const UChar *s);
|
||||
U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
|
||||
#endif
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
@ -2766,7 +2767,6 @@ public:
|
||||
* @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
|
||||
* U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
|
||||
* U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
|
||||
* @param options Options bit set, see ucasemap_open().
|
||||
* @return A reference to this.
|
||||
* @stable ICU 3.8
|
||||
*/
|
||||
@ -3614,7 +3614,7 @@ private:
|
||||
// turn a bogus string into an empty one
|
||||
void unBogus();
|
||||
|
||||
// implements assigment operator, copy constructor, and fastCopyFrom()
|
||||
// implements assignment operator, copy constructor, and fastCopyFrom()
|
||||
UnicodeString ©From(const UnicodeString &src, UBool fastCopy=false);
|
||||
|
||||
// Copies just the fields without memory management.
|
||||
|
@ -482,6 +482,7 @@
|
||||
#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
|
||||
#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
|
||||
#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
|
||||
#define ubrk_clone U_ICU_ENTRY_POINT_RENAME(ubrk_clone)
|
||||
#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
|
||||
#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
|
||||
#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current)
|
||||
@ -534,6 +535,7 @@
|
||||
#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName)
|
||||
#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID)
|
||||
#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID)
|
||||
#define ucal_getTimeZoneOffsetFromLocal U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneOffsetFromLocal)
|
||||
#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate)
|
||||
#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType)
|
||||
#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition)
|
||||
@ -962,6 +964,7 @@
|
||||
#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
|
||||
#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
|
||||
#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString)
|
||||
#define uhash_containsKey U_ICU_ENTRY_POINT_RENAME(uhash_containsKey)
|
||||
#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count)
|
||||
#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable)
|
||||
#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet)
|
||||
@ -970,6 +973,7 @@
|
||||
#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find)
|
||||
#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get)
|
||||
#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti)
|
||||
#define uhash_getiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_getiAndFound)
|
||||
#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
|
||||
#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
|
||||
#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
|
||||
@ -977,12 +981,15 @@
|
||||
#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
|
||||
#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
|
||||
#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString)
|
||||
#define uhash_icontainsKey U_ICU_ENTRY_POINT_RENAME(uhash_icontainsKey)
|
||||
#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
|
||||
#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
|
||||
#define uhash_igetiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_igetiAndFound)
|
||||
#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
|
||||
#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize)
|
||||
#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
|
||||
#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
|
||||
#define uhash_iputiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_iputiAllowZero)
|
||||
#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
|
||||
#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei)
|
||||
#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement)
|
||||
@ -990,6 +997,7 @@
|
||||
#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize)
|
||||
#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put)
|
||||
#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti)
|
||||
#define uhash_putiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_putiAllowZero)
|
||||
#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove)
|
||||
#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll)
|
||||
#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement)
|
||||
@ -1150,6 +1158,8 @@
|
||||
#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
|
||||
#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
|
||||
#define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags)
|
||||
#define umeas_getPrefixBase U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixBase)
|
||||
#define umeas_getPrefixPower U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixPower)
|
||||
#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
|
||||
#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
|
||||
#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
|
||||
@ -1672,6 +1682,9 @@
|
||||
#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact)
|
||||
#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement)
|
||||
#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll)
|
||||
#define uset_complementAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_complementAllCodePoints)
|
||||
#define uset_complementRange U_ICU_ENTRY_POINT_RENAME(uset_complementRange)
|
||||
#define uset_complementString U_ICU_ENTRY_POINT_RENAME(uset_complementString)
|
||||
#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains)
|
||||
#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll)
|
||||
#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints)
|
||||
@ -1695,12 +1708,15 @@
|
||||
#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions)
|
||||
#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove)
|
||||
#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll)
|
||||
#define uset_removeAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_removeAllCodePoints)
|
||||
#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings)
|
||||
#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange)
|
||||
#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString)
|
||||
#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern)
|
||||
#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain)
|
||||
#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll)
|
||||
#define uset_retainAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_retainAllCodePoints)
|
||||
#define uset_retainString U_ICU_ENTRY_POINT_RENAME(uset_retainString)
|
||||
#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize)
|
||||
#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains)
|
||||
#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set)
|
||||
|
@ -582,8 +582,8 @@ U_CAPI void U_EXPORT2
|
||||
uset_addString(USet* set, const UChar* str, int32_t strLen);
|
||||
|
||||
/**
|
||||
* Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
|
||||
* If this set already any particular character, it has no effect on that character.
|
||||
* Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
|
||||
* If this set already contains any particular character, it has no effect on that character.
|
||||
* A frozen set will not be modified.
|
||||
* @param set the object to which to add the character
|
||||
* @param str the source string
|
||||
@ -628,6 +628,20 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end);
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_removeString(USet* set, const UChar* str, int32_t strLen);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Removes from this set all of its elements that are contained in the
|
||||
* specified set. This operation effectively modifies this
|
||||
@ -650,15 +664,41 @@ uset_removeAll(USet* set, const USet* removeSet);
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object for which to retain only the specified range
|
||||
* @param start first character, inclusive, of range to be retained
|
||||
* to this set.
|
||||
* @param end last character, inclusive, of range to be retained
|
||||
* to this set.
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retain(USet* set, UChar32 start, UChar32 end);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Retains only the specified string from this set if it is present.
|
||||
* Upon return this set will be empty if it did not contain s, or
|
||||
* will only contain s if it did contain s.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainString(USet *set, const UChar *str, int32_t length);
|
||||
|
||||
/**
|
||||
* Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Retains only the elements in this set that are contained in the
|
||||
* specified set. In other words, removes from this set all of
|
||||
@ -696,6 +736,49 @@ uset_compact(USet* set);
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complement(USet* set);
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* Complements the specified range in this set. Any character in
|
||||
* the range will be removed if it is in this set, or will be
|
||||
* added if it is not in this set. If <code>start > end</code>
|
||||
* then an empty range is complemented, leaving the set unchanged.
|
||||
* This is equivalent to a boolean logic XOR.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param start first character, inclusive, of range
|
||||
* @param end last character, inclusive, of range
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementRange(USet *set, UChar32 start, UChar32 end);
|
||||
|
||||
/**
|
||||
* Complements the specified string in this set.
|
||||
* The string will be removed if it is in this set, or will be added if it is not in this set.
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementString(USet *set, const UChar *str, int32_t length);
|
||||
|
||||
/**
|
||||
* Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
|
||||
* A frozen set will not be modified.
|
||||
*
|
||||
* @param set the object to be modified
|
||||
* @param str the string
|
||||
* @param length the length of the string, or -1 if NUL-terminated
|
||||
* @draft ICU 69
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
/**
|
||||
* Complements in this set all elements contained in the specified
|
||||
* set. Any character in the other set will be removed if it is
|
||||
|
@ -323,7 +323,7 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
|
||||
#define U_SHAPE_PRESERVE_PRESENTATION 0x8000
|
||||
/** Presentation form option:
|
||||
* Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with
|
||||
* their unshaped correspondants in range 0+06xx, before shaping.
|
||||
* their unshaped correspondents in range 0+06xx, before shaping.
|
||||
* @stable ICU 3.6
|
||||
*/
|
||||
#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0
|
||||
|
@ -173,24 +173,23 @@ typedef enum UTraceFunctionNumber {
|
||||
UTRACE_RES_DATA_LIMIT,
|
||||
#endif // U_HIDE_INTERNAL_API
|
||||
|
||||
#ifndef U_HIDE_DRAFT_API
|
||||
/**
|
||||
* The lowest break iterator location.
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_START=0x4000,
|
||||
|
||||
/**
|
||||
* Indicates that a character instance of break iterator was created.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
|
||||
|
||||
/**
|
||||
* Indicates that a word instance of break iterator was created.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_WORD,
|
||||
|
||||
@ -200,21 +199,21 @@ typedef enum UTraceFunctionNumber {
|
||||
* Provides one C-style string to UTraceData: the lb value ("",
|
||||
* "loose", "strict", or "normal").
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_LINE,
|
||||
|
||||
/**
|
||||
* Indicates that a sentence instance of break iterator was created.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_SENTENCE,
|
||||
|
||||
/**
|
||||
* Indicates that a title instance of break iterator was created.
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_TITLE,
|
||||
|
||||
@ -224,12 +223,10 @@ typedef enum UTraceFunctionNumber {
|
||||
* Provides one C-style string to UTraceData: the script code of what
|
||||
* the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
|
||||
*
|
||||
* @draft ICU 67
|
||||
* @stable ICU 67
|
||||
*/
|
||||
UTRACE_UBRK_CREATE_BREAK_ENGINE,
|
||||
|
||||
#endif // U_HIDE_DRAFT_API
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* One more than the highest normal break iterator trace location.
|
||||
|
@ -60,13 +60,13 @@
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION_MAJOR_NUM 68
|
||||
#define U_ICU_VERSION_MAJOR_NUM 69
|
||||
|
||||
/** The current ICU minor version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_MINOR_NUM 2
|
||||
#define U_ICU_VERSION_MINOR_NUM 1
|
||||
|
||||
/** The current ICU patchlevel version as an integer.
|
||||
* This value will change in the subsequent releases of ICU
|
||||
@ -86,7 +86,7 @@
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SUFFIX _68
|
||||
#define U_ICU_VERSION_SUFFIX _69
|
||||
|
||||
/**
|
||||
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
|
||||
@ -139,7 +139,7 @@
|
||||
* This value will change in the subsequent releases of ICU
|
||||
* @stable ICU 2.4
|
||||
*/
|
||||
#define U_ICU_VERSION "68.2"
|
||||
#define U_ICU_VERSION "69.1"
|
||||
|
||||
/**
|
||||
* The current ICU library major version number as a string, for library name suffixes.
|
||||
@ -152,13 +152,13 @@
|
||||
*
|
||||
* @stable ICU 2.6
|
||||
*/
|
||||
#define U_ICU_VERSION_SHORT "68"
|
||||
#define U_ICU_VERSION_SHORT "69"
|
||||
|
||||
#ifndef U_HIDE_INTERNAL_API
|
||||
/** Data version in ICU4C.
|
||||
* @internal ICU 4.4 Internal Use Only
|
||||
**/
|
||||
#define U_ICU_DATA_VERSION "68.2"
|
||||
#define U_ICU_DATA_VERSION "69.1"
|
||||
#endif /* U_HIDE_INTERNAL_API */
|
||||
|
||||
/*===========================================================================
|
||||
|
@ -30,24 +30,6 @@
|
||||
#include "bmpset.h"
|
||||
#include "unisetspan.h"
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
// Used #define to reduce private static exports and memory access time.
|
||||
#define SET_OPEN ((UChar)0x005B) /*[*/
|
||||
#define SET_CLOSE ((UChar)0x005D) /*]*/
|
||||
#define HYPHEN ((UChar)0x002D) /*-*/
|
||||
#define COMPLEMENT ((UChar)0x005E) /*^*/
|
||||
#define COLON ((UChar)0x003A) /*:*/
|
||||
#define BACKSLASH ((UChar)0x005C) /*\*/
|
||||
#define INTERSECTION ((UChar)0x0026) /*&*/
|
||||
#define UPPER_U ((UChar)0x0055) /*U*/
|
||||
#define LOWER_U ((UChar)0x0075) /*u*/
|
||||
#define OPEN_BRACE ((UChar)123) /*{*/
|
||||
#define CLOSE_BRACE ((UChar)125) /*}*/
|
||||
#define UPPER_P ((UChar)0x0050) /*P*/
|
||||
#define LOWER_P ((UChar)0x0070) /*p*/
|
||||
#define UPPER_N ((UChar)78) /*N*/
|
||||
#define EQUALS ((UChar)0x003D) /*=*/
|
||||
|
||||
// HIGH_VALUE > all valid values. 110000 for codepoints
|
||||
#define UNICODESET_HIGH 0x0110000
|
||||
|
||||
@ -444,7 +426,6 @@ UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
|
||||
* @return <tt>true</tt> if this set contains the specified string
|
||||
*/
|
||||
UBool UnicodeSet::contains(const UnicodeString& s) const {
|
||||
if (s.length() == 0) return FALSE;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
return stringsContains(s);
|
||||
@ -559,11 +540,9 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
|
||||
if (hasStrings()) {
|
||||
for (i=0; i<strings->size(); ++i) {
|
||||
const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
|
||||
//if (s.length() == 0) {
|
||||
// // Empty strings match everything
|
||||
// return TRUE;
|
||||
//}
|
||||
// assert(s.length() != 0); // We enforce this elsewhere
|
||||
if (s.isEmpty()) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
UChar32 c = s.char32At(0);
|
||||
if ((c & 0xFF) == v) {
|
||||
return TRUE;
|
||||
@ -582,9 +561,6 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
|
||||
int32_t limit,
|
||||
UBool incremental) {
|
||||
if (offset == limit) {
|
||||
// Strings, if any, have length != 0, so we don't worry
|
||||
// about them here. If we ever allow zero-length strings
|
||||
// we much check for them here.
|
||||
if (contains(U_ETHER)) {
|
||||
return incremental ? U_PARTIAL_MATCH : U_MATCH;
|
||||
} else {
|
||||
@ -614,11 +590,9 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
|
||||
|
||||
for (i=0; i<strings->size(); ++i) {
|
||||
const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
|
||||
|
||||
//if (trial.length() == 0) {
|
||||
// return U_MATCH; // null-string always matches
|
||||
//}
|
||||
// assert(trial.length() != 0); // We ensure this elsewhere
|
||||
if (trial.isEmpty()) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
|
||||
|
||||
@ -971,12 +945,12 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
|
||||
* present. If this set already contains the multicharacter,
|
||||
* the call leaves this set unchanged.
|
||||
* Thus "ch" => {"ch"}
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the source string
|
||||
* @return the modified set, for chaining
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
|
||||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
if (isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (!stringsContains(s)) {
|
||||
@ -991,8 +965,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
|
||||
|
||||
/**
|
||||
* Adds the given string, in order, to 'strings'. The given string
|
||||
* must have been checked by the caller to not be empty and to not
|
||||
* already be in 'strings'.
|
||||
* must have been checked by the caller to not already be in 'strings'.
|
||||
*/
|
||||
void UnicodeSet::_add(const UnicodeString& s) {
|
||||
if (isFrozen() || isBogus()) {
|
||||
@ -1021,16 +994,13 @@ void UnicodeSet::_add(const UnicodeString& s) {
|
||||
* @param string to test
|
||||
*/
|
||||
int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
|
||||
//if (s.length() < 1) {
|
||||
// throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
|
||||
//}
|
||||
if (s.length() > 2) return -1;
|
||||
if (s.length() == 1) return s.charAt(0);
|
||||
|
||||
// at this point, len = 2
|
||||
UChar32 cp = s.char32At(0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
int32_t sLength = s.length();
|
||||
if (sLength == 1) return s.charAt(0);
|
||||
if (sLength == 2) {
|
||||
UChar32 cp = s.char32At(0);
|
||||
if (cp > 0xFFFF) { // is surrogate pair
|
||||
return cp;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
@ -1150,6 +1120,26 @@ UnicodeSet& UnicodeSet::retain(UChar32 c) {
|
||||
return retain(c, c);
|
||||
}
|
||||
|
||||
UnicodeSet& UnicodeSet::retain(const UnicodeString &s) {
|
||||
if (isFrozen() || isBogus()) { return *this; }
|
||||
UChar32 cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
bool isIn = stringsContains(s);
|
||||
// Check for getRangeCount() first to avoid somewhat-expensive size()
|
||||
// when there are single code points.
|
||||
if (isIn && getRangeCount() == 0 && size() == 1) {
|
||||
return *this;
|
||||
}
|
||||
clear();
|
||||
if (isIn) {
|
||||
_add(s);
|
||||
}
|
||||
} else {
|
||||
retain(cp, cp);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the specified range from this set if it is present.
|
||||
* The set will not contain the specified range once the call
|
||||
@ -1186,7 +1176,7 @@ UnicodeSet& UnicodeSet::remove(UChar32 c) {
|
||||
* @return the modified set, for chaining
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
|
||||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
if (isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (strings != nullptr && strings->removeElement((void*) &s)) {
|
||||
@ -1252,12 +1242,12 @@ UnicodeSet& UnicodeSet::complement(void) {
|
||||
* Complement the specified string in this set.
|
||||
* The set will not contain the specified string once the call
|
||||
* returns.
|
||||
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
|
||||
*
|
||||
* @param s the string to complement
|
||||
* @return this object, for chaining
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
|
||||
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
|
||||
if (isFrozen() || isBogus()) return *this;
|
||||
int32_t cp = getSingleCP(s);
|
||||
if (cp < 0) {
|
||||
if (stringsContains(s)) {
|
||||
@ -2001,22 +1991,22 @@ escapeUnprintable) {
|
||||
}
|
||||
// Okay to let ':' pass through
|
||||
switch (c) {
|
||||
case SET_OPEN:
|
||||
case SET_CLOSE:
|
||||
case HYPHEN:
|
||||
case COMPLEMENT:
|
||||
case INTERSECTION:
|
||||
case BACKSLASH:
|
||||
case OPEN_BRACE:
|
||||
case CLOSE_BRACE:
|
||||
case COLON:
|
||||
case u'[':
|
||||
case u']':
|
||||
case u'-':
|
||||
case u'^':
|
||||
case u'&':
|
||||
case u'\\':
|
||||
case u'{':
|
||||
case u'}':
|
||||
case u':':
|
||||
case SymbolTable::SYMBOL_REF:
|
||||
buf.append(BACKSLASH);
|
||||
buf.append(u'\\');
|
||||
break;
|
||||
default:
|
||||
// Escape whitespace
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
buf.append(BACKSLASH);
|
||||
buf.append(u'\\');
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -2049,7 +2039,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
|
||||
backslashCount = 0;
|
||||
} else {
|
||||
result.append(c);
|
||||
if (c == BACKSLASH) {
|
||||
if (c == u'\\') {
|
||||
++backslashCount;
|
||||
} else {
|
||||
backslashCount = 0;
|
||||
@ -2082,13 +2072,13 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
|
||||
UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
||||
UBool escapeUnprintable) const
|
||||
{
|
||||
result.append(SET_OPEN);
|
||||
result.append(u'[');
|
||||
|
||||
// // Check against the predefined categories. We implicitly build
|
||||
// // up ALL category sets the first time toPattern() is called.
|
||||
// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
|
||||
// if (*this == getCategorySet(cat)) {
|
||||
// result.append(COLON);
|
||||
// result.append(u':');
|
||||
// result.append(CATEGORY_NAMES, cat*2, 2);
|
||||
// return result.append(CATEGORY_CLOSE);
|
||||
// }
|
||||
@ -2104,7 +2094,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
||||
getRangeEnd(count-1) == MAX_VALUE) {
|
||||
|
||||
// Emit the inverse
|
||||
result.append(COMPLEMENT);
|
||||
result.append(u'^');
|
||||
|
||||
for (int32_t i = 1; i < count; ++i) {
|
||||
UChar32 start = getRangeEnd(i-1)+1;
|
||||
@ -2112,7 +2102,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
||||
_appendToPat(result, start, escapeUnprintable);
|
||||
if (start != end) {
|
||||
if ((start+1) != end) {
|
||||
result.append(HYPHEN);
|
||||
result.append(u'-');
|
||||
}
|
||||
_appendToPat(result, end, escapeUnprintable);
|
||||
}
|
||||
@ -2127,7 +2117,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
||||
_appendToPat(result, start, escapeUnprintable);
|
||||
if (start != end) {
|
||||
if ((start+1) != end) {
|
||||
result.append(HYPHEN);
|
||||
result.append(u'-');
|
||||
}
|
||||
_appendToPat(result, end, escapeUnprintable);
|
||||
}
|
||||
@ -2136,14 +2126,14 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
|
||||
|
||||
if (strings != nullptr) {
|
||||
for (int32_t i = 0; i<strings->size(); ++i) {
|
||||
result.append(OPEN_BRACE);
|
||||
result.append(u'{');
|
||||
_appendToPat(result,
|
||||
*(const UnicodeString*) strings->elementAt(i),
|
||||
escapeUnprintable);
|
||||
result.append(CLOSE_BRACE);
|
||||
result.append(u'}');
|
||||
}
|
||||
}
|
||||
return result.append(SET_CLOSE);
|
||||
return result.append(u']');
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -47,31 +47,6 @@
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
// Define UChar constants using hex for EBCDIC compatibility
|
||||
// Used #define to reduce private static exports and memory access time.
|
||||
#define SET_OPEN ((UChar)0x005B) /*[*/
|
||||
#define SET_CLOSE ((UChar)0x005D) /*]*/
|
||||
#define HYPHEN ((UChar)0x002D) /*-*/
|
||||
#define COMPLEMENT ((UChar)0x005E) /*^*/
|
||||
#define COLON ((UChar)0x003A) /*:*/
|
||||
#define BACKSLASH ((UChar)0x005C) /*\*/
|
||||
#define INTERSECTION ((UChar)0x0026) /*&*/
|
||||
#define UPPER_U ((UChar)0x0055) /*U*/
|
||||
#define LOWER_U ((UChar)0x0075) /*u*/
|
||||
#define OPEN_BRACE ((UChar)123) /*{*/
|
||||
#define CLOSE_BRACE ((UChar)125) /*}*/
|
||||
#define UPPER_P ((UChar)0x0050) /*P*/
|
||||
#define LOWER_P ((UChar)0x0070) /*p*/
|
||||
#define UPPER_N ((UChar)78) /*N*/
|
||||
#define EQUALS ((UChar)0x003D) /*=*/
|
||||
|
||||
//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:"
|
||||
static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]"
|
||||
//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p"
|
||||
//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}"
|
||||
//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N"
|
||||
static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
|
||||
|
||||
// Special property set IDs
|
||||
static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
|
||||
static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
|
||||
@ -81,12 +56,6 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
|
||||
#define NAME_PROP "na"
|
||||
#define NAME_PROP_LENGTH 2
|
||||
|
||||
/**
|
||||
* Delimiter string used in patterns to close a category reference:
|
||||
* ":]". Example: "[:Lu:]".
|
||||
*/
|
||||
//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */
|
||||
|
||||
// Cached sets ------------------------------------------------------------- ***
|
||||
|
||||
U_CDECL_BEGIN
|
||||
@ -140,27 +109,27 @@ uniset_getUnicode32Instance(UErrorCode &errorCode) {
|
||||
static inline UBool
|
||||
isPerlOpen(const UnicodeString &pattern, int32_t pos) {
|
||||
UChar c;
|
||||
return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P);
|
||||
return pattern.charAt(pos)==u'\\' && ((c=pattern.charAt(pos+1))==u'p' || c==u'P');
|
||||
}
|
||||
|
||||
/*static inline UBool
|
||||
isPerlClose(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==CLOSE_BRACE;
|
||||
return pattern.charAt(pos)==u'}';
|
||||
}*/
|
||||
|
||||
static inline UBool
|
||||
isNameOpen(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N;
|
||||
return pattern.charAt(pos)==u'\\' && pattern.charAt(pos+1)==u'N';
|
||||
}
|
||||
|
||||
static inline UBool
|
||||
isPOSIXOpen(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON;
|
||||
return pattern.charAt(pos)==u'[' && pattern.charAt(pos+1)==u':';
|
||||
}
|
||||
|
||||
/*static inline UBool
|
||||
isPOSIXClose(const UnicodeString &pattern, int32_t pos) {
|
||||
return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE;
|
||||
return pattern.charAt(pos)==u':' && pattern.charAt(pos+1)==u']';
|
||||
}*/
|
||||
|
||||
// TODO memory debugging provided inside uniset.cpp
|
||||
@ -326,9 +295,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
|
||||
while (mode != 2 && !chars.atEnd()) {
|
||||
U_ASSERT((lastItem == 0 && op == 0) ||
|
||||
(lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
|
||||
(lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
|
||||
op == INTERSECTION /*'&'*/)));
|
||||
(lastItem == 1 && (op == 0 || op == u'-')) ||
|
||||
(lastItem == 2 && (op == 0 || op == u'-' || op == u'&')));
|
||||
|
||||
UChar32 c = 0;
|
||||
UBool literal = FALSE;
|
||||
@ -356,27 +324,27 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
|
||||
if (c == 0x5B /*'['*/ && !literal) {
|
||||
if (c == u'[' && !literal) {
|
||||
if (mode == 1) {
|
||||
chars.setPos(backup); // backup
|
||||
setMode = 1;
|
||||
} else {
|
||||
// Handle opening '[' delimiter
|
||||
mode = 1;
|
||||
patLocal.append((UChar) 0x5B /*'['*/);
|
||||
patLocal.append(u'[');
|
||||
chars.getPos(backup); // prepare to backup
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (c == 0x5E /*'^'*/ && !literal) {
|
||||
if (c == u'^' && !literal) {
|
||||
invert = TRUE;
|
||||
patLocal.append((UChar) 0x5E /*'^'*/);
|
||||
patLocal.append(u'^');
|
||||
chars.getPos(backup); // prepare to backup
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
}
|
||||
// Fall through to handle special leading '-';
|
||||
// otherwise restart loop for nested [], \p{}, etc.
|
||||
if (c == HYPHEN /*'-'*/) {
|
||||
if (c == u'-') {
|
||||
literal = TRUE;
|
||||
// Fall through to handle literal '-' below
|
||||
} else {
|
||||
@ -418,7 +386,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
op = 0;
|
||||
}
|
||||
|
||||
if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) {
|
||||
if (op == u'-' || op == u'&') {
|
||||
patLocal.append(op);
|
||||
}
|
||||
|
||||
@ -454,10 +422,10 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case HYPHEN: /*'-'*/
|
||||
case u'-':
|
||||
removeAll(*nested);
|
||||
break;
|
||||
case INTERSECTION: /*'&'*/
|
||||
case u'&':
|
||||
retainAll(*nested);
|
||||
break;
|
||||
case 0:
|
||||
@ -483,24 +451,24 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
|
||||
if (!literal) {
|
||||
switch (c) {
|
||||
case 0x5D /*']'*/:
|
||||
case u']':
|
||||
if (lastItem == 1) {
|
||||
add(lastChar, lastChar);
|
||||
_appendToPat(patLocal, lastChar, FALSE);
|
||||
}
|
||||
// Treat final trailing '-' as a literal
|
||||
if (op == HYPHEN /*'-'*/) {
|
||||
if (op == u'-') {
|
||||
add(op, op);
|
||||
patLocal.append(op);
|
||||
} else if (op == INTERSECTION /*'&'*/) {
|
||||
} else if (op == u'&') {
|
||||
// syntaxError(chars, "Trailing '&'");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
}
|
||||
patLocal.append((UChar) 0x5D /*']'*/);
|
||||
patLocal.append(u']');
|
||||
mode = 2;
|
||||
continue;
|
||||
case HYPHEN /*'-'*/:
|
||||
case u'-':
|
||||
if (op == 0) {
|
||||
if (lastItem != 0) {
|
||||
op = (UChar) c;
|
||||
@ -510,8 +478,8 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
add(c, c);
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (c == 0x5D /*']'*/ && !literal) {
|
||||
patLocal.append(HYPHEN_RIGHT_BRACE, 2);
|
||||
if (c == u']' && !literal) {
|
||||
patLocal.append(u"-]", 2);
|
||||
mode = 2;
|
||||
continue;
|
||||
}
|
||||
@ -520,7 +488,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
// syntaxError(chars, "'-' not after char or set");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
case INTERSECTION /*'&'*/:
|
||||
case u'&':
|
||||
if (lastItem == 2 && op == 0) {
|
||||
op = (UChar) c;
|
||||
continue;
|
||||
@ -528,11 +496,11 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
// syntaxError(chars, "'&' not after set");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
case 0x5E /*'^'*/:
|
||||
case u'^':
|
||||
// syntaxError(chars, "'^' not after '['");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
case 0x7B /*'{'*/:
|
||||
case u'{':
|
||||
if (op != 0) {
|
||||
// syntaxError(chars, "Missing operand after operator");
|
||||
ec = U_MALFORMED_SET;
|
||||
@ -549,13 +517,13 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
while (!chars.atEnd()) {
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
if (c == 0x7D /*'}'*/ && !literal) {
|
||||
if (c == u'}' && !literal) {
|
||||
ok = TRUE;
|
||||
break;
|
||||
}
|
||||
buf.append(c);
|
||||
}
|
||||
if (buf.length() < 1 || !ok) {
|
||||
if (!ok) {
|
||||
// syntaxError(chars, "Invalid multicharacter string");
|
||||
ec = U_MALFORMED_SET;
|
||||
return;
|
||||
@ -565,9 +533,9 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
// we don't need to drop through to the further
|
||||
// processing
|
||||
add(buf);
|
||||
patLocal.append((UChar) 0x7B /*'{'*/);
|
||||
patLocal.append(u'{');
|
||||
_appendToPat(patLocal, buf, FALSE);
|
||||
patLocal.append((UChar) 0x7D /*'}'*/);
|
||||
patLocal.append(u'}');
|
||||
continue;
|
||||
case SymbolTable::SYMBOL_REF:
|
||||
// symbols nosymbols
|
||||
@ -580,7 +548,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
chars.getPos(backup);
|
||||
c = chars.next(opts, literal, ec);
|
||||
if (U_FAILURE(ec)) return;
|
||||
UBool anchor = (c == 0x5D /*']'*/ && !literal);
|
||||
UBool anchor = (c == u']' && !literal);
|
||||
if (symbols == 0 && !anchor) {
|
||||
c = SymbolTable::SYMBOL_REF;
|
||||
chars.setPos(backup);
|
||||
@ -594,7 +562,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
add(U_ETHER);
|
||||
usePat = TRUE;
|
||||
patLocal.append((UChar) SymbolTable::SYMBOL_REF);
|
||||
patLocal.append((UChar) 0x5D /*']'*/);
|
||||
patLocal.append(u']');
|
||||
mode = 2;
|
||||
continue;
|
||||
}
|
||||
@ -617,7 +585,7 @@ void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
|
||||
lastChar = c;
|
||||
break;
|
||||
case 1:
|
||||
if (op == HYPHEN /*'-'*/) {
|
||||
if (op == u'-') {
|
||||
if (lastChar >= c) {
|
||||
// Don't allow redundant (a-a) or empty (b-a) ranges;
|
||||
// these are most likely typos.
|
||||
@ -1036,11 +1004,11 @@ UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars,
|
||||
RuleCharacterIterator::Pos pos;
|
||||
chars.getPos(pos);
|
||||
UChar32 c = chars.next(iterOpts, literal, ec);
|
||||
if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) {
|
||||
if (c == u'[' || c == u'\\') {
|
||||
UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE,
|
||||
literal, ec);
|
||||
result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) :
|
||||
(d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/);
|
||||
result = (c == u'[') ? (d == u':') :
|
||||
(d == u'N' || d == u'p' || d == u'P');
|
||||
}
|
||||
chars.setPos(pos);
|
||||
return result && U_SUCCESS(ec);
|
||||
@ -1071,17 +1039,17 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
||||
posix = TRUE;
|
||||
pos += 2;
|
||||
pos = ICU_Utility::skipWhitespace(pattern, pos);
|
||||
if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) {
|
||||
if (pos < pattern.length() && pattern.charAt(pos) == u'^') {
|
||||
++pos;
|
||||
invert = TRUE;
|
||||
}
|
||||
} else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) {
|
||||
UChar c = pattern.charAt(pos+1);
|
||||
invert = (c == UPPER_P);
|
||||
isName = (c == UPPER_N);
|
||||
invert = (c == u'P');
|
||||
isName = (c == u'N');
|
||||
pos += 2;
|
||||
pos = ICU_Utility::skipWhitespace(pattern, pos);
|
||||
if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) {
|
||||
if (pos == pattern.length() || pattern.charAt(pos++) != u'{') {
|
||||
// Syntax error; "\p" or "\P" not followed by "{"
|
||||
FAIL(ec);
|
||||
}
|
||||
@ -1093,9 +1061,9 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
||||
// Look for the matching close delimiter, either :] or }
|
||||
int32_t close;
|
||||
if (posix) {
|
||||
close = pattern.indexOf(POSIX_CLOSE, 2, pos);
|
||||
close = pattern.indexOf(u":]", 2, pos);
|
||||
} else {
|
||||
close = pattern.indexOf(CLOSE_BRACE, pos);
|
||||
close = pattern.indexOf(u'}', pos);
|
||||
}
|
||||
if (close < 0) {
|
||||
// Syntax error; close delimiter missing
|
||||
@ -1105,7 +1073,7 @@ UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern,
|
||||
// Look for an '=' sign. If this is present, we will parse a
|
||||
// medium \p{gc=Cf} or long \p{GeneralCategory=Format}
|
||||
// pattern.
|
||||
int32_t equals = pattern.indexOf(EQUALS, pos);
|
||||
int32_t equals = pattern.indexOf(u'=', pos);
|
||||
UnicodeString propName, valueName;
|
||||
if (equals >= 0 && equals < close && !isName) {
|
||||
// Equals seen; parse medium/long pattern
|
||||
|
@ -231,6 +231,9 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
if (length16==0) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
UBool thisRelevant;
|
||||
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
|
||||
if(spanLength<length16) { // Relevant string.
|
||||
@ -312,7 +315,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED);
|
||||
if(spanLength<length16) { // Relevant string.
|
||||
if(spanLength<length16 && length16>0) { // Relevant string.
|
||||
if(which&UTF16) {
|
||||
if(which&CONTAINED) {
|
||||
if(which&FWD) {
|
||||
@ -362,7 +365,7 @@ UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set,
|
||||
addToSpanNotSet(c);
|
||||
}
|
||||
}
|
||||
} else { // Irrelevant string.
|
||||
} else { // Irrelevant string. (Also the empty string.)
|
||||
if(which&UTF8) {
|
||||
if(which&CONTAINED) { // Only necessary for LONGEST_MATCH.
|
||||
uint8_t *s8=utf8+utf8Count;
|
||||
@ -653,11 +656,12 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
|
||||
for(i=0; i<stringsLength; ++i) {
|
||||
int32_t overlap=spanLengths[i];
|
||||
if(overlap==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
|
||||
// Try to match this string at pos-overlap..pos.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
@ -697,6 +701,9 @@ int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondi
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
if (length16==0) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
// Try to match this string at pos-overlap..pos.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
@ -817,11 +824,12 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
|
||||
for(i=0; i<stringsLength; ++i) {
|
||||
int32_t overlap=spanBackLengths[i];
|
||||
if(overlap==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
|
||||
// Try to match this string at pos-(length16-overlap)..pos-length16.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
@ -863,6 +871,9 @@ int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanC
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
if (length16==0) {
|
||||
continue; // skip the empty string
|
||||
}
|
||||
|
||||
// Try to match this string at pos-(length16-overlap)..pos-length16.
|
||||
if(overlap>=LONG_SPAN) {
|
||||
@ -1358,11 +1369,12 @@ int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const {
|
||||
// Try to match the strings at pos.
|
||||
for(i=0; i<stringsLength; ++i) {
|
||||
if(spanLengths[i]==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) {
|
||||
return pos; // There is a set element at pos.
|
||||
}
|
||||
@ -1401,11 +1413,12 @@ int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const
|
||||
// it is easier and we only need to know whether the string is irrelevant
|
||||
// which is the same in either array.
|
||||
if(spanLengths[i]==ALL_CP_CONTAINED) {
|
||||
continue; // Irrelevant string.
|
||||
continue; // Irrelevant string. (Also the empty string.)
|
||||
}
|
||||
const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i);
|
||||
const UChar *s16=string.getBuffer();
|
||||
int32_t length16=string.length();
|
||||
U_ASSERT(length>0);
|
||||
if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) {
|
||||
return pos; // There is a set element at pos.
|
||||
}
|
||||
|
@ -310,55 +310,12 @@ u_isgraphPOSIX(UChar32 c);
|
||||
U_CFUNC UBool
|
||||
u_isprintPOSIX(UChar32 c);
|
||||
|
||||
/** Turn a bit index into a bit flag. @internal */
|
||||
#define FLAG(n) ((uint32_t)1<<(n))
|
||||
|
||||
/** Flags for general categories in the order of UCharCategory. @internal */
|
||||
#define _Cn FLAG(U_GENERAL_OTHER_TYPES)
|
||||
#define _Lu FLAG(U_UPPERCASE_LETTER)
|
||||
#define _Ll FLAG(U_LOWERCASE_LETTER)
|
||||
#define _Lt FLAG(U_TITLECASE_LETTER)
|
||||
#define _Lm FLAG(U_MODIFIER_LETTER)
|
||||
/* #define _Lo FLAG(U_OTHER_LETTER) -- conflicts with MS Visual Studio 9.0 xiosbase */
|
||||
#define _Mn FLAG(U_NON_SPACING_MARK)
|
||||
#define _Me FLAG(U_ENCLOSING_MARK)
|
||||
#define _Mc FLAG(U_COMBINING_SPACING_MARK)
|
||||
#define _Nd FLAG(U_DECIMAL_DIGIT_NUMBER)
|
||||
#define _Nl FLAG(U_LETTER_NUMBER)
|
||||
#define _No FLAG(U_OTHER_NUMBER)
|
||||
#define _Zs FLAG(U_SPACE_SEPARATOR)
|
||||
#define _Zl FLAG(U_LINE_SEPARATOR)
|
||||
#define _Zp FLAG(U_PARAGRAPH_SEPARATOR)
|
||||
#define _Cc FLAG(U_CONTROL_CHAR)
|
||||
#define _Cf FLAG(U_FORMAT_CHAR)
|
||||
#define _Co FLAG(U_PRIVATE_USE_CHAR)
|
||||
#define _Cs FLAG(U_SURROGATE)
|
||||
#define _Pd FLAG(U_DASH_PUNCTUATION)
|
||||
#define _Ps FLAG(U_START_PUNCTUATION)
|
||||
/* #define _Pe FLAG(U_END_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 xlocnum */
|
||||
/* #define _Pc FLAG(U_CONNECTOR_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
|
||||
#define _Po FLAG(U_OTHER_PUNCTUATION)
|
||||
#define _Sm FLAG(U_MATH_SYMBOL)
|
||||
#define _Sc FLAG(U_CURRENCY_SYMBOL)
|
||||
#define _Sk FLAG(U_MODIFIER_SYMBOL)
|
||||
#define _So FLAG(U_OTHER_SYMBOL)
|
||||
#define _Pi FLAG(U_INITIAL_PUNCTUATION)
|
||||
/* #define _Pf FLAG(U_FINAL_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */
|
||||
|
||||
/** Some code points. @internal */
|
||||
enum {
|
||||
TAB =0x0009,
|
||||
LF =0x000a,
|
||||
FF =0x000c,
|
||||
CR =0x000d,
|
||||
U_A =0x0041,
|
||||
U_F =0x0046,
|
||||
U_Z =0x005a,
|
||||
U_a =0x0061,
|
||||
U_f =0x0066,
|
||||
U_z =0x007a,
|
||||
DEL =0x007f,
|
||||
NL =0x0085,
|
||||
NBSP =0x00a0,
|
||||
CGJ =0x034f,
|
||||
FIGURESP=0x2007,
|
||||
@ -367,15 +324,6 @@ enum {
|
||||
ZWJ =0x200d,
|
||||
RLM =0x200f,
|
||||
NNBSP =0x202f,
|
||||
WJ =0x2060,
|
||||
INHSWAP =0x206a,
|
||||
NOMDIG =0x206f,
|
||||
U_FW_A =0xff21,
|
||||
U_FW_F =0xff26,
|
||||
U_FW_Z =0xff3a,
|
||||
U_FW_a =0xff41,
|
||||
U_FW_f =0xff46,
|
||||
U_FW_z =0xff5a,
|
||||
ZWNBSP =0xfeff
|
||||
};
|
||||
|
||||
|
@ -91,6 +91,15 @@ static UBool chopLocale(char *name) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called to check whether a name without '_' needs to be checked for a parent.
|
||||
* Some code had assumed that locale IDs with '_' could not have a non-root parent.
|
||||
* We may want a better way of doing this.
|
||||
*/
|
||||
static UBool mayHaveParent(char *name) {
|
||||
return (name[0] != 0 && uprv_strstr("nb nn",name) != nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal function
|
||||
*/
|
||||
@ -529,8 +538,8 @@ loadParentsExceptRoot(UResourceDataEntry *&t1,
|
||||
char name[], int32_t nameCapacity,
|
||||
UBool usingUSRData, char usrDataPath[], UErrorCode *status) {
|
||||
if (U_FAILURE(*status)) { return FALSE; }
|
||||
UBool hasChopped = TRUE;
|
||||
while (hasChopped && t1->fParent == NULL && !t1->fData.noFallback &&
|
||||
UBool checkParent = TRUE;
|
||||
while (checkParent && t1->fParent == NULL && !t1->fData.noFallback &&
|
||||
res_getResource(&t1->fData,"%%ParentIsRoot") == RES_BOGUS) {
|
||||
Resource parentRes = res_getResource(&t1->fData, "%%Parent");
|
||||
if (parentRes != RES_BOGUS) { // An explicit parent was found.
|
||||
@ -573,7 +582,7 @@ loadParentsExceptRoot(UResourceDataEntry *&t1,
|
||||
}
|
||||
}
|
||||
t1 = t2;
|
||||
hasChopped = chopLocale(name);
|
||||
checkParent = chopLocale(name) || mayHaveParent(name);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
@ -692,7 +701,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (hasChopped && !isRoot) {
|
||||
if ((hasChopped || mayHaveParent(name)) && !isRoot) {
|
||||
if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
|
||||
goto finish;
|
||||
}
|
||||
@ -716,7 +725,7 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID,
|
||||
hasRealData = TRUE;
|
||||
isDefault = TRUE;
|
||||
// TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path?
|
||||
if (hasChopped && !isRoot) {
|
||||
if ((hasChopped || mayHaveParent(name)) && !isRoot) {
|
||||
if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) {
|
||||
goto finish;
|
||||
}
|
||||
@ -1908,6 +1917,8 @@ ures_getByKeyWithFallback(const UResourceBundle *resB,
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else if (res == RES_BOGUS) {
|
||||
break;
|
||||
}
|
||||
} while(*myPath); /* Continue until the whole path is consumed */
|
||||
}
|
||||
@ -3019,7 +3030,7 @@ ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status)
|
||||
U_CAPI UBool U_EXPORT2
|
||||
ures_equal(const UResourceBundle* res1, const UResourceBundle* res2){
|
||||
if(res1==NULL || res2==NULL){
|
||||
return res1==res2; /* pointer comparision */
|
||||
return res1==res2; /* pointer comparison */
|
||||
}
|
||||
if(res1->fKey==NULL|| res2->fKey==NULL){
|
||||
return (res1->fKey==res2->fKey);
|
||||
|
@ -960,14 +960,6 @@ res_findResource(const ResourceData *pResData, Resource r, char** path, const ch
|
||||
if(URES_IS_TABLE(type)) {
|
||||
*key = pathP;
|
||||
t2 = res_getTableItemByKey(pResData, t1, &indexR, key);
|
||||
if(t2 == RES_BOGUS) {
|
||||
/* if we fail to get the resource by key, maybe we got an index */
|
||||
indexR = uprv_strtol(pathP, &closeIndex, 10);
|
||||
if(indexR >= 0 && *closeIndex == 0 && (*pathP != '0' || closeIndex - pathP == 1)) {
|
||||
/* if we indeed have an index, try to get the item by index */
|
||||
t2 = res_getTableItemByIndex(pResData, t1, indexR, key);
|
||||
} // else t2 is already RES_BOGUS
|
||||
}
|
||||
} else if(URES_IS_ARRAY(type)) {
|
||||
indexR = uprv_strtol(pathP, &closeIndex, 10);
|
||||
if(indexR >= 0 && *closeIndex == 0) {
|
||||
|
@ -270,11 +270,13 @@ ures_getByKeyWithFallback(const UResourceBundle *resB,
|
||||
* function can perform fallback on the sub-resources of the table.
|
||||
* @param resB a resource
|
||||
* @param inKey a key associated with the requested resource
|
||||
* @param len if not NULL, used to return the length of the string
|
||||
* @param status: fills in the outgoing error code
|
||||
* could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
|
||||
* could be a non-failing error
|
||||
* e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
|
||||
* @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it
|
||||
* @return returns a pointer to a zero-terminated UChar array which lives in a
|
||||
* memory mapped/DLL file.
|
||||
*/
|
||||
U_CAPI const UChar* U_EXPORT2
|
||||
ures_getStringByKeyWithFallback(const UResourceBundle *resB,
|
||||
|
@ -116,6 +116,12 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen) {
|
||||
((UnicodeSet*) set)->UnicodeSet::remove(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::removeAll(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_removeAll(USet* set, const USet* remove) {
|
||||
((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
|
||||
@ -126,6 +132,18 @@ uset_retain(USet* set, UChar32 start, UChar32 end) {
|
||||
((UnicodeSet*) set)->UnicodeSet::retain(start, end);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainString(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::retain(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::retainAll(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_retainAll(USet* set, const USet* retain) {
|
||||
((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
|
||||
@ -141,6 +159,23 @@ uset_complement(USet* set) {
|
||||
((UnicodeSet*) set)->UnicodeSet::complement();
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementRange(USet *set, UChar32 start, UChar32 end) {
|
||||
((UnicodeSet*) set)->UnicodeSet::complement(start, end);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementString(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::complement(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length) {
|
||||
UnicodeString s(length==-1, str, length);
|
||||
((UnicodeSet*) set)->UnicodeSet::complementAll(s);
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
uset_complementAll(USet* set, const USet* complement) {
|
||||
((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
|
||||
|
@ -575,7 +575,7 @@ usprep_map( const UStringPrepProfile* profile,
|
||||
}
|
||||
|
||||
}else if(type==USPREP_DELETE){
|
||||
// just consume the codepoint and contine
|
||||
// just consume the codepoint and continue
|
||||
continue;
|
||||
}
|
||||
//copy the code point into destination
|
||||
|
@ -364,7 +364,7 @@ _strFromWCS( UChar *dest,
|
||||
}
|
||||
|
||||
/* we have found a null so convert the
|
||||
* chunk from begining of non-null char to null
|
||||
* chunk from beginning of non-null char to null
|
||||
*/
|
||||
retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
|
||||
|
||||
@ -387,7 +387,7 @@ _strFromWCS( UChar *dest,
|
||||
* null terminate it and convert wchar_ts to chars
|
||||
*/
|
||||
if(nulLen >= _STACK_BUFFER_CAPACITY){
|
||||
/* Should rarely occcur */
|
||||
/* Should rarely occur */
|
||||
/* allocate new buffer buffer */
|
||||
pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
|
||||
if(pWStack==NULL){
|
||||
|
@ -382,7 +382,7 @@ utext_previous32From(UText *ut, int64_t index) {
|
||||
//
|
||||
UChar32 cPrev; // The character preceding cCurr, which is what we will return.
|
||||
|
||||
// Address the chunk containg the position preceding the incoming index
|
||||
// Address the chunk containing the position preceding the incoming index
|
||||
// A tricky edge case:
|
||||
// We try to test the requested native index against the chunkNativeStart to determine
|
||||
// whether the character preceding the one at the index is in the current chunk.
|
||||
@ -894,7 +894,7 @@ struct UTF8Buf {
|
||||
// one for a supplementary starting in the last normal position,
|
||||
// and one for an entry for the buffer limit position.
|
||||
uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
|
||||
// correspoding offset in filled part of buf.
|
||||
// corresponding offset in filled part of buf.
|
||||
int32_t align;
|
||||
};
|
||||
|
||||
@ -1545,7 +1545,7 @@ utf8TextMapOffsetToNative(const UText *ut) {
|
||||
}
|
||||
|
||||
//
|
||||
// Map a native index to the corrsponding chunk offset
|
||||
// Map a native index to the corresponding chunk offset
|
||||
//
|
||||
static int32_t U_CALLCONV
|
||||
utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
|
||||
|
@ -13,10 +13,10 @@
|
||||
#ifndef ICU_UTIL_H
|
||||
#define ICU_UTIL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uobject.h"
|
||||
#include "charstr.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
#include "unicode/uobject.h"
|
||||
#include "unicode/utypes.h"
|
||||
//--------------------------------------------------------------------
|
||||
// class ICU_Utility
|
||||
// i18n utility functions, scoped into the class ICU_Utility.
|
||||
|
@ -193,7 +193,7 @@ UPRV_BLOCK_MACRO_BEGIN { \
|
||||
* Trace statement for each exit point of a function that has a UTRACE_ENTRY()
|
||||
* statement, and that returns a value.
|
||||
*
|
||||
* @param val The function's return value, int32_t or comatible type.
|
||||
* @param val The function's return value, int32_t or compatible type.
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
|
@ -312,7 +312,7 @@ int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const {
|
||||
} else {
|
||||
for (i=startIndex; i<count; ++i) {
|
||||
/* Pointers are not always the same size as ints so to perform
|
||||
* a valid comparision we need to know whether we are being
|
||||
* a valid comparison we need to know whether we are being
|
||||
* provided an int or a pointer. */
|
||||
if (hint & HINT_KEY_POINTER) {
|
||||
if (key.pointer == elements[i].pointer) {
|
||||
@ -518,7 +518,7 @@ sortiComparator(const void * /*context */, const void *left, const void *right)
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort the vector, assuming it constains ints.
|
||||
* Sort the vector, assuming it contains ints.
|
||||
* (A more general sort would take a comparison function, but it's
|
||||
* not clear whether UVector's UElementComparator or
|
||||
* UComparator from uprv_sortAray would be more appropriate.)
|
||||
|
@ -57,7 +57,7 @@ LINK.c= $(CXX) $(CXXFLAGS) $(LDFLAGS)
|
||||
#LINK.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS)
|
||||
|
||||
## Shared library options
|
||||
LD_SOOPTIONS= -Wl,-Bsymbolic
|
||||
LD_SOOPTIONS=
|
||||
|
||||
## Commands to make a shared library
|
||||
SHLIB.c= $(CC) $(CFLAGS) $(LDFLAGS) -shared $(LD_SOOPTIONS) -Wl,--enable-auto-import -Wl,--out-implib=$(dir $@)lib$(notdir $(@:$(SO_TARGET_VERSION_MAJOR).$(SO)=))$(IMPORT_LIB_EXT)#M#
|
||||
@ -101,11 +101,14 @@ LIBICU = $(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)
|
||||
#SH#ICULIBS_COMMON_LIB_NAME="${LIBICU}${COMMON_STUBNAME}${ICULIBSUFFIX}${ICULIBSUFFIX_VERSION}.${SO}"
|
||||
#SH#ICULIBS_COMMON_LIB_NAME_A="${LIBICU}${COMMON_STUBNAME}${ICULIBSUFFIX}.${A}"
|
||||
|
||||
#SH#ICULIBS_DATA="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
ICULIBS_DT="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
ICULIBS_I18N="-l$(ICUPREFIX)$(I18N_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
ICULIBS_IO="-l$(ICUPREFIX)$(IO_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
ICULIBS_UC="-l$(ICUPREFIX)$(COMMON_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
#SH#ICULIBS_DATA="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_DT="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_I18N="-l$(ICUPREFIX)$(I18N_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_LX="-l$(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)$(LAYOUTEX_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_IO="-l$(ICUPREFIX)$(IO_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_UC="-l$(ICUPREFIX)$(COMMON_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_CTESTFW="-l$(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)$(CTESTFW_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_TOOLUTIL="-l$(ICUPREFIX)tu$(ICULIBSUFFIX)"
|
||||
#SH#
|
||||
#SH## ICULIBS is the set of libraries your application should link
|
||||
#SH## with usually. Many applications will want to add ${ICULIBS_I18N} as well.
|
||||
|
@ -57,7 +57,7 @@ LINK.c= $(CXX) $(CXXFLAGS) $(LDFLAGS)
|
||||
#LINK.cc= $(CXX) $(CXXFLAGS) $(LDFLAGS)
|
||||
|
||||
## Shared library options
|
||||
LD_SOOPTIONS= -Wl,-Bsymbolic
|
||||
LD_SOOPTIONS=
|
||||
|
||||
## Commands to make a shared library
|
||||
SHLIB.c= $(CC) $(CFLAGS) $(LDFLAGS) -shared $(LD_SOOPTIONS) -Wl,--enable-auto-import -Wl,--out-implib=$(dir $@)lib$(notdir $(@:$(SO_TARGET_VERSION_MAJOR).$(SO)=))$(IMPORT_LIB_EXT)#M#
|
||||
@ -101,11 +101,14 @@ LIBICU = $(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)
|
||||
#SH#ICULIBS_COMMON_LIB_NAME="${LIBICU}${COMMON_STUBNAME}${ICULIBSUFFIX}${ICULIBSUFFIX_VERSION}.${SO}"
|
||||
#SH#ICULIBS_COMMON_LIB_NAME_A="${LIBICU}${COMMON_STUBNAME}${ICULIBSUFFIX}.${A}"
|
||||
|
||||
#SH#ICULIBS_DATA="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
ICULIBS_DT="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
ICULIBS_I18N="-l$(ICUPREFIX)$(I18N_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
ICULIBS_IO="-l$(ICUPREFIX)$(IO_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
ICULIBS_UC="-l$(ICUPREFIX)$(COMMON_STUBNAME)$(ICULIBSUFFIX)$(ICULIBSUFFIX_VERSION)"
|
||||
#SH#ICULIBS_DATA="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_DT="-l$(ICUPREFIX)$(DATA_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_I18N="-l$(ICUPREFIX)$(I18N_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_LX="-l$(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)$(LAYOUTEX_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_IO="-l$(ICUPREFIX)$(IO_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_UC="-l$(ICUPREFIX)$(COMMON_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_CTESTFW="-l$(STATIC_PREFIX_WHEN_USED)$(ICUPREFIX)$(CTESTFW_STUBNAME)$(ICULIBSUFFIX)"
|
||||
ICULIBS_TOOLUTIL="-l$(ICUPREFIX)tu$(ICULIBSUFFIX)"
|
||||
#SH#
|
||||
#SH## ICULIBS is the set of libraries your application should link
|
||||
#SH## with usually. Many applications will want to add ${ICULIBS_I18N} as well.
|
||||
|
22
intl/icu/source/configure
vendored
22
intl/icu/source/configure
vendored
@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for ICU 68.2.
|
||||
# Generated by GNU Autoconf 2.69 for ICU 69.1.
|
||||
#
|
||||
# Report bugs to <http://icu-project.org/bugs>.
|
||||
#
|
||||
@ -582,8 +582,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='ICU'
|
||||
PACKAGE_TARNAME='International Components for Unicode'
|
||||
PACKAGE_VERSION='68.2'
|
||||
PACKAGE_STRING='ICU 68.2'
|
||||
PACKAGE_VERSION='69.1'
|
||||
PACKAGE_STRING='ICU 69.1'
|
||||
PACKAGE_BUGREPORT='http://icu-project.org/bugs'
|
||||
PACKAGE_URL='http://icu-project.org'
|
||||
|
||||
@ -1364,7 +1364,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures ICU 68.2 to adapt to many kinds of systems.
|
||||
\`configure' configures ICU 69.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@ -1430,7 +1430,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of ICU 68.2:";;
|
||||
short | recursive ) echo "Configuration of ICU 69.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@ -1455,7 +1455,7 @@ Optional Features:
|
||||
--enable-weak-threads weakly reference the threading library default=no
|
||||
--enable-extras build ICU extras default=yes
|
||||
--enable-icuio build ICU's icuio library default=yes
|
||||
--enable-layoutex build ICU's Paragraph Layout library default=yes.
|
||||
--enable-layoutex build ICU's Paragraph Layout library default=no.
|
||||
icu-le-hb must be installed via pkg-config. See http://harfbuzz.org
|
||||
|
||||
--enable-tools build ICU's tools default=yes
|
||||
@ -1568,7 +1568,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
ICU configure 68.2
|
||||
ICU configure 69.1
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@ -2314,7 +2314,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by ICU $as_me 68.2, which was
|
||||
It was created by ICU $as_me 69.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@ -7708,7 +7708,7 @@ if test "${enable_layoutex+set}" = set; then :
|
||||
*) as_fn_error $? "bad value ${enableval} for --enable-layoutex" "$LINENO" 5 ;;
|
||||
esac
|
||||
else
|
||||
layoutex=$have_icu_le_hb
|
||||
layoutex=false
|
||||
fi
|
||||
|
||||
|
||||
@ -8550,7 +8550,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by ICU $as_me 68.2, which was
|
||||
This file was extended by ICU $as_me 69.1, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@ -8604,7 +8604,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
ICU config.status 68.2
|
||||
ICU config.status 69.1
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
@ -1101,14 +1101,14 @@ ICU_CONDITIONAL(ICUIO, test "$icuio" = true)
|
||||
|
||||
# Enable/disable layoutex
|
||||
AC_ARG_ENABLE(layoutex,
|
||||
[ --enable-layoutex build ICU's Paragraph Layout library [default=yes].
|
||||
[ --enable-layoutex build ICU's Paragraph Layout library [default=no].
|
||||
icu-le-hb must be installed via pkg-config. See http://harfbuzz.org],
|
||||
[case "${enableval}" in
|
||||
yes) layoutex=$have_icu_le_hb ;;
|
||||
no) layoutex=false ;;
|
||||
*) AC_MSG_ERROR(bad value ${enableval} for --enable-layoutex) ;;
|
||||
esac],
|
||||
layoutex=$have_icu_le_hb)
|
||||
layoutex=false)
|
||||
ICU_CONDITIONAL(LAYOUTEX, test "$layoutex" = true)
|
||||
|
||||
# Enable/disable layout
|
||||
@ -1399,6 +1399,7 @@ AC_CONFIG_FILES([icudefs.mk \
|
||||
test/perf/ubrkperf/Makefile \
|
||||
test/perf/charperf/Makefile \
|
||||
test/perf/convperf/Makefile \
|
||||
test/perf/localecanperf/Makefile \
|
||||
test/perf/normperf/Makefile \
|
||||
test/perf/DateFmtPerf/Makefile \
|
||||
test/perf/howExpensiveIs/Makefile \
|
||||
|
@ -3,5 +3,5 @@
|
||||
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
|
||||
|
||||
{
|
||||
"cldrVersion": "38.1"
|
||||
"cldrVersion": "39"
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
|
||||
root{
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
boundaries{
|
||||
grapheme:process(dependency){"char.brk"}
|
||||
line:process(dependency){"line.brk"}
|
||||
|
@ -30,18 +30,18 @@
|
||||
<fail unless="is.cldr.dir.set" message="Please set the CLDR_DIR environment variable to the top level CLDR source dir (containing 'common')."/>
|
||||
|
||||
<available property="cldrtools.dir" value="${env.CLDR_DIR}/cldr-tools" file="${env.CLDR_DIR}/cldr-tools" type="dir"/>
|
||||
<available property="cldrtools.dir" value="${env.CLDR_DIR}/tools/java" file="${env.CLDR_DIR}/tools/java" type="dir"/>
|
||||
<available property="cldrtools.dir" value="${env.CLDR_DIR}/tools" file="${env.CLDR_DIR}/tools" type="dir"/>
|
||||
<fail unless="cldrtools.dir" message="Please make sure that the CLDR tools directory is checked out into CLDR_DIR"/>
|
||||
|
||||
<available property="env.CLDR_CLASSES" value="${cldrtools.dir}/classes" file="${cldrtools.dir}/classes" type="dir"/>
|
||||
<available property="cldrtools.jar" value="${cldrtools.dir}/cldr.jar" file="${cldrtools.dir}/cldr.jar" type="file"/>
|
||||
<available property="env.CLDR_CLASSES" value="${cldrtools.dir}/cldr-code/target/classes" file="${cldrtools.dir}/cldr-code/target/classes" type="dir"/>
|
||||
<available property="cldrtools.jar" value="${cldrtools.dir}/cldr-code/target/cldr-code.jar" file="${cldrtools.dir}/cldr-code/target/cldr-code.jar" type="file"/>
|
||||
<condition property="is.cldr.classes.set">
|
||||
<or>
|
||||
<isset property="env.CLDR_CLASSES" />
|
||||
<isset property="cldrtools.jar" />
|
||||
</or>
|
||||
</condition>
|
||||
<fail unless="is.cldr.classes.set" message="CLDR classes not found in ${cldrtools.dir}. Please either set the CLDR_CLASSES environment variable or build cldr.jar."/>
|
||||
<fail unless="is.cldr.classes.set" message="CLDR classes not found in ${cldrtools.dir}/cldr-code/target/classes. Please either set the CLDR_CLASSES environment variable or build cldr-code.jar."/>
|
||||
|
||||
<property name="env.CLDR_TMP_DIR" location="${env.CLDR_DIR}/../cldr-aux" /> <!-- Hack: see CLDRPaths -->
|
||||
<property name="cldr.prod.dir" location="${env.CLDR_TMP_DIR}/production/" />
|
||||
|
@ -30,6 +30,9 @@
|
||||
# plus the xml-apis.jar from the Apache xalan package
|
||||
# (http://xml.apache.org/xalan-j/downloads.html).
|
||||
#
|
||||
# You will also need to have performed the CLDR Maven setup (non-Eclipse version)
|
||||
# per http://cldr.unicode.org/development/maven
|
||||
#
|
||||
# Note: Enough things can (and will) fail in this process that it is best to
|
||||
# run the commands separately from an interactive shell. They should all
|
||||
# copy and paste without problems.
|
||||
@ -195,7 +198,7 @@ ant copy-cldr-testdata
|
||||
# 4d. Copy from CLDR common/testData/localeIdentifiers/localeCanonicalization.txt
|
||||
# into icu4c/source/test/testdata/localeCanonicalization.txt
|
||||
# and icu4j/main/tests/core/src/com/ibm/icu/dev/data/unicode/localeCanonicalization.txt
|
||||
# and add the following line to the begginning of these two files
|
||||
# and add the following line to the beginning of these two files
|
||||
# # File copied from cldr common/testData/localeIdentifiers/localeCanonicalization.txt
|
||||
|
||||
# 5. Check which data files have modifications, which have been added or removed
|
||||
|
@ -3,7 +3,7 @@
|
||||
// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
|
||||
|
||||
{
|
||||
"cldrVersion": "38.1",
|
||||
"cldrVersion": "39",
|
||||
"aliases": {
|
||||
"ars": "ar_SA",
|
||||
"in": "id",
|
||||
@ -11,8 +11,7 @@
|
||||
"iw": "he",
|
||||
"iw_IL": "he_IL",
|
||||
"mo": "ro",
|
||||
"no": "nb",
|
||||
"no_NO": "nb_NO",
|
||||
"no_NO": "no",
|
||||
"pa_IN": "pa_Guru_IN",
|
||||
"sh": "sr_Latn",
|
||||
"sh_BA": "sr_Latn_BA",
|
||||
@ -33,6 +32,8 @@
|
||||
"zh_TW": "zh_Hant_TW"
|
||||
},
|
||||
"parents": {
|
||||
"ff_Adlm": "root"
|
||||
"ff_Adlm": "root",
|
||||
"nb": "no",
|
||||
"nn": "no"
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ af{
|
||||
collations{
|
||||
standard{
|
||||
Sequence{"&N<<<ʼn"}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ am{
|
||||
collations{
|
||||
standard{
|
||||
Sequence{"[reorder Ethi]"}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ ar{
|
||||
"&ت<<ة<<<ﺔ<<<ﺓ"
|
||||
"&ي<<ى<<<ﯨ<<<ﯩ<<<ﻰ<<<ﻯ<<<ﲐ<<<ﱝ"
|
||||
}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
standard{
|
||||
Sequence{
|
||||
@ -397,7 +397,7 @@ ar{
|
||||
"&ۓ=ﮰ=ﮱ"
|
||||
"&ۀ=ﮤ=ﮥ"
|
||||
}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -11,7 +11,7 @@ as{
|
||||
"&[before 1]ত<ৎ=ত্\u200D"
|
||||
"&হ<ক্ষ"
|
||||
}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ az{
|
||||
"[import az-u-co-standard]"
|
||||
"[reorder others]"
|
||||
}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
standard{
|
||||
Sequence{
|
||||
@ -26,7 +26,7 @@ az{
|
||||
"&H<x<<<X"
|
||||
"&Z<w<<<W"
|
||||
}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ be{
|
||||
"&Е<ё<<<Ё"
|
||||
"&у<ў<<<Ў"
|
||||
}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ bg{
|
||||
collations{
|
||||
standard{
|
||||
Sequence{"[reorder Cyrl]"}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ bn{
|
||||
"[reorder Beng Deva Guru Gujr Orya Taml Telu Knda Mlym Sinh]"
|
||||
"&ঔ<ং<ঃ<ঁ"
|
||||
}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
traditional{
|
||||
Sequence{
|
||||
@ -629,7 +629,7 @@ bn{
|
||||
"&যৌ<<<য়ৌ"
|
||||
"&য্<<<য়্"
|
||||
}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ br{
|
||||
collations{
|
||||
standard{
|
||||
Sequence{"&C<ch<<<Ch<<<CH<c''h=c\u2019h<<<C''h=C\u2019h<<<C''H=C\u2019H"}
|
||||
Version{"38.1"}
|
||||
Version{"39"}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user