darling-corefoundation/CFUniChar.c
Lubos Dolezel 7f08a72856 CF-1153.18
2017-02-04 00:16:03 +01:00

1465 lines
58 KiB
C

/*
* Copyright (c) 2015 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this
* file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
/* CFUniChar.c
Copyright (c) 2001-2014, Apple Inc. All rights reserved.
Responsibility: Aki Inoue
*/
#include <CoreFoundation/CFByteOrder.h>
#include "CFInternal.h"
#include "CFUniChar.h"
#include "CFStringEncodingConverterExt.h"
#include "CFUnicodeDecomposition.h"
#include "CFUniCharPriv.h"
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/param.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdlib.h>
#endif
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
#include <mach/mach.h>
#endif
#if DEPLOYMENT_TARGET_WINDOWS
extern void _CFGetFrameworkPath(wchar_t *path, int maxLength);
#endif
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
#define __kCFCharacterSetDir "/System/Library/CoreServices"
#elif DEPLOYMENT_TARGET_LINUX || DEPLOYMENT_TARGET_FREEBSD || DEPLOYMENT_TARGET_EMBEDDED_MINI
#define __kCFCharacterSetDir "/usr/local/share/CoreFoundation"
#elif DEPLOYMENT_TARGET_WINDOWS
#define __kCFCharacterSetDir "\\Windows\\CoreFoundation"
#endif
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
#define USE_MACHO_SEGMENT 1
#endif
enum {
kCFUniCharLastExternalSet = kCFUniCharNewlineCharacterSet,
kCFUniCharFirstInternalSet = kCFUniCharCompatibilityDecomposableCharacterSet,
kCFUniCharLastInternalSet = kCFUniCharGraphemeExtendCharacterSet,
kCFUniCharFirstBitmapSet = kCFUniCharDecimalDigitCharacterSet
};
CF_INLINE uint32_t __CFUniCharMapExternalSetToInternalIndex(uint32_t cset) { return ((kCFUniCharFirstInternalSet <= cset) ? ((cset - kCFUniCharFirstInternalSet) + kCFUniCharLastExternalSet) : cset) - kCFUniCharFirstBitmapSet; }
CF_INLINE uint32_t __CFUniCharMapCompatibilitySetID(uint32_t cset) { return ((cset == kCFUniCharControlCharacterSet) ? kCFUniCharControlAndFormatterCharacterSet : (((cset > kCFUniCharLastExternalSet) && (cset < kCFUniCharFirstInternalSet)) ? ((cset - kCFUniCharLastExternalSet) + kCFUniCharFirstInternalSet) : cset)); }
#if (DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED) && USE_MACHO_SEGMENT
#include <mach-o/getsect.h>
#include <mach-o/dyld.h>
#include <mach-o/ldsyms.h>
extern const void* unicode_csbitmaps_section_start __asm("section$start$__UNICODE$__csbitmaps");
extern const void* unicode_csbitmaps_section_end __asm("section$end$__UNICODE$__csbitmaps");
extern const void* unicode_properties_section_start __asm("section$start$__UNICODE$__properties");
extern const void* unicode_properties_section_end __asm("section$end$__UNICODE$__properties");
extern const void* unicode_data_section_start __asm("section$start$__UNICODE$__data");
extern const void* unicode_data_section_end __asm("section$end$__UNICODE$__data");
static const void *__CFGetSectDataPtr(const char *segname, const char *sectname, uint64_t *sizep) {
// special case three common sections to have fast access
if ( strcmp(segname, "__UNICODE") == 0 ) {
if ( strcmp(sectname, "__csbitmaps") == 0) {
if (sizep) *sizep = &unicode_csbitmaps_section_end - &unicode_csbitmaps_section_start;
return &unicode_csbitmaps_section_start;
}
else if ( strcmp(sectname, "__properties") == 0 ) {
if (sizep) *sizep = &unicode_properties_section_end - &unicode_properties_section_start;
return &unicode_properties_section_start;
}
else if ( strcmp(sectname, "__data") == 0 ) {
if (sizep) *sizep = &unicode_data_section_end - &unicode_data_section_start;
return &unicode_data_section_start;
}
}
uint32_t idx, cnt = _dyld_image_count();
for (idx = 0; idx < cnt; idx++) {
void *mh = (void *)_dyld_get_image_header(idx);
if (mh != &_mh_dylib_header) continue;
#if __LP64__
const struct section_64 *sect = getsectbynamefromheader_64((struct mach_header_64 *)mh, segname, sectname);
#else
const struct section *sect = getsectbynamefromheader((struct mach_header *)mh, segname, sectname);
#endif
if (!sect) break;
if (sizep) *sizep = (uint64_t)sect->size;
return (char *)sect->addr + _dyld_get_image_vmaddr_slide(idx);
}
if (sizep) *sizep = 0ULL;
return NULL;
}
#endif
#if !USE_MACHO_SEGMENT
// Memory map the file
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
CF_INLINE void __CFUniCharCharacterSetPath(char *cpath) {
#elif DEPLOYMENT_TARGET_WINDOWS
CF_INLINE void __CFUniCharCharacterSetPath(wchar_t *wpath) {
#else
#error Unknown or unspecified DEPLOYMENT_TARGET
#endif
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
#elif DEPLOYMENT_TARGET_LINUX
strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
#elif DEPLOYMENT_TARGET_WINDOWS
wchar_t frameworkPath[MAXPATHLEN];
_CFGetFrameworkPath(frameworkPath, MAXPATHLEN);
wcsncpy(wpath, frameworkPath, MAXPATHLEN);
wcsncat(wpath, L"\\CoreFoundation.resources\\", MAXPATHLEN - wcslen(wpath));
#else
strlcpy(cpath, __kCFCharacterSetDir, MAXPATHLEN);
strlcat(cpath, "/CharacterSets/", MAXPATHLEN);
#endif
}
#if DEPLOYMENT_TARGET_WINDOWS
#define MAX_BITMAP_STATE 512
//
// If a string is placed into this array, then it has been previously
// determined that the bitmap-file cannot be found. Thus, we make
// the assumption it won't be there in future calls and we avoid
// hitting the disk un-necessarily. This assumption isn't 100%
// correct, as bitmap-files can be added. We would have to re-start
// the application in order to pick-up the new bitmap info.
//
// We should probably re-visit this.
//
static wchar_t *mappedBitmapState[MAX_BITMAP_STATE];
static int __nNumStateEntries = -1;
CRITICAL_SECTION __bitmapStateLock = {0};
bool __GetBitmapStateForName(const wchar_t *bitmapName) {
if (NULL == __bitmapStateLock.DebugInfo)
InitializeCriticalSection(&__bitmapStateLock);
EnterCriticalSection(&__bitmapStateLock);
if (__nNumStateEntries >= 0) {
for (int i = 0; i < __nNumStateEntries; i++) {
if (wcscmp(mappedBitmapState[i], bitmapName) == 0) {
LeaveCriticalSection(&__bitmapStateLock);
return true;
}
}
}
LeaveCriticalSection(&__bitmapStateLock);
return false;
}
void __AddBitmapStateForName(const wchar_t *bitmapName) {
if (NULL == __bitmapStateLock.DebugInfo)
InitializeCriticalSection(&__bitmapStateLock);
EnterCriticalSection(&__bitmapStateLock);
__nNumStateEntries++;
mappedBitmapState[__nNumStateEntries] = (wchar_t *)malloc((lstrlenW(bitmapName)+1) * sizeof(wchar_t));
lstrcpyW(mappedBitmapState[__nNumStateEntries], bitmapName);
LeaveCriticalSection(&__bitmapStateLock);
}
#endif
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
static bool __CFUniCharLoadBytesFromFile(const char *fileName, const void **bytes, int64_t *fileSize) {
#elif DEPLOYMENT_TARGET_WINDOWS
static bool __CFUniCharLoadBytesFromFile(const wchar_t *fileName, const void **bytes, int64_t *fileSize) {
#else
#error Unknown or unspecified DEPLOYMENT_TARGET
#endif
#if DEPLOYMENT_TARGET_WINDOWS
HANDLE bitmapFileHandle = NULL;
HANDLE mappingHandle = NULL;
if (__GetBitmapStateForName(fileName)) {
// The fileName has been tried in the past, so just return false
// and move on.
*bytes = NULL;
return false;
}
mappingHandle = OpenFileMappingW(FILE_MAP_READ, TRUE, fileName);
if (NULL == mappingHandle) {
if ((bitmapFileHandle = CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)) == INVALID_HANDLE_VALUE) {
// We tried to get the bitmap file for mapping, but it's not there. Add to list of non-existant bitmap-files so
// we don't have to try this again in the future.
__AddBitmapStateForName(fileName);
return false;
}
mappingHandle = CreateFileMapping(bitmapFileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
CloseHandle(bitmapFileHandle);
if (!mappingHandle) return false;
}
*bytes = MapViewOfFileEx(mappingHandle, FILE_MAP_READ, 0, 0, 0, 0);
if (NULL != fileSize) {
MEMORY_BASIC_INFORMATION memoryInfo;
if (0 == VirtualQueryEx(mappingHandle, *bytes, &memoryInfo, sizeof(memoryInfo))) {
*fileSize = 0; // This indicates no checking. Is it right ?
} else {
*fileSize = memoryInfo.RegionSize;
}
}
CloseHandle(mappingHandle);
return (*bytes ? true : false);
#else
struct stat statBuf;
int fd = -1;
if ((fd = open(fileName, O_RDONLY, 0)) < 0) {
return false;
}
if (fstat(fd, &statBuf) < 0 || (*bytes = mmap(0, statBuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0)) == (void *)-1) {
close(fd);
return false;
}
close(fd);
if (NULL != fileSize) *fileSize = statBuf.st_size;
return true;
#endif
}
#endif // USE_MACHO_SEGMENT
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
static bool __CFUniCharLoadFile(const char *bitmapName, const void **bytes, int64_t *fileSize) {
#elif DEPLOYMENT_TARGET_WINDOWS
static bool __CFUniCharLoadFile(const wchar_t *bitmapName, const void **bytes, int64_t *fileSize) {
#else
#error Unknown or unspecified DEPLOYMENT_TARGET
#endif
#if USE_MACHO_SEGMENT
*bytes = __CFGetSectDataPtr("__UNICODE", bitmapName, NULL);
if (NULL != fileSize) *fileSize = 0;
return *bytes ? true : false;
#else
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
char cpath[MAXPATHLEN];
__CFUniCharCharacterSetPath(cpath);
strlcat(cpath, bitmapName, MAXPATHLEN);
Boolean needToFree = false;
const char *possiblyFrameworkRootedCPath = CFPathRelativeToAppleFrameworksRoot(cpath, &needToFree);
bool result = __CFUniCharLoadBytesFromFile(possiblyFrameworkRootedCPath, bytes, fileSize);
if (needToFree) free((void *)possiblyFrameworkRootedCPath);
return result;
#elif DEPLOYMENT_TARGET_WINDOWS
wchar_t wpath[MAXPATHLEN];
__CFUniCharCharacterSetPath(wpath);
wcsncat(wpath, bitmapName, MAXPATHLEN);
return __CFUniCharLoadBytesFromFile(wpath, bytes, fileSize);
#else
#error Unknown or unspecified DEPLOYMENT_TARGET
#endif
#endif
}
// Bitmap functions
/*
Currently unused but left in for symmetry/informative purposes
CF_INLINE bool isControl(UTF32Char theChar, uint16_t charset, const void *data) { // ISO Control
return (((theChar <= 0x001F) || (theChar >= 0x007F && theChar <= 0x009F)) ? true : false);
}*/
CF_INLINE bool isWhitespace(UTF32Char theChar, uint16_t charset, const void *data) { // Space
return (((theChar == 0x0020) || (theChar == 0x0009) || (theChar == 0x00A0) || (theChar == 0x1680) || (theChar >= 0x2000 && theChar <= 0x200B) || (theChar == 0x202F) || (theChar == 0x205F) || (theChar == 0x3000)) ? true : false);
}
CF_INLINE bool isNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
return (((theChar >= 0x000A && theChar <= 0x000D) || (theChar == 0x0085) || (theChar == 0x2028) || (theChar == 0x2029)) ? true : false);
}
CF_INLINE bool isWhitespaceAndNewline(UTF32Char theChar, uint16_t charset, const void *data) { // White space
return ((isWhitespace(theChar, charset, data) || isNewline(theChar, charset, data)) ? true : false);
}
#if USE_MACHO_SEGMENT
CF_INLINE bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; }
#elif 1
// <rdar://problem/8961744> __CFSimpleFileSizeVerification is broken
static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) { return true; }
#else
static bool __CFSimpleFileSizeVerification(const void *bytes, int64_t fileSize) {
bool result = true;
if (fileSize > 0) {
if ((sizeof(uint32_t) * 2) > fileSize) {
result = false;
} else {
uint32_t headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
if ((headerSize < (sizeof(uint32_t) * 4)) || (headerSize > fileSize)) {
result = false;
} else {
const uint32_t *lastElement = (uint32_t *)(((uint8_t *)bytes) + headerSize) - 2;
if ((headerSize + CFSwapInt32BigToHost(lastElement[0]) + CFSwapInt32BigToHost(lastElement[1])) > headerSize) result = false;
}
}
}
if (!result) CFLog(kCFLogLevelCritical, CFSTR("File size verification for Unicode database file failed."));
return result;
}
#endif // USE_MACHO_SEGMENT
typedef struct {
uint32_t _numPlanes;
const uint8_t **_planes;
} __CFUniCharBitmapData;
static char __CFUniCharUnicodeVersionString[8] = {0, 0, 0, 0, 0, 0, 0, 0};
static uint32_t __CFUniCharNumberOfBitmaps = 0;
static __CFUniCharBitmapData *__CFUniCharBitmapDataArray = NULL;
static CFLock_t __CFUniCharBitmapLock = CFLockInit;
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
#if !defined(CF_UNICHAR_BITMAP_FILE)
#if USE_MACHO_SEGMENT
#define CF_UNICHAR_BITMAP_FILE "__csbitmaps"
#else
#define CF_UNICHAR_BITMAP_FILE "/CFCharacterSetBitmaps.bitmap"
#endif
#endif
#elif DEPLOYMENT_TARGET_WINDOWS
#if !defined(CF_UNICHAR_BITMAP_FILE)
#define CF_UNICHAR_BITMAP_FILE L"CFCharacterSetBitmaps.bitmap"
#endif
#else
#error Unknown or unspecified DEPLOYMENT_TARGET
#endif
static bool __CFUniCharLoadBitmapData(void) {
__CFUniCharBitmapData *array;
uint32_t headerSize;
uint32_t bitmapSize;
int numPlanes;
uint8_t currentPlane;
const void *bytes;
const void *bitmapBase;
const void *bitmap;
int idx, bitmapIndex;
int64_t fileSize;
__CFLock(&__CFUniCharBitmapLock);
if (__CFUniCharBitmapDataArray || !__CFUniCharLoadFile(CF_UNICHAR_BITMAP_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) {
__CFUnlock(&__CFUniCharBitmapLock);
return false;
}
for (idx = 0;idx < 4 && ((const uint8_t *)bytes)[idx];idx++) {
__CFUniCharUnicodeVersionString[idx * 2] = ((const uint8_t *)bytes)[idx];
__CFUniCharUnicodeVersionString[idx * 2 + 1] = '.';
}
__CFUniCharUnicodeVersionString[(idx < 4 ? idx * 2 - 1 : 7)] = '\0';
headerSize = CFSwapInt32BigToHost(*((uint32_t *)((char *)bytes + 4)));
bitmapBase = (uint8_t *)bytes + headerSize;
bytes = (uint8_t *)bytes + (sizeof(uint32_t) * 2);
headerSize -= (sizeof(uint32_t) * 2);
__CFUniCharNumberOfBitmaps = headerSize / (sizeof(uint32_t) * 2);
array = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * __CFUniCharNumberOfBitmaps, 0);
for (idx = 0;idx < (int)__CFUniCharNumberOfBitmaps;idx++) {
bitmap = (uint8_t *)bitmapBase + CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
bitmapSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
numPlanes = bitmapSize / (8 * 1024);
numPlanes = *(const uint8_t *)((char *)bitmap + (((numPlanes - 1) * ((8 * 1024) + 1)) - 1)) + 1;
array[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * numPlanes, 0);
array[idx]._numPlanes = numPlanes;
currentPlane = 0;
for (bitmapIndex = 0;bitmapIndex < numPlanes;bitmapIndex++) {
if (bitmapIndex == currentPlane) {
array[idx]._planes[bitmapIndex] = (const uint8_t *)bitmap;
bitmap = (uint8_t *)bitmap + (8 * 1024);
#if defined (__cplusplus)
currentPlane = *(((const uint8_t*&)bitmap)++);
#else
currentPlane = *((const uint8_t *)bitmap++);
#endif
} else {
array[idx]._planes[bitmapIndex] = NULL;
}
}
}
__CFUniCharBitmapDataArray = array;
__CFUnlock(&__CFUniCharBitmapLock);
return true;
}
CF_PRIVATE const char *__CFUniCharGetUnicodeVersionString(void) {
if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
return __CFUniCharUnicodeVersionString;
}
bool CFUniCharIsMemberOf(UTF32Char theChar, uint32_t charset) {
charset = __CFUniCharMapCompatibilitySetID(charset);
switch (charset) {
case kCFUniCharWhitespaceCharacterSet:
return isWhitespace(theChar, charset, NULL);
case kCFUniCharWhitespaceAndNewlineCharacterSet:
return isWhitespaceAndNewline(theChar, charset, NULL);
case kCFUniCharNewlineCharacterSet:
return isNewline(theChar, charset, NULL);
default: {
uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
if (tableIndex < __CFUniCharNumberOfBitmaps) {
__CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
uint8_t planeNo = (theChar >> 16) & 0xFF;
// The bitmap data for kCFUniCharIllegalCharacterSet is actually LEGAL set less Plane 14 ~ 16
if (charset == kCFUniCharIllegalCharacterSet) {
if (planeNo == 0x0E) { // Plane 14
theChar &= 0xFF;
return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? false : true);
} else if (planeNo == 0x0F || planeNo == 0x10) { // Plane 15 & 16
return ((theChar & 0xFF) > 0xFFFD ? true : false);
} else {
return (planeNo < data->_numPlanes && data->_planes[planeNo] ? !CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : true);
}
} else if (charset == kCFUniCharControlAndFormatterCharacterSet) {
if (planeNo == 0x0E) { // Plane 14
theChar &= 0xFF;
return (((theChar == 0x01) || ((theChar > 0x1F) && (theChar < 0x80))) ? true : false);
} else {
return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
}
} else {
return (planeNo < data->_numPlanes && data->_planes[planeNo] ? CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) : false);
}
}
return false;
}
}
}
const uint8_t *CFUniCharGetBitmapPtrForPlane(uint32_t charset, uint32_t plane) {
if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
charset = __CFUniCharMapCompatibilitySetID(charset);
if ((charset > kCFUniCharWhitespaceAndNewlineCharacterSet) && (charset != kCFUniCharIllegalCharacterSet) && (charset != kCFUniCharNewlineCharacterSet)) {
uint32_t tableIndex = __CFUniCharMapExternalSetToInternalIndex(charset);
if (tableIndex < __CFUniCharNumberOfBitmaps) {
__CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + tableIndex;
return (plane < data->_numPlanes ? data->_planes[plane] : NULL);
}
}
return NULL;
}
CF_PRIVATE uint8_t CFUniCharGetBitmapForPlane(uint32_t charset, uint32_t plane, void *bitmap, bool isInverted) {
const uint8_t *src = CFUniCharGetBitmapPtrForPlane(charset, plane);
int numBytes = (8 * 1024);
if (src) {
if (isInverted) {
#if defined (__cplusplus)
while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
#else
while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
#endif
} else {
#if defined (__cplusplus)
while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
#else
while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
#endif
}
return kCFUniCharBitmapFilled;
} else if (charset == kCFUniCharIllegalCharacterSet) {
__CFUniCharBitmapData *data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset));
if (plane < data->_numPlanes && (src = data->_planes[plane])) {
if (isInverted) {
#if defined (__cplusplus)
while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = *(src++);
#else
while (numBytes-- > 0) *((uint8_t *)bitmap++) = *(src++);
#endif
} else {
#if defined (__cplusplus)
while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = ~(*(src++));
#else
while (numBytes-- > 0) *((uint8_t *)bitmap++) = ~(*(src++));
#endif
}
return kCFUniCharBitmapFilled;
} else if (plane == 0x0E) { // Plane 14
int idx;
uint8_t asciiRange = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
uint8_t otherRange = (isInverted ? (uint8_t)0 : (uint8_t)0xFF);
#if defined (__cplusplus)
*(((uint8_t *&)bitmap)++) = 0x02; // UE0001 LANGUAGE TAG
#else
*((uint8_t *)bitmap++) = 0x02; // UE0001 LANGUAGE TAG
#endif
for (idx = 1;idx < numBytes;idx++) {
#if defined (__cplusplus)
*(((uint8_t *&)bitmap)++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
#else
*((uint8_t *)bitmap++) = ((idx >= (0x20 / 8) && (idx < (0x80 / 8))) ? asciiRange : otherRange);
#endif
}
return kCFUniCharBitmapFilled;
} else if (plane == 0x0F || plane == 0x10) { // Plane 15 & 16
uint32_t value = (isInverted ? ~0 : 0);
numBytes /= 4; // for 32bit
while (numBytes-- > 0) {
*((uint32_t *)bitmap) = value;
#if defined (__cplusplus)
bitmap = (uint8_t *)bitmap + sizeof(uint32_t);
#else
bitmap += sizeof(uint32_t);
#endif
}
*(((uint8_t *)bitmap) - 5) = (isInverted ? 0x3F : 0xC0); // 0xFFFE & 0xFFFF
return kCFUniCharBitmapFilled;
}
return (isInverted ? kCFUniCharBitmapEmpty : kCFUniCharBitmapAll);
} else if ((charset < kCFUniCharDecimalDigitCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
if (plane) return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
uint8_t *bitmapBase = (uint8_t *)bitmap;
CFIndex idx;
uint8_t nonFillValue = (isInverted ? (uint8_t)0xFF : (uint8_t)0);
#if defined (__cplusplus)
while (numBytes-- > 0) *(((uint8_t *&)bitmap)++) = nonFillValue;
#else
while (numBytes-- > 0) *((uint8_t *)bitmap++) = nonFillValue;
#endif
if ((charset == kCFUniCharWhitespaceAndNewlineCharacterSet) || (charset == kCFUniCharNewlineCharacterSet)) {
const UniChar newlines[] = {0x000A, 0x000B, 0x000C, 0x000D, 0x0085, 0x2028, 0x2029};
for (idx = 0;idx < (int)(sizeof(newlines) / sizeof(*newlines)); idx++) {
if (isInverted) {
CFUniCharRemoveCharacterFromBitmap(newlines[idx], bitmapBase);
} else {
CFUniCharAddCharacterToBitmap(newlines[idx], bitmapBase);
}
}
if (charset == kCFUniCharNewlineCharacterSet) return kCFUniCharBitmapFilled;
}
if (isInverted) {
CFUniCharRemoveCharacterFromBitmap(0x0009, bitmapBase);
CFUniCharRemoveCharacterFromBitmap(0x0020, bitmapBase);
CFUniCharRemoveCharacterFromBitmap(0x00A0, bitmapBase);
CFUniCharRemoveCharacterFromBitmap(0x1680, bitmapBase);
CFUniCharRemoveCharacterFromBitmap(0x202F, bitmapBase);
CFUniCharRemoveCharacterFromBitmap(0x205F, bitmapBase);
CFUniCharRemoveCharacterFromBitmap(0x3000, bitmapBase);
} else {
CFUniCharAddCharacterToBitmap(0x0009, bitmapBase);
CFUniCharAddCharacterToBitmap(0x0020, bitmapBase);
CFUniCharAddCharacterToBitmap(0x00A0, bitmapBase);
CFUniCharAddCharacterToBitmap(0x1680, bitmapBase);
CFUniCharAddCharacterToBitmap(0x202F, bitmapBase);
CFUniCharAddCharacterToBitmap(0x205F, bitmapBase);
CFUniCharAddCharacterToBitmap(0x3000, bitmapBase);
}
for (idx = 0x2000;idx <= 0x200B;idx++) {
if (isInverted) {
CFUniCharRemoveCharacterFromBitmap(idx, bitmapBase);
} else {
CFUniCharAddCharacterToBitmap(idx, bitmapBase);
}
}
return kCFUniCharBitmapFilled;
}
return (isInverted ? kCFUniCharBitmapAll : kCFUniCharBitmapEmpty);
}
CF_PRIVATE uint32_t CFUniCharGetNumberOfPlanes(uint32_t charset) {
if ((charset == kCFUniCharControlCharacterSet) || (charset == kCFUniCharControlAndFormatterCharacterSet)) {
return 15; // 0 to 14
} else if (charset < kCFUniCharDecimalDigitCharacterSet) {
return 1;
} else if (charset == kCFUniCharIllegalCharacterSet) {
return 17;
} else {
uint32_t numPlanes;
if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
numPlanes = __CFUniCharBitmapDataArray[__CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(charset))]._numPlanes;
return numPlanes;
}
}
// Mapping data loading
static const void **__CFUniCharMappingTables = NULL;
static CFLock_t __CFUniCharMappingTableLock = CFLockInit;
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
#if __CF_BIG_ENDIAN__
#if USE_MACHO_SEGMENT
#define MAPPING_TABLE_FILE "__data"
#else
#define MAPPING_TABLE_FILE "/CFUnicodeData-B.mapping"
#endif
#else
#if USE_MACHO_SEGMENT
#define MAPPING_TABLE_FILE "__data"
#else
#define MAPPING_TABLE_FILE "/CFUnicodeData-L.mapping"
#endif
#endif
#elif DEPLOYMENT_TARGET_WINDOWS
#if __CF_BIG_ENDIAN__
#if USE_MACHO_SEGMENT
#define MAPPING_TABLE_FILE "__data"
#else
#define MAPPING_TABLE_FILE L"CFUnicodeData-B.mapping"
#endif
#else
#if USE_MACHO_SEGMENT
#define MAPPING_TABLE_FILE "__data"
#else
#define MAPPING_TABLE_FILE L"CFUnicodeData-L.mapping"
#endif
#endif
#else
#error Unknown or unspecified DEPLOYMENT_TARGET
#endif
CF_PRIVATE const void *CFUniCharGetMappingData(uint32_t type) {
__CFLock(&__CFUniCharMappingTableLock);
if (NULL == __CFUniCharMappingTables) {
const void *bytes;
const void *bodyBase;
int headerSize;
int idx, count;
int64_t fileSize;
if (!__CFUniCharLoadFile(MAPPING_TABLE_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) {
__CFUnlock(&__CFUniCharMappingTableLock);
return NULL;
}
#if defined (__cplusplus)
bytes = (uint8_t *)bytes + 4; // Skip Unicode version
headerSize = *((uint8_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
#else
bytes += 4; // Skip Unicode version
headerSize = *((uint32_t *)bytes); bytes += sizeof(uint32_t);
#endif
headerSize -= (sizeof(uint32_t) * 2);
bodyBase = (char *)bytes + headerSize;
count = headerSize / sizeof(uint32_t);
__CFUniCharMappingTables = (const void **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * count, 0);
for (idx = 0;idx < count;idx++) {
#if defined (__cplusplus)
__CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes = (uint8_t *)bytes + sizeof(uint32_t);
#else
__CFUniCharMappingTables[idx] = (char *)bodyBase + *((uint32_t *)bytes); bytes += sizeof(uint32_t);
#endif
}
}
__CFUnlock(&__CFUniCharMappingTableLock);
return __CFUniCharMappingTables[type];
}
// Case mapping functions
#define DO_SPECIAL_CASE_MAPPING 1
static uint32_t *__CFUniCharCaseMappingTableCounts = NULL;
static uint32_t **__CFUniCharCaseMappingTable = NULL;
static const uint32_t **__CFUniCharCaseMappingExtraTable = NULL;
typedef struct {
uint32_t _key;
uint32_t _value;
} __CFUniCharCaseMappings;
/* Binary searches CFStringEncodingUnicodeTo8BitCharMap */
static uint32_t __CFUniCharGetMappedCase(const __CFUniCharCaseMappings *theTable, uint32_t numElem, UTF32Char character) {
const __CFUniCharCaseMappings *p, *q, *divider;
if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) {
return 0;
}
p = theTable;
q = p + (numElem-1);
while (p <= q) {
divider = p + ((q - p) >> 1); /* divide by 2 */
if (character < divider->_key) { q = divider - 1; }
else if (character > divider->_key) { p = divider + 1; }
else { return divider->_value; }
}
return 0;
}
#define NUM_CASE_MAP_DATA (kCFUniCharCaseFold + 1)
static bool __CFUniCharLoadCaseMappingTable(void) {
uint32_t *countArray;
int idx;
if (NULL == __CFUniCharMappingTables) (void)CFUniCharGetMappingData(kCFUniCharToLowercase);
if (NULL == __CFUniCharMappingTables) return false;
__CFLock(&__CFUniCharMappingTableLock);
if (__CFUniCharCaseMappingTableCounts) {
__CFUnlock(&__CFUniCharMappingTableLock);
return true;
}
countArray = (uint32_t *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(uint32_t) * NUM_CASE_MAP_DATA + sizeof(uint32_t *) * NUM_CASE_MAP_DATA * 2, 0);
__CFUniCharCaseMappingTable = (uint32_t **)((char *)countArray + sizeof(uint32_t) * NUM_CASE_MAP_DATA);
__CFUniCharCaseMappingExtraTable = (const uint32_t **)__CFUniCharCaseMappingTable + NUM_CASE_MAP_DATA;
for (idx = 0;idx < NUM_CASE_MAP_DATA;idx++) {
countArray[idx] = *((uint32_t *)__CFUniCharMappingTables[idx]) / (sizeof(uint32_t) * 2);
__CFUniCharCaseMappingTable[idx] = ((uint32_t *)__CFUniCharMappingTables[idx]) + 1;
__CFUniCharCaseMappingExtraTable[idx] = (const uint32_t *)((char *)__CFUniCharCaseMappingTable[idx] + *((uint32_t *)__CFUniCharMappingTables[idx]));
}
__CFUniCharCaseMappingTableCounts = countArray;
__CFUnlock(&__CFUniCharMappingTableLock);
return true;
}
#if __CF_BIG_ENDIAN__
#define TURKISH_LANG_CODE (0x7472) // tr
#define LITHUANIAN_LANG_CODE (0x6C74) // lt
#define AZERI_LANG_CODE (0x617A) // az
#define DUTCH_LANG_CODE (0x6E6C) // nl
#define GREEK_LANG_CODE (0x656C) // el
#else
#define TURKISH_LANG_CODE (0x7274) // tr
#define LITHUANIAN_LANG_CODE (0x746C) // lt
#define AZERI_LANG_CODE (0x7A61) // az
#define DUTCH_LANG_CODE (0x6C6E) // nl
#define GREEK_LANG_CODE (0x6C65) // el
#endif
CFIndex CFUniCharMapCaseTo(UTF32Char theChar, UTF16Char *convertedChar, CFIndex maxLength, uint32_t ctype, uint32_t flags, const uint8_t *langCode) {
__CFUniCharBitmapData *data;
uint8_t planeNo = (theChar >> 16) & 0xFF;
caseFoldRetry:
#if DO_SPECIAL_CASE_MAPPING
if (flags & kCFUniCharCaseMapFinalSigma) {
if (theChar == 0x03A3) { // Final sigma
*convertedChar = (ctype == kCFUniCharToLowercase ? 0x03C2 : 0x03A3);
return 1;
}
}
if (langCode) {
if (flags & kCFUniCharCaseMapGreekTonos) { // localized Greek uppercasing
if (theChar == 0x0301) { // GREEK TONOS
return 0;
} else if (theChar == 0x0344) {// COMBINING GREEK DIALYTIKA TONOS
*convertedChar = 0x0308; // COMBINING GREEK DIALYTIKA
return 1;
} else if (CFUniCharIsMemberOf(theChar, kCFUniCharDecomposableCharacterSet)) {
UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
CFIndex length = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
if (length > 1) {
UTF32Char *characters = buffer + 1;
UTF32Char *tail = buffer + length;
while (characters < tail) {
if (*characters == 0x0301) break;
++characters;
}
if (characters < tail) { // found a tonos
CFIndex convertedLength = CFUniCharMapCaseTo(*buffer, convertedChar, maxLength, ctype, 0, langCode);
if (convertedLength == 0) {
*convertedChar = (UTF16Char)*buffer;
convertedLength = 1;
}
characters = buffer + 1;
while (characters < tail) {
if (*characters != 0x0301) { // not tonos
if (*characters < 0x10000) { // BMP
convertedChar[convertedLength] = (UTF16Char)*characters;
++convertedLength;
} else {
UTF32Char character = *characters - 0x10000;
convertedChar[convertedLength++] = (UTF16Char)((character >> 10) + 0xD800UL);
convertedChar[convertedLength++] = (UTF16Char)((character & 0x3FF) + 0xDC00UL);
}
}
++characters;
}
return convertedLength;
}
}
}
}
switch (*(uint16_t *)langCode) {
case LITHUANIAN_LANG_CODE:
if (theChar == 0x0307 && (flags & kCFUniCharCaseMapAfter_i)) {
return 0;
} else if (ctype == kCFUniCharToLowercase) {
if (flags & kCFUniCharCaseMapMoreAbove) {
switch (theChar) {
case 0x0049: // LATIN CAPITAL LETTER I
*(convertedChar++) = 0x0069;
*(convertedChar++) = 0x0307;
return 2;
case 0x004A: // LATIN CAPITAL LETTER J
*(convertedChar++) = 0x006A;
*(convertedChar++) = 0x0307;
return 2;
case 0x012E: // LATIN CAPITAL LETTER I WITH OGONEK
*(convertedChar++) = 0x012F;
*(convertedChar++) = 0x0307;
return 2;
default: break;
}
}
switch (theChar) {
case 0x00CC: // LATIN CAPITAL LETTER I WITH GRAVE
*(convertedChar++) = 0x0069;
*(convertedChar++) = 0x0307;
*(convertedChar++) = 0x0300;
return 3;
case 0x00CD: // LATIN CAPITAL LETTER I WITH ACUTE
*(convertedChar++) = 0x0069;
*(convertedChar++) = 0x0307;
*(convertedChar++) = 0x0301;
return 3;
case 0x0128: // LATIN CAPITAL LETTER I WITH TILDE
*(convertedChar++) = 0x0069;
*(convertedChar++) = 0x0307;
*(convertedChar++) = 0x0303;
return 3;
default: break;
}
}
break;
case TURKISH_LANG_CODE:
case AZERI_LANG_CODE:
if ((theChar == 0x0049) || (theChar == 0x0131)) { // LATIN CAPITAL LETTER I & LATIN SMALL LETTER DOTLESS I
*convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? ((kCFUniCharCaseMapMoreAbove & flags) ? 0x0069 : 0x0131) : 0x0049);
return 1;
} else if ((theChar == 0x0069) || (theChar == 0x0130)) { // LATIN SMALL LETTER I & LATIN CAPITAL LETTER I WITH DOT ABOVE
*convertedChar = (((ctype == kCFUniCharToLowercase) || (ctype == kCFUniCharCaseFold)) ? 0x0069 : 0x0130);
return 1;
} else if (theChar == 0x0307 && (kCFUniCharCaseMapAfter_i & flags)) { // COMBINING DOT ABOVE AFTER_i
if (ctype == kCFUniCharToLowercase) {
return 0;
} else {
*convertedChar = 0x0307;
return 1;
}
}
break;
case DUTCH_LANG_CODE:
if ((theChar == 0x004A) || (theChar == 0x006A)) {
*convertedChar = (((ctype == kCFUniCharToUppercase) || (ctype == kCFUniCharToTitlecase) || (kCFUniCharCaseMapDutchDigraph & flags)) ? 0x004A : 0x006A);
return 1;
}
break;
default: break;
}
}
#endif // DO_SPECIAL_CASE_MAPPING
if (NULL == __CFUniCharBitmapDataArray) __CFUniCharLoadBitmapData();
data = __CFUniCharBitmapDataArray + __CFUniCharMapExternalSetToInternalIndex(__CFUniCharMapCompatibilitySetID(ctype + kCFUniCharHasNonSelfLowercaseCharacterSet));
if (planeNo < data->_numPlanes && data->_planes[planeNo] && CFUniCharIsMemberOfBitmap(theChar, data->_planes[planeNo]) && (__CFUniCharCaseMappingTableCounts || __CFUniCharLoadCaseMappingTable())) {
uint32_t value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[ctype], __CFUniCharCaseMappingTableCounts[ctype], theChar);
if (!value && ctype == kCFUniCharToTitlecase) {
value = __CFUniCharGetMappedCase((const __CFUniCharCaseMappings *)__CFUniCharCaseMappingTable[kCFUniCharToUppercase], __CFUniCharCaseMappingTableCounts[kCFUniCharToUppercase], theChar);
if (value) ctype = kCFUniCharToUppercase;
}
if (value) {
CFIndex count = CFUniCharConvertFlagToCount(value);
if (count == 1) {
if (value & kCFUniCharNonBmpFlag) {
if (maxLength > 1) {
value = (value & 0xFFFFFF) - 0x10000;
*(convertedChar++) = (UTF16Char)(value >> 10) + 0xD800UL;
*(convertedChar++) = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
return 2;
}
} else {
*convertedChar = (UTF16Char)value;
return 1;
}
} else if (count < maxLength) {
const uint32_t *extraMapping = __CFUniCharCaseMappingExtraTable[ctype] + (value & 0xFFFFFF);
if (value & kCFUniCharNonBmpFlag) {
CFIndex copiedLen = 0;
while (count-- > 0) {
value = *(extraMapping++);
if (value > 0xFFFF) {
if (copiedLen + 2 >= maxLength) break;
value = (value & 0xFFFFFF) - 0x10000;
convertedChar[copiedLen++] = (UTF16Char)(value >> 10) + 0xD800UL;
convertedChar[copiedLen++] = (UTF16Char)(value & 0x3FF) + 0xDC00UL;
} else {
if (copiedLen + 1 >= maxLength) break;
convertedChar[copiedLen++] = value;
}
}
if (!count) return copiedLen;
} else {
CFIndex idx;
for (idx = 0;idx < count;idx++) *(convertedChar++) = (UTF16Char)*(extraMapping++);
return count;
}
}
}
} else if (ctype == kCFUniCharCaseFold) {
ctype = kCFUniCharToLowercase;
goto caseFoldRetry;
}
if (theChar > 0xFFFF) { // non-BMP
theChar = (theChar & 0xFFFFFF) - 0x10000;
*(convertedChar++) = (UTF16Char)(theChar >> 10) + 0xD800UL;
*(convertedChar++) = (UTF16Char)(theChar & 0x3FF) + 0xDC00UL;
return 2;
} else {
*convertedChar = theChar;
return 1;
}
}
CFIndex CFUniCharMapTo(UniChar theChar, UniChar *convertedChar, CFIndex maxLength, uint16_t ctype, uint32_t flags) {
if (ctype == kCFUniCharCaseFold + 1) { // kCFUniCharDecompose
if (CFUniCharIsDecomposableCharacter(theChar, false)) {
UTF32Char buffer[MAX_DECOMPOSED_LENGTH];
CFIndex usedLength = CFUniCharDecomposeCharacter(theChar, buffer, MAX_DECOMPOSED_LENGTH);
CFIndex idx;
for (idx = 0;idx < usedLength;idx++) *(convertedChar++) = buffer[idx];
return usedLength;
} else {
*convertedChar = theChar;
return 1;
}
} else {
return CFUniCharMapCaseTo(theChar, convertedChar, maxLength, ctype, flags, NULL);
}
}
CF_INLINE bool __CFUniCharIsMoreAbove(UTF16Char *buffer, CFIndex length) {
UTF32Char currentChar;
uint32_t property;
while (length-- > 0) {
currentChar = *(buffer)++;
if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*(buffer + 1))) {
currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(buffer++));
--length;
}
if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
if (property == 230) return true; // Above priority
}
return false;
}
CF_INLINE bool __CFUniCharIsAfter_i(UTF16Char *buffer, CFIndex length) {
UTF32Char currentChar = 0;
uint32_t property;
UTF32Char decomposed[MAX_DECOMPOSED_LENGTH];
CFIndex decompLength;
CFIndex idx;
if (length < 1) return 0;
buffer += length;
while (length-- > 1) {
currentChar = *(--buffer);
if (CFUniCharIsSurrogateLowCharacter(currentChar)) {
if ((length > 1) && CFUniCharIsSurrogateHighCharacter(*(buffer - 1))) {
currentChar = CFUniCharGetLongCharacterForSurrogatePair(*(--buffer), currentChar);
--length;
} else {
break;
}
}
if (!CFUniCharIsMemberOf(currentChar, kCFUniCharNonBaseCharacterSet)) break;
property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
if (property == 230) return false; // Above priority
}
if (length == 0) {
currentChar = *(--buffer);
} else if (CFUniCharIsSurrogateLowCharacter(currentChar) && CFUniCharIsSurrogateHighCharacter(*(--buffer))) {
currentChar = CFUniCharGetLongCharacterForSurrogatePair(*buffer, currentChar);
}
decompLength = CFUniCharDecomposeCharacter(currentChar, decomposed, MAX_DECOMPOSED_LENGTH);
currentChar = *decomposed;
for (idx = 1;idx < decompLength;idx++) {
currentChar = decomposed[idx];
property = CFUniCharGetCombiningPropertyForCharacter(currentChar, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, (currentChar >> 16) & 0xFF));
if (property == 230) return false; // Above priority
}
return true;
}
CF_PRIVATE uint32_t CFUniCharGetConditionalCaseMappingFlags(UTF32Char theChar, UTF16Char *buffer, CFIndex currentIndex, CFIndex length, uint32_t type, const uint8_t *langCode, uint32_t lastFlags) {
if (theChar == 0x03A3) { // GREEK CAPITAL LETTER SIGMA
if ((type == kCFUniCharToLowercase) && (currentIndex > 0)) {
UTF16Char *start = buffer;
UTF16Char *end = buffer + length;
UTF32Char otherChar;
// First check if we're after a cased character
buffer += (currentIndex - 1);
while (start <= buffer) {
otherChar = *(buffer--);
if (CFUniCharIsSurrogateLowCharacter(otherChar) && (start <= buffer) && CFUniCharIsSurrogateHighCharacter(*buffer)) {
otherChar = CFUniCharGetLongCharacterForSurrogatePair(*(buffer--), otherChar);
}
if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
if (!CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) && !CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
break;
}
}
// Next check if we're before a cased character
buffer = start + currentIndex + 1;
while (buffer < end) {
otherChar = *(buffer++);
if (CFUniCharIsSurrogateHighCharacter(otherChar) && (buffer < end) && CFUniCharIsSurrogateLowCharacter(*buffer)) {
otherChar = CFUniCharGetLongCharacterForSurrogatePair(otherChar, *(buffer++));
}
if (!CFUniCharIsMemberOf(otherChar, kCFUniCharCaseIgnorableCharacterSet)) {
if (CFUniCharIsMemberOf(otherChar, kCFUniCharUppercaseLetterCharacterSet) || CFUniCharIsMemberOf(otherChar, kCFUniCharLowercaseLetterCharacterSet)) return 0; // Uppercase set contains titlecase
break;
}
}
return kCFUniCharCaseMapFinalSigma;
}
} else if (langCode) {
if (*((const uint16_t *)langCode) == LITHUANIAN_LANG_CODE) {
if ((theChar == 0x0307) && ((kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) & lastFlags) == (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove)) {
return (__CFUniCharIsAfter_i(buffer, currentIndex) ? kCFUniCharCaseMapAfter_i : 0);
} else if (type == kCFUniCharToLowercase) {
if ((theChar == 0x0049) || (theChar == 0x004A) || (theChar == 0x012E)) {
++currentIndex;
return (__CFUniCharIsMoreAbove(buffer + currentIndex, length - currentIndex) ? kCFUniCharCaseMapMoreAbove : 0);
}
} else if ((theChar == 'i') || (theChar == 'j')) {
++currentIndex;
return (__CFUniCharIsMoreAbove(buffer + currentIndex, length - currentIndex) ? (kCFUniCharCaseMapAfter_i|kCFUniCharCaseMapMoreAbove) : 0);
}
} else if ((*((const uint16_t *)langCode) == TURKISH_LANG_CODE) || (*((const uint16_t *)langCode) == AZERI_LANG_CODE)) {
if (type == kCFUniCharToLowercase) {
if (theChar == 0x0307) {
return (kCFUniCharCaseMapMoreAbove & lastFlags ? kCFUniCharCaseMapAfter_i : 0);
} else if (theChar == 0x0049) {
return (((++currentIndex < length) && (buffer[currentIndex] == 0x0307)) ? kCFUniCharCaseMapMoreAbove : 0);
}
}
} else if (*((const uint16_t *)langCode) == DUTCH_LANG_CODE) {
if (kCFUniCharCaseMapDutchDigraph & lastFlags) {
return (((theChar == 0x006A) || (theChar == 0x004A)) ? kCFUniCharCaseMapDutchDigraph : 0);
} else {
if ((type == kCFUniCharToTitlecase) && ((theChar == 0x0069) || (theChar == 0x0049))) {
return (((++currentIndex < length) && ((buffer[currentIndex] == 0x006A) || (buffer[currentIndex] == 0x004A))) ? kCFUniCharCaseMapDutchDigraph : 0);
}
}
}
if (kCFUniCharCaseMapGreekTonos & lastFlags) { // still searching for tonos
if (CFUniCharIsMemberOf(theChar, kCFUniCharNonBaseCharacterSet)) {
return kCFUniCharCaseMapGreekTonos;
}
}
if (((theChar >= 0x0370) && (theChar < 0x0400)) || ((theChar >= 0x1F00) && (theChar < 0x2000))) { // Greek/Coptic & Greek extended ranges
if ((type == kCFUniCharToUppercase) && (CFUniCharIsMemberOf(theChar, kCFUniCharLetterCharacterSet))) return kCFUniCharCaseMapGreekTonos;
}
}
return 0;
}
// Unicode property database
static __CFUniCharBitmapData *__CFUniCharUnicodePropertyTable = NULL;
static int __CFUniCharUnicodePropertyTableCount = 0;
static CFLock_t __CFUniCharPropTableLock = CFLockInit;
#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX
#if USE_MACHO_SEGMENT
#define PROP_DB_FILE "__properties"
#else
#define PROP_DB_FILE "/CFUniCharPropertyDatabase.data"
#endif
#elif DEPLOYMENT_TARGET_WINDOWS
#if USE_MACHO_SEGMENT
#define PROP_DB_FILE "__properties"
#else
#define PROP_DB_FILE L"CFUniCharPropertyDatabase.data"
#endif
#else
#error Unknown or unspecified DEPLOYMENT_TARGET
#endif
const void *CFUniCharGetUnicodePropertyDataForPlane(uint32_t propertyType, uint32_t plane) {
__CFLock(&__CFUniCharPropTableLock);
if (NULL == __CFUniCharUnicodePropertyTable) {
__CFUniCharBitmapData *table;
const void *bytes;
const void *bodyBase;
const void *planeBase;
int headerSize;
int idx, count;
int planeIndex, planeCount;
int planeSize;
int64_t fileSize;
if (!__CFUniCharLoadFile(PROP_DB_FILE, &bytes, &fileSize) || !__CFSimpleFileSizeVerification(bytes, fileSize)) {
__CFUnlock(&__CFUniCharPropTableLock);
return NULL;
}
#if defined (__cplusplus)
bytes = (uint8_t*)bytes + 4; // Skip Unicode version
headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes = (uint8_t *)bytes + sizeof(uint32_t);
#else
bytes += 4; // Skip Unicode version
headerSize = CFSwapInt32BigToHost(*((uint32_t *)bytes)); bytes += sizeof(uint32_t);
#endif
headerSize -= (sizeof(uint32_t) * 2);
bodyBase = (char *)bytes + headerSize;
count = headerSize / sizeof(uint32_t);
__CFUniCharUnicodePropertyTableCount = count;
table = (__CFUniCharBitmapData *)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(__CFUniCharBitmapData) * count, 0);
for (idx = 0;idx < count;idx++) {
planeCount = *((const uint8_t *)bodyBase);
planeBase = (char *)bodyBase + planeCount + (planeCount % 4 ? 4 - (planeCount % 4) : 0);
table[idx]._planes = (const uint8_t **)CFAllocatorAllocate(kCFAllocatorSystemDefault, sizeof(const void *) * planeCount, 0);
for (planeIndex = 0;planeIndex < planeCount;planeIndex++) {
if ((planeSize = ((const uint8_t *)bodyBase)[planeIndex + 1])) {
table[idx]._planes[planeIndex] = (const uint8_t *)planeBase;
#if defined (__cplusplus)
planeBase = (char*)planeBase + (planeSize * 256);
#else
planeBase += (planeSize * 256);
#endif
} else {
table[idx]._planes[planeIndex] = NULL;
}
}
table[idx]._numPlanes = planeCount;
#if defined (__cplusplus)
bodyBase = (const uint8_t *)bodyBase + (CFSwapInt32BigToHost(*(uint32_t *)bytes));
((uint32_t *&)bytes) ++;
#else
bodyBase += (CFSwapInt32BigToHost(*((uint32_t *)bytes++)));
#endif
}
__CFUniCharUnicodePropertyTable = table;
}
__CFUnlock(&__CFUniCharPropTableLock);
return (plane < __CFUniCharUnicodePropertyTable[propertyType]._numPlanes ? __CFUniCharUnicodePropertyTable[propertyType]._planes[plane] : NULL);
}
CF_PRIVATE uint32_t CFUniCharGetNumberOfPlanesForUnicodePropertyData(uint32_t propertyType) {
(void)CFUniCharGetUnicodePropertyDataForPlane(propertyType, 0);
return __CFUniCharUnicodePropertyTable[propertyType]._numPlanes;
}
CF_PRIVATE uint32_t CFUniCharGetUnicodeProperty(UTF32Char character, uint32_t propertyType) {
if (propertyType == kCFUniCharCombiningProperty) {
return CFUniCharGetCombiningPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
} else if (propertyType == kCFUniCharBidiProperty) {
return CFUniCharGetBidiPropertyForCharacter(character, (const uint8_t *)CFUniCharGetUnicodePropertyDataForPlane(propertyType, (character >> 16) & 0xFF));
} else {
return 0;
}
}
/*
The UTF8 conversion in the following function is derived from ConvertUTF.c
*/
/*
* Copyright 2001 Unicode, Inc.
*
* Disclaimer
*
* This source code is provided as is by Unicode, Inc. No claims are
* made as to fitness for any particular purpose. No warranties of any
* kind are expressed or implied. The recipient agrees to determine
* applicability of information provided. If this file has been
* purchased on magnetic or optical media from Unicode, Inc., the
* sole remedy for any claim will be exchange of defective media
* within 90 days of receipt.
*
* Limitations on Rights to Redistribute This Code
*
* Unicode, Inc. hereby grants the right to freely use the information
* supplied in this file in the creation of products supporting the
* Unicode Standard, and to make copies of this file in any form
* for internal or external distribution as long as this notice
* remains attached.
*/
#define UNI_REPLACEMENT_CHAR (0x0000FFFDUL)
bool CFUniCharFillDestinationBuffer(const UTF32Char *src, CFIndex srcLength, void **dst, CFIndex dstLength, CFIndex *filledLength, uint32_t dstFormat) {
UTF32Char currentChar;
CFIndex usedLength = *filledLength;
if (dstFormat == kCFUniCharUTF16Format) {
UTF16Char *dstBuffer = (UTF16Char *)*dst;
while (srcLength-- > 0) {
currentChar = *(src++);
if (currentChar > 0xFFFF) { // Non-BMP
usedLength += 2;
if (dstLength) {
if (usedLength > dstLength) return false;
currentChar -= 0x10000;
*(dstBuffer++) = (UTF16Char)((currentChar >> 10) + 0xD800UL);
*(dstBuffer++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL);
}
} else {
++usedLength;
if (dstLength) {
if (usedLength > dstLength) return false;
*(dstBuffer++) = (UTF16Char)currentChar;
}
}
}
*dst = dstBuffer;
} else if (dstFormat == kCFUniCharUTF8Format) {
uint8_t *dstBuffer = (uint8_t *)*dst;
uint16_t bytesToWrite = 0;
const UTF32Char byteMask = 0xBF;
const UTF32Char byteMark = 0x80;
static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
while (srcLength-- > 0) {
currentChar = *(src++);
/* Figure out how many bytes the result will require */
if (currentChar < (UTF32Char)0x80) {
bytesToWrite = 1;
} else if (currentChar < (UTF32Char)0x800) {
bytesToWrite = 2;
} else if (currentChar < (UTF32Char)0x10000) {
bytesToWrite = 3;
} else if (currentChar < (UTF32Char)0x200000) {
bytesToWrite = 4;
} else {
bytesToWrite = 2;
currentChar = UNI_REPLACEMENT_CHAR;
}
usedLength += bytesToWrite;
if (dstLength) {
if (usedLength > dstLength) return false;
dstBuffer += bytesToWrite;
switch (bytesToWrite) { /* note: everything falls through. */
case 4: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
case 3: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
case 2: *--dstBuffer = (currentChar | byteMark) & byteMask; currentChar >>= 6;
case 1: *--dstBuffer = currentChar | firstByteMark[bytesToWrite];
}
dstBuffer += bytesToWrite;
}
}
*dst = dstBuffer;
} else {
UTF32Char *dstBuffer = (UTF32Char *)*dst;
while (srcLength-- > 0) {
currentChar = *(src++);
++usedLength;
if (dstLength) {
if (usedLength > dstLength) return false;
*(dstBuffer++) = currentChar;
}
}
*dst = dstBuffer;
}
*filledLength = usedLength;
return true;
}
#if DEPLOYMENT_TARGET_WINDOWS
void __CFUniCharCleanup(void)
{
int idx;
// cleanup memory allocated by __CFUniCharLoadBitmapData()
__CFLock(&__CFUniCharBitmapLock);
if (__CFUniCharBitmapDataArray != NULL) {
for (idx = 0; idx < (int)__CFUniCharNumberOfBitmaps; idx++) {
CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray[idx]._planes);
__CFUniCharBitmapDataArray[idx]._planes = NULL;
}
CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharBitmapDataArray);
__CFUniCharBitmapDataArray = NULL;
__CFUniCharNumberOfBitmaps = 0;
}
__CFUnlock(&__CFUniCharBitmapLock);
// cleanup memory allocated by CFUniCharGetMappingData()
__CFLock(&__CFUniCharMappingTableLock);
if (__CFUniCharMappingTables != NULL) {
CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharMappingTables);
__CFUniCharMappingTables = NULL;
}
// cleanup memory allocated by __CFUniCharLoadCaseMappingTable()
if (__CFUniCharCaseMappingTableCounts != NULL) {
CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharCaseMappingTableCounts);
__CFUniCharCaseMappingTableCounts = NULL;
__CFUniCharCaseMappingTable = NULL;
__CFUniCharCaseMappingExtraTable = NULL;
}
__CFUnlock(&__CFUniCharMappingTableLock);
// cleanup memory allocated by CFUniCharGetUnicodePropertyDataForPlane()
__CFLock(&__CFUniCharPropTableLock);
if (__CFUniCharUnicodePropertyTable != NULL) {
for (idx = 0; idx < __CFUniCharUnicodePropertyTableCount; idx++) {
CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable[idx]._planes);
__CFUniCharUnicodePropertyTable[idx]._planes = NULL;
}
CFAllocatorDeallocate(kCFAllocatorSystemDefault, __CFUniCharUnicodePropertyTable);
__CFUniCharUnicodePropertyTable = NULL;
__CFUniCharUnicodePropertyTableCount = 0;
}
__CFUnlock(&__CFUniCharPropTableLock);
}
#endif
#undef USE_MACHO_SEGMENT