mirror of
https://github.com/darlinghq/darling-corefoundation.git
synced 2024-10-07 01:03:29 +00:00
208 lines
9.7 KiB
C
208 lines
9.7 KiB
C
//
|
|
// CFStringTransform.c
|
|
// CoreFoundation
|
|
//
|
|
// Copyright (c) 2014 Apportable. All rights reserved.
|
|
//
|
|
|
|
#include "CFString.h"
|
|
#include <unicode/utrans.h>
|
|
#include <dispatch/dispatch.h>
|
|
#include <stdio.h>
|
|
|
|
#define BUFFER_SIZE 256
|
|
|
|
// the ) is not a typo, these are the expected values...
|
|
const CFStringRef kCFStringTransformStripCombiningMarks = CFSTR(")kCFStringTransformStripCombiningMarks");
|
|
const CFStringRef kCFStringTransformToLatin = CFSTR(")kCFStringTransformToLatin");
|
|
const CFStringRef kCFStringTransformFullwidthHalfwidth = CFSTR(")kCFStringTransformFullwidthHalfwidth");
|
|
const CFStringRef kCFStringTransformLatinKatakana = CFSTR(")kCFStringTransformLatinKatakana");
|
|
const CFStringRef kCFStringTransformLatinHiragana = CFSTR(")kCFStringTransformLatinHiragana");
|
|
const CFStringRef kCFStringTransformHiraganaKatakana = CFSTR(")kCFStringTransformHiraganaKatakana");
|
|
const CFStringRef kCFStringTransformMandarinLatin = CFSTR(")kCFStringTransformMandarinLatin");
|
|
const CFStringRef kCFStringTransformLatinHangul = CFSTR(")kCFStringTransformLatinHangul");
|
|
const CFStringRef kCFStringTransformLatinArabic = CFSTR(")kCFStringTransformLatinArabic");
|
|
const CFStringRef kCFStringTransformLatinHebrew = CFSTR(")kCFStringTransformLatinHebrew");
|
|
const CFStringRef kCFStringTransformLatinThai = CFSTR(")kCFStringTransformLatinThai");
|
|
const CFStringRef kCFStringTransformLatinCyrillic = CFSTR(")kCFStringTransformLatinCyrillic");
|
|
const CFStringRef kCFStringTransformLatinGreek = CFSTR(")kCFStringTransformLatinGreek");
|
|
const CFStringRef kCFStringTransformToXMLHex = CFSTR(")kCFStringTransformToXMLHex");
|
|
const CFStringRef kCFStringTransformToUnicodeName = CFSTR(")kCFStringTransformToUnicodeName");
|
|
const CFStringRef kCFStringTransformStripDiacritics = CFSTR(")kCFStringTransformStripDiacritics");
|
|
|
|
static int32_t _CFStringTransformLength(const UReplaceable *rep) {
|
|
CFMutableStringRef string = (CFMutableStringRef)rep;
|
|
return CFStringGetLength(string);
|
|
}
|
|
|
|
static UChar _CFStringTransformCharAt(const UReplaceable *rep, int32_t offset) {
|
|
CFMutableStringRef string = (CFMutableStringRef)rep;
|
|
return CFStringGetCharacterAtIndex(string, offset);
|
|
}
|
|
|
|
static UChar32 _CFStringTransformChar32At(const UReplaceable *rep, int32_t offset) {
|
|
CFMutableStringRef string = (CFMutableStringRef)rep;
|
|
UniChar ch = CFStringGetCharacterAtIndex(string, offset);
|
|
if (CFStringIsSurrogateHighCharacter(ch)) {
|
|
UniChar low = CFStringGetCharacterAtIndex(string, offset + 1);
|
|
return CFStringGetLongCharacterForSurrogatePair(ch, low);
|
|
} else {
|
|
return (UChar32)ch;
|
|
}
|
|
}
|
|
|
|
static void _CFStringTransformReplace(UReplaceable *rep, int32_t start, int32_t limit, const UChar* text, int32_t textLength) {
|
|
CFMutableStringRef string = (CFMutableStringRef)rep;
|
|
CFStringRef replacement = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, text, textLength, kCFAllocatorNull);
|
|
CFStringReplace(string, CFRangeMake(start, limit - start), replacement);
|
|
CFRelease(replacement);
|
|
}
|
|
|
|
static void _CFStringTransformExtract(UReplaceable *rep, int32_t start, int32_t limit, UChar* dst) {
|
|
CFMutableStringRef string = (CFMutableStringRef)rep;
|
|
CFStringGetCharacters(string, CFRangeMake(start, limit - start), dst);
|
|
}
|
|
|
|
static void _CFStringTransformCopy(UReplaceable *rep, int32_t start, int32_t limit, int32_t dest) {
|
|
CFMutableStringRef string = (CFMutableStringRef)rep;
|
|
UniChar stack_text[BUFFER_SIZE];
|
|
UniChar *text = &stack_text[0];
|
|
if (limit - start > BUFFER_SIZE) {
|
|
text = malloc(limit - start);
|
|
if (text == NULL) {
|
|
// we cant throw a NSException here, but return before anything blows up...
|
|
fprintf(stderr, "ICU Internal failure occurred, we are out of memory: time to go cry in a corner now...\n");
|
|
return;
|
|
}
|
|
}
|
|
|
|
CFStringGetCharacters(string, CFRangeMake(start, limit - start), text);
|
|
CFStringRef insert = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, text, limit - start, kCFAllocatorNull);
|
|
CFStringInsert(string, dest, insert);
|
|
CFRelease(insert);
|
|
|
|
if (text != &stack_text[0]) {
|
|
free(text);
|
|
}
|
|
}
|
|
|
|
static inline UTransliterator *utrans_find(CFStringRef transform, UTransDirection dir, UErrorCode *error) {
|
|
UEnumeration *uenum = NULL;
|
|
UTransliterator *trans = NULL;
|
|
do {
|
|
uenum = utrans_openIDs(error);
|
|
if (U_FAILURE(*error)) {
|
|
fprintf(stderr, "%s\n", u_errorName(*error));
|
|
break;
|
|
}
|
|
|
|
int32_t count = uenum_count(uenum, error);
|
|
if (U_FAILURE(*error)) {
|
|
fprintf(stderr, "%s\n", u_errorName(*error));
|
|
break;
|
|
}
|
|
int32_t trans_idx = 0;
|
|
while (trans_idx < count && trans == NULL) {
|
|
int32_t idLen = 0;
|
|
const UChar *uid = uenum_unext(uenum, &idLen, error);
|
|
if (U_FAILURE(*error)) {
|
|
fprintf(stderr, "%s\n", u_errorName(*error));
|
|
break;
|
|
}
|
|
// this seems rather unlikely since we should have already broken
|
|
// by the trans_idx exceeding the count
|
|
if (uid == NULL) {
|
|
break;
|
|
}
|
|
|
|
CFStringRef name = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, uid, idLen, kCFAllocatorNull);
|
|
// It would have been nice if these stirng constants were actually defined somewhere in icu, but sadly they are runtime metadata...
|
|
if ((CFEqual(name, CFSTR("Any-Remove")) && CFEqual(transform, kCFStringTransformStripCombiningMarks)) ||
|
|
(CFEqual(name, CFSTR("Any-Latin")) && CFEqual(transform, kCFStringTransformToLatin)) ||
|
|
(CFEqual(name, CFSTR("Latin-Katakana")) && CFEqual(transform, kCFStringTransformLatinKatakana)) ||
|
|
(CFEqual(name, CFSTR("Latin-Hiragana")) && CFEqual(transform, kCFStringTransformLatinHiragana)) ||
|
|
(CFEqual(name, CFSTR("Hiragana-Katakana")) && CFEqual(transform, kCFStringTransformHiraganaKatakana)) ||
|
|
(CFEqual(name, CFSTR("Latin-Hangul")) && CFEqual(transform, kCFStringTransformLatinHangul)) ||
|
|
(CFEqual(name, CFSTR("Latin-Arabic")) && CFEqual(transform, kCFStringTransformLatinArabic)) ||
|
|
(CFEqual(name, CFSTR("Latin-Hebrew")) && CFEqual(transform, kCFStringTransformLatinHebrew)) ||
|
|
(CFEqual(name, CFSTR("Latin-Thai")) && CFEqual(transform, kCFStringTransformLatinThai)) ||
|
|
(CFEqual(name, CFSTR("Latin-Cyrillic")) && CFEqual(transform, kCFStringTransformLatinCyrillic)) ||
|
|
(CFEqual(name, CFSTR("Latin-Greek")) && CFEqual(transform, kCFStringTransformLatinGreek)) ||
|
|
(CFEqual(name, CFSTR("Any-Hex/XML")) && CFEqual(transform, kCFStringTransformToXMLHex)) ||
|
|
(CFEqual(name, CFSTR("Any-Name")) && CFEqual(transform, kCFStringTransformToUnicodeName)) ||
|
|
(CFEqual(name, CFSTR("Accents-Any")) && CFEqual(transform, kCFStringTransformStripDiacritics))) {
|
|
trans = utrans_openU(uid, idLen, dir, NULL, 0, NULL, error);
|
|
}
|
|
CFRelease(name);
|
|
trans_idx++;
|
|
}
|
|
} while (0);
|
|
|
|
if (uenum != NULL) {
|
|
uenum_reset(uenum, error);
|
|
uenum_close(uenum);
|
|
}
|
|
|
|
if (trans == NULL && (CFEqual(transform, kCFStringTransformStripCombiningMarks) ||
|
|
CFEqual(transform, kCFStringTransformToLatin) ||
|
|
CFEqual(transform, kCFStringTransformLatinKatakana) ||
|
|
CFEqual(transform, kCFStringTransformLatinHiragana) ||
|
|
CFEqual(transform, kCFStringTransformHiraganaKatakana) ||
|
|
CFEqual(transform, kCFStringTransformLatinHangul) ||
|
|
CFEqual(transform, kCFStringTransformLatinArabic) ||
|
|
CFEqual(transform, kCFStringTransformLatinHebrew) ||
|
|
CFEqual(transform, kCFStringTransformLatinCyrillic) ||
|
|
CFEqual(transform, kCFStringTransformLatinGreek) ||
|
|
CFEqual(transform, kCFStringTransformToXMLHex) ||
|
|
CFEqual(transform, kCFStringTransformToUnicodeName) ||
|
|
CFEqual(transform, kCFStringTransformStripDiacritics))) {
|
|
static dispatch_once_t once = 0L;
|
|
dispatch_once(&once, ^{
|
|
fprintf(stderr, "Unable to find transliterators in icu data: likely this is from not including the Transliterators section in building your icu.dat file");
|
|
});
|
|
}
|
|
|
|
return trans;
|
|
}
|
|
|
|
Boolean CFStringTransform(CFMutableStringRef string, CFRange *range, CFStringRef transform, Boolean reverse) {
|
|
UErrorCode err = 0;
|
|
static UReplaceableCallbacks callbacks = {
|
|
.length = &_CFStringTransformLength,
|
|
.charAt = &_CFStringTransformCharAt,
|
|
.char32At = &_CFStringTransformChar32At,
|
|
.replace = &_CFStringTransformReplace,
|
|
.extract = &_CFStringTransformExtract,
|
|
.copy = &_CFStringTransformCopy,
|
|
};
|
|
UTransliterator *trans = NULL;
|
|
Boolean success = false;
|
|
int32_t start = 0;
|
|
int32_t limit = CFStringGetLength(string);
|
|
if (range != NULL) {
|
|
range->location = kCFNotFound;
|
|
range->length = 0;
|
|
}
|
|
do {
|
|
// technically this data could potentially be cached, if it is used often, we should consider doing so, until then this should work
|
|
trans = utrans_find(transform, reverse ? UTRANS_REVERSE : UTRANS_FORWARD, &err);
|
|
if (trans == NULL) {
|
|
break;
|
|
}
|
|
utrans_trans(trans, (UReplaceable *)string, &callbacks, start, &limit, &err);
|
|
if (U_FAILURE(err)) {
|
|
break;
|
|
}
|
|
utrans_close(trans);
|
|
success = true;
|
|
} while (0);
|
|
|
|
if (success) {
|
|
range->location = start;
|
|
range->length = limit - start;
|
|
}
|
|
|
|
return success;
|
|
}
|
|
|