mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-06 09:05:45 +00:00
013fc50cd5
Bug 924839 - Remove a patch already part of ICU 52.1. See http://bugs.icu-project.org/trac/ticket/10283 but also note the relevant code was removed completely upstream. r=glandium * * * Bug 924839 - Remove another patch already part of ICU 52.1. See http://bugs.icu-project.org/trac/ticket/10290 for that. r=gaston * * * Bug 924839 - Remove another patch already in ICU 52.1. See http://bugs.icu-project.org/trac/ticket/10045 for more. r=Norbert * * * Bug 924839 - Remove another patch already applied upstream. See http://bugs.icu-project.org/trac/changeset/32937 for more. r=gaston * * * Bug 924839 - Update the ICU update script to update to 52.1, *without* applying any of our local patches. r=glandium * * * Bug 924839 - Make the ICU update script only do updating within intl/icu/source and nowhere else. r=glandium * * * Bug 924839 - Implement the changes that would be made by |cd intl/; ./update-icu.sh http://source.icu-project.org/repos/icu/icu/tags/release-52-1/;|, run with the prior changesets' changes made (thus not applying any of our local patches). These changes don't actually work without subsequent adjustments, but this provides a codebase upon which those adjustments can be made, for the purpose of generating local patches to be kept in intl/icu-patches/. rs=the-usual-suspects * * * Bug 924839 - Update the bug 899722 local patch to make runConfigureICU not override CC/CXX on BSD systems. r=gaston * * * Bug 924839 - Update the bug 724533 patch that makes ICU builds with MozillaBuild on Windows. r=glandium * * * Bug 924839 - Import an upstream patch fixing the genrb tool to properly handle the -R (--omitCollationRules) option. See http://bugs.icu-project.org/trac/ticket/10043 for the original bug report and a link to the ultimate upstream landing. r=Norbert * * * Bug 924839 - Import the upstream fix for http://bugs.icu-project.org/trac/ticket/10486 so that ICU with -DU_USING_ICU_NAMESPACE=0 will compile on Windows. r=Norbert * * * Bug 924839 - Adjust the update script to update ICU, then to apply all local patches (rather than skipping the second step). Thus if the update script is properly run, now, the final result should be no changes at all to the tree. NOT REVIEWED YET * * * Bug 924839 - Update jstests that depend on CLDR locale data to match CLDR 24. r=Norbert
1949 lines
66 KiB
C++
1949 lines
66 KiB
C++
/*
|
|
*******************************************************************************
|
|
* Copyright (C) 2004-2013, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*******************************************************************************
|
|
* file name: uregex.cpp
|
|
*/
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
|
|
|
#include "unicode/regex.h"
|
|
#include "unicode/uregex.h"
|
|
#include "unicode/unistr.h"
|
|
#include "unicode/ustring.h"
|
|
#include "unicode/uchar.h"
|
|
#include "unicode/uobject.h"
|
|
#include "unicode/utf16.h"
|
|
#include "umutex.h"
|
|
#include "uassert.h"
|
|
#include "cmemory.h"
|
|
|
|
#include "regextxt.h"
|
|
|
|
#include <stdio.h>
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
#define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
|
|
|
|
struct RegularExpression: public UMemory {
|
|
public:
|
|
RegularExpression();
|
|
~RegularExpression();
|
|
int32_t fMagic;
|
|
RegexPattern *fPat;
|
|
u_atomic_int32_t *fPatRefCount;
|
|
UChar *fPatString;
|
|
int32_t fPatStringLen;
|
|
RegexMatcher *fMatcher;
|
|
const UChar *fText; // Text from setText()
|
|
int32_t fTextLength; // Length provided by user with setText(), which
|
|
// may be -1.
|
|
UBool fOwnsText;
|
|
};
|
|
|
|
static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
|
|
|
|
RegularExpression::RegularExpression() {
|
|
fMagic = REXP_MAGIC;
|
|
fPat = NULL;
|
|
fPatRefCount = NULL;
|
|
fPatString = NULL;
|
|
fPatStringLen = 0;
|
|
fMatcher = NULL;
|
|
fText = NULL;
|
|
fTextLength = 0;
|
|
fOwnsText = FALSE;
|
|
}
|
|
|
|
RegularExpression::~RegularExpression() {
|
|
delete fMatcher;
|
|
fMatcher = NULL;
|
|
if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
|
|
delete fPat;
|
|
uprv_free(fPatString);
|
|
uprv_free((void *)fPatRefCount);
|
|
}
|
|
if (fOwnsText && fText!=NULL) {
|
|
uprv_free((void *)fText);
|
|
}
|
|
fMagic = 0;
|
|
}
|
|
|
|
U_NAMESPACE_END
|
|
|
|
U_NAMESPACE_USE
|
|
|
|
//----------------------------------------------------------------------------------------
|
|
//
|
|
// validateRE Do boilerplate style checks on API function parameters.
|
|
// Return TRUE if they look OK.
|
|
//----------------------------------------------------------------------------------------
|
|
static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
|
|
if (U_FAILURE(*status)) {
|
|
return FALSE;
|
|
}
|
|
if (re == NULL || re->fMagic != REXP_MAGIC) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return FALSE;
|
|
}
|
|
// !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
|
|
if (requiresText && re->fText == NULL && !re->fOwnsText) {
|
|
*status = U_REGEX_INVALID_STATE;
|
|
return FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
//----------------------------------------------------------------------------------------
|
|
//
|
|
// uregex_open
|
|
//
|
|
//----------------------------------------------------------------------------------------
|
|
U_CAPI URegularExpression * U_EXPORT2
|
|
uregex_open( const UChar *pattern,
|
|
int32_t patternLength,
|
|
uint32_t flags,
|
|
UParseError *pe,
|
|
UErrorCode *status) {
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return NULL;
|
|
}
|
|
if (pattern == NULL || patternLength < -1 || patternLength == 0) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return NULL;
|
|
}
|
|
int32_t actualPatLen = patternLength;
|
|
if (actualPatLen == -1) {
|
|
actualPatLen = u_strlen(pattern);
|
|
}
|
|
|
|
RegularExpression *re = new RegularExpression;
|
|
u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
|
|
UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
|
|
if (re == NULL || refC == NULL || patBuf == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
delete re;
|
|
uprv_free((void *)refC);
|
|
uprv_free(patBuf);
|
|
return NULL;
|
|
}
|
|
re->fPatRefCount = refC;
|
|
*re->fPatRefCount = 1;
|
|
|
|
//
|
|
// Make a copy of the pattern string, so we can return it later if asked.
|
|
// For compiling the pattern, we will use a UText wrapper around
|
|
// this local copy, to avoid making even more copies.
|
|
//
|
|
re->fPatString = patBuf;
|
|
re->fPatStringLen = patternLength;
|
|
u_memcpy(patBuf, pattern, actualPatLen);
|
|
patBuf[actualPatLen] = 0;
|
|
|
|
UText patText = UTEXT_INITIALIZER;
|
|
utext_openUChars(&patText, patBuf, patternLength, status);
|
|
|
|
//
|
|
// Compile the pattern
|
|
//
|
|
if (pe != NULL) {
|
|
re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
|
|
} else {
|
|
re->fPat = RegexPattern::compile(&patText, flags, *status);
|
|
}
|
|
utext_close(&patText);
|
|
|
|
if (U_FAILURE(*status)) {
|
|
goto ErrorExit;
|
|
}
|
|
|
|
//
|
|
// Create the matcher object
|
|
//
|
|
re->fMatcher = re->fPat->matcher(*status);
|
|
if (U_SUCCESS(*status)) {
|
|
return (URegularExpression*)re;
|
|
}
|
|
|
|
ErrorExit:
|
|
delete re;
|
|
return NULL;
|
|
|
|
}
|
|
|
|
//----------------------------------------------------------------------------------------
|
|
//
|
|
// uregex_openUText
|
|
//
|
|
//----------------------------------------------------------------------------------------
|
|
U_CAPI URegularExpression * U_EXPORT2
|
|
uregex_openUText(UText *pattern,
|
|
uint32_t flags,
|
|
UParseError *pe,
|
|
UErrorCode *status) {
|
|
|
|
if (U_FAILURE(*status)) {
|
|
return NULL;
|
|
}
|
|
if (pattern == NULL) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
int64_t patternNativeLength = utext_nativeLength(pattern);
|
|
|
|
if (patternNativeLength == 0) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
RegularExpression *re = new RegularExpression;
|
|
|
|
UErrorCode lengthStatus = U_ZERO_ERROR;
|
|
int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
|
|
|
|
u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
|
|
UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
|
|
if (re == NULL || refC == NULL || patBuf == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
delete re;
|
|
uprv_free((void *)refC);
|
|
uprv_free(patBuf);
|
|
return NULL;
|
|
}
|
|
re->fPatRefCount = refC;
|
|
*re->fPatRefCount = 1;
|
|
|
|
//
|
|
// Make a copy of the pattern string, so we can return it later if asked.
|
|
// For compiling the pattern, we will use a read-only UText wrapper
|
|
// around this local copy, to avoid making even more copies.
|
|
//
|
|
re->fPatString = patBuf;
|
|
re->fPatStringLen = pattern16Length;
|
|
utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
|
|
|
|
UText patText = UTEXT_INITIALIZER;
|
|
utext_openUChars(&patText, patBuf, pattern16Length, status);
|
|
|
|
//
|
|
// Compile the pattern
|
|
//
|
|
if (pe != NULL) {
|
|
re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
|
|
} else {
|
|
re->fPat = RegexPattern::compile(&patText, flags, *status);
|
|
}
|
|
utext_close(&patText);
|
|
|
|
if (U_FAILURE(*status)) {
|
|
goto ErrorExit;
|
|
}
|
|
|
|
//
|
|
// Create the matcher object
|
|
//
|
|
re->fMatcher = re->fPat->matcher(*status);
|
|
if (U_SUCCESS(*status)) {
|
|
return (URegularExpression*)re;
|
|
}
|
|
|
|
ErrorExit:
|
|
delete re;
|
|
return NULL;
|
|
|
|
}
|
|
|
|
//----------------------------------------------------------------------------------------
|
|
//
|
|
// uregex_close
|
|
//
|
|
//----------------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_close(URegularExpression *re2) {
|
|
RegularExpression *re = (RegularExpression*)re2;
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
if (validateRE(re, FALSE, &status) == FALSE) {
|
|
return;
|
|
}
|
|
delete re;
|
|
}
|
|
|
|
|
|
//----------------------------------------------------------------------------------------
|
|
//
|
|
// uregex_clone
|
|
//
|
|
//----------------------------------------------------------------------------------------
|
|
U_CAPI URegularExpression * U_EXPORT2
|
|
uregex_clone(const URegularExpression *source2, UErrorCode *status) {
|
|
RegularExpression *source = (RegularExpression*)source2;
|
|
if (validateRE(source, FALSE, status) == FALSE) {
|
|
return NULL;
|
|
}
|
|
|
|
RegularExpression *clone = new RegularExpression;
|
|
if (clone == NULL) {
|
|
*status = U_MEMORY_ALLOCATION_ERROR;
|
|
return NULL;
|
|
}
|
|
|
|
clone->fMatcher = source->fPat->matcher(*status);
|
|
if (U_FAILURE(*status)) {
|
|
delete clone;
|
|
return NULL;
|
|
}
|
|
|
|
clone->fPat = source->fPat;
|
|
clone->fPatRefCount = source->fPatRefCount;
|
|
clone->fPatString = source->fPatString;
|
|
clone->fPatStringLen = source->fPatStringLen;
|
|
umtx_atomic_inc(source->fPatRefCount);
|
|
// Note: fText is not cloned.
|
|
|
|
return (URegularExpression*)clone;
|
|
}
|
|
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_pattern
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI const UChar * U_EXPORT2
|
|
uregex_pattern(const URegularExpression *regexp2,
|
|
int32_t *patLength,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return NULL;
|
|
}
|
|
if (patLength != NULL) {
|
|
*patLength = regexp->fPatStringLen;
|
|
}
|
|
return regexp->fPatString;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_patternUText
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UText * U_EXPORT2
|
|
uregex_patternUText(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
return regexp->fPat->patternText(*status);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_flags
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_flags(const URegularExpression *regexp2, UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
int32_t flags = regexp->fPat->flags();
|
|
return flags;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_setText
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_setText(URegularExpression *regexp2,
|
|
const UChar *text,
|
|
int32_t textLength,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return;
|
|
}
|
|
if (text == NULL || textLength < -1) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
|
|
if (regexp->fOwnsText && regexp->fText != NULL) {
|
|
uprv_free((void *)regexp->fText);
|
|
}
|
|
|
|
regexp->fText = text;
|
|
regexp->fTextLength = textLength;
|
|
regexp->fOwnsText = FALSE;
|
|
|
|
UText input = UTEXT_INITIALIZER;
|
|
utext_openUChars(&input, text, textLength, status);
|
|
regexp->fMatcher->reset(&input);
|
|
utext_close(&input); // reset() made a shallow clone, so we don't need this copy
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_setUText
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_setUText(URegularExpression *regexp2,
|
|
UText *text,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return;
|
|
}
|
|
if (text == NULL) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return;
|
|
}
|
|
|
|
if (regexp->fOwnsText && regexp->fText != NULL) {
|
|
uprv_free((void *)regexp->fText);
|
|
}
|
|
|
|
regexp->fText = NULL; // only fill it in on request
|
|
regexp->fTextLength = -1;
|
|
regexp->fOwnsText = TRUE;
|
|
regexp->fMatcher->reset(text);
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_getText
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI const UChar * U_EXPORT2
|
|
uregex_getText(URegularExpression *regexp2,
|
|
int32_t *textLength,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return NULL;
|
|
}
|
|
|
|
if (regexp->fText == NULL) {
|
|
// need to fill in the text
|
|
UText *inputText = regexp->fMatcher->inputText();
|
|
int64_t inputNativeLength = utext_nativeLength(inputText);
|
|
if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
|
|
regexp->fText = inputText->chunkContents;
|
|
regexp->fTextLength = (int32_t)inputNativeLength;
|
|
regexp->fOwnsText = FALSE; // because the UText owns it
|
|
} else {
|
|
UErrorCode lengthStatus = U_ZERO_ERROR;
|
|
regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
|
|
UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
|
|
|
|
utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
|
|
regexp->fText = inputChars;
|
|
regexp->fOwnsText = TRUE; // should already be set but just in case
|
|
}
|
|
}
|
|
|
|
if (textLength != NULL) {
|
|
*textLength = regexp->fTextLength;
|
|
}
|
|
return regexp->fText;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_getUText
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UText * U_EXPORT2
|
|
uregex_getUText(URegularExpression *regexp2,
|
|
UText *dest,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return dest;
|
|
}
|
|
return regexp->fMatcher->getInput(dest, *status);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_refreshUText
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_refreshUText(URegularExpression *regexp2,
|
|
UText *text,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return;
|
|
}
|
|
regexp->fMatcher->refreshInputText(text, *status);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_matches
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_matches(URegularExpression *regexp2,
|
|
int32_t startIndex,
|
|
UErrorCode *status) {
|
|
return uregex_matches64( regexp2, (int64_t)startIndex, status);
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_matches64(URegularExpression *regexp2,
|
|
int64_t startIndex,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
UBool result = FALSE;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return result;
|
|
}
|
|
if (startIndex == -1) {
|
|
result = regexp->fMatcher->matches(*status);
|
|
} else {
|
|
result = regexp->fMatcher->matches(startIndex, *status);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_lookingAt
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_lookingAt(URegularExpression *regexp2,
|
|
int32_t startIndex,
|
|
UErrorCode *status) {
|
|
return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_lookingAt64(URegularExpression *regexp2,
|
|
int64_t startIndex,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
UBool result = FALSE;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return result;
|
|
}
|
|
if (startIndex == -1) {
|
|
result = regexp->fMatcher->lookingAt(*status);
|
|
} else {
|
|
result = regexp->fMatcher->lookingAt(startIndex, *status);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_find
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_find(URegularExpression *regexp2,
|
|
int32_t startIndex,
|
|
UErrorCode *status) {
|
|
return uregex_find64( regexp2, (int64_t)startIndex, status);
|
|
}
|
|
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_find64(URegularExpression *regexp2,
|
|
int64_t startIndex,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
UBool result = FALSE;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return result;
|
|
}
|
|
if (startIndex == -1) {
|
|
regexp->fMatcher->resetPreserveRegion();
|
|
result = regexp->fMatcher->find();
|
|
} else {
|
|
result = regexp->fMatcher->find(startIndex, *status);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_findNext
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_findNext(URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return FALSE;
|
|
}
|
|
UBool result = regexp->fMatcher->find();
|
|
return result;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_groupCount
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_groupCount(URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
int32_t result = regexp->fMatcher->groupCount();
|
|
return result;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_group
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_group(URegularExpression *regexp2,
|
|
int32_t groupNum,
|
|
UChar *dest,
|
|
int32_t destCapacity,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
if (destCapacity == 0 || regexp->fText != NULL) {
|
|
// If preflighting or if we already have the text as UChars,
|
|
// this is a little cheaper than going through uregex_groupUTextDeep()
|
|
|
|
//
|
|
// Pick up the range of characters from the matcher
|
|
//
|
|
int32_t startIx = regexp->fMatcher->start(groupNum, *status);
|
|
int32_t endIx = regexp->fMatcher->end (groupNum, *status);
|
|
if (U_FAILURE(*status)) {
|
|
return 0;
|
|
}
|
|
|
|
//
|
|
// Trim length based on buffer capacity
|
|
//
|
|
int32_t fullLength = endIx - startIx;
|
|
int32_t copyLength = fullLength;
|
|
if (copyLength < destCapacity) {
|
|
dest[copyLength] = 0;
|
|
} else if (copyLength == destCapacity) {
|
|
*status = U_STRING_NOT_TERMINATED_WARNING;
|
|
} else {
|
|
copyLength = destCapacity;
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
}
|
|
|
|
//
|
|
// Copy capture group to user's buffer
|
|
//
|
|
if (copyLength > 0) {
|
|
u_memcpy(dest, ®exp->fText[startIx], copyLength);
|
|
}
|
|
return fullLength;
|
|
} else {
|
|
UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
|
|
int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
|
|
utext_close(groupText);
|
|
return result;
|
|
}
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_groupUText
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UText * U_EXPORT2
|
|
uregex_groupUText(URegularExpression *regexp2,
|
|
int32_t groupNum,
|
|
UText *dest,
|
|
int64_t *groupLength,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
UErrorCode emptyTextStatus = U_ZERO_ERROR;
|
|
return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
|
|
}
|
|
|
|
return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_groupUTextDeep
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UText * U_EXPORT2
|
|
uregex_groupUTextDeep(URegularExpression *regexp2,
|
|
int32_t groupNum,
|
|
UText *dest,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
UErrorCode emptyTextStatus = U_ZERO_ERROR;
|
|
return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
|
|
}
|
|
|
|
if (regexp->fText != NULL) {
|
|
//
|
|
// Pick up the range of characters from the matcher
|
|
// and use our already-extracted characters
|
|
//
|
|
int32_t startIx = regexp->fMatcher->start(groupNum, *status);
|
|
int32_t endIx = regexp->fMatcher->end (groupNum, *status);
|
|
if (U_FAILURE(*status)) {
|
|
UErrorCode emptyTextStatus = U_ZERO_ERROR;
|
|
return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
|
|
}
|
|
|
|
if (dest) {
|
|
utext_replace(dest, 0, utext_nativeLength(dest), ®exp->fText[startIx], endIx - startIx, status);
|
|
} else {
|
|
UText groupText = UTEXT_INITIALIZER;
|
|
utext_openUChars(&groupText, ®exp->fText[startIx], endIx - startIx, status);
|
|
dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
|
|
utext_close(&groupText);
|
|
}
|
|
|
|
return dest;
|
|
} else {
|
|
return regexp->fMatcher->group(groupNum, dest, *status);
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_start
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_start(URegularExpression *regexp2,
|
|
int32_t groupNum,
|
|
UErrorCode *status) {
|
|
return (int32_t)uregex_start64( regexp2, groupNum, status);
|
|
}
|
|
|
|
U_CAPI int64_t U_EXPORT2
|
|
uregex_start64(URegularExpression *regexp2,
|
|
int32_t groupNum,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
int32_t result = regexp->fMatcher->start(groupNum, *status);
|
|
return result;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_end
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_end(URegularExpression *regexp2,
|
|
int32_t groupNum,
|
|
UErrorCode *status) {
|
|
return (int32_t)uregex_end64( regexp2, groupNum, status);
|
|
}
|
|
|
|
U_CAPI int64_t U_EXPORT2
|
|
uregex_end64(URegularExpression *regexp2,
|
|
int32_t groupNum,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
int32_t result = regexp->fMatcher->end(groupNum, *status);
|
|
return result;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_reset
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_reset(URegularExpression *regexp2,
|
|
int32_t index,
|
|
UErrorCode *status) {
|
|
uregex_reset64( regexp2, (int64_t)index, status);
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
uregex_reset64(URegularExpression *regexp2,
|
|
int64_t index,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return;
|
|
}
|
|
regexp->fMatcher->reset(index, *status);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_setRegion
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_setRegion(URegularExpression *regexp2,
|
|
int32_t regionStart,
|
|
int32_t regionLimit,
|
|
UErrorCode *status) {
|
|
uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
|
|
}
|
|
|
|
U_CAPI void U_EXPORT2
|
|
uregex_setRegion64(URegularExpression *regexp2,
|
|
int64_t regionStart,
|
|
int64_t regionLimit,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return;
|
|
}
|
|
regexp->fMatcher->region(regionStart, regionLimit, *status);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_setRegionAndStart
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_setRegionAndStart(URegularExpression *regexp2,
|
|
int64_t regionStart,
|
|
int64_t regionLimit,
|
|
int64_t startIndex,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return;
|
|
}
|
|
regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_regionStart
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_regionStart(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
return (int32_t)uregex_regionStart64(regexp2, status);
|
|
}
|
|
|
|
U_CAPI int64_t U_EXPORT2
|
|
uregex_regionStart64(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
return regexp->fMatcher->regionStart();
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_regionEnd
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_regionEnd(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
return (int32_t)uregex_regionEnd64(regexp2, status);
|
|
}
|
|
|
|
U_CAPI int64_t U_EXPORT2
|
|
uregex_regionEnd64(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
return regexp->fMatcher->regionEnd();
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_hasTransparentBounds
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_hasTransparentBounds(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return FALSE;
|
|
}
|
|
return regexp->fMatcher->hasTransparentBounds();
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_useTransparentBounds
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_useTransparentBounds(URegularExpression *regexp2,
|
|
UBool b,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return;
|
|
}
|
|
regexp->fMatcher->useTransparentBounds(b);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_hasAnchoringBounds
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_hasAnchoringBounds(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return FALSE;
|
|
}
|
|
return regexp->fMatcher->hasAnchoringBounds();
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_useAnchoringBounds
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_useAnchoringBounds(URegularExpression *regexp2,
|
|
UBool b,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status) == FALSE) {
|
|
return;
|
|
}
|
|
regexp->fMatcher->useAnchoringBounds(b);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_hitEnd
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_hitEnd(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return FALSE;
|
|
}
|
|
return regexp->fMatcher->hitEnd();
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_requireEnd
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UBool U_EXPORT2
|
|
uregex_requireEnd(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return FALSE;
|
|
}
|
|
return regexp->fMatcher->requireEnd();
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_setTimeLimit
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_setTimeLimit(URegularExpression *regexp2,
|
|
int32_t limit,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status)) {
|
|
regexp->fMatcher->setTimeLimit(limit, *status);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_getTimeLimit
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_getTimeLimit(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
int32_t retVal = 0;
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status)) {
|
|
retVal = regexp->fMatcher->getTimeLimit();
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_setStackLimit
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_setStackLimit(URegularExpression *regexp2,
|
|
int32_t limit,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status)) {
|
|
regexp->fMatcher->setStackLimit(limit, *status);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_getStackLimit
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_getStackLimit(const URegularExpression *regexp2,
|
|
UErrorCode *status) {
|
|
int32_t retVal = 0;
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status)) {
|
|
retVal = regexp->fMatcher->getStackLimit();
|
|
}
|
|
return retVal;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_setMatchCallback
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_setMatchCallback(URegularExpression *regexp2,
|
|
URegexMatchCallback *callback,
|
|
const void *context,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status)) {
|
|
regexp->fMatcher->setMatchCallback(callback, context, *status);
|
|
}
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_getMatchCallback
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_getMatchCallback(const URegularExpression *regexp2,
|
|
URegexMatchCallback **callback,
|
|
const void **context,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status)) {
|
|
regexp->fMatcher->getMatchCallback(*callback, *context, *status);
|
|
}
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_setMatchProgressCallback
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_setFindProgressCallback(URegularExpression *regexp2,
|
|
URegexFindProgressCallback *callback,
|
|
const void *context,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status)) {
|
|
regexp->fMatcher->setFindProgressCallback(callback, context, *status);
|
|
}
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_getMatchCallback
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI void U_EXPORT2
|
|
uregex_getFindProgressCallback(const URegularExpression *regexp2,
|
|
URegexFindProgressCallback **callback,
|
|
const void **context,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, FALSE, status)) {
|
|
regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
|
|
}
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_replaceAll
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_replaceAll(URegularExpression *regexp2,
|
|
const UChar *replacementText,
|
|
int32_t replacementLength,
|
|
UChar *destBuf,
|
|
int32_t destCapacity,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
if (replacementText == NULL || replacementLength < -1 ||
|
|
(destBuf == NULL && destCapacity > 0) ||
|
|
destCapacity < 0) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
int32_t len = 0;
|
|
|
|
uregex_reset(regexp2, 0, status);
|
|
|
|
// Note: Seperate error code variables for findNext() and appendReplacement()
|
|
// are used so that destination buffer overflow errors
|
|
// in appendReplacement won't stop findNext() from working.
|
|
// appendReplacement() and appendTail() special case incoming buffer
|
|
// overflow errors, continuing to return the correct length.
|
|
UErrorCode findStatus = *status;
|
|
while (uregex_findNext(regexp2, &findStatus)) {
|
|
len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
|
|
&destBuf, &destCapacity, status);
|
|
}
|
|
len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
|
|
|
|
if (U_FAILURE(findStatus)) {
|
|
// If anything went wrong with the findNext(), make that error trump
|
|
// whatever may have happened with the append() operations.
|
|
// Errors in findNext() are not expected.
|
|
*status = findStatus;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_replaceAllUText
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UText * U_EXPORT2
|
|
uregex_replaceAllUText(URegularExpression *regexp2,
|
|
UText *replacementText,
|
|
UText *dest,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
if (replacementText == NULL) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
|
|
return dest;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_replaceFirst
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_replaceFirst(URegularExpression *regexp2,
|
|
const UChar *replacementText,
|
|
int32_t replacementLength,
|
|
UChar *destBuf,
|
|
int32_t destCapacity,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
if (replacementText == NULL || replacementLength < -1 ||
|
|
(destBuf == NULL && destCapacity > 0) ||
|
|
destCapacity < 0) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
int32_t len = 0;
|
|
UBool findSucceeded;
|
|
uregex_reset(regexp2, 0, status);
|
|
findSucceeded = uregex_find(regexp2, 0, status);
|
|
if (findSucceeded) {
|
|
len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
|
|
&destBuf, &destCapacity, status);
|
|
}
|
|
len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
|
|
|
|
return len;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_replaceFirstUText
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
U_CAPI UText * U_EXPORT2
|
|
uregex_replaceFirstUText(URegularExpression *regexp2,
|
|
UText *replacementText,
|
|
UText *dest,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
if (replacementText == NULL) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
|
|
return dest;
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_appendReplacement
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
|
|
U_NAMESPACE_BEGIN
|
|
//
|
|
// Dummy class, because these functions need to be friends of class RegexMatcher,
|
|
// and stand-alone C functions don't work as friends
|
|
//
|
|
class RegexCImpl {
|
|
public:
|
|
inline static int32_t appendReplacement(RegularExpression *regexp,
|
|
const UChar *replacementText,
|
|
int32_t replacementLength,
|
|
UChar **destBuf,
|
|
int32_t *destCapacity,
|
|
UErrorCode *status);
|
|
|
|
inline static int32_t appendTail(RegularExpression *regexp,
|
|
UChar **destBuf,
|
|
int32_t *destCapacity,
|
|
UErrorCode *status);
|
|
|
|
inline static int32_t split(RegularExpression *regexp,
|
|
UChar *destBuf,
|
|
int32_t destCapacity,
|
|
int32_t *requiredCapacity,
|
|
UChar *destFields[],
|
|
int32_t destFieldsCapacity,
|
|
UErrorCode *status);
|
|
};
|
|
|
|
U_NAMESPACE_END
|
|
|
|
|
|
|
|
static const UChar BACKSLASH = 0x5c;
|
|
static const UChar DOLLARSIGN = 0x24;
|
|
|
|
//
|
|
// Move a character to an output buffer, with bounds checking on the index.
|
|
// Index advances even if capacity is exceeded, for preflight size computations.
|
|
// This little sequence is used a LOT.
|
|
//
|
|
static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
|
|
if (*idx < bufCapacity) {
|
|
buf[*idx] = c;
|
|
}
|
|
(*idx)++;
|
|
}
|
|
|
|
|
|
//
|
|
// appendReplacement, the actual implementation.
|
|
//
|
|
int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
|
|
const UChar *replacementText,
|
|
int32_t replacementLength,
|
|
UChar **destBuf,
|
|
int32_t *destCapacity,
|
|
UErrorCode *status) {
|
|
|
|
// If we come in with a buffer overflow error, don't suppress the operation.
|
|
// A series of appendReplacements, appendTail need to correctly preflight
|
|
// the buffer size when an overflow happens somewhere in the middle.
|
|
UBool pendingBufferOverflow = FALSE;
|
|
if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
|
|
pendingBufferOverflow = TRUE;
|
|
*status = U_ZERO_ERROR;
|
|
}
|
|
|
|
//
|
|
// Validate all paramters
|
|
//
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
if (replacementText == NULL || replacementLength < -1 ||
|
|
destCapacity == NULL || destBuf == NULL ||
|
|
(*destBuf == NULL && *destCapacity > 0) ||
|
|
*destCapacity < 0) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
RegexMatcher *m = regexp->fMatcher;
|
|
if (m->fMatch == FALSE) {
|
|
*status = U_REGEX_INVALID_STATE;
|
|
return 0;
|
|
}
|
|
|
|
UChar *dest = *destBuf;
|
|
int32_t capacity = *destCapacity;
|
|
int32_t destIdx = 0;
|
|
int32_t i;
|
|
|
|
// If it wasn't supplied by the caller, get the length of the replacement text.
|
|
// TODO: slightly smarter logic in the copy loop could watch for the NUL on
|
|
// the fly and avoid this step.
|
|
if (replacementLength == -1) {
|
|
replacementLength = u_strlen(replacementText);
|
|
}
|
|
|
|
// Copy input string from the end of previous match to start of current match
|
|
if (regexp->fText != NULL) {
|
|
int32_t matchStart;
|
|
int32_t lastMatchEnd;
|
|
if (UTEXT_USES_U16(m->fInputText)) {
|
|
lastMatchEnd = (int32_t)m->fLastMatchEnd;
|
|
matchStart = (int32_t)m->fMatchStart;
|
|
} else {
|
|
// !!!: Would like a better way to do this!
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
|
|
status = U_ZERO_ERROR;
|
|
matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
|
|
}
|
|
for (i=lastMatchEnd; i<matchStart; i++) {
|
|
appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
|
|
}
|
|
} else {
|
|
UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
|
|
destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
|
|
dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
|
|
&possibleOverflowError);
|
|
}
|
|
U_ASSERT(destIdx >= 0);
|
|
|
|
// scan the replacement text, looking for substitutions ($n) and \escapes.
|
|
int32_t replIdx = 0;
|
|
while (replIdx < replacementLength) {
|
|
UChar c = replacementText[replIdx];
|
|
replIdx++;
|
|
if (c != DOLLARSIGN && c != BACKSLASH) {
|
|
// Common case, no substitution, no escaping,
|
|
// just copy the char to the dest buf.
|
|
appendToBuf(c, &destIdx, dest, capacity);
|
|
continue;
|
|
}
|
|
|
|
if (c == BACKSLASH) {
|
|
// Backslash Escape. Copy the following char out without further checks.
|
|
// Note: Surrogate pairs don't need any special handling
|
|
// The second half wont be a '$' or a '\', and
|
|
// will move to the dest normally on the next
|
|
// loop iteration.
|
|
if (replIdx >= replacementLength) {
|
|
break;
|
|
}
|
|
c = replacementText[replIdx];
|
|
|
|
if (c==0x55/*U*/ || c==0x75/*u*/) {
|
|
// We have a \udddd or \Udddddddd escape sequence.
|
|
UChar32 escapedChar =
|
|
u_unescapeAt(uregex_ucstr_unescape_charAt,
|
|
&replIdx, // Index is updated by unescapeAt
|
|
replacementLength, // Length of replacement text
|
|
(void *)replacementText);
|
|
|
|
if (escapedChar != (UChar32)0xFFFFFFFF) {
|
|
if (escapedChar <= 0xffff) {
|
|
appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
|
|
} else {
|
|
appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
|
|
appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
|
|
}
|
|
continue;
|
|
}
|
|
// Note: if the \u escape was invalid, just fall through and
|
|
// treat it as a plain \<anything> escape.
|
|
}
|
|
|
|
// Plain backslash escape. Just put out the escaped character.
|
|
appendToBuf(c, &destIdx, dest, capacity);
|
|
|
|
replIdx++;
|
|
continue;
|
|
}
|
|
|
|
|
|
|
|
// We've got a $. Pick up a capture group number if one follows.
|
|
// Consume at most the number of digits necessary for the largest capture
|
|
// number that is valid for this pattern.
|
|
|
|
int32_t numDigits = 0;
|
|
int32_t groupNum = 0;
|
|
UChar32 digitC;
|
|
for (;;) {
|
|
if (replIdx >= replacementLength) {
|
|
break;
|
|
}
|
|
U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
|
|
if (u_isdigit(digitC) == FALSE) {
|
|
break;
|
|
}
|
|
|
|
U16_FWD_1(replacementText, replIdx, replacementLength);
|
|
groupNum=groupNum*10 + u_charDigitValue(digitC);
|
|
numDigits++;
|
|
if (numDigits >= m->fPattern->fMaxCaptureDigits) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
if (numDigits == 0) {
|
|
// The $ didn't introduce a group number at all.
|
|
// Treat it as just part of the substitution text.
|
|
appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
|
|
continue;
|
|
}
|
|
|
|
// Finally, append the capture group data to the destination.
|
|
destIdx += uregex_group((URegularExpression*)regexp, groupNum,
|
|
dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
|
|
if (*status == U_BUFFER_OVERFLOW_ERROR) {
|
|
// Ignore buffer overflow when extracting the group. We need to
|
|
// continue on to get full size of the untruncated result. We will
|
|
// raise our own buffer overflow error at the end.
|
|
*status = U_ZERO_ERROR;
|
|
}
|
|
|
|
if (U_FAILURE(*status)) {
|
|
// Can fail if group number is out of range.
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
//
|
|
// Nul Terminate the dest buffer if possible.
|
|
// Set the appropriate buffer overflow or not terminated error, if needed.
|
|
//
|
|
if (destIdx < capacity) {
|
|
dest[destIdx] = 0;
|
|
} else if (destIdx == *destCapacity) {
|
|
*status = U_STRING_NOT_TERMINATED_WARNING;
|
|
} else {
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
}
|
|
|
|
//
|
|
// Return an updated dest buffer and capacity to the caller.
|
|
//
|
|
if (destIdx > 0 && *destCapacity > 0) {
|
|
if (destIdx < capacity) {
|
|
*destBuf += destIdx;
|
|
*destCapacity -= destIdx;
|
|
} else {
|
|
*destBuf += capacity;
|
|
*destCapacity = 0;
|
|
}
|
|
}
|
|
|
|
// If we came in with a buffer overflow, make sure we go out with one also.
|
|
// (A zero length match right at the end of the previous match could
|
|
// make this function succeed even though a previous call had overflowed the buf)
|
|
if (pendingBufferOverflow && U_SUCCESS(*status)) {
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
}
|
|
|
|
return destIdx;
|
|
}
|
|
|
|
//
|
|
// appendReplacement the actual API function,
|
|
//
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_appendReplacement(URegularExpression *regexp2,
|
|
const UChar *replacementText,
|
|
int32_t replacementLength,
|
|
UChar **destBuf,
|
|
int32_t *destCapacity,
|
|
UErrorCode *status) {
|
|
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
return RegexCImpl::appendReplacement(
|
|
regexp, replacementText, replacementLength,destBuf, destCapacity, status);
|
|
}
|
|
|
|
//
|
|
// uregex_appendReplacementUText...can just use the normal C++ method
|
|
//
|
|
U_CAPI void U_EXPORT2
|
|
uregex_appendReplacementUText(URegularExpression *regexp2,
|
|
UText *replText,
|
|
UText *dest,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
regexp->fMatcher->appendReplacement(dest, replText, *status);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_appendTail
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
int32_t RegexCImpl::appendTail(RegularExpression *regexp,
|
|
UChar **destBuf,
|
|
int32_t *destCapacity,
|
|
UErrorCode *status)
|
|
{
|
|
|
|
// If we come in with a buffer overflow error, don't suppress the operation.
|
|
// A series of appendReplacements, appendTail need to correctly preflight
|
|
// the buffer size when an overflow happens somewhere in the middle.
|
|
UBool pendingBufferOverflow = FALSE;
|
|
if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
|
|
pendingBufferOverflow = TRUE;
|
|
*status = U_ZERO_ERROR;
|
|
}
|
|
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
|
|
if (destCapacity == NULL || destBuf == NULL ||
|
|
(*destBuf == NULL && *destCapacity > 0) ||
|
|
*destCapacity < 0)
|
|
{
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
RegexMatcher *m = regexp->fMatcher;
|
|
|
|
int32_t destIdx = 0;
|
|
int32_t destCap = *destCapacity;
|
|
UChar *dest = *destBuf;
|
|
|
|
if (regexp->fText != NULL) {
|
|
int32_t srcIdx;
|
|
int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
|
|
if (nativeIdx == -1) {
|
|
srcIdx = 0;
|
|
} else if (UTEXT_USES_U16(m->fInputText)) {
|
|
srcIdx = (int32_t)nativeIdx;
|
|
} else {
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
|
|
}
|
|
|
|
for (;;) {
|
|
U_ASSERT(destIdx >= 0);
|
|
|
|
if (srcIdx == regexp->fTextLength) {
|
|
break;
|
|
}
|
|
UChar c = regexp->fText[srcIdx];
|
|
if (c == 0 && regexp->fTextLength == -1) {
|
|
regexp->fTextLength = srcIdx;
|
|
break;
|
|
}
|
|
|
|
if (destIdx < destCap) {
|
|
dest[destIdx] = c;
|
|
} else {
|
|
// We've overflowed the dest buffer.
|
|
// If the total input string length is known, we can
|
|
// compute the total buffer size needed without scanning through the string.
|
|
if (regexp->fTextLength > 0) {
|
|
destIdx += (regexp->fTextLength - srcIdx);
|
|
break;
|
|
}
|
|
}
|
|
srcIdx++;
|
|
destIdx++;
|
|
}
|
|
} else {
|
|
int64_t srcIdx;
|
|
if (m->fMatch) {
|
|
// The most recent call to find() succeeded.
|
|
srcIdx = m->fMatchEnd;
|
|
} else {
|
|
// The last call to find() on this matcher failed().
|
|
// Look back to the end of the last find() that succeeded for src index.
|
|
srcIdx = m->fLastMatchEnd;
|
|
if (srcIdx == -1) {
|
|
// There has been no successful match with this matcher.
|
|
// We want to copy the whole string.
|
|
srcIdx = 0;
|
|
}
|
|
}
|
|
|
|
destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
|
|
}
|
|
|
|
//
|
|
// NUL terminate the output string, if possible, otherwise issue the
|
|
// appropriate error or warning.
|
|
//
|
|
if (destIdx < destCap) {
|
|
dest[destIdx] = 0;
|
|
} else if (destIdx == destCap) {
|
|
*status = U_STRING_NOT_TERMINATED_WARNING;
|
|
} else {
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
}
|
|
|
|
//
|
|
// Update the user's buffer ptr and capacity vars to reflect the
|
|
// amount used.
|
|
//
|
|
if (destIdx < destCap) {
|
|
*destBuf += destIdx;
|
|
*destCapacity -= destIdx;
|
|
} else if (*destBuf != NULL) {
|
|
*destBuf += destCap;
|
|
*destCapacity = 0;
|
|
}
|
|
|
|
if (pendingBufferOverflow && U_SUCCESS(*status)) {
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
}
|
|
|
|
return destIdx;
|
|
}
|
|
|
|
|
|
//
|
|
// appendTail the actual API function
|
|
//
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_appendTail(URegularExpression *regexp2,
|
|
UChar **destBuf,
|
|
int32_t *destCapacity,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
|
|
}
|
|
|
|
|
|
//
|
|
// uregex_appendTailUText...can just use the normal C++ method
|
|
//
|
|
U_CAPI UText * U_EXPORT2
|
|
uregex_appendTailUText(URegularExpression *regexp2,
|
|
UText *dest,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
return regexp->fMatcher->appendTail(dest, *status);
|
|
}
|
|
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// copyString Internal utility to copy a string to an output buffer,
|
|
// while managing buffer overflow and preflight size
|
|
// computation. NUL termination is added to destination,
|
|
// and the NUL is counted in the output size.
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
#if 0
|
|
static void copyString(UChar *destBuffer, // Destination buffer.
|
|
int32_t destCapacity, // Total capacity of dest buffer
|
|
int32_t *destIndex, // Index into dest buffer. Updated on return.
|
|
// Update not clipped to destCapacity.
|
|
const UChar *srcPtr, // Pointer to source string
|
|
int32_t srcLen) // Source string len.
|
|
{
|
|
int32_t si;
|
|
int32_t di = *destIndex;
|
|
UChar c;
|
|
|
|
for (si=0; si<srcLen; si++) {
|
|
c = srcPtr[si];
|
|
if (di < destCapacity) {
|
|
destBuffer[di] = c;
|
|
di++;
|
|
} else {
|
|
di += srcLen - si;
|
|
break;
|
|
}
|
|
}
|
|
if (di<destCapacity) {
|
|
destBuffer[di] = 0;
|
|
}
|
|
di++;
|
|
*destIndex = di;
|
|
}
|
|
#endif
|
|
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// uregex_split
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
int32_t RegexCImpl::split(RegularExpression *regexp,
|
|
UChar *destBuf,
|
|
int32_t destCapacity,
|
|
int32_t *requiredCapacity,
|
|
UChar *destFields[],
|
|
int32_t destFieldsCapacity,
|
|
UErrorCode *status) {
|
|
//
|
|
// Reset for the input text
|
|
//
|
|
regexp->fMatcher->reset();
|
|
UText *inputText = regexp->fMatcher->fInputText;
|
|
int64_t nextOutputStringStart = 0;
|
|
int64_t inputLen = regexp->fMatcher->fInputLength;
|
|
if (inputLen == 0) {
|
|
return 0;
|
|
}
|
|
|
|
//
|
|
// Loop through the input text, searching for the delimiter pattern
|
|
//
|
|
int32_t i; // Index of the field being processed.
|
|
int32_t destIdx = 0; // Next available position in destBuf;
|
|
int32_t numCaptureGroups = regexp->fMatcher->groupCount();
|
|
UErrorCode tStatus = U_ZERO_ERROR; // Want to ignore any buffer overflow errors so that the strings are still counted
|
|
for (i=0; ; i++) {
|
|
if (i>=destFieldsCapacity-1) {
|
|
// There are one or zero output strings left.
|
|
// Fill the last output string with whatever is left from the input, then exit the loop.
|
|
// ( i will be == destFieldsCapacity if we filled the output array while processing
|
|
// capture groups of the delimiter expression, in which case we will discard the
|
|
// last capture group saved in favor of the unprocessed remainder of the
|
|
// input string.)
|
|
if (inputLen > nextOutputStringStart) {
|
|
if (i != destFieldsCapacity-1) {
|
|
// No fields are left. Recycle the last one for holding the trailing part of
|
|
// the input string.
|
|
i = destFieldsCapacity-1;
|
|
destIdx = (int32_t)(destFields[i] - destFields[0]);
|
|
}
|
|
|
|
destFields[i] = &destBuf[destIdx];
|
|
destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
|
|
&destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (regexp->fMatcher->find()) {
|
|
// We found another delimiter. Move everything from where we started looking
|
|
// up until the start of the delimiter into the next output string.
|
|
destFields[i] = &destBuf[destIdx];
|
|
|
|
destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
|
|
&destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
|
|
if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
|
|
tStatus = U_ZERO_ERROR;
|
|
} else {
|
|
*status = tStatus;
|
|
}
|
|
nextOutputStringStart = regexp->fMatcher->fMatchEnd;
|
|
|
|
// If the delimiter pattern has capturing parentheses, the captured
|
|
// text goes out into the next n destination strings.
|
|
int32_t groupNum;
|
|
for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
|
|
// If we've run out of output string slots, bail out.
|
|
if (i==destFieldsCapacity-1) {
|
|
break;
|
|
}
|
|
i++;
|
|
|
|
// Set up to extract the capture group contents into the dest buffer.
|
|
destFields[i] = &destBuf[destIdx];
|
|
tStatus = U_ZERO_ERROR;
|
|
int32_t t = uregex_group((URegularExpression*)regexp,
|
|
groupNum,
|
|
destFields[i],
|
|
REMAINING_CAPACITY(destIdx, destCapacity),
|
|
&tStatus);
|
|
destIdx += t + 1; // Record the space used in the output string buffer.
|
|
// +1 for the NUL that terminates the string.
|
|
if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
|
|
tStatus = U_ZERO_ERROR;
|
|
} else {
|
|
*status = tStatus;
|
|
}
|
|
}
|
|
|
|
if (nextOutputStringStart == inputLen) {
|
|
// The delimiter was at the end of the string.
|
|
// Output an empty string, and then we are done.
|
|
if (destIdx < destCapacity) {
|
|
destBuf[destIdx] = 0;
|
|
}
|
|
if (i < destFieldsCapacity-1) {
|
|
++i;
|
|
}
|
|
if (destIdx < destCapacity) {
|
|
destFields[i] = destBuf + destIdx;
|
|
}
|
|
++destIdx;
|
|
break;
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
// We ran off the end of the input while looking for the next delimiter.
|
|
// All the remaining text goes into the current output string.
|
|
destFields[i] = &destBuf[destIdx];
|
|
destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
|
|
&destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Zero out any unused portion of the destFields array
|
|
int j;
|
|
for (j=i+1; j<destFieldsCapacity; j++) {
|
|
destFields[j] = NULL;
|
|
}
|
|
|
|
if (requiredCapacity != NULL) {
|
|
*requiredCapacity = destIdx;
|
|
}
|
|
if (destIdx > destCapacity) {
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
}
|
|
return i+1;
|
|
}
|
|
|
|
//
|
|
// uregex_split The actual API function
|
|
//
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_split(URegularExpression *regexp2,
|
|
UChar *destBuf,
|
|
int32_t destCapacity,
|
|
int32_t *requiredCapacity,
|
|
UChar *destFields[],
|
|
int32_t destFieldsCapacity,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
if (validateRE(regexp, TRUE, status) == FALSE) {
|
|
return 0;
|
|
}
|
|
if ((destBuf == NULL && destCapacity > 0) ||
|
|
destCapacity < 0 ||
|
|
destFields == NULL ||
|
|
destFieldsCapacity < 1 ) {
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
return 0;
|
|
}
|
|
|
|
return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
|
|
}
|
|
|
|
|
|
//
|
|
// uregex_splitUText...can just use the normal C++ method
|
|
//
|
|
U_CAPI int32_t U_EXPORT2
|
|
uregex_splitUText(URegularExpression *regexp2,
|
|
UText *destFields[],
|
|
int32_t destFieldsCapacity,
|
|
UErrorCode *status) {
|
|
RegularExpression *regexp = (RegularExpression*)regexp2;
|
|
return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
|
|
}
|
|
|
|
|
|
#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
|
|
|