gecko-dev/xpcom/io/nsLinebreakConverter.cpp

487 lines
13 KiB
C++
Raw Normal View History

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
2012-05-21 12:12:37 +01:00
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
1999-12-01 22:21:30 +00:00
#include "nsLinebreakConverter.h"
#include "nsMemory.h"
1999-12-01 22:21:30 +00:00
#include "nsCRT.h"
/*----------------------------------------------------------------------------
GetLinebreakString
1999-12-01 22:21:30 +00:00
Could make this inline
----------------------------------------------------------------------------*/
static const char*
GetLinebreakString(nsLinebreakConverter::ELinebreakType aBreakType)
1999-12-01 22:21:30 +00:00
{
static const char* const sLinebreaks[] = {
1999-12-01 22:21:30 +00:00
"", // any
NS_LINEBREAK, // platform
LFSTR, // content
CRLF, // net
CRSTR, // Mac
LFSTR, // Unix
CRLF, // Windows
" ", // space
nullptr
1999-12-01 22:21:30 +00:00
};
1999-12-01 22:21:30 +00:00
return sLinebreaks[aBreakType];
}
/*----------------------------------------------------------------------------
AppendLinebreak
1999-12-01 22:21:30 +00:00
Wee inline method to append a line break. Modifies ioDest.
----------------------------------------------------------------------------*/
template<class T>
void
AppendLinebreak(T*& aIoDest, const char* aLineBreakStr)
1999-12-01 22:21:30 +00:00
{
*aIoDest++ = *aLineBreakStr;
1999-12-01 22:21:30 +00:00
if (aLineBreakStr[1]) {
*aIoDest++ = aLineBreakStr[1];
}
1999-12-01 22:21:30 +00:00
}
/*----------------------------------------------------------------------------
CountChars
1999-12-01 22:21:30 +00:00
Counts occurrences of breakStr in aSrc
----------------------------------------------------------------------------*/
template<class T>
int32_t
CountLinebreaks(const T* aSrc, int32_t aInLen, const char* aBreakStr)
1999-12-01 22:21:30 +00:00
{
const T* src = aSrc;
const T* srcEnd = aSrc + aInLen;
int32_t theCount = 0;
1999-12-01 22:21:30 +00:00
while (src < srcEnd) {
if (*src == *aBreakStr) {
src++;
1999-12-01 22:21:30 +00:00
if (aBreakStr[1]) {
if (src < srcEnd && *src == aBreakStr[1]) {
src++;
theCount++;
}
} else {
theCount++;
}
} else {
src++;
1999-12-01 22:21:30 +00:00
}
}
1999-12-01 22:21:30 +00:00
return theCount;
}
/*----------------------------------------------------------------------------
ConvertBreaks
1999-12-01 22:21:30 +00:00
ioLen *includes* a terminating null, if any
----------------------------------------------------------------------------*/
template<class T>
static T*
ConvertBreaks(const T* aInSrc, int32_t& aIoLen, const char* aSrcBreak,
const char* aDestBreak)
1999-12-01 22:21:30 +00:00
{
NS_ASSERTION(aInSrc && aSrcBreak && aDestBreak, "Got a null string");
T* resultString = nullptr;
1999-12-01 22:21:30 +00:00
// handle the no conversion case
if (nsCRT::strcmp(aSrcBreak, aDestBreak) == 0) {
resultString = (T*)nsMemory::Alloc(sizeof(T) * aIoLen);
if (!resultString) {
return nullptr;
}
memcpy(resultString, aInSrc, sizeof(T) * aIoLen); // includes the null, if any
1999-12-01 22:21:30 +00:00
return resultString;
}
int32_t srcBreakLen = strlen(aSrcBreak);
int32_t destBreakLen = strlen(aDestBreak);
1999-12-01 22:21:30 +00:00
// handle the easy case, where the string length does not change, and the
// breaks are only 1 char long, i.e. CR <-> LF
if (srcBreakLen == destBreakLen && srcBreakLen == 1) {
resultString = (T*)nsMemory::Alloc(sizeof(T) * aIoLen);
if (!resultString) {
return nullptr;
}
const T* src = aInSrc;
const T* srcEnd = aInSrc + aIoLen; // includes null, if any
T* dst = resultString;
char srcBreakChar = *aSrcBreak; // we know it's one char long already
char dstBreakChar = *aDestBreak;
while (src < srcEnd) {
if (*src == srcBreakChar) {
1999-12-01 22:21:30 +00:00
*dst++ = dstBreakChar;
src++;
} else {
1999-12-01 22:21:30 +00:00
*dst++ = *src++;
}
}
// aIoLen does not change
} else {
1999-12-01 22:21:30 +00:00
// src and dest termination is different length. Do it a slower way.
1999-12-01 22:21:30 +00:00
// count linebreaks in src. Assumes that chars in 2-char linebreaks are unique.
int32_t numLinebreaks = CountLinebreaks(aInSrc, aIoLen, aSrcBreak);
int32_t newBufLen =
aIoLen - (numLinebreaks * srcBreakLen) + (numLinebreaks * destBreakLen);
resultString = (T*)nsMemory::Alloc(sizeof(T) * newBufLen);
if (!resultString) {
return nullptr;
}
const T* src = aInSrc;
const T* srcEnd = aInSrc + aIoLen; // includes null, if any
T* dst = resultString;
while (src < srcEnd) {
if (*src == *aSrcBreak) {
*dst++ = *aDestBreak;
if (aDestBreak[1]) {
*dst++ = aDestBreak[1];
}
src++;
if (src < srcEnd && aSrcBreak[1] && *src == aSrcBreak[1]) {
src++;
}
} else {
1999-12-01 22:21:30 +00:00
*dst++ = *src++;
}
}
aIoLen = newBufLen;
1999-12-01 22:21:30 +00:00
}
1999-12-01 22:21:30 +00:00
return resultString;
}
/*----------------------------------------------------------------------------
ConvertBreaksInSitu
1999-12-01 22:21:30 +00:00
Convert breaks in situ. Can only do this if the linebreak length
does not change.
----------------------------------------------------------------------------*/
template<class T>
static void
ConvertBreaksInSitu(T* aInSrc, int32_t aInLen, char aSrcBreak, char aDestBreak)
1999-12-01 22:21:30 +00:00
{
T* src = aInSrc;
T* srcEnd = aInSrc + aInLen;
1999-12-01 22:21:30 +00:00
while (src < srcEnd) {
if (*src == aSrcBreak) {
*src = aDestBreak;
}
src++;
1999-12-01 22:21:30 +00:00
}
}
/*----------------------------------------------------------------------------
ConvertUnknownBreaks
1999-12-01 22:21:30 +00:00
Convert unknown line breaks to the specified break.
1999-12-01 22:21:30 +00:00
This will convert CRLF pairs to one break, and single CR or LF to a break.
----------------------------------------------------------------------------*/
template<class T>
static T*
ConvertUnknownBreaks(const T* aInSrc, int32_t& aIoLen, const char* aDestBreak)
1999-12-01 22:21:30 +00:00
{
const T* src = aInSrc;
const T* srcEnd = aInSrc + aIoLen; // includes null, if any
int32_t destBreakLen = strlen(aDestBreak);
int32_t finalLen = 0;
1999-12-01 22:21:30 +00:00
while (src < srcEnd) {
if (*src == nsCRT::CR) {
if (src < srcEnd && src[1] == nsCRT::LF) {
1999-12-01 22:21:30 +00:00
// CRLF
finalLen += destBreakLen;
src++;
} else {
1999-12-01 22:21:30 +00:00
// Lone CR
finalLen += destBreakLen;
}
} else if (*src == nsCRT::LF) {
1999-12-01 22:21:30 +00:00
// Lone LF
finalLen += destBreakLen;
} else {
finalLen++;
1999-12-01 22:21:30 +00:00
}
src++;
1999-12-01 22:21:30 +00:00
}
T* resultString = (T*)nsMemory::Alloc(sizeof(T) * finalLen);
if (!resultString) {
return nullptr;
}
1999-12-01 22:21:30 +00:00
src = aInSrc;
srcEnd = aInSrc + aIoLen; // includes null, if any
1999-12-01 22:21:30 +00:00
T* dst = resultString;
while (src < srcEnd) {
if (*src == nsCRT::CR) {
if (src < srcEnd && src[1] == nsCRT::LF) {
1999-12-01 22:21:30 +00:00
// CRLF
AppendLinebreak(dst, aDestBreak);
src++;
} else {
1999-12-01 22:21:30 +00:00
// Lone CR
AppendLinebreak(dst, aDestBreak);
1999-12-01 22:21:30 +00:00
}
} else if (*src == nsCRT::LF) {
1999-12-01 22:21:30 +00:00
// Lone LF
AppendLinebreak(dst, aDestBreak);
} else {
1999-12-01 22:21:30 +00:00
*dst++ = *src;
}
src++;
1999-12-01 22:21:30 +00:00
}
aIoLen = finalLen;
1999-12-01 22:21:30 +00:00
return resultString;
}
/*----------------------------------------------------------------------------
ConvertLineBreaks
1999-12-01 22:21:30 +00:00
----------------------------------------------------------------------------*/
char*
nsLinebreakConverter::ConvertLineBreaks(const char* aSrc,
ELinebreakType aSrcBreaks,
ELinebreakType aDestBreaks,
int32_t aSrcLen, int32_t* aOutLen)
1999-12-01 22:21:30 +00:00
{
NS_ASSERTION(aDestBreaks != eLinebreakAny &&
aSrcBreaks != eLinebreakSpace, "Invalid parameter");
if (!aSrc) {
return nullptr;
}
int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(aSrc) + 1 : aSrcLen;
1999-12-01 22:21:30 +00:00
char* resultString;
if (aSrcBreaks == eLinebreakAny) {
resultString = ConvertUnknownBreaks(aSrc, sourceLen,
GetLinebreakString(aDestBreaks));
} else
resultString = ConvertBreaks(aSrc, sourceLen,
GetLinebreakString(aSrcBreaks),
GetLinebreakString(aDestBreaks));
if (aOutLen) {
*aOutLen = sourceLen;
}
1999-12-01 22:21:30 +00:00
return resultString;
}
/*----------------------------------------------------------------------------
ConvertLineBreaksInSitu
1999-12-01 22:21:30 +00:00
----------------------------------------------------------------------------*/
nsresult
nsLinebreakConverter::ConvertLineBreaksInSitu(char** aIoBuffer,
ELinebreakType aSrcBreaks,
ELinebreakType aDestBreaks,
int32_t aSrcLen, int32_t* aOutLen)
1999-12-01 22:21:30 +00:00
{
NS_ASSERTION(aIoBuffer && *aIoBuffer, "Null pointer passed");
if (!aIoBuffer || !*aIoBuffer) {
return NS_ERROR_NULL_POINTER;
}
NS_ASSERTION(aDestBreaks != eLinebreakAny &&
aSrcBreaks != eLinebreakSpace, "Invalid parameter");
1999-12-01 22:21:30 +00:00
int32_t sourceLen = (aSrcLen == kIgnoreLen) ? strlen(*aIoBuffer) + 1 : aSrcLen;
1999-12-01 22:21:30 +00:00
// can we convert in-place?
const char* srcBreaks = GetLinebreakString(aSrcBreaks);
const char* dstBreaks = GetLinebreakString(aDestBreaks);
if (aSrcBreaks != eLinebreakAny &&
strlen(srcBreaks) == 1 &&
strlen(dstBreaks) == 1) {
ConvertBreaksInSitu(*aIoBuffer, sourceLen, *srcBreaks, *dstBreaks);
if (aOutLen) {
*aOutLen = sourceLen;
}
} else {
1999-12-01 22:21:30 +00:00
char* destBuffer;
if (aSrcBreaks == eLinebreakAny) {
destBuffer = ConvertUnknownBreaks(*aIoBuffer, sourceLen, dstBreaks);
} else {
destBuffer = ConvertBreaks(*aIoBuffer, sourceLen, srcBreaks, dstBreaks);
}
1999-12-01 22:21:30 +00:00
if (!destBuffer) {
return NS_ERROR_OUT_OF_MEMORY;
}
*aIoBuffer = destBuffer;
if (aOutLen) {
*aOutLen = sourceLen;
}
1999-12-01 22:21:30 +00:00
}
1999-12-01 22:21:30 +00:00
return NS_OK;
}
/*----------------------------------------------------------------------------
ConvertUnicharLineBreaks
1999-12-01 22:21:30 +00:00
----------------------------------------------------------------------------*/
char16_t*
nsLinebreakConverter::ConvertUnicharLineBreaks(const char16_t* aSrc,
ELinebreakType aSrcBreaks,
ELinebreakType aDestBreaks,
int32_t aSrcLen,
int32_t* aOutLen)
1999-12-01 22:21:30 +00:00
{
NS_ASSERTION(aDestBreaks != eLinebreakAny &&
aSrcBreaks != eLinebreakSpace, "Invalid parameter");
if (!aSrc) {
return nullptr;
}
int32_t bufLen = (aSrcLen == kIgnoreLen) ? NS_strlen(aSrc) + 1 : aSrcLen;
1999-12-01 22:21:30 +00:00
char16_t* resultString;
if (aSrcBreaks == eLinebreakAny) {
resultString = ConvertUnknownBreaks(aSrc, bufLen,
GetLinebreakString(aDestBreaks));
} else
resultString = ConvertBreaks(aSrc, bufLen, GetLinebreakString(aSrcBreaks),
GetLinebreakString(aDestBreaks));
if (aOutLen) {
*aOutLen = bufLen;
}
1999-12-01 22:21:30 +00:00
return resultString;
}
/*----------------------------------------------------------------------------
ConvertStringLineBreaks
1999-12-01 22:21:30 +00:00
----------------------------------------------------------------------------*/
nsresult
nsLinebreakConverter::ConvertUnicharLineBreaksInSitu(
char16_t** aIoBuffer, ELinebreakType aSrcBreaks, ELinebreakType aDestBreaks,
int32_t aSrcLen, int32_t* aOutLen)
1999-12-01 22:21:30 +00:00
{
NS_ASSERTION(aIoBuffer && *aIoBuffer, "Null pointer passed");
if (!aIoBuffer || !*aIoBuffer) {
return NS_ERROR_NULL_POINTER;
}
NS_ASSERTION(aDestBreaks != eLinebreakAny &&
aSrcBreaks != eLinebreakSpace, "Invalid parameter");
1999-12-01 22:21:30 +00:00
int32_t sourceLen =
(aSrcLen == kIgnoreLen) ? NS_strlen(*aIoBuffer) + 1 : aSrcLen;
1999-12-01 22:21:30 +00:00
// can we convert in-place?
const char* srcBreaks = GetLinebreakString(aSrcBreaks);
const char* dstBreaks = GetLinebreakString(aDestBreaks);
if ((aSrcBreaks != eLinebreakAny) &&
(strlen(srcBreaks) == 1) &&
(strlen(dstBreaks) == 1)) {
ConvertBreaksInSitu(*aIoBuffer, sourceLen, *srcBreaks, *dstBreaks);
if (aOutLen) {
*aOutLen = sourceLen;
}
} else {
char16_t* destBuffer;
if (aSrcBreaks == eLinebreakAny) {
destBuffer = ConvertUnknownBreaks(*aIoBuffer, sourceLen, dstBreaks);
} else {
destBuffer = ConvertBreaks(*aIoBuffer, sourceLen, srcBreaks, dstBreaks);
}
1999-12-01 22:21:30 +00:00
if (!destBuffer) {
return NS_ERROR_OUT_OF_MEMORY;
}
*aIoBuffer = destBuffer;
if (aOutLen) {
*aOutLen = sourceLen;
}
1999-12-01 22:21:30 +00:00
}
1999-12-01 22:21:30 +00:00
return NS_OK;
}
/*----------------------------------------------------------------------------
ConvertStringLineBreaks
1999-12-01 22:21:30 +00:00
----------------------------------------------------------------------------*/
nsresult
nsLinebreakConverter::ConvertStringLineBreaks(nsString& aIoString,
ELinebreakType aSrcBreaks,
ELinebreakType aDestBreaks)
1999-12-01 22:21:30 +00:00
{
NS_ASSERTION(aDestBreaks != eLinebreakAny &&
aSrcBreaks != eLinebreakSpace, "Invalid parameter");
1999-12-01 22:21:30 +00:00
// nothing to do
if (aIoString.IsEmpty()) {
return NS_OK;
}
1999-12-01 22:21:30 +00:00
nsresult rv;
// remember the old buffer in case
// we blow it away later
nsString::char_iterator stringBuf;
aIoString.BeginWriting(stringBuf);
int32_t newLen;
rv = ConvertUnicharLineBreaksInSitu(&stringBuf,
aSrcBreaks, aDestBreaks,
aIoString.Length() + 1, &newLen);
if (NS_FAILED(rv)) {
return rv;
}
if (stringBuf != aIoString.get()) {
aIoString.Adopt(stringBuf);
}
1999-12-01 22:21:30 +00:00
return NS_OK;
}