2000-08-05 00:51:37 +00:00
|
|
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
|
|
/*
|
|
|
|
* The contents of this file are subject to the Mozilla Public
|
|
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
|
|
* except in compliance with the License. You may obtain a copy of
|
|
|
|
* the License at http://www.mozilla.org/MPL/
|
|
|
|
*
|
|
|
|
* Software distributed under the License is distributed on an "AS
|
|
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
|
|
* implied. See the License for the specific language governing
|
|
|
|
* rights and limitations under the License.
|
|
|
|
*
|
|
|
|
* The Original Code is mozilla.org code.
|
|
|
|
*
|
|
|
|
* The Initial Developer of the Original Code is Netscape
|
|
|
|
* Communications Corporation. Portions created by Netscape are
|
|
|
|
* Copyright (C) 2000 Netscape Communications Corporation. All
|
|
|
|
* Rights Reserved.
|
|
|
|
*
|
|
|
|
* Contributor(s):
|
2000-09-02 04:10:44 +00:00
|
|
|
* Scott Collins <scc@mozilla.org> (original author)
|
2000-08-05 00:51:37 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include "nsReadableUtils.h"
|
2000-08-05 03:32:36 +00:00
|
|
|
#include "nsMemory.h"
|
2000-08-23 17:27:06 +00:00
|
|
|
#include "nsString.h"
|
2000-09-09 07:30:55 +00:00
|
|
|
#include "nsCRT.h"
|
2000-08-05 00:51:37 +00:00
|
|
|
|
2001-04-02 19:40:52 +00:00
|
|
|
#ifndef nsStringTraits_h___
|
|
|
|
#include "nsStringTraits.h"
|
|
|
|
#endif
|
2000-08-05 01:15:45 +00:00
|
|
|
|
2001-03-25 01:14:26 +00:00
|
|
|
/**
|
|
|
|
* this allocator definition, and the global functions to access it need to move
|
|
|
|
* to their own file
|
|
|
|
*/
|
|
|
|
|
|
|
|
template <class CharT>
|
|
|
|
class XPCOM_StringAllocator
|
|
|
|
: public nsStringAllocator<CharT>
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
virtual void Deallocate( CharT* ) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class CharT>
|
|
|
|
void
|
|
|
|
XPCOM_StringAllocator<CharT>::Deallocate( CharT* aBuffer ) const
|
|
|
|
{
|
|
|
|
nsMemory::Free(aBuffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
nsStringAllocator<char>&
|
|
|
|
StringAllocator_char()
|
|
|
|
{
|
|
|
|
static XPCOM_StringAllocator<char> sStringAllocator_char;
|
|
|
|
return sStringAllocator_char;
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
nsStringAllocator<PRUnichar>&
|
|
|
|
StringAllocator_wchar_t()
|
|
|
|
{
|
|
|
|
static XPCOM_StringAllocator<PRUnichar> sStringAllocator_wchar_t;
|
|
|
|
return sStringAllocator_wchar_t;
|
|
|
|
}
|
|
|
|
|
2000-10-05 01:07:02 +00:00
|
|
|
template <class CharT> class CalculateLength
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
typedef CharT value_type;
|
|
|
|
|
|
|
|
CalculateLength() : mDistance(0) { }
|
|
|
|
size_t GetDistance() const { return mDistance; }
|
|
|
|
|
|
|
|
PRUint32 write( const CharT*, PRUint32 N )
|
|
|
|
{ mDistance += N; return N; }
|
|
|
|
private:
|
|
|
|
size_t mDistance;
|
|
|
|
};
|
|
|
|
|
|
|
|
template <class CharT>
|
|
|
|
inline
|
|
|
|
size_t
|
|
|
|
Distance_Impl( const nsReadingIterator<CharT>& aStart,
|
|
|
|
const nsReadingIterator<CharT>& aEnd )
|
|
|
|
{
|
|
|
|
CalculateLength<CharT> sink;
|
|
|
|
nsReadingIterator<CharT> fromBegin(aStart);
|
|
|
|
copy_string(fromBegin, aEnd, sink);
|
|
|
|
return sink.GetDistance();
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
size_t
|
2003-03-20 04:52:54 +00:00
|
|
|
Distance( const nsReadingIterator<PRUnichar>&aStart, const nsReadingIterator<PRUnichar>&aEnd )
|
2000-10-05 01:07:02 +00:00
|
|
|
{
|
|
|
|
return Distance_Impl(aStart, aEnd);
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
size_t
|
2003-03-20 04:52:54 +00:00
|
|
|
Distance( const nsReadingIterator<char>&aStart, const nsReadingIterator<char>&aEnd )
|
2000-10-05 01:07:02 +00:00
|
|
|
{
|
|
|
|
return Distance_Impl(aStart, aEnd);
|
|
|
|
}
|
2000-08-05 01:15:45 +00:00
|
|
|
|
2000-08-05 04:25:49 +00:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* A character sink that performs a |reinterpret_cast| style conversion between character types.
|
|
|
|
*/
|
2000-08-05 03:32:36 +00:00
|
|
|
template <class FromCharT, class ToCharT>
|
|
|
|
class LossyConvertEncoding
|
2000-08-05 02:13:59 +00:00
|
|
|
{
|
|
|
|
public:
|
2000-08-05 03:32:36 +00:00
|
|
|
typedef FromCharT value_type;
|
|
|
|
|
|
|
|
typedef FromCharT input_type;
|
|
|
|
typedef ToCharT output_type;
|
2000-08-05 02:13:59 +00:00
|
|
|
|
2001-10-02 11:15:45 +00:00
|
|
|
typedef typename nsCharTraits<FromCharT>::unsigned_char_type unsigned_input_type;
|
2001-10-02 10:18:31 +00:00
|
|
|
|
2000-08-05 02:13:59 +00:00
|
|
|
public:
|
2000-08-05 03:32:36 +00:00
|
|
|
LossyConvertEncoding( output_type* aDestination ) : mDestination(aDestination) { }
|
2000-08-05 02:13:59 +00:00
|
|
|
|
|
|
|
PRUint32
|
|
|
|
write( const input_type* aSource, PRUint32 aSourceLength )
|
|
|
|
{
|
|
|
|
const input_type* done_writing = aSource + aSourceLength;
|
|
|
|
while ( aSource < done_writing )
|
2001-10-02 10:18:31 +00:00
|
|
|
*mDestination++ = (output_type)(unsigned_input_type)(*aSource++); // use old-style cast to mimic old |ns[C]String| behavior
|
2000-08-05 03:32:36 +00:00
|
|
|
return aSourceLength;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
write_terminator()
|
|
|
|
{
|
|
|
|
*mDestination = output_type(0);
|
2000-08-05 02:13:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2000-08-05 03:32:36 +00:00
|
|
|
output_type* mDestination;
|
2000-08-05 02:13:59 +00:00
|
|
|
};
|
|
|
|
|
2000-08-05 04:25:49 +00:00
|
|
|
|
2000-08-23 17:27:06 +00:00
|
|
|
NS_COM
|
|
|
|
void
|
2001-04-02 19:40:52 +00:00
|
|
|
CopyUCS2toASCII( const nsAString& aSource, nsACString& aDest )
|
2000-08-23 17:27:06 +00:00
|
|
|
{
|
|
|
|
// right now, this won't work on multi-fragment destinations
|
|
|
|
aDest.SetLength(aSource.Length());
|
2000-09-02 04:10:44 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator fromBegin, fromEnd;
|
2000-09-02 12:20:49 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::iterator toBegin;
|
2000-09-02 12:20:49 +00:00
|
|
|
LossyConvertEncoding<PRUnichar, char> converter(aDest.BeginWriting(toBegin).get());
|
|
|
|
|
|
|
|
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
|
2000-08-23 17:27:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
void
|
2001-04-02 19:40:52 +00:00
|
|
|
CopyASCIItoUCS2( const nsACString& aSource, nsAString& aDest )
|
2000-08-23 17:27:06 +00:00
|
|
|
{
|
|
|
|
// right now, this won't work on multi-fragment destinations
|
|
|
|
aDest.SetLength(aSource.Length());
|
2000-09-02 04:10:44 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator fromBegin, fromEnd;
|
2000-09-02 12:20:49 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::iterator toBegin;
|
2000-09-02 12:20:49 +00:00
|
|
|
LossyConvertEncoding<char, PRUnichar> converter(aDest.BeginWriting(toBegin).get());
|
|
|
|
|
|
|
|
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
|
2000-08-23 17:27:06 +00:00
|
|
|
}
|
|
|
|
|
2000-08-05 04:25:49 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
|
|
|
|
*
|
|
|
|
* @param aSource an string you will eventually be making a copy of
|
|
|
|
* @return a new buffer (of the type specified by the second parameter) which you must free with |nsMemory::Free|.
|
|
|
|
*
|
|
|
|
*/
|
2001-04-02 19:40:52 +00:00
|
|
|
template <class FromStringT, class ToCharT>
|
2000-08-05 03:32:36 +00:00
|
|
|
inline
|
|
|
|
ToCharT*
|
2001-04-02 19:40:52 +00:00
|
|
|
AllocateStringCopy( const FromStringT& aSource, ToCharT* )
|
2000-08-05 03:32:36 +00:00
|
|
|
{
|
|
|
|
return NS_STATIC_CAST(ToCharT*, nsMemory::Alloc((aSource.Length()+1) * sizeof(ToCharT)));
|
|
|
|
}
|
2000-08-05 02:13:59 +00:00
|
|
|
|
2000-08-05 01:15:45 +00:00
|
|
|
|
2000-08-05 07:44:08 +00:00
|
|
|
NS_COM
|
2000-08-05 01:15:45 +00:00
|
|
|
char*
|
2001-04-02 19:40:52 +00:00
|
|
|
ToNewCString( const nsAString& aSource )
|
2000-08-05 00:51:37 +00:00
|
|
|
{
|
2000-08-05 03:32:36 +00:00
|
|
|
char* result = AllocateStringCopy(aSource, (char*)0);
|
2000-09-02 04:10:44 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator fromBegin, fromEnd;
|
2000-09-02 04:10:44 +00:00
|
|
|
LossyConvertEncoding<PRUnichar, char> converter(result);
|
|
|
|
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
|
2000-08-05 02:13:59 +00:00
|
|
|
return result;
|
2000-08-05 00:51:37 +00:00
|
|
|
}
|
|
|
|
|
2000-08-23 17:27:06 +00:00
|
|
|
NS_COM
|
|
|
|
char*
|
2001-04-02 19:40:52 +00:00
|
|
|
ToNewUTF8String( const nsAString& aSource )
|
2000-08-23 17:27:06 +00:00
|
|
|
{
|
|
|
|
NS_ConvertUCS2toUTF8 temp(aSource);
|
|
|
|
|
|
|
|
char* result;
|
2002-01-24 23:46:56 +00:00
|
|
|
if (temp.GetOwnsBuffer()) {
|
2000-08-23 17:27:06 +00:00
|
|
|
// We allocated. Trick the string into not freeing its buffer to
|
|
|
|
// avoid an extra allocation.
|
|
|
|
result = temp.mStr;
|
|
|
|
|
|
|
|
temp.mStr=0;
|
2002-01-24 23:46:56 +00:00
|
|
|
temp.SetOwnsBuffer(PR_FALSE);
|
2000-08-23 17:27:06 +00:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
// We didn't allocate a buffer, so we need to copy it out of the
|
|
|
|
// nsCAutoString's storage.
|
2001-11-28 04:59:29 +00:00
|
|
|
result = ToNewCString(temp);
|
2000-08-23 17:27:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2000-08-05 07:44:08 +00:00
|
|
|
NS_COM
|
2000-08-05 03:32:36 +00:00
|
|
|
char*
|
2001-04-02 19:40:52 +00:00
|
|
|
ToNewCString( const nsACString& aSource )
|
2000-08-05 00:51:37 +00:00
|
|
|
{
|
2000-08-05 03:32:36 +00:00
|
|
|
// no conversion needed, just allocate a buffer of the correct length and copy into it
|
|
|
|
|
|
|
|
char* result = AllocateStringCopy(aSource, (char*)0);
|
2000-09-02 04:10:44 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator fromBegin, fromEnd;
|
2000-09-02 04:10:44 +00:00
|
|
|
char* toBegin = result;
|
|
|
|
*copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = char(0);
|
2000-08-05 02:13:59 +00:00
|
|
|
return result;
|
2000-08-05 00:51:37 +00:00
|
|
|
}
|
|
|
|
|
2000-08-05 07:44:08 +00:00
|
|
|
NS_COM
|
2000-08-05 03:32:36 +00:00
|
|
|
PRUnichar*
|
2001-04-02 19:40:52 +00:00
|
|
|
ToNewUnicode( const nsAString& aSource )
|
2000-08-05 03:32:36 +00:00
|
|
|
{
|
|
|
|
// no conversion needed, just allocate a buffer of the correct length and copy into it
|
2000-08-05 01:15:45 +00:00
|
|
|
|
2000-08-05 03:32:36 +00:00
|
|
|
PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
|
2000-09-02 04:10:44 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator fromBegin, fromEnd;
|
2000-09-02 04:10:44 +00:00
|
|
|
PRUnichar* toBegin = result;
|
|
|
|
*copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), toBegin) = PRUnichar(0);
|
2000-08-05 03:32:36 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2000-08-05 07:44:08 +00:00
|
|
|
NS_COM
|
2000-08-05 03:32:36 +00:00
|
|
|
PRUnichar*
|
2001-04-02 19:40:52 +00:00
|
|
|
ToNewUnicode( const nsACString& aSource )
|
2000-08-05 03:32:36 +00:00
|
|
|
{
|
|
|
|
PRUnichar* result = AllocateStringCopy(aSource, (PRUnichar*)0);
|
2000-09-02 04:10:44 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator fromBegin, fromEnd;
|
2000-09-02 04:10:44 +00:00
|
|
|
LossyConvertEncoding<char, PRUnichar> converter(result);
|
|
|
|
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter).write_terminator();
|
2000-08-05 03:32:36 +00:00
|
|
|
return result;
|
|
|
|
}
|
2000-08-05 01:15:45 +00:00
|
|
|
|
2000-08-23 17:27:06 +00:00
|
|
|
NS_COM
|
|
|
|
PRUnichar*
|
2001-04-02 19:40:52 +00:00
|
|
|
CopyUnicodeTo( const nsAString& aSource, PRUint32 aSrcOffset, PRUnichar* aDest, PRUint32 aLength )
|
2000-08-23 17:27:06 +00:00
|
|
|
{
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator fromBegin, fromEnd;
|
2000-12-12 21:58:14 +00:00
|
|
|
PRUnichar* toBegin = aDest;
|
|
|
|
copy_string(aSource.BeginReading(fromBegin).advance( PRInt32(aSrcOffset) ), aSource.BeginReading(fromEnd).advance( PRInt32(aSrcOffset+aLength) ), toBegin);
|
2000-08-23 17:27:06 +00:00
|
|
|
return aDest;
|
|
|
|
}
|
|
|
|
|
2000-12-12 21:58:14 +00:00
|
|
|
NS_COM
|
|
|
|
void
|
2001-09-25 09:35:50 +00:00
|
|
|
CopyUnicodeTo( const nsAString::const_iterator& aSrcStart,
|
|
|
|
const nsAString::const_iterator& aSrcEnd,
|
2001-04-02 19:40:52 +00:00
|
|
|
nsAString& aDest )
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::iterator writer;
|
2000-12-12 21:58:14 +00:00
|
|
|
aDest.SetLength(Distance(aSrcStart, aSrcEnd));
|
|
|
|
aDest.BeginWriting(writer);
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator fromBegin(aSrcStart);
|
2000-12-12 21:58:14 +00:00
|
|
|
|
|
|
|
copy_string(fromBegin, aSrcEnd, writer);
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
void
|
2001-09-25 09:35:50 +00:00
|
|
|
AppendUnicodeTo( const nsAString::const_iterator& aSrcStart,
|
|
|
|
const nsAString::const_iterator& aSrcEnd,
|
2001-04-02 19:40:52 +00:00
|
|
|
nsAString& aDest )
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::iterator writer;
|
2000-12-12 21:58:14 +00:00
|
|
|
PRUint32 oldLength = aDest.Length();
|
|
|
|
aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
|
|
|
|
aDest.BeginWriting(writer).advance(oldLength);
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator fromBegin(aSrcStart);
|
2000-12-12 21:58:14 +00:00
|
|
|
|
|
|
|
copy_string(fromBegin, aSrcEnd, writer);
|
|
|
|
}
|
|
|
|
|
2000-08-05 07:44:08 +00:00
|
|
|
NS_COM
|
2000-08-05 00:51:37 +00:00
|
|
|
PRBool
|
2001-04-02 19:40:52 +00:00
|
|
|
IsASCII( const nsAString& aString )
|
2000-08-05 00:51:37 +00:00
|
|
|
{
|
2000-08-05 04:25:49 +00:00
|
|
|
static const PRUnichar NOT_ASCII = PRUnichar(~0x007F);
|
2000-08-05 00:51:37 +00:00
|
|
|
|
|
|
|
|
|
|
|
// Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
|
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator done_reading;
|
2000-09-02 04:10:44 +00:00
|
|
|
aString.EndReading(done_reading);
|
2000-08-05 00:51:37 +00:00
|
|
|
|
2000-08-05 03:32:36 +00:00
|
|
|
// for each chunk of |aString|...
|
2000-09-02 04:10:44 +00:00
|
|
|
PRUint32 fragmentLength = 0;
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator iter;
|
2000-09-02 04:10:44 +00:00
|
|
|
for ( aString.BeginReading(iter); iter != done_reading; iter.advance( PRInt32(fragmentLength) ) )
|
2000-08-05 00:51:37 +00:00
|
|
|
{
|
2000-09-09 07:30:55 +00:00
|
|
|
fragmentLength = PRUint32(iter.size_forward());
|
2000-08-05 00:51:37 +00:00
|
|
|
const PRUnichar* c = iter.get();
|
2000-09-02 04:10:44 +00:00
|
|
|
const PRUnichar* fragmentEnd = c + fragmentLength;
|
2000-08-05 00:51:37 +00:00
|
|
|
|
|
|
|
// for each character in this chunk...
|
2000-09-02 04:10:44 +00:00
|
|
|
while ( c < fragmentEnd )
|
2000-08-05 00:51:37 +00:00
|
|
|
if ( *c++ & NOT_ASCII )
|
|
|
|
return PR_FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return PR_TRUE;
|
2000-08-05 02:13:59 +00:00
|
|
|
}
|
|
|
|
|
2002-03-06 07:48:55 +00:00
|
|
|
NS_COM
|
|
|
|
PRBool
|
|
|
|
IsASCII( const nsACString& aString )
|
|
|
|
{
|
|
|
|
static const char NOT_ASCII = char(~0x7F);
|
|
|
|
|
|
|
|
|
|
|
|
// Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
|
|
|
|
|
|
|
|
nsACString::const_iterator done_reading;
|
|
|
|
aString.EndReading(done_reading);
|
|
|
|
|
|
|
|
// for each chunk of |aString|...
|
|
|
|
PRUint32 fragmentLength = 0;
|
|
|
|
nsACString::const_iterator iter;
|
|
|
|
for ( aString.BeginReading(iter); iter != done_reading; iter.advance( PRInt32(fragmentLength) ) )
|
|
|
|
{
|
|
|
|
fragmentLength = PRUint32(iter.size_forward());
|
|
|
|
const char* c = iter.get();
|
|
|
|
const char* fragmentEnd = c + fragmentLength;
|
|
|
|
|
|
|
|
// for each character in this chunk...
|
|
|
|
while ( c < fragmentEnd )
|
|
|
|
if ( *c++ & NOT_ASCII )
|
|
|
|
return PR_FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return PR_TRUE;
|
|
|
|
}
|
|
|
|
|
2003-03-25 08:11:13 +00:00
|
|
|
NS_COM
|
|
|
|
PRBool
|
|
|
|
IsUTF8( const nsACString& aString )
|
|
|
|
{
|
|
|
|
nsReadingIterator<char> done_reading;
|
|
|
|
aString.EndReading(done_reading);
|
|
|
|
|
|
|
|
PRInt32 state = 0;
|
|
|
|
PRBool overlong = PR_FALSE;
|
|
|
|
PRBool surrogate = PR_FALSE;
|
|
|
|
PRBool nonchar = PR_FALSE;
|
|
|
|
PRUint16 olupper = 0; // overlong byte upper bound.
|
|
|
|
PRUint16 slower = 0; // surrogate byte lower bound.
|
|
|
|
|
|
|
|
// for each chunk of |aString|...
|
|
|
|
PRUint32 fragmentLength = 0;
|
|
|
|
nsReadingIterator<char> iter;
|
2000-09-09 07:30:55 +00:00
|
|
|
|
2003-03-25 08:11:13 +00:00
|
|
|
for ( aString.BeginReading(iter); iter != done_reading; iter.advance( PRInt32(fragmentLength) ) )
|
|
|
|
{
|
|
|
|
fragmentLength = PRUint32(iter.size_forward());
|
|
|
|
const char* ptr = iter.get();
|
|
|
|
const char* fragmentEnd = ptr + fragmentLength;
|
|
|
|
|
|
|
|
// for each character in this chunk...
|
|
|
|
while ( ptr < fragmentEnd )
|
|
|
|
{
|
|
|
|
PRUint8 c;
|
|
|
|
|
|
|
|
if (0 == state)
|
|
|
|
{
|
|
|
|
c = *ptr++;
|
|
|
|
|
|
|
|
if ( UTF8traits::isASCII(c) )
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if ( c <= 0xC1 ) // [80-BF] where not expected, [C0-C1] for overlong.
|
|
|
|
return PR_FALSE;
|
|
|
|
else if ( UTF8traits::is2byte(c) )
|
|
|
|
state = 1;
|
|
|
|
else if ( UTF8traits::is3byte(c) )
|
|
|
|
{
|
|
|
|
state = 2;
|
|
|
|
if ( c == 0xE0 ) // to exclude E0[80-9F][80-BF]
|
|
|
|
{
|
|
|
|
overlong = PR_TRUE;
|
|
|
|
olupper = 0x9F;
|
|
|
|
}
|
|
|
|
else if ( c == 0xED ) // ED[A0-BF][80-BF] : surrogate codepoint
|
|
|
|
{
|
|
|
|
surrogate = PR_TRUE;
|
|
|
|
slower = 0xA0;
|
|
|
|
}
|
|
|
|
else if ( c == 0xEF ) // EF BF [BE-BF] : non-character
|
|
|
|
nonchar = PR_TRUE;
|
|
|
|
}
|
|
|
|
else if ( c <= 0xF4 ) // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
|
|
|
|
{
|
|
|
|
state = 3;
|
|
|
|
nonchar = PR_TRUE;
|
|
|
|
if ( c == 0xF0 ) // to exclude F0[80-8F][80-BF]{2}
|
|
|
|
{
|
|
|
|
overlong = PR_TRUE;
|
|
|
|
olupper = 0x8F;
|
|
|
|
}
|
|
|
|
else if ( c == 0xF4 ) // to exclude F4[90-BF][80-BF]
|
|
|
|
{
|
|
|
|
// actually not surrogates but codepoints beyond 0x10FFFF
|
|
|
|
surrogate = PR_TRUE;
|
|
|
|
slower = 0x90;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return PR_FALSE; // Not UTF8 string
|
|
|
|
}
|
|
|
|
|
|
|
|
while (ptr < fragmentEnd && state)
|
|
|
|
{
|
|
|
|
c = *ptr++;
|
|
|
|
--state;
|
|
|
|
|
|
|
|
// non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
|
|
|
|
if ( nonchar && ( !state && c < 0xBE ||
|
|
|
|
state == 1 && c != 0xBF ||
|
|
|
|
state == 2 && 0x0F != (0x0F & c) ))
|
|
|
|
nonchar = PR_FALSE;
|
|
|
|
|
|
|
|
if ( !UTF8traits::isInSeq(c) || overlong && c <= olupper ||
|
|
|
|
surrogate && slower <= c || nonchar && !state )
|
|
|
|
return PR_FALSE; // Not UTF8 string
|
|
|
|
overlong = surrogate = PR_FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
|
|
|
|
}
|
2000-09-09 07:30:55 +00:00
|
|
|
|
|
|
|
/**
|
2001-12-08 07:08:38 +00:00
|
|
|
* A character sink for in-place case conversion.
|
2000-09-09 07:30:55 +00:00
|
|
|
*/
|
|
|
|
class ConvertToUpperCase
|
|
|
|
{
|
|
|
|
public:
|
2001-11-28 04:59:29 +00:00
|
|
|
typedef char value_type;
|
2000-09-09 07:30:55 +00:00
|
|
|
|
|
|
|
PRUint32
|
2001-11-28 04:59:29 +00:00
|
|
|
write( const char* aSource, PRUint32 aSourceLength )
|
2000-09-09 07:30:55 +00:00
|
|
|
{
|
2001-11-28 04:59:29 +00:00
|
|
|
char* cp = NS_CONST_CAST(char*,aSource);
|
|
|
|
const char* end = aSource + aSourceLength;
|
|
|
|
while (cp != end) {
|
|
|
|
char ch = *cp;
|
|
|
|
if ((ch >= 'a') && (ch <= 'z'))
|
|
|
|
*cp = ch - ('a' - 'A');
|
|
|
|
++cp;
|
|
|
|
}
|
2000-09-09 07:30:55 +00:00
|
|
|
return aSourceLength;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
void
|
2001-04-02 19:40:52 +00:00
|
|
|
ToUpperCase( nsACString& aCString )
|
2000-09-09 07:30:55 +00:00
|
|
|
{
|
2001-04-02 19:40:52 +00:00
|
|
|
nsACString::iterator fromBegin, fromEnd;
|
2001-11-28 04:59:29 +00:00
|
|
|
ConvertToUpperCase converter;
|
2000-09-09 07:30:55 +00:00
|
|
|
copy_string(aCString.BeginWriting(fromBegin), aCString.EndWriting(fromEnd), converter);
|
|
|
|
}
|
|
|
|
|
2001-12-23 02:56:41 +00:00
|
|
|
NS_COM
|
|
|
|
void
|
|
|
|
ToUpperCase( nsASingleFragmentCString& aCString )
|
|
|
|
{
|
|
|
|
ConvertToUpperCase converter;
|
|
|
|
char* start;
|
|
|
|
converter.write(aCString.BeginWriting(start), aCString.Length());
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
void
|
|
|
|
ToUpperCase( nsCString& aCString )
|
|
|
|
{
|
|
|
|
ConvertToUpperCase converter;
|
2002-01-24 23:46:56 +00:00
|
|
|
converter.write(aCString.mStr, aCString.Length());
|
2001-12-23 02:56:41 +00:00
|
|
|
}
|
2000-09-09 07:30:55 +00:00
|
|
|
|
2001-12-08 07:08:38 +00:00
|
|
|
/**
|
|
|
|
* A character sink for copying with case conversion.
|
|
|
|
*/
|
|
|
|
class CopyToUpperCase
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
typedef char value_type;
|
|
|
|
|
|
|
|
CopyToUpperCase( nsACString::iterator& aDestIter )
|
|
|
|
: mIter(aDestIter)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
PRUint32
|
|
|
|
write( const char* aSource, PRUint32 aSourceLength )
|
|
|
|
{
|
|
|
|
PRUint32 len = PR_MIN(PRUint32(mIter.size_forward()), aSourceLength);
|
|
|
|
char* cp = mIter.get();
|
|
|
|
const char* end = aSource + len;
|
|
|
|
while (aSource != end) {
|
|
|
|
char ch = *aSource;
|
|
|
|
if ((ch >= 'a') && (ch <= 'z'))
|
|
|
|
*cp = ch - ('a' - 'A');
|
2002-05-01 14:02:11 +00:00
|
|
|
else
|
|
|
|
*cp = ch;
|
2001-12-08 07:08:38 +00:00
|
|
|
++aSource;
|
|
|
|
++cp;
|
|
|
|
}
|
|
|
|
mIter.advance(len);
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
nsACString::iterator& mIter;
|
|
|
|
};
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
void
|
|
|
|
ToUpperCase( const nsACString& aSource, nsACString& aDest )
|
|
|
|
{
|
|
|
|
nsACString::const_iterator fromBegin, fromEnd;
|
|
|
|
nsACString::iterator toBegin;
|
|
|
|
aDest.SetLength(aSource.Length());
|
|
|
|
CopyToUpperCase converter(aDest.BeginWriting(toBegin));
|
|
|
|
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
|
|
|
|
}
|
|
|
|
|
2000-09-09 07:30:55 +00:00
|
|
|
/**
|
|
|
|
* A character sink for case conversion.
|
|
|
|
*/
|
|
|
|
class ConvertToLowerCase
|
|
|
|
{
|
|
|
|
public:
|
2001-11-28 04:59:29 +00:00
|
|
|
typedef char value_type;
|
2000-09-09 07:30:55 +00:00
|
|
|
|
|
|
|
PRUint32
|
2001-11-28 04:59:29 +00:00
|
|
|
write( const char* aSource, PRUint32 aSourceLength )
|
2000-09-09 07:30:55 +00:00
|
|
|
{
|
2001-11-28 04:59:29 +00:00
|
|
|
char* cp = NS_CONST_CAST(char*,aSource);
|
|
|
|
const char* end = aSource + aSourceLength;
|
|
|
|
while (cp != end) {
|
|
|
|
char ch = *cp;
|
|
|
|
if ((ch >= 'A') && (ch <= 'Z'))
|
|
|
|
*cp = ch + ('a' - 'A');
|
|
|
|
++cp;
|
|
|
|
}
|
2000-09-09 07:30:55 +00:00
|
|
|
return aSourceLength;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
void
|
2001-04-02 19:40:52 +00:00
|
|
|
ToLowerCase( nsACString& aCString )
|
2000-09-09 07:30:55 +00:00
|
|
|
{
|
2001-04-02 19:40:52 +00:00
|
|
|
nsACString::iterator fromBegin, fromEnd;
|
2001-11-28 04:59:29 +00:00
|
|
|
ConvertToLowerCase converter;
|
2000-09-09 07:30:55 +00:00
|
|
|
copy_string(aCString.BeginWriting(fromBegin), aCString.EndWriting(fromEnd), converter);
|
|
|
|
}
|
2000-12-12 21:58:14 +00:00
|
|
|
|
2001-12-23 02:56:41 +00:00
|
|
|
NS_COM
|
|
|
|
void
|
|
|
|
ToLowerCase( nsASingleFragmentCString& aCString )
|
|
|
|
{
|
|
|
|
ConvertToLowerCase converter;
|
|
|
|
char* start;
|
|
|
|
converter.write(aCString.BeginWriting(start), aCString.Length());
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
void
|
|
|
|
ToLowerCase( nsCString& aCString )
|
|
|
|
{
|
|
|
|
ConvertToLowerCase converter;
|
2002-01-24 23:46:56 +00:00
|
|
|
converter.write(aCString.mStr, aCString.Length());
|
2001-12-23 02:56:41 +00:00
|
|
|
}
|
|
|
|
|
2001-12-08 07:08:38 +00:00
|
|
|
/**
|
|
|
|
* A character sink for copying with case conversion.
|
|
|
|
*/
|
|
|
|
class CopyToLowerCase
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
typedef char value_type;
|
|
|
|
|
|
|
|
CopyToLowerCase( nsACString::iterator& aDestIter )
|
|
|
|
: mIter(aDestIter)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
PRUint32
|
|
|
|
write( const char* aSource, PRUint32 aSourceLength )
|
|
|
|
{
|
|
|
|
PRUint32 len = PR_MIN(PRUint32(mIter.size_forward()), aSourceLength);
|
|
|
|
char* cp = mIter.get();
|
|
|
|
const char* end = aSource + len;
|
|
|
|
while (aSource != end) {
|
|
|
|
char ch = *aSource;
|
|
|
|
if ((ch >= 'A') && (ch <= 'Z'))
|
|
|
|
*cp = ch + ('a' - 'A');
|
2002-05-01 14:02:11 +00:00
|
|
|
else
|
|
|
|
*cp = ch;
|
2001-12-08 07:08:38 +00:00
|
|
|
++aSource;
|
|
|
|
++cp;
|
|
|
|
}
|
|
|
|
mIter.advance(len);
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
nsACString::iterator& mIter;
|
|
|
|
};
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
void
|
|
|
|
ToLowerCase( const nsACString& aSource, nsACString& aDest )
|
|
|
|
{
|
|
|
|
nsACString::const_iterator fromBegin, fromEnd;
|
|
|
|
nsACString::iterator toBegin;
|
|
|
|
aDest.SetLength(aSource.Length());
|
|
|
|
CopyToLowerCase converter(aDest.BeginWriting(toBegin));
|
|
|
|
copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), converter);
|
|
|
|
}
|
|
|
|
|
2001-05-13 07:03:29 +00:00
|
|
|
template <class StringT, class IteratorT, class Comparator>
|
2000-12-12 21:58:14 +00:00
|
|
|
PRBool
|
2001-05-13 07:03:29 +00:00
|
|
|
FindInReadable_Impl( const StringT& aPattern, IteratorT& aSearchStart, IteratorT& aSearchEnd, const Comparator& compare )
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
|
|
|
PRBool found_it = PR_FALSE;
|
|
|
|
|
|
|
|
// only bother searching at all if we're given a non-empty range to search
|
|
|
|
if ( aSearchStart != aSearchEnd )
|
|
|
|
{
|
2001-05-13 07:03:29 +00:00
|
|
|
IteratorT aPatternStart, aPatternEnd;
|
2000-12-12 21:58:14 +00:00
|
|
|
aPattern.BeginReading(aPatternStart);
|
|
|
|
aPattern.EndReading(aPatternEnd);
|
|
|
|
|
|
|
|
// outer loop keeps searching till we find it or run out of string to search
|
|
|
|
while ( !found_it )
|
|
|
|
{
|
|
|
|
// fast inner loop (that's what it's called, not what it is) looks for a potential match
|
2001-10-30 03:39:18 +00:00
|
|
|
while ( aSearchStart != aSearchEnd &&
|
|
|
|
compare(*aPatternStart, *aSearchStart) )
|
2000-12-12 21:58:14 +00:00
|
|
|
++aSearchStart;
|
|
|
|
|
|
|
|
// if we broke out of the `fast' loop because we're out of string ... we're done: no match
|
|
|
|
if ( aSearchStart == aSearchEnd )
|
|
|
|
break;
|
|
|
|
|
|
|
|
// otherwise, we're at a potential match, let's see if we really hit one
|
2001-05-13 07:03:29 +00:00
|
|
|
IteratorT testPattern(aPatternStart);
|
|
|
|
IteratorT testSearch(aSearchStart);
|
2000-12-12 21:58:14 +00:00
|
|
|
|
|
|
|
// slow inner loop verifies the potential match (found by the `fast' loop) at the current position
|
|
|
|
for(;;)
|
|
|
|
{
|
|
|
|
// we already compared the first character in the outer loop,
|
|
|
|
// so we'll advance before the next comparison
|
|
|
|
++testPattern;
|
|
|
|
++testSearch;
|
|
|
|
|
|
|
|
// if we verified all the way to the end of the pattern, then we found it!
|
|
|
|
if ( testPattern == aPatternEnd )
|
|
|
|
{
|
|
|
|
found_it = PR_TRUE;
|
|
|
|
aSearchEnd = testSearch; // return the exact found range through the parameters
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// if we got to end of the string we're searching before we hit the end of the
|
|
|
|
// pattern, we'll never find what we're looking for
|
|
|
|
if ( testSearch == aSearchEnd )
|
|
|
|
{
|
|
|
|
aSearchStart = aSearchEnd;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// else if we mismatched ... it's time to advance to the next search position
|
|
|
|
// and get back into the `fast' loop
|
2001-10-30 03:39:18 +00:00
|
|
|
if ( compare(*testPattern, *testSearch) )
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
|
|
|
++aSearchStart;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return found_it;
|
|
|
|
}
|
|
|
|
|
2001-04-02 22:00:34 +00:00
|
|
|
|
2001-05-13 05:16:10 +00:00
|
|
|
NS_COM
|
|
|
|
PRBool
|
2001-10-30 03:39:18 +00:00
|
|
|
FindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator )
|
2001-05-13 05:16:10 +00:00
|
|
|
{
|
2001-10-30 03:39:18 +00:00
|
|
|
return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
|
2001-05-13 05:16:10 +00:00
|
|
|
}
|
2001-04-02 22:00:34 +00:00
|
|
|
|
2001-05-13 05:16:10 +00:00
|
|
|
NS_COM
|
|
|
|
PRBool
|
2001-10-30 03:39:18 +00:00
|
|
|
FindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
|
2001-05-13 05:16:10 +00:00
|
|
|
{
|
2001-10-30 03:39:18 +00:00
|
|
|
return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
|
2001-05-13 05:16:10 +00:00
|
|
|
}
|
2001-04-02 22:00:34 +00:00
|
|
|
|
2001-05-13 05:16:10 +00:00
|
|
|
NS_COM
|
|
|
|
PRBool
|
2001-09-25 09:35:50 +00:00
|
|
|
CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd )
|
2001-05-13 05:16:10 +00:00
|
|
|
{
|
2001-10-30 03:39:18 +00:00
|
|
|
return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, nsCaseInsensitiveCStringComparator());
|
2000-12-12 21:58:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* This implementation is simple, but does too much work.
|
|
|
|
* It searches the entire string from left to right, and returns the last match found, if any.
|
|
|
|
* This implementation will be replaced when I get |reverse_iterator|s working.
|
|
|
|
*/
|
2001-04-02 22:00:34 +00:00
|
|
|
NS_COM
|
2000-12-12 21:58:14 +00:00
|
|
|
PRBool
|
2002-03-19 06:46:56 +00:00
|
|
|
RFindInReadable( const nsAString& aPattern, nsAString::const_iterator& aSearchStart, nsAString::const_iterator& aSearchEnd, const nsStringComparator& aComparator)
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
|
|
|
PRBool found_it = PR_FALSE;
|
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator savedSearchEnd(aSearchEnd);
|
|
|
|
nsAString::const_iterator searchStart(aSearchStart), searchEnd(aSearchEnd);
|
2000-12-12 21:58:14 +00:00
|
|
|
|
|
|
|
while ( searchStart != searchEnd )
|
|
|
|
{
|
2002-03-19 06:46:56 +00:00
|
|
|
if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
|
|
|
found_it = PR_TRUE;
|
|
|
|
|
|
|
|
// this is the best match so far, so remember it
|
|
|
|
aSearchStart = searchStart;
|
|
|
|
aSearchEnd = searchEnd;
|
|
|
|
|
|
|
|
// ...and get ready to search some more
|
|
|
|
// (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
|
|
|
|
++searchStart;
|
|
|
|
searchEnd = savedSearchEnd;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// if we never found it, return an empty range
|
|
|
|
if ( !found_it )
|
|
|
|
aSearchStart = aSearchEnd;
|
|
|
|
|
|
|
|
return found_it;
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
PRBool
|
2002-03-19 06:46:56 +00:00
|
|
|
RFindInReadable( const nsACString& aPattern, nsACString::const_iterator& aSearchStart, nsACString::const_iterator& aSearchEnd, const nsCStringComparator& aComparator)
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
2001-04-02 22:00:34 +00:00
|
|
|
PRBool found_it = PR_FALSE;
|
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator savedSearchEnd(aSearchEnd);
|
|
|
|
nsACString::const_iterator searchStart(aSearchStart), searchEnd(aSearchEnd);
|
2001-04-02 22:00:34 +00:00
|
|
|
|
|
|
|
while ( searchStart != searchEnd )
|
|
|
|
{
|
2002-03-19 06:46:56 +00:00
|
|
|
if ( FindInReadable(aPattern, searchStart, searchEnd, aComparator) )
|
2001-04-02 22:00:34 +00:00
|
|
|
{
|
|
|
|
found_it = PR_TRUE;
|
|
|
|
|
|
|
|
// this is the best match so far, so remember it
|
|
|
|
aSearchStart = searchStart;
|
|
|
|
aSearchEnd = searchEnd;
|
|
|
|
|
|
|
|
// ...and get ready to search some more
|
|
|
|
// (it's tempting to set |searchStart=searchEnd| ... but that misses overlapping patterns)
|
|
|
|
++searchStart;
|
|
|
|
searchEnd = savedSearchEnd;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// if we never found it, return an empty range
|
|
|
|
if ( !found_it )
|
|
|
|
aSearchStart = aSearchEnd;
|
|
|
|
|
|
|
|
return found_it;
|
2000-12-12 21:58:14 +00:00
|
|
|
}
|
|
|
|
|
2001-05-23 06:49:51 +00:00
|
|
|
PRBool
|
|
|
|
nsSubstituteString::IsDependentOn( const nsAString& aString ) const
|
|
|
|
{
|
|
|
|
return mText.IsDependentOn(aString) || mPattern.IsDependentOn(aString) || mReplacement.IsDependentOn(aString);
|
|
|
|
}
|
|
|
|
|
|
|
|
PRUint32
|
|
|
|
nsSubstituteString::MaxLength() const
|
|
|
|
{
|
|
|
|
PRInt32 numberOfMatches = mNumberOfMatches;
|
|
|
|
|
|
|
|
// if we don't know exactly how long the result will be,
|
|
|
|
// calculate the longest possible result
|
|
|
|
if ( numberOfMatches < 0 )
|
|
|
|
{
|
|
|
|
if ( mReplacement.Length() <= mPattern.Length() )
|
|
|
|
numberOfMatches = 0; // substitutions shrink the result, so worst case is none
|
|
|
|
else
|
|
|
|
numberOfMatches = PRInt32(mText.Length() / mPattern.Length());
|
|
|
|
// substitutions grow the result, so worst case is the maximum number of times |mPattern| can be found
|
|
|
|
}
|
|
|
|
|
|
|
|
PRInt32 costPerMatch = PRInt32(mReplacement.Length()) - PRInt32(mPattern.Length());
|
|
|
|
return mText.Length() + (numberOfMatches * costPerMatch);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
nsSubstituteString::CountMatches() const
|
|
|
|
{
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator textEnd;
|
|
|
|
nsAString::const_iterator searchEnd = mText.EndReading(textEnd);
|
2001-05-23 06:49:51 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator searchStart;
|
2001-05-23 06:49:51 +00:00
|
|
|
mText.BeginReading(searchStart);
|
|
|
|
|
|
|
|
PRInt32 numberOfMatches = 0;
|
|
|
|
while ( FindInReadable(mPattern, searchStart, searchEnd) )
|
|
|
|
{
|
|
|
|
++numberOfMatches;
|
|
|
|
searchStart = searchEnd;
|
|
|
|
searchEnd = textEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_CONST_CAST(nsSubstituteString*, this)->mNumberOfMatches = numberOfMatches;
|
|
|
|
}
|
|
|
|
|
|
|
|
PRUint32
|
|
|
|
nsSubstituteString::Length() const
|
|
|
|
{
|
|
|
|
if ( mNumberOfMatches < 0 )
|
|
|
|
CountMatches();
|
|
|
|
return MaxLength();
|
|
|
|
}
|
|
|
|
|
|
|
|
PRUnichar*
|
|
|
|
nsSubstituteString::operator()( PRUnichar* aDestBuffer ) const
|
|
|
|
{
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator replacementEnd;
|
2001-05-23 06:49:51 +00:00
|
|
|
mReplacement.EndReading(replacementEnd);
|
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator textEnd;
|
|
|
|
nsAString::const_iterator searchEnd = mText.EndReading(textEnd);
|
2001-05-23 06:49:51 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator uncopiedStart;
|
|
|
|
nsAString::const_iterator searchStart = mText.BeginReading(uncopiedStart);
|
2001-05-23 06:49:51 +00:00
|
|
|
|
|
|
|
while ( FindInReadable(mPattern, searchStart, searchEnd) )
|
|
|
|
{
|
|
|
|
// |searchStart| and |searchEnd| now bracket the match
|
|
|
|
|
|
|
|
// copy everything up to this match
|
|
|
|
copy_string(uncopiedStart, searchStart, aDestBuffer); // updates |aDestBuffer|
|
|
|
|
|
|
|
|
// copy the replacement
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator replacementStart;
|
2001-05-23 06:49:51 +00:00
|
|
|
copy_string(mReplacement.BeginReading(replacementStart), replacementEnd, aDestBuffer);
|
|
|
|
|
|
|
|
// start searching from where the current match ends
|
|
|
|
uncopiedStart = searchStart = searchEnd;
|
|
|
|
searchEnd = textEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
// copy everything after the final (if any) match
|
|
|
|
copy_string(uncopiedStart, textEnd, aDestBuffer);
|
|
|
|
return aDestBuffer;
|
|
|
|
}
|
2000-12-12 21:58:14 +00:00
|
|
|
|
2001-05-23 06:49:51 +00:00
|
|
|
PRBool
|
|
|
|
nsSubstituteCString::IsDependentOn( const nsACString& aString ) const
|
|
|
|
{
|
|
|
|
return mText.IsDependentOn(aString) || mPattern.IsDependentOn(aString) || mReplacement.IsDependentOn(aString);
|
|
|
|
}
|
|
|
|
|
|
|
|
PRUint32
|
|
|
|
nsSubstituteCString::MaxLength() const
|
|
|
|
{
|
|
|
|
PRInt32 numberOfMatches = mNumberOfMatches;
|
|
|
|
|
|
|
|
// if we don't know exactly how long the result will be,
|
|
|
|
// calculate the longest possible result
|
|
|
|
if ( numberOfMatches < 0 )
|
|
|
|
{
|
|
|
|
if ( mReplacement.Length() <= mPattern.Length() )
|
|
|
|
numberOfMatches = 0; // substitutions shrink the result, so worst case is none
|
|
|
|
else
|
|
|
|
numberOfMatches = PRInt32(mText.Length() / mPattern.Length());
|
|
|
|
// substitutions grow the result, so worst case is the maximum number of times |mPattern| can be found
|
|
|
|
}
|
|
|
|
|
|
|
|
PRInt32 costPerMatch = PRInt32(mReplacement.Length()) - PRInt32(mPattern.Length());
|
|
|
|
return mText.Length() + (numberOfMatches * costPerMatch);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
nsSubstituteCString::CountMatches() const
|
|
|
|
{
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator textEnd;
|
|
|
|
nsACString::const_iterator searchEnd = mText.EndReading(textEnd);
|
2001-05-23 06:49:51 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator searchStart;
|
2001-05-23 06:49:51 +00:00
|
|
|
mText.BeginReading(searchStart);
|
|
|
|
|
|
|
|
PRInt32 numberOfMatches = 0;
|
|
|
|
while ( FindInReadable(mPattern, searchStart, searchEnd) )
|
|
|
|
{
|
|
|
|
++numberOfMatches;
|
|
|
|
searchStart = searchEnd;
|
|
|
|
searchEnd = textEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_CONST_CAST(nsSubstituteCString*, this)->mNumberOfMatches = numberOfMatches;
|
|
|
|
}
|
|
|
|
|
|
|
|
PRUint32
|
|
|
|
nsSubstituteCString::Length() const
|
|
|
|
{
|
|
|
|
if ( mNumberOfMatches < 0 )
|
|
|
|
CountMatches();
|
|
|
|
return MaxLength();
|
|
|
|
}
|
|
|
|
|
|
|
|
char*
|
|
|
|
nsSubstituteCString::operator()( char* aDestBuffer ) const
|
|
|
|
{
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator replacementEnd;
|
2001-05-23 06:49:51 +00:00
|
|
|
mReplacement.EndReading(replacementEnd);
|
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator textEnd;
|
|
|
|
nsACString::const_iterator searchEnd = mText.EndReading(textEnd);
|
2001-05-23 06:49:51 +00:00
|
|
|
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator uncopiedStart;
|
|
|
|
nsACString::const_iterator searchStart = mText.BeginReading(uncopiedStart);
|
2001-05-23 06:49:51 +00:00
|
|
|
|
|
|
|
while ( FindInReadable(mPattern, searchStart, searchEnd) )
|
|
|
|
{
|
|
|
|
// |searchStart| and |searchEnd| now bracket the match
|
|
|
|
|
|
|
|
// copy everything up to this match
|
|
|
|
copy_string(uncopiedStart, searchStart, aDestBuffer); // updates |aDestBuffer|
|
|
|
|
|
|
|
|
// copy the replacement
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator replacementStart;
|
2001-05-23 06:49:51 +00:00
|
|
|
copy_string(mReplacement.BeginReading(replacementStart), replacementEnd, aDestBuffer);
|
|
|
|
|
|
|
|
// start searching from where the current match ends
|
|
|
|
uncopiedStart = searchStart = searchEnd;
|
|
|
|
searchEnd = textEnd;
|
|
|
|
}
|
|
|
|
|
|
|
|
// copy everything after the final (if any) match
|
|
|
|
copy_string(uncopiedStart, textEnd, aDestBuffer);
|
|
|
|
return aDestBuffer;
|
|
|
|
}
|
2000-12-12 21:58:14 +00:00
|
|
|
|
2001-04-02 22:00:34 +00:00
|
|
|
NS_COM
|
|
|
|
PRBool
|
2001-09-25 09:35:50 +00:00
|
|
|
FindCharInReadable( PRUnichar aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd )
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
|
|
|
while ( aSearchStart != aSearchEnd )
|
|
|
|
{
|
|
|
|
PRInt32 fragmentLength;
|
|
|
|
if ( SameFragment(aSearchStart, aSearchEnd) )
|
|
|
|
fragmentLength = aSearchEnd.get() - aSearchStart.get();
|
|
|
|
else
|
|
|
|
fragmentLength = aSearchStart.size_forward();
|
|
|
|
|
2001-04-02 22:00:34 +00:00
|
|
|
const PRUnichar* charFoundAt = nsCharTraits<PRUnichar>::find(aSearchStart.get(), fragmentLength, aChar);
|
2000-12-12 21:58:14 +00:00
|
|
|
if ( charFoundAt ) {
|
|
|
|
aSearchStart.advance( charFoundAt - aSearchStart.get() );
|
|
|
|
return PR_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
aSearchStart.advance(fragmentLength);
|
|
|
|
}
|
|
|
|
|
|
|
|
return PR_FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
PRBool
|
2001-09-25 09:35:50 +00:00
|
|
|
FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd )
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
2001-04-02 22:00:34 +00:00
|
|
|
while ( aSearchStart != aSearchEnd )
|
|
|
|
{
|
|
|
|
PRInt32 fragmentLength;
|
|
|
|
if ( SameFragment(aSearchStart, aSearchEnd) )
|
|
|
|
fragmentLength = aSearchEnd.get() - aSearchStart.get();
|
|
|
|
else
|
|
|
|
fragmentLength = aSearchStart.size_forward();
|
|
|
|
|
|
|
|
const char* charFoundAt = nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
|
|
|
|
if ( charFoundAt ) {
|
|
|
|
aSearchStart.advance( charFoundAt - aSearchStart.get() );
|
|
|
|
return PR_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
aSearchStart.advance(fragmentLength);
|
|
|
|
}
|
|
|
|
|
|
|
|
return PR_FALSE;
|
2000-12-12 21:58:14 +00:00
|
|
|
}
|
|
|
|
|
2001-04-02 22:00:34 +00:00
|
|
|
NS_COM
|
2000-12-12 21:58:14 +00:00
|
|
|
PRUint32
|
2001-04-02 22:00:34 +00:00
|
|
|
CountCharInReadable( const nsAString& aStr,
|
|
|
|
PRUnichar aChar )
|
2000-12-12 21:58:14 +00:00
|
|
|
{
|
|
|
|
PRUint32 count = 0;
|
2001-09-25 09:35:50 +00:00
|
|
|
nsAString::const_iterator begin, end;
|
2000-12-12 21:58:14 +00:00
|
|
|
|
|
|
|
aStr.BeginReading(begin);
|
|
|
|
aStr.EndReading(end);
|
|
|
|
|
|
|
|
while (begin != end) {
|
|
|
|
if (*begin == aChar) {
|
2001-06-19 22:38:45 +00:00
|
|
|
++count;
|
2000-12-12 21:58:14 +00:00
|
|
|
}
|
2001-06-19 22:38:45 +00:00
|
|
|
++begin;
|
2000-12-12 21:58:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM
|
|
|
|
PRUint32
|
2001-04-02 19:40:52 +00:00
|
|
|
CountCharInReadable( const nsACString& aStr,
|
2000-12-12 21:58:14 +00:00
|
|
|
char aChar )
|
|
|
|
{
|
2001-04-02 22:00:34 +00:00
|
|
|
PRUint32 count = 0;
|
2001-09-25 09:35:50 +00:00
|
|
|
nsACString::const_iterator begin, end;
|
2001-04-02 22:00:34 +00:00
|
|
|
|
|
|
|
aStr.BeginReading(begin);
|
|
|
|
aStr.EndReading(end);
|
|
|
|
|
|
|
|
while (begin != end) {
|
|
|
|
if (*begin == aChar) {
|
2001-06-19 22:38:45 +00:00
|
|
|
++count;
|
2001-04-02 22:00:34 +00:00
|
|
|
}
|
2001-06-19 22:38:45 +00:00
|
|
|
++begin;
|
2001-04-02 22:00:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return count;
|
2000-12-12 21:58:14 +00:00
|
|
|
}
|
2002-01-17 04:08:14 +00:00
|
|
|
|
|
|
|
template <class CharT>
|
|
|
|
class CalculateHashCode
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
typedef CharT char_type;
|
|
|
|
typedef PRUint32 hashcode_type;
|
|
|
|
typedef CharT value_type;
|
|
|
|
|
|
|
|
CalculateHashCode() : mHashCode(0) { }
|
|
|
|
hashcode_type GetHashCode() const { return mHashCode; }
|
|
|
|
|
|
|
|
PRUint32 write( const CharT* chars, PRUint32 N )
|
|
|
|
{
|
|
|
|
for ( const CharT *end = chars + N; chars < end; ++chars)
|
|
|
|
mHashCode = (mHashCode>>28) ^ (mHashCode<<4) ^ PRUint32(*chars);
|
|
|
|
return N;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
hashcode_type mHashCode;
|
|
|
|
};
|
|
|
|
|
|
|
|
NS_COM PRUint32 HashString( const nsAString& aStr )
|
|
|
|
{
|
|
|
|
CalculateHashCode<nsAString::char_type> sink;
|
|
|
|
nsAString::const_iterator begin, end;
|
|
|
|
aStr.BeginReading(begin);
|
|
|
|
aStr.EndReading(end);
|
|
|
|
copy_string(begin, end, sink);
|
|
|
|
return sink.GetHashCode();
|
|
|
|
}
|
|
|
|
|
|
|
|
NS_COM PRUint32 HashString( const nsACString& aStr )
|
|
|
|
{
|
|
|
|
CalculateHashCode<nsACString::char_type> sink;
|
|
|
|
nsACString::const_iterator begin, end;
|
|
|
|
aStr.BeginReading(begin);
|
|
|
|
aStr.EndReading(end);
|
|
|
|
copy_string(begin, end, sink);
|
|
|
|
return sink.GetHashCode();
|
|
|
|
}
|
|
|
|
|