/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 2000 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Scott Collins (original author) * Johnny Stenbeck * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #ifndef nsReadableUtils_h___ #define nsReadableUtils_h___ /** * I guess all the routines in this file are all mis-named. * According to our conventions, they should be |NS_xxx|. */ #ifndef nsAString_h___ #include "nsAString.h" #endif #include "nsTArray.h" inline size_t Distance( const nsReadingIterator& start, const nsReadingIterator& end ) { return end.get() - start.get(); } inline size_t Distance( const nsReadingIterator& start, const nsReadingIterator& end ) { return end.get() - start.get(); } void LossyCopyUTF16toASCII( const nsAString& aSource, nsACString& aDest NS_OUTPARAM ); void CopyASCIItoUTF16( const nsACString& aSource, nsAString& aDest NS_OUTPARAM ); void LossyCopyUTF16toASCII( const PRUnichar* aSource, nsACString& aDest NS_OUTPARAM ); void CopyASCIItoUTF16( const char* aSource, nsAString& aDest NS_OUTPARAM ); void CopyUTF16toUTF8( const nsAString& aSource, nsACString& aDest NS_OUTPARAM ); void CopyUTF8toUTF16( const nsACString& aSource, nsAString& aDest NS_OUTPARAM ); void CopyUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest NS_OUTPARAM ); void CopyUTF8toUTF16( const char* aSource, nsAString& aDest NS_OUTPARAM ); void LossyAppendUTF16toASCII( const nsAString& aSource, nsACString& aDest ); void AppendASCIItoUTF16( const nsACString& aSource, nsAString& aDest ); void LossyAppendUTF16toASCII( const PRUnichar* aSource, nsACString& aDest ); void AppendASCIItoUTF16( const char* aSource, nsAString& aDest ); void AppendUTF16toUTF8( const nsAString& aSource, nsACString& aDest ); void AppendUTF8toUTF16( const nsACString& aSource, nsAString& aDest ); void AppendUTF16toUTF8( const PRUnichar* aSource, nsACString& aDest ); void AppendUTF8toUTF16( const char* aSource, nsAString& aDest ); /** * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. * * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|. * Performs a lossy encoding conversion by chopping 16-bit wide characters down to 8-bits wide while copying |aSource| to your new buffer. * This conversion is not well defined; but it reproduces legacy string behavior. * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. * * @param aSource a 16-bit wide string * @return a new |char| buffer you must free with |nsMemory::Free|. */ char* ToNewCString( const nsAString& aSource ); /** * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. * * Allocates and returns a new |char| buffer which you must free with |nsMemory::Free|. * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. * * @param aSource an 8-bit wide string * @return a new |char| buffer you must free with |nsMemory::Free|. */ char* ToNewCString( const nsACString& aSource ); /** * Returns a new |char| buffer containing a zero-terminated copy of |aSource|. * * Allocates and returns a new |char| buffer which you must free with * |nsMemory::Free|. * Performs an encoding conversion from a UTF-16 string to a UTF-8 string * copying |aSource| to your new buffer. * The new buffer is zero-terminated, but that may not help you if |aSource| * contains embedded nulls. * * @param aSource a UTF-16 string (made of PRUnichar's) * @param aUTF8Count the number of 8-bit units that was returned * @return a new |char| buffer you must free with |nsMemory::Free|. */ char* ToNewUTF8String( const nsAString& aSource, PRUint32 *aUTF8Count = nsnull ); /** * Returns a new |PRUnichar| buffer containing a zero-terminated copy of * |aSource|. * * Allocates and returns a new |PRUnichar| buffer which you must free with * |nsMemory::Free|. * The new buffer is zero-terminated, but that may not help you if |aSource| * contains embedded nulls. * * @param aSource a UTF-16 string * @return a new |PRUnichar| buffer you must free with |nsMemory::Free|. */ PRUnichar* ToNewUnicode( const nsAString& aSource ); /** * Returns a new |PRUnichar| buffer containing a zero-terminated copy of |aSource|. * * Allocates and returns a new |PRUnichar| buffer which you must free with |nsMemory::Free|. * Performs an encoding conversion by 0-padding 8-bit wide characters up to 16-bits wide while copying |aSource| to your new buffer. * This conversion is not well defined; but it reproduces legacy string behavior. * The new buffer is zero-terminated, but that may not help you if |aSource| contains embedded nulls. * * @param aSource an 8-bit wide string (a C-string, NOT UTF-8) * @return a new |PRUnichar| buffer you must free with |nsMemory::Free|. */ PRUnichar* ToNewUnicode( const nsACString& aSource ); /** * Returns a new |PRUnichar| buffer containing a zero-terminated copy * of |aSource|. * * Allocates and returns a new |char| buffer which you must free with * |nsMemory::Free|. Performs an encoding conversion from UTF-8 to UTF-16 * while copying |aSource| to your new buffer. This conversion is well defined * for a valid UTF-8 string. The new buffer is zero-terminated, but that * may not help you if |aSource| contains embedded nulls. * * @param aSource an 8-bit wide string, UTF-8 encoded * @param aUTF16Count the number of 16-bit units that was returned * @return a new |PRUnichar| buffer you must free with |nsMemory::Free|. * (UTF-16 encoded) */ PRUnichar* UTF8ToNewUnicode( const nsACString& aSource, PRUint32 *aUTF16Count = nsnull ); /** * Copies |aLength| 16-bit code units from the start of |aSource| to the * |PRUnichar| buffer |aDest|. * * After this operation |aDest| is not null terminated. * * @param aSource a UTF-16 string * @param aSrcOffset start offset in the source string * @param aDest a |PRUnichar| buffer * @param aLength the number of 16-bit code units to copy * @return pointer to destination buffer - identical to |aDest| */ PRUnichar* CopyUnicodeTo( const nsAString& aSource, PRUint32 aSrcOffset, PRUnichar* aDest, PRUint32 aLength ); /** * Copies 16-bit characters between iterators |aSrcStart| and * |aSrcEnd| to the writable string |aDest|. Similar to the * |nsString::Mid| method. * * After this operation |aDest| is not null terminated. * * @param aSrcStart start source iterator * @param aSrcEnd end source iterator * @param aDest destination for the copy */ void CopyUnicodeTo( const nsAString::const_iterator& aSrcStart, const nsAString::const_iterator& aSrcEnd, nsAString& aDest ); /** * Appends 16-bit characters between iterators |aSrcStart| and * |aSrcEnd| to the writable string |aDest|. * * After this operation |aDest| is not null terminated. * * @param aSrcStart start source iterator * @param aSrcEnd end source iterator * @param aDest destination for the copy */ void AppendUnicodeTo( const nsAString::const_iterator& aSrcStart, const nsAString::const_iterator& aSrcEnd, nsAString& aDest ); /** * Returns |PR_TRUE| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). * * @param aString a 16-bit wide string to scan */ PRBool IsASCII( const nsAString& aString ); /** * Returns |PR_TRUE| if |aString| contains only ASCII characters, that is, characters in the range (0x00, 0x7F). * * @param aString a 8-bit wide string to scan */ PRBool IsASCII( const nsACString& aString ); /** * Returns |PR_TRUE| if |aString| is a valid UTF-8 string. * XXX This is not bullet-proof and nor an all-purpose UTF-8 validator. * It is mainly written to replace and roughly equivalent to * * str.Equals(NS_ConvertUTF16toUTF8(NS_ConvertUTF8toUTF16(str))) * * (see bug 191541) * As such, it does not check for non-UTF-8 7bit encodings such as * ISO-2022-JP and HZ. * * It rejects sequences with the following errors: * * byte sequences that cannot be decoded into characters according to * UTF-8's rules (including cases where the input is part of a valid * UTF-8 sequence but starts or ends mid-character) * overlong sequences (i.e., cases where a character was encoded * non-canonically by using more bytes than necessary) * surrogate codepoints (i.e., the codepoints reserved for representing astral characters in UTF-16) * codepoints above the unicode range (i.e., outside the first 17 * planes; higher than U+10FFFF), in accordance with * http://tools.ietf.org/html/rfc3629 * when aRejectNonChar is true (the default), any codepoint whose low * 16 bits are 0xFFFE or 0xFFFF * * @param aString an 8-bit wide string to scan * @param aRejectNonChar a boolean to control the rejection of utf-8 * non characters */ PRBool IsUTF8( const nsACString& aString, PRBool aRejectNonChar = PR_TRUE ); PRBool ParseString(const nsACString& aAstring, char aDelimiter, nsTArray& aArray); /** * Converts case in place in the argument string. */ void ToUpperCase( nsACString& ); void ToLowerCase( nsACString& ); void ToUpperCase( nsCSubstring& ); void ToLowerCase( nsCSubstring& ); /** * Converts case from string aSource to aDest. */ void ToUpperCase( const nsACString& aSource, nsACString& aDest ); void ToLowerCase( const nsACString& aSource, nsACString& aDest ); /** * Finds the leftmost occurrence of |aPattern|, if any in the range |aSearchStart|..|aSearchEnd|. * * Returns |PR_TRUE| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to * point to the match. If no match was found, returns |PR_FALSE| and makes |aSearchStart == aSearchEnd|. * * Currently, this is equivalent to the O(m*n) implementation previously on |ns[C]String|. * If we need something faster, then we can implement that later. */ PRBool FindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() ); PRBool FindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() ); /* sometimes we don't care about where the string was, just that we * found it or not */ inline PRBool FindInReadable( const nsAString& aPattern, const nsAString& aSource, const nsStringComparator& compare = nsDefaultStringComparator() ) { nsAString::const_iterator start, end; aSource.BeginReading(start); aSource.EndReading(end); return FindInReadable(aPattern, start, end, compare); } inline PRBool FindInReadable( const nsACString& aPattern, const nsACString& aSource, const nsCStringComparator& compare = nsDefaultCStringComparator() ) { nsACString::const_iterator start, end; aSource.BeginReading(start); aSource.EndReading(end); return FindInReadable(aPattern, start, end, compare); } PRBool CaseInsensitiveFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator& ); /** * Finds the rightmost occurrence of |aPattern| * Returns |PR_TRUE| if a match was found, and adjusts |aSearchStart| and |aSearchEnd| to * point to the match. If no match was found, returns |PR_FALSE| and makes |aSearchStart == aSearchEnd|. * */ PRBool RFindInReadable( const nsAString& aPattern, nsAString::const_iterator&, nsAString::const_iterator&, const nsStringComparator& = nsDefaultStringComparator() ); PRBool RFindInReadable( const nsACString& aPattern, nsACString::const_iterator&, nsACString::const_iterator&, const nsCStringComparator& = nsDefaultCStringComparator() ); /** * Finds the leftmost occurrence of |aChar|, if any in the range * |aSearchStart|..|aSearchEnd|. * * Returns |PR_TRUE| if a match was found, and adjusts |aSearchStart| to * point to the match. If no match was found, returns |PR_FALSE| and * makes |aSearchStart == aSearchEnd|. */ PRBool FindCharInReadable( PRUnichar aChar, nsAString::const_iterator& aSearchStart, const nsAString::const_iterator& aSearchEnd ); PRBool FindCharInReadable( char aChar, nsACString::const_iterator& aSearchStart, const nsACString::const_iterator& aSearchEnd ); /** * Finds the number of occurences of |aChar| in the string |aStr| */ PRUint32 CountCharInReadable( const nsAString& aStr, PRUnichar aChar ); PRUint32 CountCharInReadable( const nsACString& aStr, char aChar ); PRBool StringBeginsWith( const nsAString& aSource, const nsAString& aSubstring, const nsStringComparator& aComparator = nsDefaultStringComparator() ); PRBool StringBeginsWith( const nsACString& aSource, const nsACString& aSubstring, const nsCStringComparator& aComparator = nsDefaultCStringComparator() ); PRBool StringEndsWith( const nsAString& aSource, const nsAString& aSubstring, const nsStringComparator& aComparator = nsDefaultStringComparator() ); PRBool StringEndsWith( const nsACString& aSource, const nsACString& aSubstring, const nsCStringComparator& aComparator = nsDefaultCStringComparator() ); const nsAFlatString& EmptyString(); const nsAFlatCString& EmptyCString(); /** * Compare a UTF-8 string to an UTF-16 string. * * Returns 0 if the strings are equal, -1 if aUTF8String is less * than aUTF16Count, and 1 in the reverse case. In case of fatal * error (eg the strings are not valid UTF8 and UTF16 respectively), * this method will return PR_INT32_MIN. */ PRInt32 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, const nsASingleFragmentString& aUTF16String); void AppendUCS4ToUTF16(const PRUint32 aSource, nsAString& aDest); template inline PRBool EnsureStringLength(T& aStr, PRUint32 aLen) { aStr.SetLength(aLen); return (aStr.Length() == aLen); } #endif // !defined(nsReadableUtils_h___)