Bug 421576 - Unpaired surrogate handled wrongly (Acid3 #68). acid3++ r=dbaron

This commit is contained in:
Jeff Walden 2008-06-02 21:29:00 -04:00
parent b1b8f07670
commit becc8f7cf3
5 changed files with 298 additions and 96 deletions

View File

@ -244,10 +244,9 @@ nsMenuBarFrame::FindMenuWithShortcut(nsIDOMKeyEvent* aKeyEvent)
current->GetAttr(kNameSpaceID_None, nsGkAtoms::accesskey, shortcutKey);
if (!shortcutKey.IsEmpty()) {
ToLowerCase(shortcutKey);
nsAutoString::const_iterator start, end;
shortcutKey.BeginReading(start);
shortcutKey.EndReading(end);
PRUint32 ch = UTF16CharEnumerator::NextChar(start, end);
const PRUnichar* start = shortcutKey.BeginReading();
const PRUnichar* end = shortcutKey.EndReading();
PRUint32 ch = UTF16CharEnumerator::NextChar(&start, end);
PRUint32 index = accessKeys.IndexOf(ch);
if (index != accessKeys.NoIndex &&
(foundIndex == kNotFound || index < foundIndex)) {

View File

@ -335,13 +335,18 @@ public:
{
// Found a high surrogate followed by something other than
// a low surrogate. Flag this as an error and return the
// Unicode replacement character 0xFFFD.
// Unicode replacement character 0xFFFD. Note that the
// pointer to the next character points to the second 16-bit
// value, not beyond it, as per Unicode 5.0.0 Chapter 3 C10,
// only the first code unit of an illegal sequence must be
// treated as an illegally terminated code unit sequence
// (also Chapter 3 D91, "isolated [not paired and ill-formed]
// UTF-16 code units in the range D800..DFFF are ill-formed").
NS_WARNING("got a High Surrogate but no low surrogate");
if (err)
*err = PR_TRUE;
*buffer = p;
*buffer = p - 1;
return 0xFFFD;
}
}
@ -364,91 +369,6 @@ public:
*err = PR_TRUE;
return 0;
}
#ifdef MOZILLA_INTERNAL_API
static PRUint32 NextChar(nsAString::const_iterator& iter,
const nsAString::const_iterator& end,
PRBool *err = nsnull)
{
if (iter == end)
{
if (err)
*err = PR_TRUE;
return 0;
}
PRUnichar c = *iter++;
if (!IS_SURROGATE(c)) // U+0000 - U+D7FF,U+E000 - U+FFFF
{
if (err)
*err = PR_FALSE;
return c;
}
else if (NS_IS_HIGH_SURROGATE(c)) // U+D800 - U+DBFF
{
if (iter == end)
{
// Found a high surrogate the end of the buffer. Flag this
// as an error and return the Unicode replacement
// character 0xFFFD.
NS_WARNING("Unexpected end of buffer after high surrogate");
if (err)
*err = PR_TRUE;
return 0xFFFD;
}
// D800- DBFF - High Surrogate
PRUnichar h = c;
c = *iter++;
if (NS_IS_LOW_SURROGATE(c))
{
// DC00- DFFF - Low Surrogate
// N = (H - D800) *400 + 10000 + ( L - DC00 )
PRUint32 ucs4 = SURROGATE_TO_UCS4(h, c);
if (err)
*err = PR_FALSE;
return ucs4;
}
else
{
// Found a high surrogate followed by something other than
// a low surrogate. Flag this as an error and return the
// Unicode replacement character 0xFFFD.
NS_WARNING("got a High Surrogate but no low surrogate");
if (err)
*err = PR_TRUE;
return 0xFFFD;
}
}
else // U+DC00 - U+DFFF
{
// DC00- DFFF - Low Surrogate
// Found a low surrogate w/o a preceeding high surrogate. Flag
// this as an error and return the Unicode replacement
// character 0xFFFD.
NS_WARNING("got a low Surrogate but no high surrogate");
if (err)
*err = PR_TRUE;
return 0xFFFD;
}
if (err)
*err = PR_TRUE;
return 0;
}
#endif // MOZILLA_INTERNAL_API
};
@ -687,6 +607,15 @@ class ConvertUTF16toUTF8
*out++ = 0xBF;
*out++ = 0xBD;
// The pointer to the next character points to the second
// 16-bit value, not beyond it, as per Unicode 5.0.0
// Chapter 3 C10, only the first code unit of an illegal
// sequence must be treated as an illegally terminated
// code unit sequence (also Chapter 3 D91, "isolated [not
// paired and ill-formed] UTF-16 code units in the range
// D800..DFFF are ill-formed").
p--;
NS_WARNING("got a High Surrogate but no low surrogate");
}
}
@ -768,6 +697,15 @@ class CalculateUTF8Size
// UTF-8)
mSize += 3;
// The next code unit is the second 16-bit value, not
// the one beyond it, as per Unicode 5.0.0 Chapter 3 C10,
// only the first code unit of an illegal sequence must
// be treated as an illegally terminated code unit
// sequence (also Chapter 3 D91, "isolated [not paired and
// ill-formed] UTF-16 code units in the range D800..DFFF
// are ill-formed").
p--;
NS_WARNING("got a high Surrogate but no low surrogate");
}
}

View File

@ -88,6 +88,7 @@ CPPSRCS += \
TestAtoms.cpp \
TestAutoLock.cpp \
TestCRT.cpp \
TestEncoding.cpp \
TestPermanentAtoms.cpp \
TestPipes.cpp \
TestThreads.cpp \
@ -146,6 +147,7 @@ CPP_UNIT_TESTS += \
TestArray \
TestAutoLock \
TestCRT \
TestEncoding \
TestExpirationTracker \
TestPipes \
TestProxies \

View File

@ -0,0 +1,232 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Jeff Walden <jwalden+code@mit.edu>.
* Portions created by the Initial Developer are Copyright (C) 2008
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
#include "TestHarness.h"
nsresult TestGoodSurrogatePair()
{
// When this string is decoded, the surrogate pair is U+10302 and the rest of
// the string is specified by indexes 2 onward.
const PRUnichar goodPairData[] = { 0xD800, 0xDF02, 0x65, 0x78, 0x0 };
nsDependentString goodPair16(goodPairData);
PRUint32 byteCount = 0;
char* goodPair8 = ToNewUTF8String(goodPair16, &byteCount);
if (!goodPair8)
{
fail("out of memory creating goodPair8");
return NS_ERROR_OUT_OF_MEMORY;
}
if (byteCount != 6)
{
fail("wrong number of bytes; expected 6, got %lu", byteCount);
return NS_ERROR_FAILURE;
}
const char expected8[] = { 0xF0, 0x90, 0x8C, 0x82, 0x65, 0x78, 0x0 };
if (0 != memcmp(expected8, goodPair8, sizeof(expected8)))
{
fail("wrong translation to UTF8");
return NS_ERROR_FAILURE;
}
// This takes a different code path from the above, so test it to make sure
// the UTF-16 enumeration remains in sync with the UTF-8 enumeration.
nsDependentCString expected(expected8);
if (0 != CompareUTF8toUTF16(expected, goodPair16))
{
fail("bad comparison between UTF-8 and equivalent UTF-16");
return NS_ERROR_FAILURE;
}
NS_Free(goodPair8);
passed("TestGoodSurrogatePair");
return NS_OK;
}
nsresult TestBackwardsSurrogatePair()
{
// When this string is decoded, the two surrogates are wrongly ordered and
// must each be interpreted as U+FFFD.
const PRUnichar backwardsPairData[] = { 0xDDDD, 0xD863, 0x65, 0x78, 0x0 };
nsDependentString backwardsPair16(backwardsPairData);
PRUint32 byteCount = 0;
char* backwardsPair8 = ToNewUTF8String(backwardsPair16, &byteCount);
if (!backwardsPair8)
{
fail("out of memory creating backwardsPair8");
return NS_ERROR_OUT_OF_MEMORY;
}
if (byteCount != 8)
{
fail("wrong number of bytes; expected 8, got %lu", byteCount);
return NS_ERROR_FAILURE;
}
const char expected8[] =
{ 0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD, 0x65, 0x78, 0x0 };
if (0 != memcmp(expected8, backwardsPair8, sizeof(expected8)))
{
fail("wrong translation to UTF8");
return NS_ERROR_FAILURE;
}
// This takes a different code path from the above, so test it to make sure
// the UTF-16 enumeration remains in sync with the UTF-8 enumeration.
nsDependentCString expected(expected8);
if (0 != CompareUTF8toUTF16(expected, backwardsPair16))
{
fail("bad comparison between UTF-8 and malformed but equivalent UTF-16");
return NS_ERROR_FAILURE;
}
NS_Free(backwardsPair8);
passed("TestBackwardsSurrogatePair");
return NS_OK;
}
nsresult TestMalformedUTF16OrphanHighSurrogate()
{
// When this string is decoded, the high surrogate should be replaced and the
// rest of the string is specified by indexes 1 onward.
const PRUnichar highSurrogateData[] = { 0xD863, 0x74, 0x65, 0x78, 0x74, 0x0 };
nsDependentString highSurrogate16(highSurrogateData);
PRUint32 byteCount = 0;
char* highSurrogate8 = ToNewUTF8String(highSurrogate16, &byteCount);
if (!highSurrogate8)
{
fail("out of memory creating highSurrogate8");
return NS_ERROR_OUT_OF_MEMORY;
}
if (byteCount != 7)
{
fail("wrong number of bytes; expected 7, got %lu", byteCount);
return NS_ERROR_FAILURE;
}
const char expected8[] = { 0xEF, 0xBF, 0xBD, 0x74, 0x65, 0x78, 0x74, 0x0 };
if (0 != memcmp(expected8, highSurrogate8, sizeof(expected8)))
{
fail("wrong translation to UTF8");
return NS_ERROR_FAILURE;
}
// This takes a different code path from the above, so test it to make sure
// the UTF-16 enumeration remains in sync with the UTF-8 enumeration.
nsDependentCString expected(expected8);
if (0 != CompareUTF8toUTF16(expected, highSurrogate16))
{
fail("bad comparison between UTF-8 and malformed but equivalent UTF-16");
return NS_ERROR_FAILURE;
}
NS_Free(highSurrogate8);
passed("TestMalformedUTF16OrphanHighSurrogate");
return NS_OK;
}
nsresult TestMalformedUTF16OrphanLowSurrogate()
{
// When this string is decoded, the low surrogate should be replaced and the
// rest of the string is specified by indexes 1 onward.
const PRUnichar lowSurrogateData[] = { 0xDDDD, 0x74, 0x65, 0x78, 0x74, 0x0 };
nsDependentString lowSurrogate16(lowSurrogateData);
PRUint32 byteCount = 0;
char* lowSurrogate8 = ToNewUTF8String(lowSurrogate16, &byteCount);
if (!lowSurrogate8)
{
fail("out of memory creating lowSurrogate8");
return NS_ERROR_OUT_OF_MEMORY;
}
if (byteCount != 7)
{
fail("wrong number of bytes; expected 7, got %lu", byteCount);
return NS_ERROR_FAILURE;
}
const char expected8[] = { 0xEF, 0xBF, 0xBD, 0x74, 0x65, 0x78, 0x74, 0x0 };
if (0 != memcmp(expected8, lowSurrogate8, sizeof(expected8)))
{
fail("wrong translation to UTF8");
return NS_ERROR_FAILURE;
}
// This takes a different code path from the above, so test it to make sure
// the UTF-16 enumeration remains in sync with the UTF-8 enumeration.
nsDependentCString expected(expected8);
if (0 != CompareUTF8toUTF16(expected, lowSurrogate16))
{
fail("bad comparison between UTF-8 and malformed but equivalent UTF-16");
return NS_ERROR_FAILURE;
}
NS_Free(lowSurrogate8);
passed("TestMalformedUTF16OrphanLowSurrogate");
return NS_OK;
}
int main(int argc, char** argv)
{
ScopedXPCOM xpcom("TestEncoding");
if (xpcom.failed())
return 1;
int rv = 0;
if (NS_FAILED(TestGoodSurrogatePair()))
rv = 1;
if (NS_FAILED(TestBackwardsSurrogatePair()))
rv = 1;
if (NS_FAILED(TestMalformedUTF16OrphanHighSurrogate()))
rv = 1;
if (NS_FAILED(TestMalformedUTF16OrphanLowSurrogate()))
rv = 1;
return rv;
}

View File

@ -37,18 +37,49 @@
/*
* Test harness for XPCOM objects, providing a scoped XPCOM initializer,
* nsCOMPtr, nsRefPtr, do_CreateInstance, and stdio.h/stdlib.h.
* nsCOMPtr, nsRefPtr, do_CreateInstance, do_GetService, ns(Auto|C|)String,
* and stdio.h/stdlib.h.
*/
#ifndef TestHarness_h__
#define TestHarness_h__
#include "nsIServiceManager.h"
#include "nsComponentManagerUtils.h"
#include "nsServiceManagerUtils.h"
#include "nsCOMPtr.h"
#include "nsAutoPtr.h"
#include "nsStringGlue.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
/**
* Prints the given failure message and arguments using printf, prepending
* "FAIL " for the benefit of the test harness and appending "\n" to eliminate
* having to type it at each call site.
*/
void fail(const char* msg, ...)
{
va_list ap;
printf("FAIL ");
va_start(ap, msg);
vprintf(msg, ap);
va_end(ap);
putchar('\n');
}
/**
* Prints the given string followed by " PASSED!\n", to be used at the end
* of a successful test function.
*/
void passed(const char* test)
{
printf("%s PASSED!\n", test);
}
class ScopedXPCOM
{
@ -62,7 +93,7 @@ class ScopedXPCOM
nsresult rv = NS_InitXPCOM2(&mServMgr, NULL, dirSvcProvider);
if (NS_FAILED(rv))
{
printf("FAIL NS_InitXPCOM2 returned failure code %x\n", rv);
fail("NS_InitXPCOM2 returned failure code 0x%x", rv);
mServMgr = NULL;
}
}
@ -75,7 +106,7 @@ class ScopedXPCOM
nsresult rv = NS_ShutdownXPCOM(NULL);
if (NS_FAILED(rv))
{
printf("FAIL XPCOM shutdown failed with code %x\n", rv);
fail("XPCOM shutdown failed with code 0x%x", rv);
exit(1);
}
}