Added ASCII check to the US-ASCII optimization code in order to avoid generating illegal UTF-8,

bug 82591, r=ducarroz, sr=sspitzer.
2025-02-16 22:04:36 +00:00 · 2001-07-24 22:35:55 +00:00 · 2001-07-24 22:35:55 +00:00 · 2889456445
commit 2889456445
parent 775cb01c2d
1 changed files with 35 additions and 10 deletions
--- a/mailnews/mime/src/comi18n.cpp
+++ b/mailnews/mime/src/comi18n.cpp
@ -1564,23 +1564,48 @@ PRInt32 MIME_ConvertCharset(const PRBool autoDetection, const char* from_charset
      (!nsCRT::strcasecmp(from_charset,to_charset) ||
       (!nsCRT::strcasecmp(from_charset,"us-ascii") && !nsCRT::strcasecmp(to_charset,"UTF-8")) ||
       (!nsCRT::strcasecmp(from_charset,"UTF-8")    && !nsCRT::strcasecmp(to_charset,"us-ascii")))) {
-    if (NULL != numUnConverted) 
-      *numUnConverted = 0;

-    *outBuffer = (char *) PR_Malloc(inLength+1);
-    if (NULL != *outBuffer) {
-      nsCRT::memcpy(*outBuffer, inBuffer, inLength);
-      *outLength = inLength;
-      (*outBuffer)[inLength] = '\0';
-      return 0;
+    // make sure if the data is really ASCII in order to prevent generating illegal UTF-8
+    PRInt32 len = inLength;
+    PRBool bASCII = PR_TRUE;
+    // check 4 bytes at a time
+    for (PRUint32* p = (PRUint32 *) inBuffer; len > 3; p++) {
+      if (*p & (PRUint32)0x80808080) {
+        bASCII = PR_FALSE;
+        break;
+      }
+      len -= 4;
+    }
+    // check rest of the data
+    if (bASCII) {
+      for (PRUint8* p = (PRUint8 *) (inBuffer + inLength - len); len; p++) {
+        if (*p & (PRUint8)0x80) {
+          bASCII = PR_FALSE;
+          break;
+        }
+        len--;
+      }
+    }
+    if (bASCII) {
+      if (NULL != numUnConverted) 
+        *numUnConverted = 0;
+
+      *outBuffer = (char *) PR_Malloc(inLength+1);
+      if (NULL != *outBuffer) {
+        nsCRT::memcpy(*outBuffer, inBuffer, inLength);
+        *outLength = inLength;
+        (*outBuffer)[inLength] = '\0';
+        return 0;
+      }
+      return -1;
    }
-    return -1;
  } 
  
  MimeCharsetConverterClass aMimeCharsetConverterClass;
  PRInt32 res;

-  res = aMimeCharsetConverterClass.Initialize(from_charset, to_charset, autoDetection, -1);
+  res = aMimeCharsetConverterClass.Initialize(nsCRT::strcasecmp(from_charset,"us-ascii") ? from_charset : "ISO-8859-1", 
+                                              to_charset, autoDetection, -1);

  if (res != -1) {
    res = aMimeCharsetConverterClass.Convert(inBuffer, inLength, outBuffer, outLength, NULL);