bug 241739 reuse HTML/SVG's <script> loading code for XUL <script>, thus

correctly handling non-ASCII characters r+sr=bz a=asa
2024-12-04 19:33:18 +00:00 · 2005-06-12 12:32:05 +00:00 · 2005-06-12 12:32:05 +00:00 · 55fdadbd11
commit 55fdadbd11
parent 24710753a4
3 changed files with 122 additions and 91 deletions
--- a/content/base/src/nsScriptLoader.cpp
+++ b/content/base/src/nsScriptLoader.cpp
@ -808,6 +808,94 @@ DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsCString& oChars
  return !oCharset.IsEmpty();
 }

+/* static */ nsresult
+nsScriptLoader::ConvertToUTF16(nsIChannel* aChannel, const PRUint8* aData,
+                               PRUint32 aLength, const nsString& aHintCharset,
+                               nsIDocument* aDocument, nsString& aString)
+{
+  if (!aLength) {
+    aString.Truncate();
+    return NS_OK;
+  }
+
+  nsCAutoString characterSet;
+
+  nsresult rv = NS_OK;
+  if (aChannel) {
+    rv = aChannel->GetContentCharset(characterSet);
+  }
+
+  if (!aHintCharset.IsEmpty() && (NS_FAILED(rv) || characterSet.IsEmpty())) {
+    // charset name is always ASCII.
+    LossyCopyUTF16toASCII(aHintCharset, characterSet);
+  }
+
+  if (NS_FAILED(rv) || characterSet.IsEmpty()) {
+    DetectByteOrderMark(aData, aLength, characterSet);
+  }
+
+  if (characterSet.IsEmpty()) {
+    // charset from document default
+    characterSet = aDocument->GetDocumentCharacterSet();
+  }
+
+  if (characterSet.IsEmpty()) {
+    // fall back to ISO-8859-1, see bug 118404
+    characterSet.AssignLiteral("ISO-8859-1");
+  }
+
+  nsCOMPtr<nsICharsetConverterManager> charsetConv =
+    do_GetService(kCharsetConverterManagerCID, &rv);
+
+  nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
+
+  if (NS_SUCCEEDED(rv) && charsetConv) {
+    rv = charsetConv->GetUnicodeDecoder(characterSet.get(),
+                                        getter_AddRefs(unicodeDecoder));
+    if (NS_FAILED(rv)) {
+      // fall back to ISO-8859-1 if charset is not supported. (bug 230104)
+      rv = charsetConv->GetUnicodeDecoderRaw("ISO-8859-1",
+                                             getter_AddRefs(unicodeDecoder));
+    }
+  }
+
+  // converts from the charset to unicode
+  if (NS_SUCCEEDED(rv)) {
+    PRInt32 unicodeLength = 0;
+
+    rv = unicodeDecoder->GetMaxLength(NS_REINTERPRET_CAST(const char*, aData),
+                                      aLength, &unicodeLength);
+    if (NS_SUCCEEDED(rv)) {
+      aString.SetLength(unicodeLength);
+      PRUnichar *ustr = aString.BeginWriting();
+
+      PRInt32 consumedLength = 0;
+      PRInt32 originalLength = aLength;
+      PRInt32 convertedLength = 0;
+      PRInt32 bufferLength = unicodeLength;
+      do {
+        rv = unicodeDecoder->Convert(NS_REINTERPRET_CAST(const char*, aData),
+                                     (PRInt32 *) &aLength, ustr,
+                                     &unicodeLength);
+        if (NS_FAILED(rv)) {
+          // if we failed, we consume one byte, replace it with U+FFFD
+          // and try the conversion again.
+          ustr[unicodeLength++] = (PRUnichar)0xFFFD;
+          ustr += unicodeLength;
+
+          unicodeDecoder->Reset();
+        }
+        aData += ++aLength;
+        consumedLength += aLength;
+        aLength = originalLength - consumedLength;
+        convertedLength += unicodeLength;
+        unicodeLength = bufferLength - convertedLength;
+      } while (NS_FAILED(rv) && (originalLength > consumedLength) && (bufferLength > convertedLength));
+      aString.SetLength(convertedLength);
+    }
+  }
+  return rv;
+}

 NS_IMETHODIMP
 nsScriptLoader::OnStreamComplete(nsIStreamLoader* aLoader,
@ -858,90 +946,13 @@ nsScriptLoader::OnStreamComplete(nsIStreamLoader* aLoader,
    }
  }

+  nsCOMPtr<nsIChannel> channel = do_QueryInterface(req);
  if (stringLen) {
-    nsCAutoString characterSet;
-    nsCOMPtr<nsIChannel> channel;
-
-    channel = do_QueryInterface(req);
-    if (channel) {
-      rv = channel->GetContentCharset(characterSet);
-    }
-
-    if (NS_FAILED(rv) || characterSet.IsEmpty()) {
-      // Check the charset attribute to determine script charset.
-      nsAutoString charset;
-      request->mElement->GetScriptCharset(charset);
-      if (!charset.IsEmpty()) {
-        // charset name is always ASCII.
-        LossyCopyUTF16toASCII(charset, characterSet);
-      }
-    }
-
-    if (NS_FAILED(rv) || characterSet.IsEmpty()) {
-      DetectByteOrderMark(string, stringLen, characterSet);
-    }
-
-    if (characterSet.IsEmpty()) {
-      // charset from document default
-      characterSet = mDocument->GetDocumentCharacterSet();
-    }
-
-    if (characterSet.IsEmpty()) {
-      // fall back to ISO-8859-1, see bug 118404
-      characterSet.AssignLiteral("ISO-8859-1");
-    }
-
-    nsCOMPtr<nsICharsetConverterManager> charsetConv =
-      do_GetService(kCharsetConverterManagerCID, &rv);
-
-    nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
-
-    if (NS_SUCCEEDED(rv) && charsetConv) {
-      rv = charsetConv->GetUnicodeDecoder(characterSet.get(),
-                                          getter_AddRefs(unicodeDecoder));
-      if (NS_FAILED(rv)) {
-        // fall back to ISO-8859-1 if charset is not supported. (bug 230104)
-        rv = charsetConv->GetUnicodeDecoderRaw("ISO-8859-1",
-                                               getter_AddRefs(unicodeDecoder));
-      }
-    }
-
-    // converts from the charset to unicode
-    if (NS_SUCCEEDED(rv)) {
-      PRInt32 unicodeLength = 0;
-
-      rv = unicodeDecoder->GetMaxLength(NS_REINTERPRET_CAST(const char*, string), stringLen, &unicodeLength);
-      if (NS_SUCCEEDED(rv)) {
-        nsString tempStr;
-        tempStr.SetLength(unicodeLength);
-        PRUnichar *ustr;
-        tempStr.BeginWriting(ustr);
-
-        PRInt32 consumedLength = 0;
-        PRInt32 originalLength = stringLen;
-        PRInt32 convertedLength = 0;
-        PRInt32 bufferLength = unicodeLength;
-        do {
-          rv = unicodeDecoder->Convert(NS_REINTERPRET_CAST(const char*, string), (PRInt32 *) &stringLen, ustr,
-                                       &unicodeLength);
-          if (NS_FAILED(rv)) {
-            // if we failed, we consume one byte, replace it with U+FFFD
-            // and try the conversion again.
-            ustr[unicodeLength++] = (PRUnichar)0xFFFD;
-            ustr += unicodeLength;
-
-            unicodeDecoder->Reset();
-          }
-          string += ++stringLen;
-          consumedLength += stringLen;
-          stringLen = originalLength - consumedLength;
-          convertedLength += unicodeLength;
-          unicodeLength = bufferLength - convertedLength;
-        } while (NS_FAILED(rv) && (originalLength > consumedLength) && (bufferLength > convertedLength));
-        tempStr.SetLength(convertedLength);
-        request->mScriptText = tempStr;
-      }
-    }
+    // Check the charset attribute to determine script charset.
+    nsAutoString hintCharset;
+    request->mElement->GetScriptCharset(hintCharset);
+    rv = ConvertToUTF16(channel, string, stringLen, hintCharset, mDocument,
+                        request->mScriptText);

    NS_ASSERTION(NS_SUCCEEDED(rv),
                 "Could not convert external JavaScript to Unicode!");
--- a/content/base/src/nsScriptLoader.h
+++ b/content/base/src/nsScriptLoader.h
@ -65,6 +65,22 @@ public:
  NS_DECL_NSISCRIPTLOADER
  NS_DECL_NSISTREAMLOADEROBSERVER

+  /**
+   * Convert the given buffer to a UTF-16 string.
+   * @param aChannel     Channel corresponding to the data. May be null.
+   * @param aData        The data to convert
+   * @param aLength      Length of the data
+   * @param aHintCharset Hint for the character set (e.g., from a charset
+   *                     attribute). May be the empty string.
+   * @param aDocument    Document which the data is loaded for. Must not be
+   *                     null.
+   * @param aString      [out] Data as converted to unicode
+   */
+  static nsresult ConvertToUTF16(nsIChannel* aChannel, const PRUint8* aData,
+                                 PRUint32 aLength,
+                                 const nsString& aHintCharset,
+                                 nsIDocument* aDocument, nsString& aString);
+
 protected:
  PRBool InNonScriptingContainer(nsIScriptElement* aScriptElement);
  PRBool IsScriptEventHandler(nsIScriptElement* aScriptElement);
--- a/content/xul/document/src/nsXULDocument.cpp
+++ b/content/xul/document/src/nsXULDocument.cpp
@ -3332,13 +3332,13 @@ nsXULDocument::OnStreamComplete(nsIStreamLoader* aLoader,
                                PRUint32 stringLen,
                                const PRUint8* string)
 {
+    nsCOMPtr<nsIRequest> request;
+    aLoader->GetRequest(getter_AddRefs(request));
+    nsCOMPtr<nsIChannel> channel = do_QueryInterface(request);
+
 #ifdef DEBUG
    // print a load error on bad status
    if (NS_FAILED(aStatus)) {
-        nsCOMPtr<nsIRequest> request;
-        aLoader->GetRequest(getter_AddRefs(request));
-        nsCOMPtr<nsIChannel> channel;
-        channel = do_QueryInterface(request);
        if (channel) {
            nsCOMPtr<nsIURI> uri;
            channel->GetURI(getter_AddRefs(uri));
@ -3381,10 +3381,14 @@ nsXULDocument::OnStreamComplete(nsIStreamLoader* aLoader,
        // not to reach here.
        nsCOMPtr<nsIURI> uri = scriptProto->mSrcURI;

-        // XXX this seems broken - what if the script is non-ascii? (bug 241739)
-        nsString stringStr; stringStr.AssignWithConversion(NS_REINTERPRET_CAST(const char*, string), stringLen);
-        rv = scriptProto->Compile(stringStr.get(), stringLen, uri, 1, this,
-                                  mCurrentPrototype);
+        // XXX should also check nsIHttpChannel::requestSucceeded
+
+        nsString stringStr;
+        rv = nsScriptLoader::ConvertToUTF16(channel, string, stringLen,
+                                            EmptyString(), this, stringStr);
+        if (NS_SUCCEEDED(rv))
+          rv = scriptProto->Compile(stringStr.get(), stringStr.Length(), uri,
+                                    1, this, mCurrentPrototype);

        aStatus = rv;
        if (NS_SUCCEEDED(rv) && scriptProto->mJSObject) {