Bug 638379 - Part 3: Remove workaround for unreliable inputErrorBehavior. r=hsivonen

2024-10-09 11:25:00 +00:00 · 2012-12-10 09:11:15 -05:00 · 2012-12-10 09:11:15 -05:00 · ca51d6a175
commit ca51d6a175
parent f11dbe9a73
8 changed files with 48 additions and 213 deletions
--- a/content/base/src/nsContentUtils.cpp
+++ b/content/base/src/nsContentUtils.cpp
@ -3594,27 +3594,10 @@ nsContentUtils::ConvertStringFromCharset(const nsACString& aCharset,

  const char* data = flatInput.get();
  aOutput.Truncate();
-  for (;;) {
-    int32_t srcLen = length;
-    int32_t dstLen = outLen;
-    rv = decoder->Convert(data, &srcLen, ustr, &dstLen);
-    // Convert will convert the input partially even if the status
-    // indicates a failure.
-    ustr[dstLen] = 0;
-    aOutput.Append(ustr, dstLen);
-    if (rv != NS_ERROR_ILLEGAL_INPUT) {
-      break;
-    }
-    // Emit a decode error manually because some decoders
-    // do not support kOnError_Recover (bug 638379)
-    if (srcLen == -1) {
-      decoder->Reset();
-    } else {
-      data += srcLen + 1;
-      length -= srcLen + 1;
-      aOutput.Append(static_cast<PRUnichar>(0xFFFD));
-    }
-  }
+  rv = decoder->Convert(data, &length, ustr, &outLen);
+  MOZ_ASSERT(rv != NS_ERROR_ILLEGAL_INPUT);
+  ustr[outLen] = 0;
+  aOutput.Append(ustr, outLen);

  nsMemory::Free(ustr);
  return rv;
--- a/content/base/src/nsEventSource.cpp
+++ b/content/base/src/nsEventSource.cpp
@ -301,7 +301,6 @@ nsEventSource::Init(nsIPrincipal* aPrincipal,

  rv = convManager->GetUnicodeDecoder("UTF-8", getter_AddRefs(mUnicodeDecoder));
  NS_ENSURE_SUCCESS(rv, rv);
-  mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);

  // the constructor should throw a SYNTAX_ERROR only if it fails resolving the
  // url parameter, so we don't care about the InitChannelAndRequestEventSource
@ -503,32 +502,17 @@ nsEventSource::StreamReaderFunc(nsIInputStream *aInputStream,

    thisObject->mLastConvertionResult =
      thisObject->mUnicodeDecoder->Convert(p, &srcCount, out, &outCount);
+    MOZ_ASSERT(thisObject->mLastConvertionResult != NS_ERROR_ILLEGAL_INPUT);

-    if (thisObject->mLastConvertionResult == NS_ERROR_ILLEGAL_INPUT) {
-      // There's an illegal byte in the input. It's now the responsibility
-      // of this calling code to output a U+FFFD REPLACEMENT CHARACTER, advance
-      // over the bad byte and reset the decoder.
-      rv = thisObject->ParseCharacter(REPLACEMENT_CHAR);
+    for (int32_t i = 0; i < outCount; ++i) {
+      rv = thisObject->ParseCharacter(out[i]);
      NS_ENSURE_SUCCESS(rv, rv);
-      p = p + srcCount + 1;
-      thisObject->mUnicodeDecoder->Reset();
-    } else {
-      for (int32_t i = 0; i < outCount; ++i) {
-        rv = thisObject->ParseCharacter(out[i]);
-        NS_ENSURE_SUCCESS(rv, rv);
-      }
-      p = p + srcCount;
    }
+    p = p + srcCount;
  } while (p < end &&
           thisObject->mLastConvertionResult != NS_PARTIAL_MORE_INPUT &&
           thisObject->mLastConvertionResult != NS_OK);

-  // check if the last byte was a bad one and
-  // clear the state since it was handled above.
-  if (thisObject->mLastConvertionResult == NS_ERROR_ILLEGAL_INPUT) {
-    thisObject->mLastConvertionResult = NS_OK;
-  }
-
  *aWriteCount = aCount;
  return NS_OK;
 }
--- a/content/base/src/nsScriptLoader.cpp
+++ b/content/base/src/nsScriptLoader.cpp
@ -1073,31 +1073,11 @@ nsScriptLoader::ConvertToUTF16(nsIChannel* aChannel, const uint8_t* aData,

  PRUnichar *ustr = aString.BeginWriting();

-  int32_t consumedLength = 0;
-  int32_t originalLength = aLength;
-  int32_t convertedLength = 0;
-  int32_t bufferLength = unicodeLength;
-  do {
-    rv = unicodeDecoder->Convert(reinterpret_cast<const char*>(aData),
-                                 (int32_t *) &aLength, ustr,
-                                 &unicodeLength);
-    if (NS_FAILED(rv)) {
-      // if we failed, we consume one byte, replace it with U+FFFD
-      // and try the conversion again.
-      ustr[unicodeLength++] = (PRUnichar)0xFFFD;
-      ustr += unicodeLength;
-
-      unicodeDecoder->Reset();
-    }
-    aData += ++aLength;
-    consumedLength += aLength;
-    aLength = originalLength - consumedLength;
-    convertedLength += unicodeLength;
-    unicodeLength = bufferLength - convertedLength;
-  } while (NS_FAILED(rv) &&
-           (originalLength > consumedLength) &&
-           (bufferLength > convertedLength));
-  aString.SetLength(convertedLength);
+  rv = unicodeDecoder->Convert(reinterpret_cast<const char*>(aData),
+                               (int32_t *) &aLength, ustr,
+                               &unicodeLength);
+  MOZ_ASSERT(NS_SUCCEEDED(rv));
+  aString.SetLength(unicodeLength);
  return rv;
 }

--- a/content/base/src/nsXMLHttpRequest.cpp
+++ b/content/base/src/nsXMLHttpRequest.cpp
@ -788,36 +788,15 @@ nsXMLHttpRequest::AppendToResponseText(const char * aSrcBuffer,

  // This code here is basically a copy of a similar thing in
  // nsScanner::Append(const char* aBuffer, uint32_t aLen).
-  // If we get illegal characters in the input we replace
-  // them and don't just fail.
-  do {
-    int32_t srclen = (int32_t)aSrcBufferLen;
-    int32_t destlen = (int32_t)destBufferLen;
-    rv = mDecoder->Convert(aSrcBuffer,
-                           &srclen,
-                           destBuffer,
-                           &destlen);
-    if (NS_FAILED(rv)) {
-      // We consume one byte, replace it with U+FFFD
-      // and try the conversion again.
+  int32_t srclen = (int32_t)aSrcBufferLen;
+  int32_t destlen = (int32_t)destBufferLen;
+  rv = mDecoder->Convert(aSrcBuffer,
+                         &srclen,
+                         destBuffer,
+                         &destlen);
+  MOZ_ASSERT(NS_SUCCEEDED(rv));

-      destBuffer[destlen] = (PRUnichar)0xFFFD; // add replacement character
-      destlen++; // skip written replacement character
-      destBuffer += destlen;
-      destBufferLen -= destlen;
-
-      if (srclen < (int32_t)aSrcBufferLen) {
-        srclen++; // Consume the invalid character
-      }
-      aSrcBuffer += srclen;
-      aSrcBufferLen -= srclen;
-
-      mDecoder->Reset();
-    }
-
-    totalChars += destlen;
-
-  } while (NS_FAILED(rv) && aSrcBufferLen > 0);
+  totalChars += destlen;

  mResponseText.SetLength(totalChars);

--- a/dom/encoding/TextDecoder.cpp
+++ b/dom/encoding/TextDecoder.cpp
@ -59,7 +59,7 @@ TextDecoder::Decode(const ArrayBufferView* aView,
                    ErrorResult& aRv)
 {
  const char* data;
-  uint32_t length;
+  int32_t length;
  // If view is not specified, let view be a Uint8Array of length 0.
  if (!aView) {
    data = EmptyCString().BeginReading();
@ -87,27 +87,10 @@ TextDecoder::Decode(const ArrayBufferView* aView,
    return;
  }

-  for (;;) {
-    int32_t srcLen = length;
-    int32_t dstLen = outLen;
-    rv = mDecoder->Convert(data, &srcLen, buf, &dstLen);
-    // Convert will convert the input partially even if the status
-    // indicates a failure.
-    buf[dstLen] = 0;
-    aOutDecodedString.Append(buf, dstLen);
-    if (mFatal || rv != NS_ERROR_ILLEGAL_INPUT) {
-      break;
-    }
-    // Emit a decode error manually because some decoders
-    // do not support kOnError_Recover (bug 638379)
-    if (srcLen == -1) {
-      mDecoder->Reset();
-    } else {
-      data += srcLen + 1;
-      length -= srcLen + 1;
-      aOutDecodedString.Append(kReplacementChar);
-    }
-  }
+  rv = mDecoder->Convert(data, &length, buf, &outLen);
+  MOZ_ASSERT(mFatal || rv != NS_ERROR_ILLEGAL_INPUT);
+  buf[outLen] = 0;
+  aOutDecodedString.Append(buf, outLen);

  // If the internal streaming flag of the decoder object is not set,
  // then reset the encoding algorithm state to the default values
--- a/intl/uconv/src/nsConverterInputStream.cpp
+++ b/intl/uconv/src/nsConverterInputStream.cpp
@ -46,7 +46,11 @@ nsConverterInputStream::Init(nsIInputStream* aStream,

    mInput = aStream;
    mReplacementChar = aReplacementChar;
-    
+    if (!aReplacementChar ||
+        aReplacementChar != mConverter->GetCharacterForUnMapped()) {
+        mConverter->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
+    }
+
    return NS_OK;
 }

--- a/netwerk/base/src/nsUnicharStreamLoader.cpp
+++ b/netwerk/base/src/nsUnicharStreamLoader.cpp
@ -198,42 +198,23 @@ nsUnicharStreamLoader::WriteSegmentFun(nsIInputStream *,
  nsUnicharStreamLoader* self = static_cast<nsUnicharStreamLoader*>(aClosure);

  uint32_t haveRead = self->mBuffer.Length();
-  uint32_t consumed = 0;
  nsresult rv;
-  do {
-    int32_t srcLen = aCount - consumed;
-    int32_t dstLen;
-    self->mDecoder->GetMaxLength(aSegment + consumed, srcLen, &dstLen);
+  int32_t srcLen = aCount;
+  int32_t dstLen;
+  self->mDecoder->GetMaxLength(aSegment, srcLen, &dstLen);

-    uint32_t capacity = haveRead + dstLen;
-    if (!self->mBuffer.SetCapacity(capacity, fallible_t())) {
-      return NS_ERROR_OUT_OF_MEMORY;
-    }
+  uint32_t capacity = haveRead + dstLen;
+  if (!self->mBuffer.SetCapacity(capacity, fallible_t())) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }

-    rv = self->mDecoder->Convert(aSegment + consumed,
-                                 &srcLen,
-                                 self->mBuffer.BeginWriting() + haveRead,
-                                 &dstLen);
-    haveRead += dstLen;
-    // XXX if srcLen is negative, we want to drop the _first_ byte in
-    // the erroneous byte sequence and try again.  This is not quite
-    // possible right now -- see bug 160784
-    consumed += srcLen;
-    if (NS_FAILED(rv)) {
-      if (haveRead >= capacity) {
-        // Make room for writing the 0xFFFD below (bug 785753).
-        if (!self->mBuffer.SetCapacity(haveRead + 1, fallible_t())) {
-          return NS_ERROR_OUT_OF_MEMORY;
-        }
-      }
-      self->mBuffer.BeginWriting()[haveRead++] = 0xFFFD;
-      ++consumed;
-      // XXX this is needed to make sure we don't underrun our buffer;
-      // bug 160784 again
-      consumed = NS_MAX<uint32_t>(consumed, 0);
-      self->mDecoder->Reset();
-    }
-  } while (consumed < aCount);
+  rv = self->mDecoder->Convert(aSegment,
+                               &srcLen,
+                               self->mBuffer.BeginWriting() + haveRead,
+                               &dstLen);
+  MOZ_ASSERT(NS_SUCCEEDED(rv));
+  MOZ_ASSERT(srcLen == static_cast<int32_t>(aCount));
+  haveRead += dstLen;

  self->mBuffer.SetLength(haveRead);
  *aWriteCount = aCount;
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@ -302,7 +302,6 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
  }
  NS_ENSURE_SUCCESS(rv, rv);
-  mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
  return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
 }

@ -335,7 +334,6 @@ nsHtml5StreamParser::SetupDecodingFromBom(const char* aCharsetName, const char*
  NS_ENSURE_SUCCESS(rv, rv);
  rv = convManager->GetUnicodeDecoderRaw(aDecoderCharsetName, getter_AddRefs(mUnicodeDecoder));
  NS_ENSURE_SUCCESS(rv, rv);
-  mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
  mCharset.Assign(aCharsetName);
  mCharsetSource = kCharsetFromByteOrderMark;
  mFeedChardet = false;
@ -718,8 +716,6 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
    convManager->GetUnicodeDecoder(mCharset.get(),
                                   getter_AddRefs(mUnicodeDecoder));
    if (mUnicodeDecoder) {
-      mUnicodeDecoder->SetInputErrorBehavior(
-          nsIUnicodeDecoder::kOnError_Recover);
      mFeedChardet = false;
      mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
      mMetaScanner = nullptr;
@ -749,8 +745,6 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
          countToSniffingLimit);
      mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
      if (mUnicodeDecoder) {
-        mUnicodeDecoder->SetInputErrorBehavior(
-            nsIUnicodeDecoder::kOnError_Recover);
        // meta scan successful
        mCharsetSource = kCharsetFromMetaPrescan;
        mFeedChardet = false;
@ -770,8 +764,6 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
    mMetaScanner->sniff(&readable, getter_AddRefs(mUnicodeDecoder), mCharset);
    if (mUnicodeDecoder) {
      // meta scan successful
-      mUnicodeDecoder->SetInputErrorBehavior(
-          nsIUnicodeDecoder::kOnError_Recover);
      mCharsetSource = kCharsetFromMetaPrescan;
      mFeedChardet = false;
      mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
@ -824,6 +816,7 @@ nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment,
    // pair.

    nsresult convResult = mUnicodeDecoder->Convert((const char*)aFromSegment, &byteCount, mLastBuffer->getBuffer() + end, &utf16Count);
+    MOZ_ASSERT(NS_SUCCEEDED(convResult));

    end += utf16Count;
    mLastBuffer->setEnd(end);
@ -834,57 +827,7 @@ nsHtml5StreamParser::WriteStreamBytes(const uint8_t* aFromSegment,
        "The Unicode decoder wrote too much data.");
    NS_ASSERTION(byteCount >= -1, "The decoder consumed fewer than -1 bytes.");

-    if (NS_FAILED(convResult)) {
-      // Using the more generic NS_FAILED test above in case there are still
-      // decoders around that don't use NS_ERROR_ILLEGAL_INPUT properly.
-      NS_ASSERTION(convResult == NS_ERROR_ILLEGAL_INPUT,
-          "The decoder signaled an error other than NS_ERROR_ILLEGAL_INPUT.");
-
-      // There's an illegal byte in the input. It's now the responsibility
-      // of this calling code to output a U+FFFD REPLACEMENT CHARACTER and
-      // reset the decoder.
-
-      if (totalByteCount < (int32_t)aCount) {
-        // advance over the bad byte
-        ++totalByteCount;
-        ++aFromSegment;
-      } else {
-        NS_NOTREACHED("The decoder signaled an error but consumed all input.");
-        // Recovering from this situation in case there are still broken
-        // decoders, since nsScanner had recovery code, too.
-        totalByteCount = (int32_t)aCount;
-      }
-
-      // Emit the REPLACEMENT CHARACTER
-      if (end >= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
-        nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
-          nsHtml5OwningUTF16Buffer::FalliblyCreate(
-            NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
-        if (!newBuf) {
-          return NS_ERROR_OUT_OF_MEMORY;
-        }
-        mLastBuffer = (mLastBuffer->next = newBuf.forget());
-        end = 0;
-      }
-      mLastBuffer->getBuffer()[end] = 0xFFFD;
-      ++end;
-      mLastBuffer->setEnd(end);
-      if (end >= NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE) {
-        nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
-          nsHtml5OwningUTF16Buffer::FalliblyCreate(
-            NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
-        if (!newBuf) {
-          return NS_ERROR_OUT_OF_MEMORY;
-        }
-        mLastBuffer = (mLastBuffer->next = newBuf.forget());
-      }
-
-      mUnicodeDecoder->Reset();
-      if (totalByteCount == (int32_t)aCount) {
-        *aWriteCount = (uint32_t)totalByteCount;
-        return NS_OK;
-      }
-    } else if (convResult == NS_PARTIAL_MORE_OUTPUT) {
+    if (convResult == NS_PARTIAL_MORE_OUTPUT) {
      nsRefPtr<nsHtml5OwningUTF16Buffer> newBuf =
        nsHtml5OwningUTF16Buffer::FalliblyCreate(
          NS_HTML5_STREAM_PARSER_READ_BUFFER_SIZE);
@ -1011,9 +954,7 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
  rv = convManager->GetUnicodeDecoder(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
  // if we failed to get a decoder, there will be fallback, so don't propagate
  //  the error.
-  if (NS_SUCCEEDED(rv)) {
-    mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
-  } else {
+  if (NS_FAILED(rv)) {
    mCharsetSource = kCharsetFromWeakDocTypeDefault;
  }
  return NS_OK;