Bug 741776 - Treat JSON, WebVTT and AppCache manifests as UTF-8 when loaded as plain text. r=Ehsan

MozReview-Commit-ID: 5UvYqJVvX0r

--HG--
extra : rebase_source : 5a6f3dfd97fb06810fde9a4b8b650a7a922a7c20
This commit is contained in:
Henri Sivonen 2016-06-09 14:29:30 +03:00
parent a4563a219e
commit a36fff43c5
9 changed files with 39 additions and 6 deletions

View File

@ -3757,6 +3757,16 @@ nsContentUtils::IsPlainTextType(const nsACString& aContentType)
IsScriptType(aContentType);
}
bool
nsContentUtils::IsUtf8OnlyPlainTextType(const nsACString& aContentType)
{
// NOTE: This must be a subset of the list in IsPlainTextType().
return aContentType.EqualsLiteral(TEXT_CACHE_MANIFEST) ||
aContentType.EqualsLiteral(APPLICATION_JSON) ||
aContentType.EqualsLiteral(TEXT_JSON) ||
aContentType.EqualsLiteral(TEXT_VTT);
}
bool
nsContentUtils::GetWrapperSafeScriptFilename(nsIDocument* aDocument,
nsIURI* aURI,

View File

@ -1026,15 +1026,21 @@ public:
static bool IsChildOfSameType(nsIDocument* aDoc);
/**
'* Returns true if the content-type is any of the supported script types.
* Returns true if the content-type is any of the supported script types.
*/
static bool IsScriptType(const nsACString& aContentType);
/**
'* Returns true if the content-type will be rendered as plain-text.
* Returns true if the content-type will be rendered as plain-text.
*/
static bool IsPlainTextType(const nsACString& aContentType);
/**
* Returns true iff the type is rendered as plain text and doesn't support
* non-UTF-8 encodings.
*/
static bool IsUtf8OnlyPlainTextType(const nsACString& aContentType);
/**
* Get the script file name to use when compiling the script
* referenced by aURI. In cases where there's no need for any extra

View File

@ -550,6 +550,9 @@ nsHTMLDocument::StartDocumentLoad(const char* aCommand,
return NS_ERROR_INVALID_ARG;
}
bool forceUtf8 = plainText &&
nsContentUtils::IsUtf8OnlyPlainTextType(contentType);
bool loadAsHtml5 = true;
if (!viewSource && xhtml) {
@ -669,7 +672,12 @@ nsHTMLDocument::StartDocumentLoad(const char* aCommand,
}
}
if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR
if (forceUtf8) {
charsetSource = kCharsetFromUtf8OnlyMime;
charset.AssignLiteral("UTF-8");
parserCharsetSource = charsetSource;
parserCharset = charset;
} else if (!IsHTMLDocument() || !docShell) { // no docshell for text/html XHR
charsetSource = IsHTMLDocument() ? kCharsetFromFallback
: kCharsetFromDocTypeDefault;
charset.AssignLiteral("UTF-8");
@ -3618,7 +3626,7 @@ nsHTMLDocument::WillIgnoreCharsetOverride()
MOZ_ASSERT(mType == eXHTML);
return true;
}
if (mCharacterSetSource == kCharsetFromByteOrderMark) {
if (mCharacterSetSource >= kCharsetFromByteOrderMark) {
return true;
}
if (!EncodingUtils::IsAsciiCompatible(mCharacterSet)) {

View File

@ -0,0 +1 @@
<meta charset=utf-8><pre>ää

View File

@ -0,0 +1 @@
ää

View File

@ -27,6 +27,7 @@ include toblob-todataurl/reftest.list
== 610935.html 610935-ref.html
== 649134-1.html 649134-ref.html
skip-if(Android) == 649134-2.html 649134-2-ref.html
== 741776-1.vtt 741776-1-ref.html
== bug448564-1_malformed.html bug448564-1_well-formed.html
== bug448564-1_malformed.html bug448564-1_ideal.html

View File

@ -981,13 +981,15 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
}
nsCOMPtr<nsIWyciwygChannel> wyciwygChannel(do_QueryInterface(mRequest));
if (!wyciwygChannel) {
if (mCharsetSource < kCharsetFromUtf8OnlyMime && !wyciwygChannel) {
// we aren't ready to commit to an encoding yet
// leave converter uninstantiated for now
return NS_OK;
}
// We are reloading a document.open()ed doc.
// We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
// a browsing context. In the latter case, there's no need to remove the
// BOM manually here, because the UTF-8 decoder removes it.
mReparseForbidden = true;
mFeedChardet = false;

View File

@ -22,5 +22,6 @@
#define kCharsetFromParentForced 13 // propagates to child frames
#define kCharsetFromUserForced 14 // propagates to child frames
#define kCharsetFromByteOrderMark 15
#define kCharsetFromUtf8OnlyMime 16 // For JSON, WebVTT and such
#endif /* nsCharsetSource_h_ */

View File

@ -591,6 +591,9 @@ static const nsExtraMimeTypeEntry extraMimeEntries[] =
{ IMAGE_SVG_XML, "svg", "Scalable Vector Graphics" },
{ MESSAGE_RFC822, "eml", "RFC-822 data" },
{ TEXT_PLAIN, "txt,text", "Text File" },
{ APPLICATION_JSON, "json", "JavaScript Object Notation" },
{ TEXT_VTT, "vtt", "Web Video Text Tracks" },
{ TEXT_CACHE_MANIFEST, "appcache", "Application Cache Manifest" },
{ TEXT_HTML, "html,htm,shtml,ehtml", "HyperText Markup Language" },
{ "application/xhtml+xml", "xhtml,xht", "Extensible HyperText Markup Language" },
{ APPLICATION_MATHML_XML, "mml", "Mathematical Markup Language" },