mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-10 11:55:49 +00:00
prettify HTML-detection a bit; make it a little less likely to detect HTML.
Bug 144672, r=bbaetz, sr=darin
This commit is contained in:
parent
36a26eca9c
commit
327a0205c8
@ -48,6 +48,8 @@
|
||||
#include "nsIPref.h"
|
||||
#include "imgILoader.h"
|
||||
|
||||
#include "nsCRT.h"
|
||||
|
||||
#include "nsIMIMEService.h"
|
||||
|
||||
#include "nsIViewSourceChannel.h"
|
||||
@ -349,53 +351,10 @@ void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* To prevent a possible attack, we will not consider this to be
|
||||
* html content if it comes from the local file system and our prefs
|
||||
* are set right
|
||||
*/
|
||||
if (AllowSniffing(aRequest)) {
|
||||
// Now look for HTML
|
||||
CBufDescriptor bufDesc((const char*)mBuffer, PR_TRUE, mBufferLen, mBufferLen);
|
||||
nsCAutoString str(bufDesc);
|
||||
|
||||
PRInt32 offset;
|
||||
|
||||
offset = str.Find("<HTML", PR_TRUE);
|
||||
if (offset < 0) {
|
||||
offset = str.Find("<TITLE", PR_TRUE);
|
||||
if (offset < 0) {
|
||||
offset = str.Find("<FRAMESET", PR_TRUE);
|
||||
if (offset < 0) {
|
||||
offset = str.Find("<SCRIPT", PR_TRUE);
|
||||
if (offset < 0) {
|
||||
offset = str.Find("<BODY", PR_TRUE);
|
||||
if (offset < 0) {
|
||||
offset = str.Find("<TABLE", PR_TRUE);
|
||||
if (offset < 0) {
|
||||
offset = str.Find("<DIV", PR_TRUE);
|
||||
if (offset < 0) {
|
||||
offset = str.Find("<A HREF", PR_TRUE);
|
||||
if (offset < 0) {
|
||||
offset = str.Find("<APPLET", PR_TRUE);
|
||||
if (offset < 0) {
|
||||
offset = str.Find("<META", PR_TRUE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (offset >= 0) {
|
||||
mContentType = TEXT_HTML;
|
||||
return;
|
||||
}
|
||||
if (SniffForHTML(aRequest)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// We don't know what this is yet. Before we just give up, try
|
||||
// the URI from the request.
|
||||
if (SniffURI(aRequest)) {
|
||||
@ -419,6 +378,73 @@ PRBool nsUnknownDecoder::SniffForImageMimeType(nsIRequest* aRequest)
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
PRBool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest)
|
||||
{
|
||||
/*
|
||||
* To prevent a possible attack, we will not consider this to be
|
||||
* html content if it comes from the local file system and our prefs
|
||||
* are set right
|
||||
*/
|
||||
if (!AllowSniffing(aRequest)) {
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
// Now look for HTML. First, we get us a nice nsCAutoString
|
||||
// containing our data in a readonly-ish manner...
|
||||
const CBufDescriptor bufDesc((const char*)mBuffer, PR_TRUE, mBufferLen, mBufferLen);
|
||||
const nsCAutoString str(bufDesc);
|
||||
|
||||
nsCAutoString::const_iterator start, end;
|
||||
str.BeginReading(start);
|
||||
str.EndReading(end);
|
||||
PRUint32 pos = 0; // for Substring ease
|
||||
|
||||
// skip leading whitespace
|
||||
while (start != end && nsCRT::IsAsciiSpace(*start)) {
|
||||
++start;
|
||||
++pos;
|
||||
}
|
||||
|
||||
// did we find something like a start tag?
|
||||
if (start == end || *start != '<' || ++start == end) {
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
// advance pos to keep synch with |start|
|
||||
++pos;
|
||||
|
||||
// If we seem to be SGML or XML and we got down here, just pretend we're HTML
|
||||
if (*start == '!' || *start == '?') {
|
||||
mContentType = TEXT_HTML;
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
nsCaseInsensitiveCStringComparator comparator;
|
||||
|
||||
#define MATCHES_TAG(_tagstr) \
|
||||
Substring(str, pos, sizeof(_tagstr) - 1).Equals(_tagstr, comparator)
|
||||
|
||||
if (MATCHES_TAG("html") ||
|
||||
MATCHES_TAG("frameset") ||
|
||||
MATCHES_TAG("body") ||
|
||||
MATCHES_TAG("script") ||
|
||||
MATCHES_TAG("a href") ||
|
||||
MATCHES_TAG("img") ||
|
||||
MATCHES_TAG("table") ||
|
||||
MATCHES_TAG("title") ||
|
||||
MATCHES_TAG("div") ||
|
||||
MATCHES_TAG("applet") ||
|
||||
MATCHES_TAG("meta")) {
|
||||
|
||||
mContentType = TEXT_HTML;
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
#undef MATCHES_TAG
|
||||
|
||||
return PR_FALSE;
|
||||
}
|
||||
|
||||
PRBool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest)
|
||||
{
|
||||
// Just like HTML, this should be able to be shut off.
|
||||
|
@ -89,8 +89,17 @@ protected:
|
||||
// Various sniffer functions. Returning PR_TRUE means that a type
|
||||
// was determined; PR_FALSE means no luck.
|
||||
PRBool SniffForImageMimeType(nsIRequest* aRequest);
|
||||
PRBool SniffForHTML(nsIRequest* aRequest);
|
||||
PRBool SniffForXML(nsIRequest* aRequest);
|
||||
|
||||
// SniffURI guesses at the content type based on the URI (typically
|
||||
// using the extentsion)
|
||||
PRBool SniffURI(nsIRequest* aRequest);
|
||||
|
||||
// LastDitchSniff guesses at text/plain vs. application/octet-stream
|
||||
// by just looking at whether the data contains null bytes, and
|
||||
// maybe at the fraction of chars with high bit set. Use this only
|
||||
// as a last-ditch attempt to decide a content type!
|
||||
PRBool LastDitchSniff(nsIRequest* aRequest);
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user