prettify HTML-detection a bit; make it a little less likely to detect HTML.

Bug 144672, r=bbaetz, sr=darin
This commit is contained in:
bzbarsky%mit.edu 2003-04-04 22:46:50 +00:00
parent 36a26eca9c
commit 327a0205c8
2 changed files with 81 additions and 46 deletions

View File

@ -48,6 +48,8 @@
#include "nsIPref.h"
#include "imgILoader.h"
#include "nsCRT.h"
#include "nsIMIMEService.h"
#include "nsIViewSourceChannel.h"
@ -349,53 +351,10 @@ void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest)
return;
}
/*
* To prevent a possible attack, we will not consider this to be
* html content if it comes from the local file system and our prefs
* are set right
*/
if (AllowSniffing(aRequest)) {
// Now look for HTML
CBufDescriptor bufDesc((const char*)mBuffer, PR_TRUE, mBufferLen, mBufferLen);
nsCAutoString str(bufDesc);
PRInt32 offset;
offset = str.Find("<HTML", PR_TRUE);
if (offset < 0) {
offset = str.Find("<TITLE", PR_TRUE);
if (offset < 0) {
offset = str.Find("<FRAMESET", PR_TRUE);
if (offset < 0) {
offset = str.Find("<SCRIPT", PR_TRUE);
if (offset < 0) {
offset = str.Find("<BODY", PR_TRUE);
if (offset < 0) {
offset = str.Find("<TABLE", PR_TRUE);
if (offset < 0) {
offset = str.Find("<DIV", PR_TRUE);
if (offset < 0) {
offset = str.Find("<A HREF", PR_TRUE);
if (offset < 0) {
offset = str.Find("<APPLET", PR_TRUE);
if (offset < 0) {
offset = str.Find("<META", PR_TRUE);
}
}
}
}
}
}
}
}
}
if (offset >= 0) {
mContentType = TEXT_HTML;
return;
}
if (SniffForHTML(aRequest)) {
return;
}
// We don't know what this is yet. Before we just give up, try
// the URI from the request.
if (SniffURI(aRequest)) {
@ -419,6 +378,73 @@ PRBool nsUnknownDecoder::SniffForImageMimeType(nsIRequest* aRequest)
return PR_TRUE;
}
PRBool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest)
{
/*
* To prevent a possible attack, we will not consider this to be
* html content if it comes from the local file system and our prefs
* are set right
*/
if (!AllowSniffing(aRequest)) {
return PR_FALSE;
}
// Now look for HTML. First, we get us a nice nsCAutoString
// containing our data in a readonly-ish manner...
const CBufDescriptor bufDesc((const char*)mBuffer, PR_TRUE, mBufferLen, mBufferLen);
const nsCAutoString str(bufDesc);
nsCAutoString::const_iterator start, end;
str.BeginReading(start);
str.EndReading(end);
PRUint32 pos = 0; // for Substring ease
// skip leading whitespace
while (start != end && nsCRT::IsAsciiSpace(*start)) {
++start;
++pos;
}
// did we find something like a start tag?
if (start == end || *start != '<' || ++start == end) {
return PR_FALSE;
}
// advance pos to keep synch with |start|
++pos;
// If we seem to be SGML or XML and we got down here, just pretend we're HTML
if (*start == '!' || *start == '?') {
mContentType = TEXT_HTML;
return PR_TRUE;
}
nsCaseInsensitiveCStringComparator comparator;
#define MATCHES_TAG(_tagstr) \
Substring(str, pos, sizeof(_tagstr) - 1).Equals(_tagstr, comparator)
if (MATCHES_TAG("html") ||
MATCHES_TAG("frameset") ||
MATCHES_TAG("body") ||
MATCHES_TAG("script") ||
MATCHES_TAG("a href") ||
MATCHES_TAG("img") ||
MATCHES_TAG("table") ||
MATCHES_TAG("title") ||
MATCHES_TAG("div") ||
MATCHES_TAG("applet") ||
MATCHES_TAG("meta")) {
mContentType = TEXT_HTML;
return PR_TRUE;
}
#undef MATCHES_TAG
return PR_FALSE;
}
PRBool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest)
{
// Just like HTML, this should be able to be shut off.

View File

@ -89,8 +89,17 @@ protected:
// Various sniffer functions. Returning PR_TRUE means that a type
// was determined; PR_FALSE means no luck.
PRBool SniffForImageMimeType(nsIRequest* aRequest);
PRBool SniffForHTML(nsIRequest* aRequest);
PRBool SniffForXML(nsIRequest* aRequest);
// SniffURI guesses at the content type based on the URI (typically
// using the extentsion)
PRBool SniffURI(nsIRequest* aRequest);
// LastDitchSniff guesses at text/plain vs. application/octet-stream
// by just looking at whether the data contains null bytes, and
// maybe at the fraction of chars with high bit set. Use this only
// as a last-ditch attempt to decide a content type!
PRBool LastDitchSniff(nsIRequest* aRequest);
/**