stream decoder for determining content-type by sniffing the incoming data...

This commit is contained in:
rpotts%netscape.com 2000-01-08 06:17:40 +00:00
parent b301f353f5
commit 37a760ac57
2 changed files with 422 additions and 0 deletions

View File

@ -0,0 +1,347 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1999 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
*/
#include "nsUnknownDecoder.h"
#include "nsIServiceManager.h"
#include "nsIStreamConverterService.h"
#include "nsIPipe.h"
#include "nsIBufferInputStream.h"
#include "nsIBufferOutputStream.h"
#include "nsMimeTypes.h"
#define MAX_BUFFER_SIZE 1024
static NS_DEFINE_CID(kStreamConverterServiceCID, NS_STREAMCONVERTERSERVICE_CID);
nsUnknownDecoder::nsUnknownDecoder()
{
NS_INIT_ISUPPORTS();
mBuffer = nsnull;
mBufferLen = 0;
}
nsUnknownDecoder::~nsUnknownDecoder()
{
if (mBuffer) {
delete [] mBuffer;
mBuffer = nsnull;
}
}
// ----
//
// nsISupports implementation...
//
// ----
NS_IMPL_ADDREF(nsUnknownDecoder);
NS_IMPL_RELEASE(nsUnknownDecoder);
NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder)
NS_INTERFACE_MAP_ENTRY(nsIStreamConverter)
NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
NS_INTERFACE_MAP_ENTRY(nsIStreamObserver)
NS_INTERFACE_MAP_ENTRY(nsISupports)
NS_INTERFACE_MAP_END
// ----
//
// nsIStreamConverter methods...
//
// ----
NS_IMETHODIMP
nsUnknownDecoder::Convert(nsIInputStream *aFromStream,
const PRUnichar *aFromType,
const PRUnichar *aToType,
nsISupports *aCtxt,
nsIInputStream **aResultStream)
{
return NS_ERROR_NOT_IMPLEMENTED;
}
NS_IMETHODIMP
nsUnknownDecoder::AsyncConvertData(const PRUnichar *aFromType,
const PRUnichar *aToType,
nsIStreamListener *aListener,
nsISupports *aCtxt)
{
NS_ASSERTION(aListener && aFromType && aToType,
"null pointer passed into multi mixed converter");
// hook up our final listener. this guy gets the various On*() calls we want to throw
// at him.
//
mNextListener = aListener;
return (aListener) ? NS_OK : NS_ERROR_FAILURE;
}
// ----
//
// nsIStreamListener methods...
//
// ----
NS_IMETHODIMP
nsUnknownDecoder::OnDataAvailable(nsIChannel *aChannel,
nsISupports *aCtxt,
nsIInputStream *aStream,
PRUint32 aSourceOffset,
PRUint32 aCount)
{
nsresult rv;
if (!mNextListener) return NS_ERROR_FAILURE;
if (mContentType.IsEmpty()) {
PRUint32 count, len;
// If the buffer has not been allocated by now, just fail...
if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
//
// Determine how much of the stream should be read to fill up the
// sniffer buffer...
//
if (mBufferLen + aCount >= MAX_BUFFER_SIZE) {
count = MAX_BUFFER_SIZE-mBufferLen;
} else {
count = aCount;
}
// Read the data into the buffer...
rv = aStream->Read((mBuffer+mBufferLen), count, &len);
if (NS_FAILED(rv)) return rv;
mBufferLen += len;
aCount -= len;
if (aCount) {
//
// Adjust the source offset... The call to FireListenerNotifications(...)
// will make the first OnDataAvailable(...) call with an offset of 0.
// So, this offset needs to be adjusted to reflect that...
//
NS_ASSERTION(aSourceOffset >= mBufferLen, "Invalid source offset.");
aSourceOffset -= mBufferLen;
DetermineContentType();
NS_ASSERTION(!mContentType.IsEmpty(),
"Content type should be known by now.");
rv = FireListenerNotifications(aChannel, aCtxt);
}
}
if (aCount) {
NS_ASSERTION(!mContentType.IsEmpty(),
"Content type should be known by now.");
rv = mNextListener->OnDataAvailable(aChannel, aCtxt, aStream,
aSourceOffset, aCount);
}
return rv;
}
// ----
//
// nsIStreamObserver methods...
//
// ----
NS_IMETHODIMP
nsUnknownDecoder::OnStartRequest(nsIChannel *aChannel, nsISupports *aCtxt)
{
nsresult rv = NS_OK;
if (!mNextListener) return NS_ERROR_FAILURE;
// Allocate the sniffer buffer...
if (NS_SUCCEEDED(rv) && !mBuffer) {
mBuffer = new char[MAX_BUFFER_SIZE];
if (!mBuffer) {
rv = NS_ERROR_OUT_OF_MEMORY;
}
}
// Do not pass the OnStartRequest on to the next listener (yet)...
return rv;
}
NS_IMETHODIMP
nsUnknownDecoder::OnStopRequest(nsIChannel *aChannel,
nsISupports *aCtxt,
nsresult aStatus,
const PRUnichar *aErrorMsg)
{
nsresult rv = NS_OK;
if (!mNextListener) return NS_ERROR_FAILURE;
//
// The total amount of data is less than the size of the sniffer buffer.
// Analyze the buffer now...
//
if (mContentType.IsEmpty()) {
DetermineContentType();
NS_ASSERTION(!mContentType.IsEmpty(),
"Content type should be known by now.");
rv = FireListenerNotifications(aChannel, aCtxt);
if (NS_FAILED(rv)) {
aStatus = rv;
}
}
rv = mNextListener->OnStopRequest(aChannel, aCtxt, aStatus, aErrorMsg);
mNextListener = 0;
return rv;
}
void nsUnknownDecoder::DetermineContentType()
{
PRUint32 i;
NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known.");
if (!mContentType.IsEmpty()) return;
// First make sure that the buffer is ASCII...
for (i=0; i<mBufferLen; i++) {
char ch = mBuffer[i];
// Found a non-ASCII character...
if (!ch || (0x80 & ch)) {
break;
}
}
//
// The content is at least text/plain
//
if (i == mBufferLen) {
CBufDescriptor bufDesc((const char*)mBuffer, PR_TRUE, mBufferLen, mBufferLen);
nsCAutoString str(bufDesc);
PRInt32 offset;
//
// If the buffer begins with "#!" or "%!" then it is a script of some
// sort...
//
// This false match happened all the time... For example, CGI scripts
// written in sh or perl that emit HTML.
//
if (str.Equals("#!", PR_FALSE, 2) ||
str.Equals("%!", PR_FALSE, 2)) {
mContentType = TEXT_PLAIN;
}
//
// If the buffer begins with a mailbox delimiter then it is not HTML
//
else if (str.Equals("From ", PR_TRUE, 5) ||
str.Equals(">From ", PR_TRUE, 6)) {
mContentType = TEXT_PLAIN;
}
//
// If the buffer contains "common" HTML tags then lets call it HTML :-)
//
else {
offset = str.Find("<HTML", PR_TRUE);
if (offset < 0) {
offset = str.Find("<TITLE", PR_TRUE);
if (offset < 0) {
offset = str.Find("<FRAMESET", PR_TRUE);
}
}
if (offset >= 0) {
mContentType = TEXT_HTML;
} else {
mContentType = TEXT_PLAIN;
}
}
}
//
// If the buffer is not text, then just call it application/octet-stream
//
if (mContentType.IsEmpty()) {
mContentType = APPLICATION_OCTET_STREAM;
}
}
nsresult nsUnknownDecoder::FireListenerNotifications(nsIChannel *aChannel,
nsISupports *aCtxt)
{
nsresult rv = NS_OK;
if (!mNextListener) return NS_ERROR_FAILURE;
if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
// Set the new content type on the channel...
aChannel->SetContentType(mContentType);
// Fire the OnStartRequest(...)
rv = mNextListener->OnStartRequest(aChannel, aCtxt);
// Fire the first OnDataAvailable for the data that was read from the
// stream into the sniffer buffer...
if (NS_SUCCEEDED(rv)) {
PRUint32 len = 0;
nsCOMPtr<nsIBufferInputStream> in;
nsCOMPtr<nsIBufferOutputStream> out;
// Create a pipe and fill it with the data from the sniffer buffer.
rv = NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out), nsnull,
MAX_BUFFER_SIZE, MAX_BUFFER_SIZE);
if (NS_SUCCEEDED(rv)) {
rv = out->Write(mBuffer, mBufferLen, &len);
if (NS_SUCCEEDED(rv)) {
if (len == mBufferLen) {
rv = mNextListener->OnDataAvailable(aChannel, aCtxt, in, 0, len);
} else {
NS_ASSERTION(0, "Unable to write all the data into the pipe.");
rv = NS_ERROR_FAILURE;
}
}
}
}
delete [] mBuffer;
mBuffer = nsnull;
mBufferLen = 0;
return rv;
}

View File

@ -0,0 +1,75 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1999 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
*/
#ifndef nsUnknownDecoder_h__
#define nsUnknownDecoder_h__
#include "nsIStreamConverter.h"
#include "nsIChannel.h"
#include "nsCOMPtr.h"
#include "nsString.h"
#define NS_UNKNOWNDECODER_CID \
{ /* 7d7008a0-c49a-11d3-9b22-0080c7cb1080 */ \
0x7d7008a0, \
0xc49a, \
0x11d3, \
{0x9b, 0x22, 0x00, 0x80, 0xc7, 0xcb, 0x10, 0x80} \
}
class nsUnknownDecoder : public nsIStreamConverter
{
public:
// nsISupports methods
NS_DECL_ISUPPORTS
// nsIStreamConverter methods
NS_DECL_NSISTREAMCONVERTER
// nsIStreamListener methods
NS_DECL_NSISTREAMLISTENER
// nsIStreamObserver methods
NS_DECL_NSISTREAMOBSERVER
nsUnknownDecoder();
protected:
virtual ~nsUnknownDecoder();
void DetermineContentType();
nsresult FireListenerNotifications(nsIChannel *aChannel, nsISupports *aCtxt);
protected:
nsCOMPtr<nsIStreamListener> mNextListener;
char *mBuffer;
PRUint32 mBufferLen;
nsCString mContentType;
};
#endif /* nsUnknownDecoder_h__ */