gecko-dev/webshell/tests/viewer/nsWebCrawler.h
1999-08-05 01:28:30 +00:00

180 lines
5.6 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public License
* Version 1.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is Mozilla Communicator client code.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are Copyright (C) 1998
* Netscape Communications Corporation. All Rights Reserved.
*/
#ifndef nsWebCrawler_h___
#define nsWebCrawler_h___
#include "nsIBrowserWindow.h"
#include "nsIDocumentLoaderObserver.h"
#include "nsVoidArray.h"
#include "nsString.h"
class nsIAtom;
class nsIContent;
class nsIDocument;
class nsITimer;
class nsIURI;
class nsIPresShell;
class nsViewerApp;
class AtomHashTable;
class nsWebCrawler : public nsIDocumentLoaderObserver {
public:
// Make a new web-crawler for the given viewer. Note: the web
// crawler does not addref the viewer.
nsWebCrawler(nsViewerApp* aViewer);
// nsISupports
NS_DECL_ISUPPORTS
// nsIDocumentLoaderObserver
#ifdef NECKO
NS_IMETHOD OnStartDocumentLoad(nsIDocumentLoader* loader, nsIURI* aURL, const char* aCommand);
NS_IMETHOD OnEndDocumentLoad(nsIDocumentLoader* loader, nsIChannel* channel, nsresult aStatus, nsIDocumentLoaderObserver* aObserver);
NS_IMETHOD OnStartURLLoad(nsIDocumentLoader* loader, nsIChannel* channel, nsIContentViewer* aViewer);
NS_IMETHOD OnProgressURLLoad(nsIDocumentLoader* loader, nsIChannel* channel, PRUint32 aProgress, PRUint32 aProgressMax);
NS_IMETHOD OnStatusURLLoad(nsIDocumentLoader* loader, nsIChannel* channel, nsString& aMsg);
NS_IMETHOD OnEndURLLoad(nsIDocumentLoader* loader, nsIChannel* channel, nsresult aStatus);
NS_IMETHOD HandleUnknownContentType(nsIDocumentLoader* loader, nsIChannel* channel, const char *aContentType,const char *aCommand );
#else
NS_IMETHOD OnStartDocumentLoad(nsIDocumentLoader* loader, nsIURI* aURL,
const char* aCommand);
NS_IMETHOD OnEndDocumentLoad(nsIDocumentLoader* loader, nsIURI *aURL,
PRInt32 aStatus,
nsIDocumentLoaderObserver * aObserver);
NS_IMETHOD OnStartURLLoad(nsIDocumentLoader* loader, nsIURI* aURL,
const char* aContentType,
nsIContentViewer* aViewer);
NS_IMETHOD OnProgressURLLoad(nsIDocumentLoader* loader,
nsIURI* aURL, PRUint32 aProgress,
PRUint32 aProgressMax);
NS_IMETHOD OnStatusURLLoad(nsIDocumentLoader* loader, nsIURI* aURL,
nsString& aMsg);
NS_IMETHOD OnEndURLLoad(nsIDocumentLoader* loader, nsIURI* aURL,
PRInt32 aStatus);
NS_IMETHOD HandleUnknownContentType(nsIDocumentLoader* loader,
nsIURI *aURL,
const char *aContentType,
const char *aCommand);
#endif
// Add a url to load
void AddURL(const nsString& aURL);
// Add a domain that is safe to load url's from
void AddSafeDomain(const nsString& aDomain);
// Add a domain that must be avoided
void AddAvoidDomain(const nsString& aDomain);
void SetBrowserWindow(nsIBrowserWindow* aWindow);
void GetBrowserWindow(nsIBrowserWindow** aWindow);
// Set the delay (by default, the timer is set to one second)
void SetDelay(PRInt32 aSeconds) {
mDelay = aSeconds;
}
void EnableJiggleLayout() {
mJiggleLayout = PR_TRUE;
}
// If set to TRUE the loader will post an exit message on exit
void SetExitOnDone(PRBool aPostExit) {
mPostExit = aPostExit;
}
// Start loading documents
void Start();
// Enable the crawler; when a document contains links to other
// documents the crawler will go to them subject to the limitations
// on the total crawl count and the domain name checks.
void EnableCrawler();
void SetRecordFile(FILE* aFile) {
mRecord = aFile;
}
void SetMaxPages(PRInt32 aMax) {
mMaxPages = aMax;
}
void SetOutputDir(const nsString& aOutputDir);
void SetRegressionDir(const nsString& aOutputDir);
void SetEnableRegression(PRBool aSetting) {
mRegressing = aSetting;
}
void LoadNextURL();
void SetVerbose(PRBool aSetting) {
mVerbose = aSetting;
}
protected:
virtual ~nsWebCrawler();
void FindURLsIn(nsIDocument* aDocument, nsIContent* aNode);
void FindMoreURLs();
PRBool OkToLoad(const nsString& aURLSpec);
void RecordLoadedURL(const nsString& aURLSpec);
/** generate an output name from a URL */
FILE* GetOutputFile(nsIURI *aURL, nsString& aOutputName);
nsIPresShell* GetPresShell();
void PerformRegressionTest(const nsString& aOutputName);
nsIBrowserWindow* mBrowser;
nsViewerApp* mViewer;
nsITimer* mTimer;
FILE* mRecord;
nsIAtom* mLinkTag;
nsIAtom* mFrameTag;
nsIAtom* mIFrameTag;
nsIAtom* mHrefAttr;
nsIAtom* mSrcAttr;
nsIAtom* mBaseHrefAttr;
AtomHashTable* mVisited;
nsString mOutputDir;
PRBool mCrawl;
PRBool mJiggleLayout;
PRBool mPostExit;
PRInt32 mDelay;
PRInt32 mMaxPages;
nsString mCurrentURL;
PRBool mVerbose;
PRBool mRegressing;
nsString mRegressionDir;
nsVoidArray mPendingURLs;
nsVoidArray mSafeDomains;
nsVoidArray mAvoidDomains;
};
#endif /* nsWebCrawler_h___ */