diff --git a/htmlparser/src/nsHTMLContentSinkStream.cpp b/htmlparser/src/nsHTMLContentSinkStream.cpp index 275da8af11f6..f8d4f5e33ce4 100644 --- a/htmlparser/src/nsHTMLContentSinkStream.cpp +++ b/htmlparser/src/nsHTMLContentSinkStream.cpp @@ -42,6 +42,8 @@ #include "nsICharsetAlias.h" #include "nsIServiceManager.h" #include "nsICharsetConverterManager.h" +#include "nsIOutputStream.h" +#include "nsFileStream.h" static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); @@ -51,7 +53,6 @@ static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID); static char* gHeaderComment = ""; static char* gDocTypeHeader = ""; const int gTabSize=2; -static char gBuffer[1024]; static const char* UnicodeToEntity(PRInt32 aCode); @@ -332,18 +333,23 @@ NS_IMPL_RELEASE(nsHTMLContentSinkStream) /** - * This method is defined in nsIParser. It is used to - * cause the COM-like construction of an nsParser. + * Create an new sink * - * @update gess 4/8/98 - * @param nsIParser** ptr to newly instantiated parser + * @update gpk 05/01/99 * @return NS_xxx error result */ NS_HTMLPARS nsresult NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult, - PRBool aDoFormat, - PRBool aDoHeader) { - nsHTMLContentSinkStream* it = new nsHTMLContentSinkStream(aDoFormat,aDoHeader); + nsIOutputStream* aOutStream, + const nsString* aCharsetOverride, + PRBool aDoFormat, + PRBool aDoHeader) +{ + nsHTMLContentSinkStream* it = new nsHTMLContentSinkStream(aOutStream, + nsnull, + aCharsetOverride, + aDoFormat, + aDoHeader); if (nsnull == it) { return NS_ERROR_OUT_OF_MEMORY; } @@ -352,6 +358,32 @@ NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult, } +/** + * Create an new sink + * + * @update gpk 05/01/99 + * @return NS_xxx error result + */ +NS_HTMLPARS nsresult +NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult, + nsString* aOutString, + PRBool aDoFormat, + PRBool aDoHeader) +{ + nsHTMLContentSinkStream* it = new nsHTMLContentSinkStream(nsnull, + aOutString, + nsnull, + aDoFormat, + aDoHeader); + if (nsnull == it) { + return NS_ERROR_OUT_OF_MEMORY; + } + + return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult); +} + + + /** * Inits the encoder instance variable for the sink based on the charset @@ -409,30 +441,12 @@ nsresult nsHTMLContentSinkStream::InitEncoder(const nsString& aCharset) * @param * @return */ -nsHTMLContentSinkStream::nsHTMLContentSinkStream(PRBool aDoFormat,PRBool aDoHeader) { +nsHTMLContentSinkStream::nsHTMLContentSinkStream(nsIOutputStream* aOutStream, + nsString* aOutString, + const nsString* aCharsetOverride, + PRBool aDoFormat, + PRBool aDoHeader) { NS_INIT_REFCNT(); - mOutput=&cout; - mLowerCaseTags = PR_TRUE; - memset(mHTMLTagStack,0,sizeof(mHTMLTagStack)); - mHTMLStackPos = 0; - mColPos = 0; - mIndent = 0; - mDoFormat = aDoFormat; - mDoHeader = aDoHeader; - mBuffer = nsnull; - mBufferSize = 0; - mUnicodeEncoder = nsnull; -} - -/** - * Construct a content sink stream. - * @update gess7/7/98 - * @param - * @return - */ -nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoFormat,PRBool aDoHeader) { - NS_INIT_REFCNT(); - mOutput = &aStream; mLowerCaseTags = PR_TRUE; memset(mHTMLTagStack,0,sizeof(mHTMLTagStack)); mHTMLStackPos = 0; @@ -443,6 +457,10 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoForm mBuffer = nsnull; mBufferSize = 0; mUnicodeEncoder = nsnull; + mStream = aOutStream; + mString = aOutString; + if (aCharsetOverride != nsnull) + mCharsetOverride = *aCharsetOverride; } @@ -498,82 +516,119 @@ void nsHTMLContentSinkStream::EnsureBufferSize(PRInt32 aNewSize) } } -void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc) + + +void nsHTMLContentSinkStream::EncodeToBuffer(const nsString& aSrc) { - PRInt32 length = aSrc.Length(); - PRUnichar ch; - const char* entity = nsnull; - PRUint32 offset = 0; - PRUint32 addedLength = 0; - nsAutoString data; - - + + NS_ASSERTION(mUnicodeEncoder != nsnull,"The unicode encoder needs to be initialized"); if (mUnicodeEncoder == nsnull) - InitEncoder(""); + return; - if (length > 0) +#define CH_NBSP 160 + + PRInt32 length = aSrc.Length(); + nsresult result; + + if (mUnicodeEncoder != nsnull && length > 0) { - // Step 1. Convert anything that maps to character entity to - // the entity value EnsureBufferSize(length); - for (PRInt32 i = 0; i < length; i++) - { - ch = aSrc.CharAt(i); - - entity = UnicodeToEntity(ch); - if (entity) - { - nsAutoString temp(entity); - - temp.ToLowerCase(); - data.Append('&'); - data.Append(temp); - data.Append(';'); - } - else - { - data.Append(ch); - } - } - - // Step 2. Run the result through the converter - length = data.Length(); - EnsureBufferSize(length); - PRInt32 bufferLength = mBufferSize; + mBufferLength = mBufferSize; mUnicodeEncoder->Reset(); - nsresult result = mUnicodeEncoder->Convert(data.GetUnicode(), &length, mBuffer, &bufferLength); - mBuffer[bufferLength] = 0; - PRInt32 temp = bufferLength; + result = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &length, mBuffer, &mBufferLength); + mBuffer[mBufferLength] = 0; + PRInt32 temp = mBufferLength; if (NS_SUCCEEDED(result)) result = mUnicodeEncoder->Finish(mBuffer,&temp); - } + + + for (PRInt32 i = 0; i < mBufferLength; i++) + { + if (mBuffer[i] == char(CH_NBSP)) + mBuffer[i] = ' '; + } + } + } +void nsHTMLContentSinkStream::Write(const nsString& aString) +{ + + // If a encoder is being used then convert first convert the input string + if (mUnicodeEncoder != nsnull) + { + EncodeToBuffer(aString); + if (mStream != nsnull) + { + nsOutputStream out(mStream); + out.write(mBuffer,mBufferLength); + } + if (mString != nsnull) + { + mString->Append(mBuffer); + } + } + else + { + if (mStream != nsnull) + { + nsOutputStream out(mStream); + const PRUnichar* unicode = aString.GetUnicode(); + PRUint32 length = aString.Length(); + out.write(unicode,length); + } + else + { + mString->Append(aString); + } + } +} + + +void nsHTMLContentSinkStream::Write(const char* aData) +{ + if (mStream) + { + nsOutputStream out(mStream); + out << aData; + } + if (mString) + { + mString->Append(aData); + } +} + +void nsHTMLContentSinkStream::Write(char aData) +{ + if (mStream) + { + nsOutputStream out(mStream); + out << aData; + } + if (mString) + { + mString->Append(aData); + } +} + + + + + + /** * - * @update gess7/7/98 + * @update 04/30/99 gpk * @param * @return */ nsHTMLContentSinkStream::~nsHTMLContentSinkStream() { NS_IF_RELEASE(mUnicodeEncoder); - mOutput=0; //we don't own the stream we're given; just forget it. } -/** - * - * @update gess7/22/98 - * @param - * @return - */ -NS_IMETHODIMP_(void) -nsHTMLContentSinkStream::SetOutputStream(ostream& aStream){ - mOutput=&aStream; -} - /** * @@ -581,7 +636,7 @@ nsHTMLContentSinkStream::SetOutputStream(ostream& aStream){ * @param * @return */ -void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream& aStream) { +void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode) { int theCount=aNode.GetAttributeCount(); if(theCount) { int i=0; @@ -600,14 +655,20 @@ void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream EnsureBufferSize(key.Length()); key.ToCString(mBuffer,mBufferSize); - - aStream << " " << mBuffer << char(kEqual); + + // send to ouput " [KEY]=" + Write(' '); + Write(mBuffer); + Write(char(kEqual)); mColPos += 1 + strlen(mBuffer) + 1; const nsString& value=aNode.GetValueAt(i); - UnicodeToHTMLString(value); + + // send to ouput "\"[VALUE]\"" + Write('\"'); + Write(value); + Write('\"'); - aStream << "\"" << mBuffer << "\""; mColPos += 1 + strlen(mBuffer) + 1; } } @@ -615,85 +676,31 @@ void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream } -/** - * - * @update gess7/5/98 - * @param - * @return - */ -static -void OpenTagWithAttributes(const char* theTag,const nsIParserNode& aNode,int tab,ostream& aStream,PRBool aNewline) { - int i=0; - for(i=0;iflush(); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_html) + AddEndTag(aNode); return NS_OK; } @@ -738,17 +740,15 @@ nsHTMLContentSinkStream::CloseHTML(const nsIParserNode& aNode){ /** * This method is used to open the only HEAD container. * - * @update 07/12/98 gpk + * @update 04/30/99 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenHead(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_head) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_head) + AddStartTag(aNode); return NS_OK; } @@ -756,17 +756,15 @@ nsHTMLContentSinkStream::OpenHead(const nsIParserNode& aNode){ /** * This method is used to close the only HEAD container. * - * @update 07/12/98 gpk + * @update 04/30/99 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseHead(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_head) - AddEndTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_head) + AddEndTag(aNode); return NS_OK; } @@ -774,17 +772,15 @@ nsHTMLContentSinkStream::CloseHead(const nsIParserNode& aNode){ /** * This method is used to open the main BODY container. * - * @update 07/12/98 gpk + * @update 04/30/99 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenBody(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_body) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_body) + AddStartTag(aNode); return NS_OK; } @@ -792,17 +788,15 @@ nsHTMLContentSinkStream::OpenBody(const nsIParserNode& aNode){ /** * This method is used to close the main BODY container. * - * @update 07/12/98 gpk + * @update 04/30/99 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseBody(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_body) - AddEndTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_body) + AddEndTag(aNode); return NS_OK; } @@ -816,11 +810,9 @@ nsHTMLContentSinkStream::CloseBody(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenForm(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_form) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_form) + AddStartTag(aNode); return NS_OK; } @@ -834,11 +826,9 @@ nsHTMLContentSinkStream::OpenForm(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseForm(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_form) - AddEndTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_form) + AddEndTag(aNode); return NS_OK; } @@ -851,11 +841,9 @@ nsHTMLContentSinkStream::CloseForm(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenMap(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_map) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_map) + AddStartTag(aNode); return NS_OK; } @@ -869,12 +857,10 @@ nsHTMLContentSinkStream::OpenMap(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseMap(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_map) - AddEndTag(aNode,*mOutput); - } - return NS_OK; + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_map) + AddEndTag(aNode); +return NS_OK; } @@ -887,11 +873,9 @@ nsHTMLContentSinkStream::CloseMap(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenFrameset(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_frameset) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_frameset) + AddStartTag(aNode); return NS_OK; } @@ -905,27 +889,26 @@ nsHTMLContentSinkStream::OpenFrameset(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseFrameset(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_frameset) - AddEndTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_frameset) + AddEndTag(aNode); return NS_OK; } -void nsHTMLContentSinkStream::AddIndent(ostream& aStream) +void nsHTMLContentSinkStream::AddIndent() { + nsString padding(" "); for (PRInt32 i = mIndent; --i >= 0; ) { - aStream << " "; + Write(padding); mColPos += 2; } } -void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode, ostream& aStream) +void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); const nsString& name = aNode.GetText(); @@ -942,37 +925,41 @@ void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode, ostream& a if (mColPos != 0 && BreakBeforeOpen(tag)) { - aStream << endl; + Write('\n'); mColPos = 0; } if (PermitWSBeforeOpen(tag)) - AddIndent(aStream); + AddIndent(); + + EnsureBufferSize(tagName.Length()); + tagName.ToCString(mBuffer,mBufferSize); + + Write(kLessThan); + Write(mBuffer); - tagName.ToCString(gBuffer,sizeof(gBuffer)-1); - aStream << (char)kLessThan << gBuffer; mColPos += 1 + tagName.Length(); if (tag == eHTMLTag_style) { - aStream << (char)kGreaterThan << endl; + Write(">\n"); const nsString& data = aNode.GetSkippedContent(); PRInt32 size = data.Length(); char* buffer = new char[size+1]; data.ToCString(buffer,size+1); - aStream << buffer; + Write(buffer); delete[] buffer; } else { - WriteAttributes(aNode,aStream); - aStream << (char)kGreaterThan; + WriteAttributes(aNode); + Write(kGreaterThan); mColPos += 1; } if (BreakAfterOpen(tag)) { - aStream << endl; + Write('\n'); mColPos = 0; } @@ -983,7 +970,7 @@ void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode, ostream& a -void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode, ostream& aStream) +void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); // const nsString& name = aNode.GetText(); @@ -1010,19 +997,25 @@ void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode, ostream& aSt { if (mColPos != 0) { - aStream << endl; + Write('\n'); mColPos = 0; } - AddIndent(aStream); + AddIndent(); } - tagName.ToCString(gBuffer,sizeof(gBuffer)-1); - aStream << (char)kLessThan << (char)kForwardSlash << gBuffer << (char)kGreaterThan; - mColPos += 1 + 1 + strlen(gBuffer) + 1; + EnsureBufferSize(tagName.Length()); + tagName.ToCString(mBuffer,mBufferSize); + + Write(kLessThan); + Write(kForwardSlash); + Write(mBuffer); + Write(kGreaterThan); + + mColPos += 1 + 1 + strlen(mBuffer) + 1; if (BreakAfterClose(tag)) { - aStream << endl; + Write('\n'); mColPos = 0; } mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown; @@ -1040,7 +1033,7 @@ void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode, ostream& aSt * @return */ nsresult -nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ +nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode){ eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); eHTMLTags tag = eHTMLTag_unknown; if (mHTMLStackPos > 0) @@ -1062,14 +1055,16 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ type == eHTMLTag_meta || type == eHTMLTag_style) { - AddStartTag(aNode,aStream); + AddStartTag(aNode); mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown; } else if (type == eHTMLTag_entity) { const nsString& entity = aNode.GetText(); - UnicodeToHTMLString(entity); - aStream << '&' << mBuffer << ';'; + EncodeToBuffer(entity); + Write('&'); + Write(mBuffer); + Write(';'); mColPos += entity.Length() + 2; } else if (type == eHTMLTag_text) @@ -1077,8 +1072,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ const nsString& text = aNode.GetText(); if ((mDoFormat == PR_FALSE) || preformatted == PR_TRUE) { - UnicodeToHTMLString(text); - aStream << mBuffer; + Write(text); mColPos += text.Length(); } else @@ -1092,8 +1086,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ // than the max then just add it if (mColPos + length < mMaxColumn) { - UnicodeToHTMLString(text); - aStream << mBuffer; + Write(text); mColPos += text.Length(); } else @@ -1115,8 +1108,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ // if there is no break than just add it if (index == kNotFound) { - UnicodeToHTMLString(str); - aStream << mBuffer; + Write(str); mColPos += str.Length(); done = PR_TRUE; } @@ -1128,8 +1120,8 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ first.Truncate(index); - UnicodeToHTMLString(first); - aStream << mBuffer << endl; + Write(first); + Write('\n'); mColPos = 0; // cut the string from the beginning to the index @@ -1145,8 +1137,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ if ((mDoFormat == PR_FALSE) || preformatted || IgnoreWS(tag) == PR_FALSE) { const nsString& text = aNode.GetText(); - UnicodeToHTMLString(text); - aStream << mBuffer; + Write(text); mColPos += text.Length(); } } @@ -1155,8 +1146,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ if ((mDoFormat == PR_FALSE) || preformatted) { const nsString& text = aNode.GetText(); - UnicodeToHTMLString(text); - aStream << mBuffer; + Write(text); mColPos = 0; } } @@ -1215,25 +1205,26 @@ nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){ - if(mOutput) + + const nsString& name = aNode.GetText(); + if (name.Equals("XIF_DOC_INFO")) { - const nsString& name = aNode.GetText(); - if (name.Equals("XIF_DOC_INFO")) + PRInt32 count=aNode.GetAttributeCount(); + for(PRInt32 i=0;iSetCharsetOverride(aCharsetOverride); return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult); } + /** - * Construct a content sink stream. - * @update gpk02/03/99 - * @param - * @return + * This method creates a new sink, it sets the stream used + * for the sink to aStream + * + * @update gpk 04/30/99 */ -nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() { - NS_INIT_REFCNT(); - mOutput=&cout; - mColPos = 0; - mIndent = 0; - mDoOutput = PR_FALSE; - mBufferSize = 0; - mBuffer = nsnull; - mUnicodeEncoder = nsnull; +NS_HTMLPARS nsresult +NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult, + nsString* aString) { + + NS_ASSERTION(aString != nsnull, "a valid stream is required"); + nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(nsnull,aString); + if (nsnull == it) { + return NS_ERROR_OUT_OF_MEMORY; + } + nsString ucs2("ucs2"); + it->SetCharsetOverride(&ucs2); + return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult); } + + /** * Construct a content sink stream. * @update gpk02/03/99 * @param * @return */ -nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) { +nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(nsIOutputStream* aStream, nsString* aString) { NS_INIT_REFCNT(); - mOutput = &aStream; + mStream = aStream; mColPos = 0; mIndent = 0; mDoOutput = PR_FALSE; mBufferSize = 0; + mBufferLength = 0; mBuffer = nsnull; mUnicodeEncoder = nsnull; + mStream = aStream; + mString = aString; } + /** * * @update gpk02/03/99 @@ -200,7 +227,6 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) { * @return */ nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() { - mOutput=0; //we don't own the stream we're given; just forget it. delete [] mBuffer; NS_IF_RELEASE(mUnicodeEncoder); } @@ -208,50 +234,24 @@ nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() { /** * - * @update gpk02/03/99 + * @update gpk04/30/99 * @param * @return */ -NS_IMETHODIMP_(void) -nsHTMLToTXTSinkStream::SetOutputStream(ostream& aStream){ - mOutput=&aStream; + +NS_IMETHODIMP +nsHTMLToTXTSinkStream::SetCharsetOverride(const nsString* aCharset) +{ + if (aCharset) + { + mCharsetOverride = *aCharset; + InitEncoder(mCharsetOverride); + } + return NS_OK; } -/** - * - * @update gpk02/03/99 - * @param - * @return - */ -static -void OpenTagWithAttributes(const char* theTag,const nsIParserNode& aNode,int tab,ostream& aStream,PRBool aNewline) { -} - - -/** - * - * @update gpk02/03/99 - * @param - * @return - */ -static -void OpenTag(const char* theTag,int tab,ostream& aStream,PRBool aNewline) { -} - - -/** - * - * @update gpk02/03/99 - * @param - * @return - */ -static -void CloseTag(const char* theTag,int tab,ostream& aStream) { -} - - /** * This method gets called by the parser when it encounters * a title tag and wants to set the document title in the sink. @@ -463,38 +463,38 @@ void nsHTMLToTXTSinkStream::EnsureBufferSize(PRInt32 aNewSize) mBufferSize = 2*aNewSize+1; // make the twice as large mBuffer = new char[mBufferSize]; mBuffer[0] = 0; + mBufferLength = 0; } } -void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc) +void nsHTMLToTXTSinkStream::EncodeToBuffer(const nsString& aSrc) { - + + NS_ASSERTION(mUnicodeEncoder != nsnull,"The unicode encoder needs to be initialized"); + if (mUnicodeEncoder == nsnull) + return; #define CH_NBSP 160 PRInt32 length = aSrc.Length(); nsresult result; - PRInt32 bufferLength; - if (mUnicodeEncoder == nsnull) - InitEncoder(""); - - if (length > 0) + if (mUnicodeEncoder != nsnull && length > 0) { EnsureBufferSize(length); - bufferLength = mBufferSize; + mBufferLength = mBufferSize; mUnicodeEncoder->Reset(); - result = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &length, mBuffer, &bufferLength); - mBuffer[bufferLength] = 0; - PRInt32 temp = bufferLength; + result = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &length, mBuffer, &mBufferLength); + mBuffer[mBufferLength] = 0; + PRInt32 temp = mBufferLength; if (NS_SUCCEEDED(result)) result = mUnicodeEncoder->Finish(mBuffer,&temp); - for (PRInt32 i = 0; i < bufferLength; i++) + for (PRInt32 i = 0; i < mBufferLength; i++) { if (mBuffer[i] == char(CH_NBSP)) mBuffer[i] = ' '; @@ -504,76 +504,47 @@ void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc) } -NS_IMETHODIMP -nsHTMLToTXTSinkStream::GetStringBuffer(nsString & aStrBuffer) -{ - aStrBuffer = mStrBuffer; - return NS_OK; -} - - /** - * This gets called by the parser when you want to add - * a leaf node to the current container in the content - * model. + * Write places the contents of aString into either the output stream + * or the output string. + * When going to the stream, all data is run through the encoder * - * @updated gpk 06/18/98 + * @updated gpk02/03/99 * @param * @return */ -nsresult -nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream) +void nsHTMLToTXTSinkStream::Write(const nsString& aString) { - eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); - - const nsString& text = aNode.GetText(); - if (mDoOutput == PR_FALSE) - return NS_OK; - - if (type == eHTMLTag_text) { - - UnicodeToTXTString(text); - aStream << mBuffer; - mStrBuffer.Append(mBuffer); - mColPos += text.Length(); - } - else if (type == eHTMLTag_entity) + // If a encoder is being used then convert first convert the input string + if (mUnicodeEncoder != nsnull) { - const nsString& text = aNode.GetText(); - UnicodeToTXTString(text); - PRInt32 entity = NS_EntityToUnicode(mBuffer); - if (entity < 256) + EncodeToBuffer(aString); + if (mStream != nsnull) { - char ch = (char)entity; - aStream << ch; - mColPos++; + nsOutputStream out(mStream); + out.write(mBuffer,mBufferLength); + } + if (mString != nsnull) + { + mString->Append(mBuffer); } } - else if (type == eHTMLTag_whitespace) + else { - if (PR_TRUE) + if (mStream != nsnull) { - const nsString& text = aNode.GetText(); - UnicodeToTXTString(text); - aStream << mBuffer; - mStrBuffer.Append(mBuffer); - mColPos += text.Length(); + nsOutputStream out(mStream); + const PRUnichar* unicode = aString.GetUnicode(); + PRUint32 length = aString.Length(); + out.write(unicode,length); + } + else + { + mString->Append(aString); } } - else if (type == eHTMLTag_br) - { - if (PR_TRUE) - { - aStream << endl; - mStrBuffer.Append("\n"); - mColPos += 1; - } - } - - - return NS_OK; } @@ -628,7 +599,10 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){ const nsString& value=aNode.GetValueAt(i); if (key.Equals("charset")) - InitEncoder(value); + { + if (mCharsetOverride.Length() == 0) + InitEncoder(value); + } } } @@ -657,9 +631,8 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){ { if (mColPos != 0) { - if (mOutput) - *mOutput << endl; - mStrBuffer.Append("\n"); + nsString temp("\n"); + Write(temp); mColPos = 0; } } @@ -677,11 +650,53 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){ - nsresult result = NS_OK; - if(mOutput) { - result = AddLeaf(aNode,*mOutput); + eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); + + const nsString& text = aNode.GetText(); + + if (mDoOutput == PR_FALSE) + return NS_OK; + + if (type == eHTMLTag_text) { + Write(text); + mColPos += text.Length(); + } + else if (type == eHTMLTag_entity) + { + const nsString& text = aNode.GetText(); + EncodeToBuffer(text); + PRUnichar entity = NS_EntityToUnicode(mBuffer); + nsString temp; + + temp.Append(entity); + Write(temp); + + mColPos++; } - return result; + else if (type == eHTMLTag_whitespace) + { + if (PR_TRUE) + { + const nsString& text = aNode.GetText(); + Write(text); + mColPos += text.Length(); + } + } + else if (type == eHTMLTag_br) + { + nsString temp("\n"); + Write(text); + mColPos++; + } + else if (type == eHTMLTag_newline) + { + nsString temp("\n"); + Write(text); + mColPos++; + } + + + return NS_OK; } diff --git a/htmlparser/src/nsHTMLToTXTSinkStream.h b/htmlparser/src/nsHTMLToTXTSinkStream.h index b3408af6250b..72c3331b3299 100644 --- a/htmlparser/src/nsHTMLToTXTSinkStream.h +++ b/htmlparser/src/nsHTMLToTXTSinkStream.h @@ -47,12 +47,9 @@ {0xa39c6bff, 0x15f0, 0x11d2, \ {0x80, 0x41, 0x0, 0x10, 0x4b, 0x98, 0x3f, 0xd4}} -#ifndef XP_MAC -class ostream; -#endif - class nsIUnicodeEncoder; +class nsIOutputStream; class nsHTMLToTXTSinkStream : public nsIHTMLContentSink { public: @@ -61,8 +58,7 @@ class nsHTMLToTXTSinkStream : public nsIHTMLContentSink { * Standard constructor * @update gpk02/03/99 */ - nsHTMLToTXTSinkStream(); - nsHTMLToTXTSinkStream(ostream& aStream); + nsHTMLToTXTSinkStream(nsIOutputStream* aOutStream, nsString* aOutString); /** * virtual destructor @@ -70,8 +66,8 @@ class nsHTMLToTXTSinkStream : public nsIHTMLContentSink { */ virtual ~nsHTMLToTXTSinkStream(); - NS_IMETHOD_(void) SetOutputStream(ostream& aStream); - NS_IMETHOD GetStringBuffer(nsString & aStrBuffer); + NS_IMETHOD SetCharsetOverride(const nsString* aCharset); + // nsISupports NS_DECL_ISUPPORTS @@ -113,32 +109,47 @@ class nsHTMLToTXTSinkStream : public nsIHTMLContentSink { NS_IMETHOD BeginContext(PRInt32 aPosition); NS_IMETHOD EndContext(PRInt32 aPosition); + + protected: - nsresult AddLeaf(const nsIParserNode& aNode, ostream& aStream); - void WriteAttributes(const nsIParserNode& aNode,ostream& aStream); void EnsureBufferSize(PRInt32 aNewSize); - void UnicodeToTXTString(const nsString& aSrc); + nsresult InitEncoder(const nsString& aCharset); + void Write(const nsString& aString); + void EncodeToBuffer(const nsString& aString); + + + protected: - ostream* mOutput; + nsIOutputStream* mStream; + nsString* mString; + PRInt32 mIndent; PRInt32 mColPos; PRBool mDoOutput; - char* mBuffer; - PRInt32 mBufferSize; - nsString mStrBuffer; + char* mBuffer; + PRInt32 mBufferLength; // The length of the data in the buffer + PRInt32 mBufferSize; // The actual size of the buffer, regardless of the data + nsIUnicodeEncoder* mUnicodeEncoder; + nsString mCharsetOverride; + }; extern NS_HTMLPARS nsresult -NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult); +NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult, + nsIOutputStream* aOutStream, + const nsString* aCharsetOverride=nsnull); +extern NS_HTMLPARS nsresult +NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult, + nsString* aOutString); #endif diff --git a/parser/htmlparser/src/nsHTMLContentSinkStream.cpp b/parser/htmlparser/src/nsHTMLContentSinkStream.cpp index 275da8af11f6..f8d4f5e33ce4 100644 --- a/parser/htmlparser/src/nsHTMLContentSinkStream.cpp +++ b/parser/htmlparser/src/nsHTMLContentSinkStream.cpp @@ -42,6 +42,8 @@ #include "nsICharsetAlias.h" #include "nsIServiceManager.h" #include "nsICharsetConverterManager.h" +#include "nsIOutputStream.h" +#include "nsFileStream.h" static NS_DEFINE_IID(kISupportsIID, NS_ISUPPORTS_IID); @@ -51,7 +53,6 @@ static NS_DEFINE_IID(kIHTMLContentSinkIID, NS_IHTML_CONTENT_SINK_IID); static char* gHeaderComment = ""; static char* gDocTypeHeader = ""; const int gTabSize=2; -static char gBuffer[1024]; static const char* UnicodeToEntity(PRInt32 aCode); @@ -332,18 +333,23 @@ NS_IMPL_RELEASE(nsHTMLContentSinkStream) /** - * This method is defined in nsIParser. It is used to - * cause the COM-like construction of an nsParser. + * Create an new sink * - * @update gess 4/8/98 - * @param nsIParser** ptr to newly instantiated parser + * @update gpk 05/01/99 * @return NS_xxx error result */ NS_HTMLPARS nsresult NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult, - PRBool aDoFormat, - PRBool aDoHeader) { - nsHTMLContentSinkStream* it = new nsHTMLContentSinkStream(aDoFormat,aDoHeader); + nsIOutputStream* aOutStream, + const nsString* aCharsetOverride, + PRBool aDoFormat, + PRBool aDoHeader) +{ + nsHTMLContentSinkStream* it = new nsHTMLContentSinkStream(aOutStream, + nsnull, + aCharsetOverride, + aDoFormat, + aDoHeader); if (nsnull == it) { return NS_ERROR_OUT_OF_MEMORY; } @@ -352,6 +358,32 @@ NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult, } +/** + * Create an new sink + * + * @update gpk 05/01/99 + * @return NS_xxx error result + */ +NS_HTMLPARS nsresult +NS_New_HTML_ContentSinkStream(nsIHTMLContentSink** aInstancePtrResult, + nsString* aOutString, + PRBool aDoFormat, + PRBool aDoHeader) +{ + nsHTMLContentSinkStream* it = new nsHTMLContentSinkStream(nsnull, + aOutString, + nsnull, + aDoFormat, + aDoHeader); + if (nsnull == it) { + return NS_ERROR_OUT_OF_MEMORY; + } + + return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult); +} + + + /** * Inits the encoder instance variable for the sink based on the charset @@ -409,30 +441,12 @@ nsresult nsHTMLContentSinkStream::InitEncoder(const nsString& aCharset) * @param * @return */ -nsHTMLContentSinkStream::nsHTMLContentSinkStream(PRBool aDoFormat,PRBool aDoHeader) { +nsHTMLContentSinkStream::nsHTMLContentSinkStream(nsIOutputStream* aOutStream, + nsString* aOutString, + const nsString* aCharsetOverride, + PRBool aDoFormat, + PRBool aDoHeader) { NS_INIT_REFCNT(); - mOutput=&cout; - mLowerCaseTags = PR_TRUE; - memset(mHTMLTagStack,0,sizeof(mHTMLTagStack)); - mHTMLStackPos = 0; - mColPos = 0; - mIndent = 0; - mDoFormat = aDoFormat; - mDoHeader = aDoHeader; - mBuffer = nsnull; - mBufferSize = 0; - mUnicodeEncoder = nsnull; -} - -/** - * Construct a content sink stream. - * @update gess7/7/98 - * @param - * @return - */ -nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoFormat,PRBool aDoHeader) { - NS_INIT_REFCNT(); - mOutput = &aStream; mLowerCaseTags = PR_TRUE; memset(mHTMLTagStack,0,sizeof(mHTMLTagStack)); mHTMLStackPos = 0; @@ -443,6 +457,10 @@ nsHTMLContentSinkStream::nsHTMLContentSinkStream(ostream& aStream,PRBool aDoForm mBuffer = nsnull; mBufferSize = 0; mUnicodeEncoder = nsnull; + mStream = aOutStream; + mString = aOutString; + if (aCharsetOverride != nsnull) + mCharsetOverride = *aCharsetOverride; } @@ -498,82 +516,119 @@ void nsHTMLContentSinkStream::EnsureBufferSize(PRInt32 aNewSize) } } -void nsHTMLContentSinkStream::UnicodeToHTMLString(const nsString& aSrc) + + +void nsHTMLContentSinkStream::EncodeToBuffer(const nsString& aSrc) { - PRInt32 length = aSrc.Length(); - PRUnichar ch; - const char* entity = nsnull; - PRUint32 offset = 0; - PRUint32 addedLength = 0; - nsAutoString data; - - + + NS_ASSERTION(mUnicodeEncoder != nsnull,"The unicode encoder needs to be initialized"); if (mUnicodeEncoder == nsnull) - InitEncoder(""); + return; - if (length > 0) +#define CH_NBSP 160 + + PRInt32 length = aSrc.Length(); + nsresult result; + + if (mUnicodeEncoder != nsnull && length > 0) { - // Step 1. Convert anything that maps to character entity to - // the entity value EnsureBufferSize(length); - for (PRInt32 i = 0; i < length; i++) - { - ch = aSrc.CharAt(i); - - entity = UnicodeToEntity(ch); - if (entity) - { - nsAutoString temp(entity); - - temp.ToLowerCase(); - data.Append('&'); - data.Append(temp); - data.Append(';'); - } - else - { - data.Append(ch); - } - } - - // Step 2. Run the result through the converter - length = data.Length(); - EnsureBufferSize(length); - PRInt32 bufferLength = mBufferSize; + mBufferLength = mBufferSize; mUnicodeEncoder->Reset(); - nsresult result = mUnicodeEncoder->Convert(data.GetUnicode(), &length, mBuffer, &bufferLength); - mBuffer[bufferLength] = 0; - PRInt32 temp = bufferLength; + result = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &length, mBuffer, &mBufferLength); + mBuffer[mBufferLength] = 0; + PRInt32 temp = mBufferLength; if (NS_SUCCEEDED(result)) result = mUnicodeEncoder->Finish(mBuffer,&temp); - } + + + for (PRInt32 i = 0; i < mBufferLength; i++) + { + if (mBuffer[i] == char(CH_NBSP)) + mBuffer[i] = ' '; + } + } + } +void nsHTMLContentSinkStream::Write(const nsString& aString) +{ + + // If a encoder is being used then convert first convert the input string + if (mUnicodeEncoder != nsnull) + { + EncodeToBuffer(aString); + if (mStream != nsnull) + { + nsOutputStream out(mStream); + out.write(mBuffer,mBufferLength); + } + if (mString != nsnull) + { + mString->Append(mBuffer); + } + } + else + { + if (mStream != nsnull) + { + nsOutputStream out(mStream); + const PRUnichar* unicode = aString.GetUnicode(); + PRUint32 length = aString.Length(); + out.write(unicode,length); + } + else + { + mString->Append(aString); + } + } +} + + +void nsHTMLContentSinkStream::Write(const char* aData) +{ + if (mStream) + { + nsOutputStream out(mStream); + out << aData; + } + if (mString) + { + mString->Append(aData); + } +} + +void nsHTMLContentSinkStream::Write(char aData) +{ + if (mStream) + { + nsOutputStream out(mStream); + out << aData; + } + if (mString) + { + mString->Append(aData); + } +} + + + + + + /** * - * @update gess7/7/98 + * @update 04/30/99 gpk * @param * @return */ nsHTMLContentSinkStream::~nsHTMLContentSinkStream() { NS_IF_RELEASE(mUnicodeEncoder); - mOutput=0; //we don't own the stream we're given; just forget it. } -/** - * - * @update gess7/22/98 - * @param - * @return - */ -NS_IMETHODIMP_(void) -nsHTMLContentSinkStream::SetOutputStream(ostream& aStream){ - mOutput=&aStream; -} - /** * @@ -581,7 +636,7 @@ nsHTMLContentSinkStream::SetOutputStream(ostream& aStream){ * @param * @return */ -void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream& aStream) { +void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode) { int theCount=aNode.GetAttributeCount(); if(theCount) { int i=0; @@ -600,14 +655,20 @@ void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream EnsureBufferSize(key.Length()); key.ToCString(mBuffer,mBufferSize); - - aStream << " " << mBuffer << char(kEqual); + + // send to ouput " [KEY]=" + Write(' '); + Write(mBuffer); + Write(char(kEqual)); mColPos += 1 + strlen(mBuffer) + 1; const nsString& value=aNode.GetValueAt(i); - UnicodeToHTMLString(value); + + // send to ouput "\"[VALUE]\"" + Write('\"'); + Write(value); + Write('\"'); - aStream << "\"" << mBuffer << "\""; mColPos += 1 + strlen(mBuffer) + 1; } } @@ -615,85 +676,31 @@ void nsHTMLContentSinkStream::WriteAttributes(const nsIParserNode& aNode,ostream } -/** - * - * @update gess7/5/98 - * @param - * @return - */ -static -void OpenTagWithAttributes(const char* theTag,const nsIParserNode& aNode,int tab,ostream& aStream,PRBool aNewline) { - int i=0; - for(i=0;iflush(); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_html) + AddEndTag(aNode); return NS_OK; } @@ -738,17 +740,15 @@ nsHTMLContentSinkStream::CloseHTML(const nsIParserNode& aNode){ /** * This method is used to open the only HEAD container. * - * @update 07/12/98 gpk + * @update 04/30/99 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenHead(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_head) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_head) + AddStartTag(aNode); return NS_OK; } @@ -756,17 +756,15 @@ nsHTMLContentSinkStream::OpenHead(const nsIParserNode& aNode){ /** * This method is used to close the only HEAD container. * - * @update 07/12/98 gpk + * @update 04/30/99 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseHead(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_head) - AddEndTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_head) + AddEndTag(aNode); return NS_OK; } @@ -774,17 +772,15 @@ nsHTMLContentSinkStream::CloseHead(const nsIParserNode& aNode){ /** * This method is used to open the main BODY container. * - * @update 07/12/98 gpk + * @update 04/30/99 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenBody(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_body) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_body) + AddStartTag(aNode); return NS_OK; } @@ -792,17 +788,15 @@ nsHTMLContentSinkStream::OpenBody(const nsIParserNode& aNode){ /** * This method is used to close the main BODY container. * - * @update 07/12/98 gpk + * @update 04/30/99 gpk * @param nsIParserNode reference to parser node interface * @return PR_TRUE if successful. */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseBody(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_body) - AddEndTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_body) + AddEndTag(aNode); return NS_OK; } @@ -816,11 +810,9 @@ nsHTMLContentSinkStream::CloseBody(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenForm(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_form) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_form) + AddStartTag(aNode); return NS_OK; } @@ -834,11 +826,9 @@ nsHTMLContentSinkStream::OpenForm(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseForm(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_form) - AddEndTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_form) + AddEndTag(aNode); return NS_OK; } @@ -851,11 +841,9 @@ nsHTMLContentSinkStream::CloseForm(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenMap(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_map) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_map) + AddStartTag(aNode); return NS_OK; } @@ -869,12 +857,10 @@ nsHTMLContentSinkStream::OpenMap(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseMap(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_map) - AddEndTag(aNode,*mOutput); - } - return NS_OK; + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_map) + AddEndTag(aNode); +return NS_OK; } @@ -887,11 +873,9 @@ nsHTMLContentSinkStream::CloseMap(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenFrameset(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_frameset) - AddStartTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_frameset) + AddStartTag(aNode); return NS_OK; } @@ -905,27 +889,26 @@ nsHTMLContentSinkStream::OpenFrameset(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::CloseFrameset(const nsIParserNode& aNode){ - if(mOutput) { - eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); - if (tag == eHTMLTag_frameset) - AddEndTag(aNode,*mOutput); - } + eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); + if (tag == eHTMLTag_frameset) + AddEndTag(aNode); return NS_OK; } -void nsHTMLContentSinkStream::AddIndent(ostream& aStream) +void nsHTMLContentSinkStream::AddIndent() { + nsString padding(" "); for (PRInt32 i = mIndent; --i >= 0; ) { - aStream << " "; + Write(padding); mColPos += 2; } } -void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode, ostream& aStream) +void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); const nsString& name = aNode.GetText(); @@ -942,37 +925,41 @@ void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode, ostream& a if (mColPos != 0 && BreakBeforeOpen(tag)) { - aStream << endl; + Write('\n'); mColPos = 0; } if (PermitWSBeforeOpen(tag)) - AddIndent(aStream); + AddIndent(); + + EnsureBufferSize(tagName.Length()); + tagName.ToCString(mBuffer,mBufferSize); + + Write(kLessThan); + Write(mBuffer); - tagName.ToCString(gBuffer,sizeof(gBuffer)-1); - aStream << (char)kLessThan << gBuffer; mColPos += 1 + tagName.Length(); if (tag == eHTMLTag_style) { - aStream << (char)kGreaterThan << endl; + Write(">\n"); const nsString& data = aNode.GetSkippedContent(); PRInt32 size = data.Length(); char* buffer = new char[size+1]; data.ToCString(buffer,size+1); - aStream << buffer; + Write(buffer); delete[] buffer; } else { - WriteAttributes(aNode,aStream); - aStream << (char)kGreaterThan; + WriteAttributes(aNode); + Write(kGreaterThan); mColPos += 1; } if (BreakAfterOpen(tag)) { - aStream << endl; + Write('\n'); mColPos = 0; } @@ -983,7 +970,7 @@ void nsHTMLContentSinkStream::AddStartTag(const nsIParserNode& aNode, ostream& a -void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode, ostream& aStream) +void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode) { eHTMLTags tag = (eHTMLTags)aNode.GetNodeType(); // const nsString& name = aNode.GetText(); @@ -1010,19 +997,25 @@ void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode, ostream& aSt { if (mColPos != 0) { - aStream << endl; + Write('\n'); mColPos = 0; } - AddIndent(aStream); + AddIndent(); } - tagName.ToCString(gBuffer,sizeof(gBuffer)-1); - aStream << (char)kLessThan << (char)kForwardSlash << gBuffer << (char)kGreaterThan; - mColPos += 1 + 1 + strlen(gBuffer) + 1; + EnsureBufferSize(tagName.Length()); + tagName.ToCString(mBuffer,mBufferSize); + + Write(kLessThan); + Write(kForwardSlash); + Write(mBuffer); + Write(kGreaterThan); + + mColPos += 1 + 1 + strlen(mBuffer) + 1; if (BreakAfterClose(tag)) { - aStream << endl; + Write('\n'); mColPos = 0; } mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown; @@ -1040,7 +1033,7 @@ void nsHTMLContentSinkStream::AddEndTag(const nsIParserNode& aNode, ostream& aSt * @return */ nsresult -nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ +nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode){ eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); eHTMLTags tag = eHTMLTag_unknown; if (mHTMLStackPos > 0) @@ -1062,14 +1055,16 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ type == eHTMLTag_meta || type == eHTMLTag_style) { - AddStartTag(aNode,aStream); + AddStartTag(aNode); mHTMLTagStack[--mHTMLStackPos] = eHTMLTag_unknown; } else if (type == eHTMLTag_entity) { const nsString& entity = aNode.GetText(); - UnicodeToHTMLString(entity); - aStream << '&' << mBuffer << ';'; + EncodeToBuffer(entity); + Write('&'); + Write(mBuffer); + Write(';'); mColPos += entity.Length() + 2; } else if (type == eHTMLTag_text) @@ -1077,8 +1072,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ const nsString& text = aNode.GetText(); if ((mDoFormat == PR_FALSE) || preformatted == PR_TRUE) { - UnicodeToHTMLString(text); - aStream << mBuffer; + Write(text); mColPos += text.Length(); } else @@ -1092,8 +1086,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ // than the max then just add it if (mColPos + length < mMaxColumn) { - UnicodeToHTMLString(text); - aStream << mBuffer; + Write(text); mColPos += text.Length(); } else @@ -1115,8 +1108,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ // if there is no break than just add it if (index == kNotFound) { - UnicodeToHTMLString(str); - aStream << mBuffer; + Write(str); mColPos += str.Length(); done = PR_TRUE; } @@ -1128,8 +1120,8 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ first.Truncate(index); - UnicodeToHTMLString(first); - aStream << mBuffer << endl; + Write(first); + Write('\n'); mColPos = 0; // cut the string from the beginning to the index @@ -1145,8 +1137,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ if ((mDoFormat == PR_FALSE) || preformatted || IgnoreWS(tag) == PR_FALSE) { const nsString& text = aNode.GetText(); - UnicodeToHTMLString(text); - aStream << mBuffer; + Write(text); mColPos += text.Length(); } } @@ -1155,8 +1146,7 @@ nsHTMLContentSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream){ if ((mDoFormat == PR_FALSE) || preformatted) { const nsString& text = aNode.GetText(); - UnicodeToHTMLString(text); - aStream << mBuffer; + Write(text); mColPos = 0; } } @@ -1215,25 +1205,26 @@ nsHTMLContentSinkStream::AddComment(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLContentSinkStream::OpenContainer(const nsIParserNode& aNode){ - if(mOutput) + + const nsString& name = aNode.GetText(); + if (name.Equals("XIF_DOC_INFO")) { - const nsString& name = aNode.GetText(); - if (name.Equals("XIF_DOC_INFO")) + PRInt32 count=aNode.GetAttributeCount(); + for(PRInt32 i=0;iSetCharsetOverride(aCharsetOverride); return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult); } + /** - * Construct a content sink stream. - * @update gpk02/03/99 - * @param - * @return + * This method creates a new sink, it sets the stream used + * for the sink to aStream + * + * @update gpk 04/30/99 */ -nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream() { - NS_INIT_REFCNT(); - mOutput=&cout; - mColPos = 0; - mIndent = 0; - mDoOutput = PR_FALSE; - mBufferSize = 0; - mBuffer = nsnull; - mUnicodeEncoder = nsnull; +NS_HTMLPARS nsresult +NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult, + nsString* aString) { + + NS_ASSERTION(aString != nsnull, "a valid stream is required"); + nsHTMLToTXTSinkStream* it = new nsHTMLToTXTSinkStream(nsnull,aString); + if (nsnull == it) { + return NS_ERROR_OUT_OF_MEMORY; + } + nsString ucs2("ucs2"); + it->SetCharsetOverride(&ucs2); + return it->QueryInterface(kIHTMLContentSinkIID, (void **)aInstancePtrResult); } + + /** * Construct a content sink stream. * @update gpk02/03/99 * @param * @return */ -nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) { +nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(nsIOutputStream* aStream, nsString* aString) { NS_INIT_REFCNT(); - mOutput = &aStream; + mStream = aStream; mColPos = 0; mIndent = 0; mDoOutput = PR_FALSE; mBufferSize = 0; + mBufferLength = 0; mBuffer = nsnull; mUnicodeEncoder = nsnull; + mStream = aStream; + mString = aString; } + /** * * @update gpk02/03/99 @@ -200,7 +227,6 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream(ostream& aStream) { * @return */ nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() { - mOutput=0; //we don't own the stream we're given; just forget it. delete [] mBuffer; NS_IF_RELEASE(mUnicodeEncoder); } @@ -208,50 +234,24 @@ nsHTMLToTXTSinkStream::~nsHTMLToTXTSinkStream() { /** * - * @update gpk02/03/99 + * @update gpk04/30/99 * @param * @return */ -NS_IMETHODIMP_(void) -nsHTMLToTXTSinkStream::SetOutputStream(ostream& aStream){ - mOutput=&aStream; + +NS_IMETHODIMP +nsHTMLToTXTSinkStream::SetCharsetOverride(const nsString* aCharset) +{ + if (aCharset) + { + mCharsetOverride = *aCharset; + InitEncoder(mCharsetOverride); + } + return NS_OK; } -/** - * - * @update gpk02/03/99 - * @param - * @return - */ -static -void OpenTagWithAttributes(const char* theTag,const nsIParserNode& aNode,int tab,ostream& aStream,PRBool aNewline) { -} - - -/** - * - * @update gpk02/03/99 - * @param - * @return - */ -static -void OpenTag(const char* theTag,int tab,ostream& aStream,PRBool aNewline) { -} - - -/** - * - * @update gpk02/03/99 - * @param - * @return - */ -static -void CloseTag(const char* theTag,int tab,ostream& aStream) { -} - - /** * This method gets called by the parser when it encounters * a title tag and wants to set the document title in the sink. @@ -463,38 +463,38 @@ void nsHTMLToTXTSinkStream::EnsureBufferSize(PRInt32 aNewSize) mBufferSize = 2*aNewSize+1; // make the twice as large mBuffer = new char[mBufferSize]; mBuffer[0] = 0; + mBufferLength = 0; } } -void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc) +void nsHTMLToTXTSinkStream::EncodeToBuffer(const nsString& aSrc) { - + + NS_ASSERTION(mUnicodeEncoder != nsnull,"The unicode encoder needs to be initialized"); + if (mUnicodeEncoder == nsnull) + return; #define CH_NBSP 160 PRInt32 length = aSrc.Length(); nsresult result; - PRInt32 bufferLength; - if (mUnicodeEncoder == nsnull) - InitEncoder(""); - - if (length > 0) + if (mUnicodeEncoder != nsnull && length > 0) { EnsureBufferSize(length); - bufferLength = mBufferSize; + mBufferLength = mBufferSize; mUnicodeEncoder->Reset(); - result = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &length, mBuffer, &bufferLength); - mBuffer[bufferLength] = 0; - PRInt32 temp = bufferLength; + result = mUnicodeEncoder->Convert(aSrc.GetUnicode(), &length, mBuffer, &mBufferLength); + mBuffer[mBufferLength] = 0; + PRInt32 temp = mBufferLength; if (NS_SUCCEEDED(result)) result = mUnicodeEncoder->Finish(mBuffer,&temp); - for (PRInt32 i = 0; i < bufferLength; i++) + for (PRInt32 i = 0; i < mBufferLength; i++) { if (mBuffer[i] == char(CH_NBSP)) mBuffer[i] = ' '; @@ -504,76 +504,47 @@ void nsHTMLToTXTSinkStream::UnicodeToTXTString(const nsString& aSrc) } -NS_IMETHODIMP -nsHTMLToTXTSinkStream::GetStringBuffer(nsString & aStrBuffer) -{ - aStrBuffer = mStrBuffer; - return NS_OK; -} - - /** - * This gets called by the parser when you want to add - * a leaf node to the current container in the content - * model. + * Write places the contents of aString into either the output stream + * or the output string. + * When going to the stream, all data is run through the encoder * - * @updated gpk 06/18/98 + * @updated gpk02/03/99 * @param * @return */ -nsresult -nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode, ostream& aStream) +void nsHTMLToTXTSinkStream::Write(const nsString& aString) { - eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); - - const nsString& text = aNode.GetText(); - if (mDoOutput == PR_FALSE) - return NS_OK; - - if (type == eHTMLTag_text) { - - UnicodeToTXTString(text); - aStream << mBuffer; - mStrBuffer.Append(mBuffer); - mColPos += text.Length(); - } - else if (type == eHTMLTag_entity) + // If a encoder is being used then convert first convert the input string + if (mUnicodeEncoder != nsnull) { - const nsString& text = aNode.GetText(); - UnicodeToTXTString(text); - PRInt32 entity = NS_EntityToUnicode(mBuffer); - if (entity < 256) + EncodeToBuffer(aString); + if (mStream != nsnull) { - char ch = (char)entity; - aStream << ch; - mColPos++; + nsOutputStream out(mStream); + out.write(mBuffer,mBufferLength); + } + if (mString != nsnull) + { + mString->Append(mBuffer); } } - else if (type == eHTMLTag_whitespace) + else { - if (PR_TRUE) + if (mStream != nsnull) { - const nsString& text = aNode.GetText(); - UnicodeToTXTString(text); - aStream << mBuffer; - mStrBuffer.Append(mBuffer); - mColPos += text.Length(); + nsOutputStream out(mStream); + const PRUnichar* unicode = aString.GetUnicode(); + PRUint32 length = aString.Length(); + out.write(unicode,length); + } + else + { + mString->Append(aString); } } - else if (type == eHTMLTag_br) - { - if (PR_TRUE) - { - aStream << endl; - mStrBuffer.Append("\n"); - mColPos += 1; - } - } - - - return NS_OK; } @@ -628,7 +599,10 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode){ const nsString& value=aNode.GetValueAt(i); if (key.Equals("charset")) - InitEncoder(value); + { + if (mCharsetOverride.Length() == 0) + InitEncoder(value); + } } } @@ -657,9 +631,8 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){ { if (mColPos != 0) { - if (mOutput) - *mOutput << endl; - mStrBuffer.Append("\n"); + nsString temp("\n"); + Write(temp); mColPos = 0; } } @@ -677,11 +650,53 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode){ */ NS_IMETHODIMP nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode){ - nsresult result = NS_OK; - if(mOutput) { - result = AddLeaf(aNode,*mOutput); + eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); + + const nsString& text = aNode.GetText(); + + if (mDoOutput == PR_FALSE) + return NS_OK; + + if (type == eHTMLTag_text) { + Write(text); + mColPos += text.Length(); + } + else if (type == eHTMLTag_entity) + { + const nsString& text = aNode.GetText(); + EncodeToBuffer(text); + PRUnichar entity = NS_EntityToUnicode(mBuffer); + nsString temp; + + temp.Append(entity); + Write(temp); + + mColPos++; } - return result; + else if (type == eHTMLTag_whitespace) + { + if (PR_TRUE) + { + const nsString& text = aNode.GetText(); + Write(text); + mColPos += text.Length(); + } + } + else if (type == eHTMLTag_br) + { + nsString temp("\n"); + Write(text); + mColPos++; + } + else if (type == eHTMLTag_newline) + { + nsString temp("\n"); + Write(text); + mColPos++; + } + + + return NS_OK; } diff --git a/parser/htmlparser/src/nsHTMLToTXTSinkStream.h b/parser/htmlparser/src/nsHTMLToTXTSinkStream.h index b3408af6250b..72c3331b3299 100644 --- a/parser/htmlparser/src/nsHTMLToTXTSinkStream.h +++ b/parser/htmlparser/src/nsHTMLToTXTSinkStream.h @@ -47,12 +47,9 @@ {0xa39c6bff, 0x15f0, 0x11d2, \ {0x80, 0x41, 0x0, 0x10, 0x4b, 0x98, 0x3f, 0xd4}} -#ifndef XP_MAC -class ostream; -#endif - class nsIUnicodeEncoder; +class nsIOutputStream; class nsHTMLToTXTSinkStream : public nsIHTMLContentSink { public: @@ -61,8 +58,7 @@ class nsHTMLToTXTSinkStream : public nsIHTMLContentSink { * Standard constructor * @update gpk02/03/99 */ - nsHTMLToTXTSinkStream(); - nsHTMLToTXTSinkStream(ostream& aStream); + nsHTMLToTXTSinkStream(nsIOutputStream* aOutStream, nsString* aOutString); /** * virtual destructor @@ -70,8 +66,8 @@ class nsHTMLToTXTSinkStream : public nsIHTMLContentSink { */ virtual ~nsHTMLToTXTSinkStream(); - NS_IMETHOD_(void) SetOutputStream(ostream& aStream); - NS_IMETHOD GetStringBuffer(nsString & aStrBuffer); + NS_IMETHOD SetCharsetOverride(const nsString* aCharset); + // nsISupports NS_DECL_ISUPPORTS @@ -113,32 +109,47 @@ class nsHTMLToTXTSinkStream : public nsIHTMLContentSink { NS_IMETHOD BeginContext(PRInt32 aPosition); NS_IMETHOD EndContext(PRInt32 aPosition); + + protected: - nsresult AddLeaf(const nsIParserNode& aNode, ostream& aStream); - void WriteAttributes(const nsIParserNode& aNode,ostream& aStream); void EnsureBufferSize(PRInt32 aNewSize); - void UnicodeToTXTString(const nsString& aSrc); + nsresult InitEncoder(const nsString& aCharset); + void Write(const nsString& aString); + void EncodeToBuffer(const nsString& aString); + + + protected: - ostream* mOutput; + nsIOutputStream* mStream; + nsString* mString; + PRInt32 mIndent; PRInt32 mColPos; PRBool mDoOutput; - char* mBuffer; - PRInt32 mBufferSize; - nsString mStrBuffer; + char* mBuffer; + PRInt32 mBufferLength; // The length of the data in the buffer + PRInt32 mBufferSize; // The actual size of the buffer, regardless of the data + nsIUnicodeEncoder* mUnicodeEncoder; + nsString mCharsetOverride; + }; extern NS_HTMLPARS nsresult -NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult); +NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult, + nsIOutputStream* aOutStream, + const nsString* aCharsetOverride=nsnull); +extern NS_HTMLPARS nsresult +NS_New_HTMLToTXT_SinkStream(nsIHTMLContentSink** aInstancePtrResult, + nsString* aOutString); #endif