mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-10 11:55:49 +00:00
13750,13753,14075,15293,13873,12551:
- Rewrite wrapping logic for plaintext output sink, and run all output through the wrapping/formatting Write method (which now has logic to determine when we should be wrapping). - Handle blockquote type=cite mail quotes. - Write several new regression tests for plaintext output bugs, and include those tests and their expected output into the automated output test. r=sfraser,harishd
This commit is contained in:
parent
73d733e8c2
commit
d1731dbe4e
@ -164,6 +164,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream()
|
||||
NS_INIT_REFCNT();
|
||||
mColPos = 0;
|
||||
mIndent = 0;
|
||||
mCiteQuote = PR_FALSE;
|
||||
mDoOutput = PR_FALSE;
|
||||
mBufferSize = 0;
|
||||
mBufferLength = 0;
|
||||
@ -355,6 +356,24 @@ nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode)
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsHTMLToTXTSinkStream::GetValueOfAttribute(const nsIParserNode& aNode,
|
||||
char* aMatchKey,
|
||||
nsString& aValueRet)
|
||||
{
|
||||
nsAutoString matchKey (aMatchKey);
|
||||
PRInt32 count=aNode.GetAttributeCount();
|
||||
for (PRInt32 i=0;i<count;i++)
|
||||
{
|
||||
const nsString& key = aNode.GetKeyAt(i);
|
||||
if (key == matchKey)
|
||||
{
|
||||
aValueRet = aNode.GetValueAt(i);
|
||||
return NS_OK;
|
||||
}
|
||||
}
|
||||
return NS_ERROR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used to a general container.
|
||||
@ -371,19 +390,13 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode)
|
||||
const nsString& name = aNode.GetText();
|
||||
if (name.Equals("XIF_DOC_INFO"))
|
||||
{
|
||||
PRInt32 count=aNode.GetAttributeCount();
|
||||
for(PRInt32 i=0;i<count;i++)
|
||||
nsString value;
|
||||
if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "charset", value)))
|
||||
{
|
||||
const nsString& key=aNode.GetKeyAt(i);
|
||||
const nsString& value=aNode.GetValueAt(i);
|
||||
|
||||
if (key.Equals("charset"))
|
||||
{
|
||||
if (mCharsetOverride.Length() == 0)
|
||||
InitEncoder(value);
|
||||
else
|
||||
InitEncoder(mCharsetOverride);
|
||||
}
|
||||
if (mCharsetOverride.Length() == 0)
|
||||
InitEncoder(value);
|
||||
else
|
||||
InitEncoder(mCharsetOverride);
|
||||
}
|
||||
}
|
||||
|
||||
@ -427,7 +440,17 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode)
|
||||
mColPos++;
|
||||
}
|
||||
else if (type == eHTMLTag_blockquote)
|
||||
mIndent += gTabSize;
|
||||
{
|
||||
// Find out whether it's a type=cite, and insert "> " instead.
|
||||
// Eventually we should get the value of the pref controlling citations,
|
||||
// and handle AOL-style citations as well.
|
||||
nsString value;
|
||||
if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "type", value))
|
||||
&& value.StripChars("\"").Equals("cite", PR_TRUE))
|
||||
mCiteQuote = PR_TRUE;
|
||||
else
|
||||
mIndent += gTabSize;
|
||||
}
|
||||
else if (type == eHTMLTag_pre)
|
||||
{
|
||||
nsAutoString temp(NS_LINEBREAK);
|
||||
@ -489,14 +512,19 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode)
|
||||
--mOLStackIndex;
|
||||
|
||||
else if (type == eHTMLTag_blockquote)
|
||||
mIndent -= gTabSize;
|
||||
{
|
||||
if (mCiteQuote)
|
||||
mCiteQuote = PR_FALSE;
|
||||
else
|
||||
mIndent -= gTabSize;
|
||||
}
|
||||
|
||||
// End current line if we're ending a block level tag
|
||||
if (IsBlockLevel(type))
|
||||
{
|
||||
if (mColPos != 0)
|
||||
{
|
||||
if (mFlags & nsIDocumentEncoder::OutputFormatted)
|
||||
//if (mFlags & nsIDocumentEncoder::OutputFormatted)
|
||||
{
|
||||
nsAutoString temp(NS_LINEBREAK);
|
||||
Write(temp);
|
||||
@ -538,15 +566,7 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode)
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((mFlags & nsIDocumentEncoder::OutputFormatted
|
||||
|| mFlags & nsIDocumentEncoder::OutputWrap)
|
||||
&& mWrapColumn > 0)
|
||||
WriteWrapped(text);
|
||||
else
|
||||
{
|
||||
Write(text);
|
||||
mColPos += text.Length();
|
||||
}
|
||||
Write(text);
|
||||
}
|
||||
else if (type == eHTMLTag_entity)
|
||||
{
|
||||
@ -650,15 +670,16 @@ void nsHTMLToTXTSinkStream::EncodeToBuffer(const nsString& aSrc)
|
||||
|
||||
|
||||
/**
|
||||
* Write places the contents of aString into either the output stream
|
||||
* WriteSimple places the contents of aString into either the output stream
|
||||
* or the output string.
|
||||
* When going to the stream, all data is run through the encoder
|
||||
* When going to the stream, all data is run through the encoder.
|
||||
* No formatting or wrapping is done here; that happens in ::Write.
|
||||
*
|
||||
* @updated gpk02/03/99
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void nsHTMLToTXTSinkStream::Write(const nsString& aString)
|
||||
void nsHTMLToTXTSinkStream::WriteSimple(const nsString& aString)
|
||||
{
|
||||
// If a encoder is being used then convert first convert the input string
|
||||
if (mUnicodeEncoder != nsnull)
|
||||
@ -696,51 +717,86 @@ void nsHTMLToTXTSinkStream::Write(const nsString& aString)
|
||||
|
||||
//
|
||||
// Write a string, wrapping appropriately to mWrapColumn.
|
||||
// This routine also handles indentation and mail-quoting,
|
||||
// and so should be used for formatted output even if we're not wrapping.
|
||||
//
|
||||
void
|
||||
nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
|
||||
nsHTMLToTXTSinkStream::Write(const nsString& aString)
|
||||
{
|
||||
#ifdef DEBUG_wrapping
|
||||
char* foo = aString.ToNewCString();
|
||||
printf("WriteWrapped(%s): wrap col = %d\n", foo, mWrapColumn);
|
||||
printf("Write(%s): wrap col = %d, mColPos = %d\n", foo, mWrapColumn, mColPos);
|
||||
nsAllocator::Free(foo);
|
||||
#endif
|
||||
|
||||
PRInt32 bol = 0;
|
||||
int totLen = aString.Length();
|
||||
while (bol < totLen) // Loop over lines
|
||||
{
|
||||
#ifdef DEBUG_wrapping
|
||||
nsString remaining;
|
||||
aString.Right(remaining, totLen - bol);
|
||||
foo = remaining.ToNewCString();
|
||||
printf("Next line: bol = %d, totLen = %d, string = '%s'\n",
|
||||
bol, totLen, foo);
|
||||
nsAllocator::Free(foo);
|
||||
#endif
|
||||
|
||||
// Indent at the beginning of the line, if necessary
|
||||
if (mColPos == 0 && mIndent > 0)
|
||||
// Put the mail quote "> " chars in, if appropriate:
|
||||
if (mColPos == 0)
|
||||
{
|
||||
if (mCiteQuote)
|
||||
{
|
||||
nsAutoString temp("> ");
|
||||
WriteSimple(temp);
|
||||
mColPos += 2;
|
||||
}
|
||||
// Indent if necessary
|
||||
if (mIndent > 0)
|
||||
{
|
||||
char* spaces = NS_STATIC_CAST(char*, nsAllocator::Alloc(mIndent+1));
|
||||
for (int i=0; i<mIndent; ++i)
|
||||
spaces[i] = ' ';
|
||||
spaces[mIndent] = '\0';
|
||||
nsAutoString temp(spaces);
|
||||
Write (temp);
|
||||
WriteSimple(temp);
|
||||
mColPos += mIndent;
|
||||
nsAllocator::Free(spaces);
|
||||
}
|
||||
}
|
||||
|
||||
// See if there's a newline in the string:
|
||||
PRInt32 newline = aString.FindCharInSet("\n\r", bol);
|
||||
// Don't wrap mail-quoted text
|
||||
PRUint32 wrapcol = (mCiteQuote ? 0 : mWrapColumn);
|
||||
|
||||
// See if there's a newline in the string:
|
||||
PRInt32 newline = aString.FindCharInSet("\n\r", bol);
|
||||
|
||||
if ((!(mFlags & nsIDocumentEncoder::OutputFormatted)
|
||||
&& !(mFlags & nsIDocumentEncoder::OutputWrap))
|
||||
|| wrapcol == 0)
|
||||
{
|
||||
WriteSimple(aString);
|
||||
|
||||
// Simple attempt to be smart about col pos:
|
||||
if (newline >= 0)
|
||||
mColPos = totLen - newline - 1;
|
||||
else
|
||||
mColPos += totLen;
|
||||
#ifdef DEBUG_wrapping
|
||||
printf("No wrapping: newline is %d, totLen is %d; leaving mColPos = %d\n",
|
||||
newline, totLen, mColPos);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
while (bol < totLen) // Loop over lines
|
||||
{
|
||||
#ifdef DEBUG_wrapping
|
||||
nsString remaining;
|
||||
aString.Right(remaining, totLen - bol);
|
||||
foo = remaining.ToNewCString();
|
||||
printf("Next line: bol = %d, newline = %d, totLen = %d, string = '%s'\n",
|
||||
bol, newline, totLen, foo);
|
||||
nsAllocator::Free(foo);
|
||||
#endif
|
||||
|
||||
// Set eol to the end of the string or the first newline,
|
||||
// whichever comes first:
|
||||
int eol = bol + mWrapColumn - mColPos;
|
||||
int eol = bol + wrapcol - mColPos;
|
||||
|
||||
if (eol > totLen || wrapcol == 0)
|
||||
eol = bol + totLen;
|
||||
|
||||
if (eol > totLen)
|
||||
eol = totLen;
|
||||
else if (newline > 0 && eol > newline)
|
||||
eol = newline;
|
||||
// else we have to wrap
|
||||
@ -773,11 +829,11 @@ nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
|
||||
if (mColPos > mIndent)
|
||||
{
|
||||
nsAutoString linebreak(NS_LINEBREAK);
|
||||
Write(linebreak);
|
||||
WriteSimple(linebreak);
|
||||
mColPos = 0;
|
||||
continue;
|
||||
}
|
||||
#endif /* CONFUSED */
|
||||
#endif /* NOTSURE */
|
||||
|
||||
// Else apparently we really can't break this line at whitespace --
|
||||
// so scan forward to the next space or newline, and dump a long line.
|
||||
@ -786,9 +842,8 @@ nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
|
||||
&& (newline < 0 || eol < newline))
|
||||
{
|
||||
#ifdef DEBUG_wrapping
|
||||
nsString linestr;
|
||||
aString.Mid(linestr, bol, lastSpace - bol);
|
||||
foo = linestr.ToNewCString();
|
||||
aString.Mid(remaining, bol, lastSpace - bol);
|
||||
foo = remaining.ToNewCString();
|
||||
printf("Searching foreward: '%c' is not a space\n line = '%s'\n",
|
||||
(char)aString[lastSpace], foo);
|
||||
nsAllocator::Free(foo);
|
||||
@ -817,8 +872,16 @@ nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
|
||||
}
|
||||
else // Not wrapping and not writing a newline
|
||||
mColPos += lineStr.Length();
|
||||
Write(lineStr);
|
||||
WriteSimple(lineStr);
|
||||
#ifdef DEBUG_wrapping
|
||||
foo = lineStr.ToNewCString();
|
||||
printf("Calling WriteSimple(%s), leaving mColPos = %d\n", foo, mColPos);
|
||||
nsAllocator::Free(foo);
|
||||
#endif
|
||||
|
||||
// Reset bol and newline:
|
||||
bol = eol+1;
|
||||
newline = aString.FindCharInSet("\n\r", bol);
|
||||
} // Continue looping over the string
|
||||
}
|
||||
|
||||
|
@ -137,15 +137,19 @@ protected:
|
||||
|
||||
nsresult InitEncoder(const nsString& aCharset);
|
||||
|
||||
void WriteSimple(const nsString& aString);
|
||||
void Write(const nsString& aString);
|
||||
void WriteWrapped(const nsString& aString);
|
||||
void EncodeToBuffer(const nsString& aString);
|
||||
NS_IMETHOD GetValueOfAttribute(const nsIParserNode& aNode,
|
||||
char* aMatchKey,
|
||||
nsString& aValueRet);
|
||||
|
||||
protected:
|
||||
nsIOutputStream* mStream;
|
||||
nsString* mString;
|
||||
|
||||
PRInt32 mIndent;
|
||||
PRBool mCiteQuote;
|
||||
PRInt32 mColPos;
|
||||
PRBool mDoOutput;
|
||||
PRInt32 mFlags;
|
||||
|
@ -44,6 +44,8 @@ TEST_FILES = \
|
||||
entityxif.out \
|
||||
mailquote.html \
|
||||
mailquote.out \
|
||||
xifstuff.xif \
|
||||
xifstuff.out \
|
||||
$(NULL)
|
||||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
|
@ -27,6 +27,13 @@
|
||||
|
||||
set errmsg = ""
|
||||
|
||||
echo "Testing simple html to html ..."
|
||||
TestOutput -i text/html -o text/html -c OutTestData/simple.html OutTestData/simple.html
|
||||
if ($status != 0) then
|
||||
echo "Simple html to html failed.\n"
|
||||
set errmsg = ($errmsg "simple.html")
|
||||
endif
|
||||
|
||||
echo "Testing simple copy cases ..."
|
||||
TestOutput -i text/html -o text/plain -f 0 -w 0 -c OutTestData/simplecopy.out OutTestData/simple.html
|
||||
if ($status != 0) then
|
||||
@ -62,6 +69,13 @@ if ($status != 0) then
|
||||
set errmsg = ($errmsg "entityxif.out")
|
||||
endif
|
||||
|
||||
echo "Testing XIF to HTML ..."
|
||||
TestOutput -i text/xif -o text/html -c OutTestData/xifstuff.out OutTestData/xifstuff.xif
|
||||
if ($status != 0) then
|
||||
echo "XIF entity convertsion test failed."
|
||||
set errmsg = ($errmsg "entityxif.out")
|
||||
endif
|
||||
|
||||
if (errmsg != "") then
|
||||
echo " "
|
||||
echo TESTS FAILED: $errmsg
|
||||
|
@ -26,13 +26,26 @@ OBJS = \
|
||||
.\$(OBJDIR)\nsSetupRegistry.obj \
|
||||
$(NULL)
|
||||
|
||||
TEST_FILES = \
|
||||
plain.html \
|
||||
plainwrap.out \
|
||||
plainnowrap.out \
|
||||
simple.html \
|
||||
simplecopy.out \
|
||||
entityxif.xif \
|
||||
entityxif.out \
|
||||
mailquote.html \
|
||||
mailquote.out \
|
||||
xifstuff.xif \
|
||||
xifstuff.out \
|
||||
$(NULL)
|
||||
|
||||
LINCS= \
|
||||
-I$(PUBLIC)\raptor \
|
||||
-I$(PUBLIC)\xpcom \
|
||||
-I$(PUBLIC)\netlib
|
||||
|
||||
LLIBS= \
|
||||
$(DIST)\lib\raptorhtmlpars.lib \
|
||||
$(DIST)\lib\xpcom.lib \
|
||||
$(LIBNSPR) \
|
||||
$(NULL)
|
||||
@ -47,6 +60,7 @@ include <$(DEPTH)\config\rules.mak>
|
||||
|
||||
install:: $(PROGRAM)
|
||||
$(MAKE_INSTALL) $(PROGRAM) $(DIST)\bin
|
||||
$(MAKE_INSTALL) $(TEST_FILES) $(DIST)/bin/OutTestData
|
||||
|
||||
clobber::
|
||||
rm -f $(OBJS) $(DIST)\bin\Convert.exe
|
||||
|
@ -1,5 +1,6 @@
|
||||
<html>
|
||||
<head><!--
|
||||
<head>
|
||||
<!--
|
||||
-- The contents of this file are subject to the Netscape Public
|
||||
-- License Version 1.1 (the "License"); you may not use this file
|
||||
-- except in compliance with the License. You may obtain a copy of
|
||||
@ -32,7 +33,8 @@ Here is some <u>underlined and <b>bold</b>ened</u>ified text.
|
||||
<p>
|
||||
Here is a line ending with a space
|
||||
followed by a line break.
|
||||
The output should contain only one space between "space" and "followed".
|
||||
The output should contain only one space (and no line breaks) between "space" and "followed".
|
||||
</p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,3 +1,3 @@
|
||||
Simple html page Here is a link to mozilla.org. Here is some underlined and boldenedified text.
|
||||
Here is a line ending with a space followed by a line break. The output should contain only one space between "space" and "followed".
|
||||
Here is a line ending with a space followed by a line break. The output should contain only one space (and no line breaks) between "space" and "followed".
|
||||
|
||||
|
42
htmlparser/tests/outsinks/xifstuff.out
Normal file
42
htmlparser/tests/outsinks/xifstuff.out
Normal file
@ -0,0 +1,42 @@
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<!--
|
||||
-- The contents of this file are subject to the Netscape Public
|
||||
-- License Version 1.1 (the "License"); you may not use this file
|
||||
-- except in compliance with the License. You may obtain a copy of
|
||||
-- the License at http://www.mozilla.org/NPL/
|
||||
--
|
||||
-- Software distributed under the License is distributed on an "AS
|
||||
-- IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
-- implied. See the License for the specific language governing
|
||||
-- rights and limitations under the License.
|
||||
--
|
||||
-- The Original Code is Mozilla Communicator client code, released
|
||||
-- March 31, 1998.
|
||||
--
|
||||
-- The Initial Developer of the Original Code is Netscape
|
||||
-- Communications Corporation. Portions created by Netscape are
|
||||
-- Copyright (C) 1998-1999 Netscape Communications Corporation. All
|
||||
-- Rights Reserved.
|
||||
--
|
||||
-- Contributor(s):
|
||||
-->
|
||||
|
||||
<title>XIF Test Page</title>
|
||||
|
||||
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
<h2>Here's the deal...</h2>
|
||||
|
||||
<p>This is a good place to add in <b>html</b> to aid in testing features
|
||||
under development. It's also a great place to not use latin.
|
||||
<!-- This is a comment;
|
||||
Here is more of the comment.
|
||||
-->
|
||||
|
||||
</p></body>
|
84
htmlparser/tests/outsinks/xifstuff.xif
Normal file
84
htmlparser/tests/outsinks/xifstuff.xif
Normal file
@ -0,0 +1,84 @@
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE xif>
|
||||
<encode selection="0"/>
|
||||
<section>
|
||||
<section_head>
|
||||
<document_info charset="UTF-8"/>
|
||||
</section_head>
|
||||
<section_body>
|
||||
<container isa="html">
|
||||
<container isa="head">
|
||||
<content>
|
||||
</content>
|
||||
<comment><content>
|
||||
-- The contents of this file are subject to the Netscape Public
|
||||
-- License Version 1.1 (the "License"); you may not use this file
|
||||
-- except in compliance with the License. You may obtain a copy of
|
||||
-- the License at http://www.mozilla.org/NPL/
|
||||
--
|
||||
-- Software distributed under the License is distributed on an "AS
|
||||
-- IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
-- implied. See the License for the specific language governing
|
||||
-- rights and limitations under the License.
|
||||
--
|
||||
-- The Original Code is Mozilla Communicator client code, released
|
||||
-- March 31, 1998.
|
||||
--
|
||||
-- The Initial Developer of the Original Code is Netscape
|
||||
-- Communications Corporation. Portions created by Netscape are
|
||||
-- Copyright (C) 1998-1999 Netscape Communications Corporation. All
|
||||
-- Rights Reserved.
|
||||
--
|
||||
-- Contributor(s):
|
||||
</content>
|
||||
</comment>
|
||||
<content>
|
||||
|
||||
</content>
|
||||
<container isa="title">
|
||||
<content>XIF Test Page</content>
|
||||
</container><!--title-->
|
||||
<content>
|
||||
|
||||
</content>
|
||||
<leaf isa="meta">
|
||||
<attr name="http-equiv" value="Content-Type"/>
|
||||
<attr name="content" value="text/html; charset=utf-8"/>
|
||||
</leaf><!--meta-->
|
||||
|
||||
</container><!--head-->
|
||||
|
||||
<content>
|
||||
|
||||
</content>
|
||||
|
||||
<container isa="body">
|
||||
|
||||
<container isa="h2">
|
||||
<content>Here's the deal...</content>
|
||||
</container><!--h2-->
|
||||
<content>
|
||||
|
||||
</content>
|
||||
|
||||
<container isa="p">
|
||||
<content>This is a good place to add in </content>
|
||||
<container isa="b">
|
||||
<content>html</content>
|
||||
</container><!--b-->
|
||||
<content> to aid in testing features
|
||||
under development. It's also a great place to not use latin.
|
||||
</content>
|
||||
|
||||
<comment><content> This is a comment;
|
||||
Here is more of the comment.
|
||||
</content>
|
||||
</comment>
|
||||
<content>
|
||||
|
||||
</content>
|
||||
|
||||
</container><!--body-->
|
||||
</container><!--html-->
|
||||
</section_body>
|
||||
</section>
|
@ -164,6 +164,7 @@ nsHTMLToTXTSinkStream::nsHTMLToTXTSinkStream()
|
||||
NS_INIT_REFCNT();
|
||||
mColPos = 0;
|
||||
mIndent = 0;
|
||||
mCiteQuote = PR_FALSE;
|
||||
mDoOutput = PR_FALSE;
|
||||
mBufferSize = 0;
|
||||
mBufferLength = 0;
|
||||
@ -355,6 +356,24 @@ nsHTMLToTXTSinkStream::AddComment(const nsIParserNode& aNode)
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
nsHTMLToTXTSinkStream::GetValueOfAttribute(const nsIParserNode& aNode,
|
||||
char* aMatchKey,
|
||||
nsString& aValueRet)
|
||||
{
|
||||
nsAutoString matchKey (aMatchKey);
|
||||
PRInt32 count=aNode.GetAttributeCount();
|
||||
for (PRInt32 i=0;i<count;i++)
|
||||
{
|
||||
const nsString& key = aNode.GetKeyAt(i);
|
||||
if (key == matchKey)
|
||||
{
|
||||
aValueRet = aNode.GetValueAt(i);
|
||||
return NS_OK;
|
||||
}
|
||||
}
|
||||
return NS_ERROR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used to a general container.
|
||||
@ -371,19 +390,13 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode)
|
||||
const nsString& name = aNode.GetText();
|
||||
if (name.Equals("XIF_DOC_INFO"))
|
||||
{
|
||||
PRInt32 count=aNode.GetAttributeCount();
|
||||
for(PRInt32 i=0;i<count;i++)
|
||||
nsString value;
|
||||
if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "charset", value)))
|
||||
{
|
||||
const nsString& key=aNode.GetKeyAt(i);
|
||||
const nsString& value=aNode.GetValueAt(i);
|
||||
|
||||
if (key.Equals("charset"))
|
||||
{
|
||||
if (mCharsetOverride.Length() == 0)
|
||||
InitEncoder(value);
|
||||
else
|
||||
InitEncoder(mCharsetOverride);
|
||||
}
|
||||
if (mCharsetOverride.Length() == 0)
|
||||
InitEncoder(value);
|
||||
else
|
||||
InitEncoder(mCharsetOverride);
|
||||
}
|
||||
}
|
||||
|
||||
@ -427,7 +440,17 @@ nsHTMLToTXTSinkStream::OpenContainer(const nsIParserNode& aNode)
|
||||
mColPos++;
|
||||
}
|
||||
else if (type == eHTMLTag_blockquote)
|
||||
mIndent += gTabSize;
|
||||
{
|
||||
// Find out whether it's a type=cite, and insert "> " instead.
|
||||
// Eventually we should get the value of the pref controlling citations,
|
||||
// and handle AOL-style citations as well.
|
||||
nsString value;
|
||||
if (NS_SUCCEEDED(GetValueOfAttribute(aNode, "type", value))
|
||||
&& value.StripChars("\"").Equals("cite", PR_TRUE))
|
||||
mCiteQuote = PR_TRUE;
|
||||
else
|
||||
mIndent += gTabSize;
|
||||
}
|
||||
else if (type == eHTMLTag_pre)
|
||||
{
|
||||
nsAutoString temp(NS_LINEBREAK);
|
||||
@ -489,14 +512,19 @@ nsHTMLToTXTSinkStream::CloseContainer(const nsIParserNode& aNode)
|
||||
--mOLStackIndex;
|
||||
|
||||
else if (type == eHTMLTag_blockquote)
|
||||
mIndent -= gTabSize;
|
||||
{
|
||||
if (mCiteQuote)
|
||||
mCiteQuote = PR_FALSE;
|
||||
else
|
||||
mIndent -= gTabSize;
|
||||
}
|
||||
|
||||
// End current line if we're ending a block level tag
|
||||
if (IsBlockLevel(type))
|
||||
{
|
||||
if (mColPos != 0)
|
||||
{
|
||||
if (mFlags & nsIDocumentEncoder::OutputFormatted)
|
||||
//if (mFlags & nsIDocumentEncoder::OutputFormatted)
|
||||
{
|
||||
nsAutoString temp(NS_LINEBREAK);
|
||||
Write(temp);
|
||||
@ -538,15 +566,7 @@ nsHTMLToTXTSinkStream::AddLeaf(const nsIParserNode& aNode)
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((mFlags & nsIDocumentEncoder::OutputFormatted
|
||||
|| mFlags & nsIDocumentEncoder::OutputWrap)
|
||||
&& mWrapColumn > 0)
|
||||
WriteWrapped(text);
|
||||
else
|
||||
{
|
||||
Write(text);
|
||||
mColPos += text.Length();
|
||||
}
|
||||
Write(text);
|
||||
}
|
||||
else if (type == eHTMLTag_entity)
|
||||
{
|
||||
@ -650,15 +670,16 @@ void nsHTMLToTXTSinkStream::EncodeToBuffer(const nsString& aSrc)
|
||||
|
||||
|
||||
/**
|
||||
* Write places the contents of aString into either the output stream
|
||||
* WriteSimple places the contents of aString into either the output stream
|
||||
* or the output string.
|
||||
* When going to the stream, all data is run through the encoder
|
||||
* When going to the stream, all data is run through the encoder.
|
||||
* No formatting or wrapping is done here; that happens in ::Write.
|
||||
*
|
||||
* @updated gpk02/03/99
|
||||
* @param
|
||||
* @return
|
||||
*/
|
||||
void nsHTMLToTXTSinkStream::Write(const nsString& aString)
|
||||
void nsHTMLToTXTSinkStream::WriteSimple(const nsString& aString)
|
||||
{
|
||||
// If a encoder is being used then convert first convert the input string
|
||||
if (mUnicodeEncoder != nsnull)
|
||||
@ -696,51 +717,86 @@ void nsHTMLToTXTSinkStream::Write(const nsString& aString)
|
||||
|
||||
//
|
||||
// Write a string, wrapping appropriately to mWrapColumn.
|
||||
// This routine also handles indentation and mail-quoting,
|
||||
// and so should be used for formatted output even if we're not wrapping.
|
||||
//
|
||||
void
|
||||
nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
|
||||
nsHTMLToTXTSinkStream::Write(const nsString& aString)
|
||||
{
|
||||
#ifdef DEBUG_wrapping
|
||||
char* foo = aString.ToNewCString();
|
||||
printf("WriteWrapped(%s): wrap col = %d\n", foo, mWrapColumn);
|
||||
printf("Write(%s): wrap col = %d, mColPos = %d\n", foo, mWrapColumn, mColPos);
|
||||
nsAllocator::Free(foo);
|
||||
#endif
|
||||
|
||||
PRInt32 bol = 0;
|
||||
int totLen = aString.Length();
|
||||
while (bol < totLen) // Loop over lines
|
||||
{
|
||||
#ifdef DEBUG_wrapping
|
||||
nsString remaining;
|
||||
aString.Right(remaining, totLen - bol);
|
||||
foo = remaining.ToNewCString();
|
||||
printf("Next line: bol = %d, totLen = %d, string = '%s'\n",
|
||||
bol, totLen, foo);
|
||||
nsAllocator::Free(foo);
|
||||
#endif
|
||||
|
||||
// Indent at the beginning of the line, if necessary
|
||||
if (mColPos == 0 && mIndent > 0)
|
||||
// Put the mail quote "> " chars in, if appropriate:
|
||||
if (mColPos == 0)
|
||||
{
|
||||
if (mCiteQuote)
|
||||
{
|
||||
nsAutoString temp("> ");
|
||||
WriteSimple(temp);
|
||||
mColPos += 2;
|
||||
}
|
||||
// Indent if necessary
|
||||
if (mIndent > 0)
|
||||
{
|
||||
char* spaces = NS_STATIC_CAST(char*, nsAllocator::Alloc(mIndent+1));
|
||||
for (int i=0; i<mIndent; ++i)
|
||||
spaces[i] = ' ';
|
||||
spaces[mIndent] = '\0';
|
||||
nsAutoString temp(spaces);
|
||||
Write (temp);
|
||||
WriteSimple(temp);
|
||||
mColPos += mIndent;
|
||||
nsAllocator::Free(spaces);
|
||||
}
|
||||
}
|
||||
|
||||
// See if there's a newline in the string:
|
||||
PRInt32 newline = aString.FindCharInSet("\n\r", bol);
|
||||
// Don't wrap mail-quoted text
|
||||
PRUint32 wrapcol = (mCiteQuote ? 0 : mWrapColumn);
|
||||
|
||||
// See if there's a newline in the string:
|
||||
PRInt32 newline = aString.FindCharInSet("\n\r", bol);
|
||||
|
||||
if ((!(mFlags & nsIDocumentEncoder::OutputFormatted)
|
||||
&& !(mFlags & nsIDocumentEncoder::OutputWrap))
|
||||
|| wrapcol == 0)
|
||||
{
|
||||
WriteSimple(aString);
|
||||
|
||||
// Simple attempt to be smart about col pos:
|
||||
if (newline >= 0)
|
||||
mColPos = totLen - newline - 1;
|
||||
else
|
||||
mColPos += totLen;
|
||||
#ifdef DEBUG_wrapping
|
||||
printf("No wrapping: newline is %d, totLen is %d; leaving mColPos = %d\n",
|
||||
newline, totLen, mColPos);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
while (bol < totLen) // Loop over lines
|
||||
{
|
||||
#ifdef DEBUG_wrapping
|
||||
nsString remaining;
|
||||
aString.Right(remaining, totLen - bol);
|
||||
foo = remaining.ToNewCString();
|
||||
printf("Next line: bol = %d, newline = %d, totLen = %d, string = '%s'\n",
|
||||
bol, newline, totLen, foo);
|
||||
nsAllocator::Free(foo);
|
||||
#endif
|
||||
|
||||
// Set eol to the end of the string or the first newline,
|
||||
// whichever comes first:
|
||||
int eol = bol + mWrapColumn - mColPos;
|
||||
int eol = bol + wrapcol - mColPos;
|
||||
|
||||
if (eol > totLen || wrapcol == 0)
|
||||
eol = bol + totLen;
|
||||
|
||||
if (eol > totLen)
|
||||
eol = totLen;
|
||||
else if (newline > 0 && eol > newline)
|
||||
eol = newline;
|
||||
// else we have to wrap
|
||||
@ -773,11 +829,11 @@ nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
|
||||
if (mColPos > mIndent)
|
||||
{
|
||||
nsAutoString linebreak(NS_LINEBREAK);
|
||||
Write(linebreak);
|
||||
WriteSimple(linebreak);
|
||||
mColPos = 0;
|
||||
continue;
|
||||
}
|
||||
#endif /* CONFUSED */
|
||||
#endif /* NOTSURE */
|
||||
|
||||
// Else apparently we really can't break this line at whitespace --
|
||||
// so scan forward to the next space or newline, and dump a long line.
|
||||
@ -786,9 +842,8 @@ nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
|
||||
&& (newline < 0 || eol < newline))
|
||||
{
|
||||
#ifdef DEBUG_wrapping
|
||||
nsString linestr;
|
||||
aString.Mid(linestr, bol, lastSpace - bol);
|
||||
foo = linestr.ToNewCString();
|
||||
aString.Mid(remaining, bol, lastSpace - bol);
|
||||
foo = remaining.ToNewCString();
|
||||
printf("Searching foreward: '%c' is not a space\n line = '%s'\n",
|
||||
(char)aString[lastSpace], foo);
|
||||
nsAllocator::Free(foo);
|
||||
@ -817,8 +872,16 @@ nsHTMLToTXTSinkStream::WriteWrapped(const nsString& aString)
|
||||
}
|
||||
else // Not wrapping and not writing a newline
|
||||
mColPos += lineStr.Length();
|
||||
Write(lineStr);
|
||||
WriteSimple(lineStr);
|
||||
#ifdef DEBUG_wrapping
|
||||
foo = lineStr.ToNewCString();
|
||||
printf("Calling WriteSimple(%s), leaving mColPos = %d\n", foo, mColPos);
|
||||
nsAllocator::Free(foo);
|
||||
#endif
|
||||
|
||||
// Reset bol and newline:
|
||||
bol = eol+1;
|
||||
newline = aString.FindCharInSet("\n\r", bol);
|
||||
} // Continue looping over the string
|
||||
}
|
||||
|
||||
|
@ -137,15 +137,19 @@ protected:
|
||||
|
||||
nsresult InitEncoder(const nsString& aCharset);
|
||||
|
||||
void WriteSimple(const nsString& aString);
|
||||
void Write(const nsString& aString);
|
||||
void WriteWrapped(const nsString& aString);
|
||||
void EncodeToBuffer(const nsString& aString);
|
||||
NS_IMETHOD GetValueOfAttribute(const nsIParserNode& aNode,
|
||||
char* aMatchKey,
|
||||
nsString& aValueRet);
|
||||
|
||||
protected:
|
||||
nsIOutputStream* mStream;
|
||||
nsString* mString;
|
||||
|
||||
PRInt32 mIndent;
|
||||
PRBool mCiteQuote;
|
||||
PRInt32 mColPos;
|
||||
PRBool mDoOutput;
|
||||
PRInt32 mFlags;
|
||||
|
@ -44,6 +44,8 @@ TEST_FILES = \
|
||||
entityxif.out \
|
||||
mailquote.html \
|
||||
mailquote.out \
|
||||
xifstuff.xif \
|
||||
xifstuff.out \
|
||||
$(NULL)
|
||||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
|
@ -27,6 +27,13 @@
|
||||
|
||||
set errmsg = ""
|
||||
|
||||
echo "Testing simple html to html ..."
|
||||
TestOutput -i text/html -o text/html -c OutTestData/simple.html OutTestData/simple.html
|
||||
if ($status != 0) then
|
||||
echo "Simple html to html failed.\n"
|
||||
set errmsg = ($errmsg "simple.html")
|
||||
endif
|
||||
|
||||
echo "Testing simple copy cases ..."
|
||||
TestOutput -i text/html -o text/plain -f 0 -w 0 -c OutTestData/simplecopy.out OutTestData/simple.html
|
||||
if ($status != 0) then
|
||||
@ -62,6 +69,13 @@ if ($status != 0) then
|
||||
set errmsg = ($errmsg "entityxif.out")
|
||||
endif
|
||||
|
||||
echo "Testing XIF to HTML ..."
|
||||
TestOutput -i text/xif -o text/html -c OutTestData/xifstuff.out OutTestData/xifstuff.xif
|
||||
if ($status != 0) then
|
||||
echo "XIF entity convertsion test failed."
|
||||
set errmsg = ($errmsg "entityxif.out")
|
||||
endif
|
||||
|
||||
if (errmsg != "") then
|
||||
echo " "
|
||||
echo TESTS FAILED: $errmsg
|
||||
|
@ -26,13 +26,26 @@ OBJS = \
|
||||
.\$(OBJDIR)\nsSetupRegistry.obj \
|
||||
$(NULL)
|
||||
|
||||
TEST_FILES = \
|
||||
plain.html \
|
||||
plainwrap.out \
|
||||
plainnowrap.out \
|
||||
simple.html \
|
||||
simplecopy.out \
|
||||
entityxif.xif \
|
||||
entityxif.out \
|
||||
mailquote.html \
|
||||
mailquote.out \
|
||||
xifstuff.xif \
|
||||
xifstuff.out \
|
||||
$(NULL)
|
||||
|
||||
LINCS= \
|
||||
-I$(PUBLIC)\raptor \
|
||||
-I$(PUBLIC)\xpcom \
|
||||
-I$(PUBLIC)\netlib
|
||||
|
||||
LLIBS= \
|
||||
$(DIST)\lib\raptorhtmlpars.lib \
|
||||
$(DIST)\lib\xpcom.lib \
|
||||
$(LIBNSPR) \
|
||||
$(NULL)
|
||||
@ -47,6 +60,7 @@ include <$(DEPTH)\config\rules.mak>
|
||||
|
||||
install:: $(PROGRAM)
|
||||
$(MAKE_INSTALL) $(PROGRAM) $(DIST)\bin
|
||||
$(MAKE_INSTALL) $(TEST_FILES) $(DIST)/bin/OutTestData
|
||||
|
||||
clobber::
|
||||
rm -f $(OBJS) $(DIST)\bin\Convert.exe
|
||||
|
@ -1,5 +1,6 @@
|
||||
<html>
|
||||
<head><!--
|
||||
<head>
|
||||
<!--
|
||||
-- The contents of this file are subject to the Netscape Public
|
||||
-- License Version 1.1 (the "License"); you may not use this file
|
||||
-- except in compliance with the License. You may obtain a copy of
|
||||
@ -32,7 +33,8 @@ Here is some <u>underlined and <b>bold</b>ened</u>ified text.
|
||||
<p>
|
||||
Here is a line ending with a space
|
||||
followed by a line break.
|
||||
The output should contain only one space between "space" and "followed".
|
||||
The output should contain only one space (and no line breaks) between "space" and "followed".
|
||||
</p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,3 +1,3 @@
|
||||
Simple html page Here is a link to mozilla.org. Here is some underlined and boldenedified text.
|
||||
Here is a line ending with a space followed by a line break. The output should contain only one space between "space" and "followed".
|
||||
Here is a line ending with a space followed by a line break. The output should contain only one space (and no line breaks) between "space" and "followed".
|
||||
|
||||
|
42
parser/htmlparser/tests/outsinks/xifstuff.out
Normal file
42
parser/htmlparser/tests/outsinks/xifstuff.out
Normal file
@ -0,0 +1,42 @@
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<!--
|
||||
-- The contents of this file are subject to the Netscape Public
|
||||
-- License Version 1.1 (the "License"); you may not use this file
|
||||
-- except in compliance with the License. You may obtain a copy of
|
||||
-- the License at http://www.mozilla.org/NPL/
|
||||
--
|
||||
-- Software distributed under the License is distributed on an "AS
|
||||
-- IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
-- implied. See the License for the specific language governing
|
||||
-- rights and limitations under the License.
|
||||
--
|
||||
-- The Original Code is Mozilla Communicator client code, released
|
||||
-- March 31, 1998.
|
||||
--
|
||||
-- The Initial Developer of the Original Code is Netscape
|
||||
-- Communications Corporation. Portions created by Netscape are
|
||||
-- Copyright (C) 1998-1999 Netscape Communications Corporation. All
|
||||
-- Rights Reserved.
|
||||
--
|
||||
-- Contributor(s):
|
||||
-->
|
||||
|
||||
<title>XIF Test Page</title>
|
||||
|
||||
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
<h2>Here's the deal...</h2>
|
||||
|
||||
<p>This is a good place to add in <b>html</b> to aid in testing features
|
||||
under development. It's also a great place to not use latin.
|
||||
<!-- This is a comment;
|
||||
Here is more of the comment.
|
||||
-->
|
||||
|
||||
</p></body>
|
84
parser/htmlparser/tests/outsinks/xifstuff.xif
Normal file
84
parser/htmlparser/tests/outsinks/xifstuff.xif
Normal file
@ -0,0 +1,84 @@
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE xif>
|
||||
<encode selection="0"/>
|
||||
<section>
|
||||
<section_head>
|
||||
<document_info charset="UTF-8"/>
|
||||
</section_head>
|
||||
<section_body>
|
||||
<container isa="html">
|
||||
<container isa="head">
|
||||
<content>
|
||||
</content>
|
||||
<comment><content>
|
||||
-- The contents of this file are subject to the Netscape Public
|
||||
-- License Version 1.1 (the "License"); you may not use this file
|
||||
-- except in compliance with the License. You may obtain a copy of
|
||||
-- the License at http://www.mozilla.org/NPL/
|
||||
--
|
||||
-- Software distributed under the License is distributed on an "AS
|
||||
-- IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
||||
-- implied. See the License for the specific language governing
|
||||
-- rights and limitations under the License.
|
||||
--
|
||||
-- The Original Code is Mozilla Communicator client code, released
|
||||
-- March 31, 1998.
|
||||
--
|
||||
-- The Initial Developer of the Original Code is Netscape
|
||||
-- Communications Corporation. Portions created by Netscape are
|
||||
-- Copyright (C) 1998-1999 Netscape Communications Corporation. All
|
||||
-- Rights Reserved.
|
||||
--
|
||||
-- Contributor(s):
|
||||
</content>
|
||||
</comment>
|
||||
<content>
|
||||
|
||||
</content>
|
||||
<container isa="title">
|
||||
<content>XIF Test Page</content>
|
||||
</container><!--title-->
|
||||
<content>
|
||||
|
||||
</content>
|
||||
<leaf isa="meta">
|
||||
<attr name="http-equiv" value="Content-Type"/>
|
||||
<attr name="content" value="text/html; charset=utf-8"/>
|
||||
</leaf><!--meta-->
|
||||
|
||||
</container><!--head-->
|
||||
|
||||
<content>
|
||||
|
||||
</content>
|
||||
|
||||
<container isa="body">
|
||||
|
||||
<container isa="h2">
|
||||
<content>Here's the deal...</content>
|
||||
</container><!--h2-->
|
||||
<content>
|
||||
|
||||
</content>
|
||||
|
||||
<container isa="p">
|
||||
<content>This is a good place to add in </content>
|
||||
<container isa="b">
|
||||
<content>html</content>
|
||||
</container><!--b-->
|
||||
<content> to aid in testing features
|
||||
under development. It's also a great place to not use latin.
|
||||
</content>
|
||||
|
||||
<comment><content> This is a comment;
|
||||
Here is more of the comment.
|
||||
</content>
|
||||
</comment>
|
||||
<content>
|
||||
|
||||
</content>
|
||||
|
||||
</container><!--body-->
|
||||
</container><!--html-->
|
||||
</section_body>
|
||||
</section>
|
Loading…
Reference in New Issue
Block a user