gecko-dev/content/base/src/nsPlainTextSerializer.cpp

1747 lines
51 KiB
C++
Raw Normal View History

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
* Daniel Bratell <bratell@lysator.liu.se>
* Ben Bucksch <mozilla@bucksch.org>
*/
#include "nsPlainTextSerializer.h"
#include "nsILineBreakerFactory.h"
#include "nsLWBrkCIID.h"
#include "nsIPref.h"
#include "nsIServiceManager.h"
#include "nsHTMLAtoms.h"
#include "nsIDocumentEncoder.h"
#include "nsIDOMText.h"
#include "nsIDOMElement.h"
#include "nsINameSpaceManager.h"
#include "nsIHTMLContent.h"
#include "nsITextContent.h"
#include "nsTextFragment.h"
#include "nsParserCIID.h"
static NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID);
static NS_DEFINE_CID(kPrefServiceCID, NS_PREF_CID);
static NS_DEFINE_CID(kParserServiceCID, NS_PARSERSERVICE_CID);
#define PREF_STRUCTS "converter.html2txt.structs"
#define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy"
static const PRInt32 kTabSize=4;
static const PRInt32 kOLNumberWidth = 3;
static const PRInt32 kIndentSizeHeaders = 2; /* Indention of h1, if
mHeaderStrategy = 1 or = 2.
Indention of other headers
is derived from that.
XXX center h1? */
static const PRInt32 kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1,
indent h(x+1) this many
columns more than h(x) */
static const PRInt32 kIndentSizeList = (kTabSize > kOLNumberWidth+3) ? kTabSize: kOLNumberWidth+3;
// Indention of non-first lines of ul and ol
static const PRInt32 kIndentSizeDD = kTabSize; // Indention of <dd>
static PRInt32 HeaderLevel(eHTMLTags aTag);
static PRInt32 unicharwidth(PRUnichar ucs);
static PRInt32 unicharwidth(const PRUnichar* pwcs, PRInt32 n);
// Someday may want to make this non-const:
static const PRUint32 TagStackSize = 500;
static const PRUint32 OLStackSize = 100;
nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer)
{
nsPlainTextSerializer* it = new nsPlainTextSerializer();
if (!it) {
return NS_ERROR_OUT_OF_MEMORY;
}
return it->QueryInterface(NS_GET_IID(nsIContentSerializer),
(void**)aSerializer);
}
nsPlainTextSerializer::nsPlainTextSerializer()
{
NS_INIT_ISUPPORTS();
mOutputString = nsnull;
mInHead = PR_FALSE;
mColPos = 0;
mIndent = 0;
mCiteQuoteLevel = 0;
mStructs = PR_TRUE; // will be read from prefs later
mHeaderStrategy = 1 /*indent increasingly*/; // ditto
for (PRInt32 i = 0; i <= 6; i++) {
mHeaderCounter[i] = 0;
}
// Line breaker
mWrapColumn = 72; // XXX magic number, we expect someone to reset this
mCurrentLineWidth = 0;
// Flow
mEmptyLines = 1; // The start of the document is an "empty line" in itself,
mInWhitespace = PR_TRUE;
mPreFormatted = PR_FALSE;
mCacheLine = PR_FALSE;
mStartedOutput = PR_FALSE;
// initialize the tag stack to zero:
mTagStack = new nsHTMLTag[TagStackSize];
mTagStackIndex = 0;
// initialize the OL stack, where numbers for ordered lists are kept:
mOLStack = new PRInt32[OLStackSize];
mOLStackIndex = 0;
}
nsPlainTextSerializer::~nsPlainTextSerializer()
{
delete[] mTagStack;
delete[] mOLStack;
}
NS_IMPL_ISUPPORTS4(nsPlainTextSerializer,
nsIContentSerializer,
nsIContentSink,
nsIHTMLContentSink,
nsIHTMLToTextSink)
NS_IMETHODIMP
nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn)
{
mFlags = aFlags;
mWrapColumn = aWrapColumn;
nsresult rv;
NS_WITH_SERVICE(nsILineBreakerFactory, lf, kLWBrkCID, &rv);
if (NS_SUCCEEDED(rv)) {
nsAutoString lbarg;
rv = lf->GetBreaker(lbarg, getter_AddRefs(mLineBreaker));
if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
}
// Turn on caching if we are wrapping or we want formatting.
// We need this even when flags indicate preformatted,
// in order to wrap textareas with wrap=hard.
if((mFlags & nsIDocumentEncoder::OutputFormatted) ||
(mFlags & nsIDocumentEncoder::OutputWrap))
{
mCacheLine = PR_TRUE;
}
// Set the line break character:
if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak)
&& (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) // Windows/mail
mLineBreak.AssignWithConversion("\r\n");
else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) // Mac
mLineBreak.AssignWithConversion("\r");
else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) // Unix/DOM
mLineBreak.AssignWithConversion("\n");
else
mLineBreak.AssignWithConversion(NS_LINEBREAK); // Platform/default
// Get some prefs
NS_WITH_SERVICE(nsIPref, prefs, NS_PREF_CONTRACTID, &rv);
if (NS_SUCCEEDED(rv) && prefs)
{
prefs->GetBoolPref(PREF_STRUCTS, &mStructs);
prefs->GetIntPref(PREF_HEADER_STRATEGY, &mHeaderStrategy);
}
return NS_OK;
}
NS_IMETHODIMP
nsPlainTextSerializer::Initialize(nsAWritableString* aOutString,
PRUint32 aFlags, PRUint32 aWrapCol)
{
nsresult rv = Init(aFlags, aWrapCol);
NS_ENSURE_SUCCESS(rv, rv);
// XXX This is wrong. It violates XPCOM string ownership rules.
// We're only getting away with this because instances of this
// class are restricted to single function scope.
mOutputString = aOutString;
return NS_OK;
}
NS_IMETHODIMP
nsPlainTextSerializer::AppendText(nsIDOMText* aText,
PRInt32 aStartOffset,
PRInt32 aEndOffset,
nsAWritableString& aStr)
{
NS_ENSURE_ARG(aText);
nsresult rv = NS_OK;
PRInt32 length = 0;
nsAutoString textstr;
nsCOMPtr<nsITextContent> content = do_QueryInterface(aText);
if (!content) return NS_ERROR_FAILURE;
const nsTextFragment* frag;
content->GetText(&frag);
if (frag) {
length = ((aEndOffset == -1) ? frag->GetLength() : aEndOffset) - aStartOffset;
if (frag->Is2b()) {
textstr.Assign(frag->Get2b() + aStartOffset, length);
}
else {
textstr.AssignWithConversion(frag->Get1b()+aStartOffset, length);
}
}
mOutputString = &aStr;
nsAutoString linebuffer;
// We have to split the string across newlines
// to match parser behavior
PRInt32 start = 0;
PRInt32 offset = textstr.FindCharInSet("\n\r");
while (offset != kNotFound) {
// Pass in the line
textstr.Mid(linebuffer, start, offset-start);
rv = DoAddLeaf(eHTMLTag_text, linebuffer);
if (NS_FAILED(rv)) break;
// Pass in a newline
rv = DoAddLeaf(eHTMLTag_newline, mLineBreak);
if (NS_FAILED(rv)) break;
start = offset+1;
offset = textstr.FindCharInSet("\n\r", start);
}
// Consume the last bit of the string if there's any left
if (NS_SUCCEEDED(rv) & (start < length)) {
if (start) {
textstr.Mid(linebuffer, start, offset-start);
rv = DoAddLeaf(eHTMLTag_text, linebuffer);
}
else {
rv = DoAddLeaf(eHTMLTag_text, textstr);
}
}
mOutputString = nsnull;
return rv;
}
NS_IMETHODIMP
nsPlainTextSerializer::AppendElementStart(nsIDOMElement *aElement,
nsAWritableString& aStr)
{
NS_ENSURE_ARG(aElement);
mContent = do_QueryInterface(aElement);
if (!mContent) return NS_ERROR_FAILURE;
nsresult rv;
PRInt32 id;
rv = GetIdForContent(mContent, &id);
if (NS_FAILED(rv)) return rv;
PRBool isContainer = IsContainer(id);
mOutputString = &aStr;
if (isContainer) {
rv = DoOpenContainer(id);
}
else {
nsAutoString empty;
rv = DoAddLeaf(id, empty);
}
mContent = 0;
mOutputString = nsnull;
if (!mInHead && id == eHTMLTag_head)
mInHead = PR_TRUE;
return rv;
}
NS_IMETHODIMP
nsPlainTextSerializer::AppendElementEnd(nsIDOMElement *aElement,
nsAWritableString& aStr)
{
NS_ENSURE_ARG(aElement);
mContent = do_QueryInterface(aElement);
if (!mContent) return NS_ERROR_FAILURE;
nsresult rv;
PRInt32 id;
rv = GetIdForContent(mContent, &id);
if (NS_FAILED(rv)) return rv;
PRBool isContainer = IsContainer(id);
mOutputString = &aStr;
rv = NS_OK;
if (isContainer) {
rv = DoCloseContainer(id);
}
mContent = 0;
mOutputString = nsnull;
if (mInHead && id == eHTMLTag_head)
mInHead = PR_FALSE;
return rv;
}
NS_IMETHODIMP
nsPlainTextSerializer::Flush(nsAWritableString& aStr)
{
mOutputString = &aStr;
FlushLine();
mOutputString = nsnull;
return NS_OK;
}
NS_IMETHODIMP
nsPlainTextSerializer::OpenContainer(const nsIParserNode& aNode)
{
PRInt32 type = aNode.GetNodeType();
mParserNode = NS_CONST_CAST(nsIParserNode *, &aNode);
return DoOpenContainer(type);
}
NS_IMETHODIMP
nsPlainTextSerializer::CloseContainer(const nsIParserNode& aNode)
{
PRInt32 type = aNode.GetNodeType();
const nsAReadableString& namestr = aNode.GetText();
nsCOMPtr<nsIAtom> name = dont_AddRef(NS_NewAtom(namestr));
mParserNode = NS_CONST_CAST(nsIParserNode *, &aNode);
return DoCloseContainer(type);
}
NS_IMETHODIMP
nsPlainTextSerializer::AddLeaf(const nsIParserNode& aNode)
{
PRInt32 type = aNode.GetNodeType();
const nsAReadableString& text = aNode.GetText();
mParserNode = NS_CONST_CAST(nsIParserNode *, &aNode);
return DoAddLeaf(type, text);
}
NS_IMETHODIMP
nsPlainTextSerializer::OpenHTML(const nsIParserNode& aNode)
{
return OpenContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::CloseHTML(const nsIParserNode& aNode)
{
return CloseContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::OpenHead(const nsIParserNode& aNode)
{
mInHead = PR_TRUE;
return NS_OK;
}
NS_IMETHODIMP
nsPlainTextSerializer::CloseHead(const nsIParserNode& aNode)
{
mInHead = PR_FALSE;
return NS_OK;
}
NS_IMETHODIMP
nsPlainTextSerializer::OpenBody(const nsIParserNode& aNode)
{
return OpenContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::CloseBody(const nsIParserNode& aNode)
{
return CloseContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::OpenForm(const nsIParserNode& aNode)
{
return OpenContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::CloseForm(const nsIParserNode& aNode)
{
return CloseContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::OpenMap(const nsIParserNode& aNode)
{
return OpenContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::CloseMap(const nsIParserNode& aNode)
{
return CloseContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::OpenFrameset(const nsIParserNode& aNode)
{
return OpenContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::CloseFrameset(const nsIParserNode& aNode)
{
return CloseContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::OpenNoscript(const nsIParserNode& aNode)
{
return OpenContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::CloseNoscript(const nsIParserNode& aNode)
{
return CloseContainer(aNode);
}
NS_IMETHODIMP
nsPlainTextSerializer::DoFragment(PRBool aFlag)
{
return NS_OK;
}
nsresult
nsPlainTextSerializer::DoOpenContainer(PRInt32 aTag)
{
eHTMLTags type = (eHTMLTags)aTag;
if (mTagStackIndex < TagStackSize) {
mTagStack[mTagStackIndex++] = type;
}
if (type == eHTMLTag_body) {
// body -> can turn on cacheing unless it's already preformatted
if(!(mFlags & nsIDocumentEncoder::OutputPreformatted) &&
((mFlags & nsIDocumentEncoder::OutputFormatted) ||
(mFlags & nsIDocumentEncoder::OutputWrap))) {
mCacheLine = PR_TRUE;
}
// Try to figure out here whether we have a
// preformatted style attribute.
//
// Trigger on the presence of a "-moz-pre-wrap" in the
// style attribute. That's a very simplistic way to do
// it, but better than nothing.
// Also set mWrapColumn to the value given there
// (which arguably we should only do if told to do so).
nsAutoString style;
PRInt32 whitespace;
if(NS_SUCCEEDED(GetAttributeValue(nsHTMLAtoms::style, style)) &&
(-1 != (whitespace = style.Find("white-space:")))) {
if (-1 != style.Find("-moz-pre-wrap", PR_TRUE, whitespace)) {
#ifdef DEBUG_preformatted
printf("Set mPreFormatted based on style moz-pre-wrap\n");
#endif
mPreFormatted = PR_TRUE;
mCacheLine = PR_TRUE;
PRInt32 widthOffset = style.Find("width:");
if (widthOffset >= 0) {
// We have to search for the ch before the semicolon,
// not for the semicolon itself, because nsString::ToInteger()
// considers 'c' to be a valid numeric char (even if radix=10)
// but then gets confused if it sees it next to the number
// when the radix specified was 10, and returns an error code.
PRInt32 semiOffset = style.Find("ch", widthOffset+6);
PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6
: style.Length() - widthOffset);
nsAutoString widthstr;
style.Mid(widthstr, widthOffset+6, length);
PRInt32 err;
PRInt32 col = widthstr.ToInteger(&err);
if (NS_SUCCEEDED(err)) {
mWrapColumn = (PRUint32)col;
#ifdef DEBUG_preformatted
printf("Set wrap column to %d based on style\n", mWrapColumn);
#endif
}
}
}
else if (-1 != style.Find("pre", PR_TRUE, whitespace)) {
#ifdef DEBUG_preformatted
printf("Set mPreFormatted based on style pre\n");
#endif
mPreFormatted = PR_TRUE;
mCacheLine = PR_TRUE;
mWrapColumn = 0;
}
}
else {
mPreFormatted = PR_FALSE;
mCacheLine = PR_TRUE; // Cache lines unless something else tells us not to
}
return NS_OK;
}
if (!DoOutput()) {
return NS_OK;
}
if (type == eHTMLTag_p || type == eHTMLTag_pre) {
EnsureVerticalSpace(1); // Should this be 0 in unformatted case?
}
else if (type == eHTMLTag_td || type == eHTMLTag_th) {
// We must make sure that the content of two table cells get a
// space between them.
// Fow now, I will only add a SPACE. Could be a TAB or something
// else but I'm not sure everything can handle the TAB so SPACE
// seems like a better solution.
if(!mInWhitespace) {
// Maybe add something else? Several spaces? A TAB? SPACE+TAB?
if(mCacheLine) {
AddToLine(NS_ConvertASCIItoUCS2(" "), 1);
}
else {
nsAutoString space;
space.AssignWithConversion(" ");
WriteSimple(space);
}
mInWhitespace = PR_TRUE;
}
}
else if (type == eHTMLTag_ul) {
// Indent here to support nested list, which aren't included in li :-(
EnsureVerticalSpace(1); // Must end the current line before we change indent.
mIndent += kIndentSizeList;
}
else if (type == eHTMLTag_ol) {
EnsureVerticalSpace(1); // Must end the current line before we change indent.
if (mOLStackIndex < OLStackSize) {
mOLStack[mOLStackIndex++] = 1; // XXX should get it from the node!
}
mIndent += kIndentSizeList; // see ul
}
else if (type == eHTMLTag_li) {
if (mTagStackIndex > 1 && mTagStack[mTagStackIndex-2] == eHTMLTag_ol) {
if (mOLStackIndex > 0) {
// This is what nsBulletFrame does for OLs:
mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
}
else {
mInIndentString.AppendWithConversion("#");
}
mInIndentString.AppendWithConversion('.');
}
else {
mInIndentString.AppendWithConversion('*');
}
mInIndentString.AppendWithConversion(' ');
}
else if (type == eHTMLTag_dl) {
EnsureVerticalSpace(1);
}
else if (type == eHTMLTag_dd) {
mIndent += kIndentSizeDD;
}
// Else make sure we'll separate block level tags,
// even if we're about to leave, before doing any other formatting.
else if (IsBlockLevel(aTag)) {
EnsureVerticalSpace(0);
}
//////////////////////////////////////////////////////////////
if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
return NS_OK;
}
//////////////////////////////////////////////////////////////
// The rest of this routine is formatted output stuff,
// which we should skip if we're not formatted:
//////////////////////////////////////////////////////////////
if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
type == eHTMLTag_h5 || type == eHTMLTag_h6)
{
EnsureVerticalSpace(2);
if (mHeaderStrategy == 2) { // numbered
mIndent += kIndentSizeHeaders;
// Caching
nsCAutoString leadup;
PRInt32 level = HeaderLevel(type);
// Increase counter for current level
mHeaderCounter[level]++;
// Reset all lower levels
PRInt32 i;
for (i = level + 1; i <= 6; i++) {
mHeaderCounter[i] = 0;
}
// Construct numbers
for (i = 1; i <= level; i++) {
leadup.AppendInt(mHeaderCounter[i]);
leadup += ".";
}
leadup += " ";
Write(NS_ConvertASCIItoUCS2(leadup.GetBuffer()));
}
else if (mHeaderStrategy == 1) { // indent increasingly
mIndent += kIndentSizeHeaders;
for (PRInt32 i = HeaderLevel(type); i > 1; i--) {
// for h(x), run x-1 times
mIndent += kIndentIncrementHeaders;
}
}
}
else if (type == eHTMLTag_blockquote) {
EnsureVerticalSpace(1);
// Find out whether it's a type=cite, and insert "> " instead.
// Eventually we should get the value of the pref controlling citations,
// and handle AOL-style citations as well.
// If we want to support RFC 2646 (and we do!) we have to have:
// >>>> text
// >>> fdfd
// when a mail is sent.
nsAutoString value;
nsresult rv = GetAttributeValue(nsHTMLAtoms::type, value);
if (NS_SUCCEEDED(rv) && value.EqualsWithConversion("cite", PR_TRUE)) {
mCiteQuoteLevel++;
}
else {
mIndent += kTabSize; // Check for some maximum value?
}
}
else if (type == eHTMLTag_a && !IsCurrentNodeConverted()) {
nsAutoString url;
if (NS_SUCCEEDED(GetAttributeValue(nsHTMLAtoms::href, url))
&& !url.IsEmpty()) {
mURL = url;
}
}
else if (type == eHTMLTag_q) {
Write(NS_ConvertASCIItoUCS2("\""));
}
else if (type == eHTMLTag_sup && mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("^"));
}
else if (type == eHTMLTag_sub && mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("_"));
}
else if (type == eHTMLTag_code && mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("|"));
}
else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
&& mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("*"));
}
else if ((type == eHTMLTag_em || type == eHTMLTag_i)
&& mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("/"));
}
else if (type == eHTMLTag_u && mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("_"));
}
return NS_OK;
}
nsresult
nsPlainTextSerializer::DoCloseContainer(PRInt32 aTag)
{
eHTMLTags type = (eHTMLTags)aTag;
if (mTagStackIndex > 0) {
--mTagStackIndex;
}
// End current line if we're ending a block level tag
if((type == eHTMLTag_body) || (type == eHTMLTag_html)) {
// We want the output to end with a new line,
// but in preformatted areas like text fields,
// we can't emit newlines that weren't there.
// So add the newline only in the case of formatted output.
if (mFlags & nsIDocumentEncoder::OutputFormatted) {
EnsureVerticalSpace(0);
}
else {
FlushLine();
}
// We won't want to do anything with these in formatted mode either,
// so just return now:
return NS_OK;
}
else if ((type == eHTMLTag_tr) ||
(type == eHTMLTag_li) ||
(type == eHTMLTag_pre) ||
(type == eHTMLTag_dt)) {
// Items that should always end a line, but get no more whitespace
EnsureVerticalSpace(0);
}
else if (type == eHTMLTag_ul) {
mIndent -= kIndentSizeList;
}
else if (type == eHTMLTag_ol) {
FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
--mOLStackIndex;
mIndent -= kIndentSizeList;
}
else if (type == eHTMLTag_dd) {
mIndent -= kIndentSizeDD;
}
else if (IsBlockLevel(aTag)
&& type != eHTMLTag_blockquote
&& type != eHTMLTag_script
&& type != eHTMLTag_markupDecl) {
// All other blocks get 1 vertical space after them
// in formatted mode, otherwise 0.
// This is hard. Sometimes 0 is a better number, but
// how to know?
EnsureVerticalSpace((mFlags & nsIDocumentEncoder::OutputFormatted)
? 1 : 0);
}
//////////////////////////////////////////////////////////////
if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
return NS_OK;
}
//////////////////////////////////////////////////////////////
// The rest of this routine is formatted output stuff,
// which we should skip if we're not formatted:
//////////////////////////////////////////////////////////////
if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
type == eHTMLTag_h5 || type == eHTMLTag_h6) {
if (mHeaderStrategy) { /*numbered or indent increasingly*/
mIndent -= kIndentSizeHeaders;
}
if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
for (PRInt32 i = HeaderLevel(type); i > 1; i--) {
// for h(x), run x-1 times
mIndent -= kIndentIncrementHeaders;
}
}
EnsureVerticalSpace(1);
}
else if (type == eHTMLTag_blockquote) {
FlushLine(); // Is this needed?
nsAutoString value;
nsresult rv = GetAttributeValue(nsHTMLAtoms::type, value);
if (NS_SUCCEEDED(rv) && value.EqualsWithConversion("cite", PR_TRUE)) {
mCiteQuoteLevel--;
}
else {
mIndent -= kTabSize;
}
EnsureVerticalSpace(1);
}
else if (type == eHTMLTag_a && !IsCurrentNodeConverted() && !mURL.IsEmpty()) {
nsAutoString temp;
temp.AssignWithConversion(" <");
temp += mURL;
temp.AppendWithConversion(">");
Write(temp);
mURL.Truncate();
}
else if (type == eHTMLTag_q) {
Write(NS_ConvertASCIItoUCS2("\""));
}
else if ((type == eHTMLTag_sup || type == eHTMLTag_sub)
&& mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2(" "));
}
else if (type == eHTMLTag_code && mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("|"));
}
else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
&& mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("*"));
}
else if ((type == eHTMLTag_em || type == eHTMLTag_i)
&& mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("/"));
}
else if (type == eHTMLTag_u && mStructs && !IsCurrentNodeConverted()) {
Write(NS_ConvertASCIItoUCS2("_"));
}
return NS_OK;
}
nsresult
nsPlainTextSerializer::DoAddLeaf(PRInt32 aTag,
const nsAReadableString& aText)
{
// If we don't want any output, just return
if (!DoOutput()) {
return NS_OK;
}
eHTMLTags type = (eHTMLTags)aTag;
if (mTagStackIndex > 1 && mTagStack[mTagStackIndex-2] == eHTMLTag_select) {
// Don't output the contents of SELECT elements;
// Might be nice, eventually, to output just the selected element.
return NS_OK;
}
else if (mTagStackIndex > 0 && mTagStack[mTagStackIndex-1] == eHTMLTag_script) {
// Don't output the contents of <script> tags;
return NS_OK;
}
else if (type == eHTMLTag_text) {
/* Check, if some other MUA (e.g. 4.x) recognized the URL in
plain text and inserted an <a> element. If yes, output only once. */
if (!mURL.IsEmpty() && mURL.Equals(aText)) {
mURL.Truncate();
}
if (
// Bug 31994 says we shouldn't output the contents of SELECT elements.
mTagStackIndex <= 0 ||
mTagStack[mTagStackIndex-1] != eHTMLTag_select
) {
Write(aText);
}
}
else if (type == eHTMLTag_entity) {
nsCOMPtr<nsIParserService> parserService;
GetParserService(getter_AddRefs(parserService));
if (parserService) {
nsAutoString str(aText);
PRInt32 entity;
parserService->HTMLConvertEntityToUnicode(str, &entity);
nsAutoString temp(entity);
Write(temp);
}
}
else if (type == eHTMLTag_br) {
// Another egregious editor workaround, see bug 38194:
// ignore the bogus br tags that the editor sticks here and there.
nsAutoString typeAttr;
if (NS_FAILED(GetAttributeValue(nsHTMLAtoms::type, typeAttr))
|| !typeAttr.EqualsWithConversion("_moz")) {
EnsureVerticalSpace(mEmptyLines+1);
}
}
else if (type == eHTMLTag_whitespace) {
// The only times we want to pass along whitespace from the original
// html source are if we're forced into preformatted mode via flags,
// or if we're prettyprinting and we're inside a <pre>.
// Otherwise, either we're collapsing to minimal text, or we're
// prettyprinting to mimic the html format, and in neither case
// does the formatting of the html source help us.
// One exception: at the very beginning of a selection,
// we want to preserve whitespace.
if (mFlags & nsIDocumentEncoder::OutputPreformatted ||
(mPreFormatted && !mWrapColumn) ||
((mFlags & nsIDocumentEncoder::OutputFormatted)
&& IsInPre())) {
Write(aText); // XXX: spacestuffing (maybe call AddToLine if mCacheLine==true)
}
else if(!mInWhitespace ||
(!mStartedOutput
&& mFlags | nsIDocumentEncoder::OutputSelectionOnly)) {
mInWhitespace = PR_FALSE;
Write( NS_ConvertToString(" ") );
mInWhitespace = PR_TRUE;
}
}
else if (type == eHTMLTag_newline) {
if (mFlags & nsIDocumentEncoder::OutputPreformatted ||
(mPreFormatted && !mWrapColumn) ||
IsInPre()) {
EnsureVerticalSpace(mEmptyLines+1);
}
else {
Write(NS_ConvertASCIItoUCS2(" "));
}
}
else if (type == eHTMLTag_hr &&
(mFlags & nsIDocumentEncoder::OutputFormatted)) {
EnsureVerticalSpace(0);
// Make a line of dashes as wide as the wrap width
// XXX honoring percentage would be nice
nsAutoString line;
PRUint32 width = (mWrapColumn > 0 ? mWrapColumn : 25);
while (line.Length() < width) {
line.AppendWithConversion('-');
}
Write(line);
EnsureVerticalSpace(0);
}
else if (type == eHTMLTag_img) {
/* Output (in decreasing order of preference)
alt, title or src (URI) attribute */
// See <http://www.w3.org/TR/REC-html40/struct/objects.html#edef-IMG>
nsAutoString desc, temp;
if (NS_SUCCEEDED(GetAttributeValue(nsHTMLAtoms::alt, desc))) {
if (!desc.IsEmpty()) {
temp.AppendWithConversion(" ["); // Should we output chars at all here?
desc.StripChars("\"");
temp += desc;
temp.AppendWithConversion("] ");
}
// If the alt attribute has an empty value (|alt=""|), output nothing
}
else if (NS_SUCCEEDED(GetAttributeValue(nsHTMLAtoms::title, desc))
&& !desc.IsEmpty()) {
temp.AppendWithConversion(" [");
desc.StripChars("\"");
temp += desc;
temp.AppendWithConversion("] ");
}
else if (NS_SUCCEEDED(GetAttributeValue(nsHTMLAtoms::src, desc))
&& !desc.IsEmpty()) {
temp.AppendWithConversion(" <");
desc.StripChars("\"");
temp += desc;
temp.AppendWithConversion("> ");
}
if (!temp.IsEmpty()) {
Write(temp);
}
}
return NS_OK;
}
void
nsPlainTextSerializer::EnsureVerticalSpace(PRInt32 noOfRows)
{
while(mEmptyLines < noOfRows) {
EndLine(PR_FALSE);
}
}
// This empties the current line cache without adding a NEWLINE.
// Should not be used if line wrapping is of importance since
// this function destroys the cache information.
void
nsPlainTextSerializer::FlushLine()
{
if(mCurrentLine.Length()>0) {
if(0 == mColPos) {
WriteQuotesAndIndent();
}
WriteSimple(mCurrentLine);
mColPos += mCurrentLine.Length();
mCurrentLine.Truncate();
mCurrentLineWidth = 0;
}
}
void
nsPlainTextSerializer::WriteSimple(nsString& aString)
{
if (aString.Length() > 0) {
mStartedOutput = PR_TRUE;
}
// First, replace all nbsp characters with spaces,
// which the unicode encoder won't do for us.
static PRUnichar nbsp = 160;
static PRUnichar space = ' ';
aString.ReplaceChar(nbsp, space);
mOutputString->Append(aString);
}
void
nsPlainTextSerializer::AddToLine(const PRUnichar * aLineFragment,
PRInt32 aLineFragmentLength)
{
PRUint32 prefixwidth = (mCiteQuoteLevel > 0 ? mCiteQuoteLevel + 1:0)+mIndent;
PRInt32 linelength = mCurrentLine.Length();
if(0 == linelength) {
if(0 == aLineFragmentLength) {
// Nothing at all. Are you kidding me?
return;
}
if(mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
if(('>' == aLineFragment[0]) ||
(' ' == aLineFragment[0]) ||
(!nsCRT::strncmp(aLineFragment, "From ", 5))) {
// Space stuffing a la RFC 2646 (format=flowed).
mCurrentLine.AppendWithConversion(' ');
if(MayWrap()) {
mCurrentLineWidth += unicharwidth(' ');
#ifdef DEBUG_wrapping
NS_ASSERTION(unicharwidth(mCurrentLine.GetUnicode(),
mCurrentLine.Length()) ==
(PRInt32)mCurrentLineWidth,
"mCurrentLineWidth and reality out of sync!");
#endif
}
}
}
mEmptyLines=-1;
}
mCurrentLine.Append(aLineFragment, aLineFragmentLength);
if(MayWrap()) {
mCurrentLineWidth += unicharwidth(aLineFragment, aLineFragmentLength);
#ifdef DEBUG_wrapping
NS_ASSERTION(unicharwidth(mCurrentLine.GetUnicode(),
mCurrentLine.Length()) ==
(PRInt32)mCurrentLineWidth,
"mCurrentLineWidth and reality out of sync!");
#endif
}
linelength = mCurrentLine.Length();
// Wrap?
if(MayWrap())
{
#ifdef DEBUG_wrapping
NS_ASSERTION(unicharwidth(mCurrentLine.GetUnicode(),
mCurrentLine.Length()) ==
(PRInt32)mCurrentLineWidth,
"mCurrentLineWidth and reality out of sync!");
#endif
// Yes, wrap!
// The "+4" is to avoid wrap lines that only would be a couple
// of letters too long. We give this bonus only if the
// wrapcolumn is more than 20.
PRUint32 bonuswidth = (mWrapColumn > 20) ? 4 : 0;
// XXX: Should calculate prefixwidth with unicharwidth
while(mCurrentLineWidth+prefixwidth > mWrapColumn+bonuswidth) {
// Must wrap. Let's find a good place to do that.
nsresult result = NS_OK;
// We go from the end removing one letter at a time until
// we have a reasonable width
PRInt32 goodSpace = mCurrentLine.Length();
PRUint32 width = mCurrentLineWidth;
while(goodSpace > 0 && (width+prefixwidth > mWrapColumn)) {
goodSpace--;
width -= unicharwidth(mCurrentLine[goodSpace]);
}
goodSpace++;
PRBool oNeedMoreText;
if (nsnull != mLineBreaker) {
result = mLineBreaker->Prev(mCurrentLine.GetUnicode(),
mCurrentLine.Length(), goodSpace,
(PRUint32 *) &goodSpace, &oNeedMoreText);
if (oNeedMoreText) {
goodSpace = -1;
}
else if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace-1))) {
--goodSpace; // adjust the position since line breaker returns a position next to space
}
}
// fallback if the line breaker is unavailable or failed
if (nsnull == mLineBreaker || NS_FAILED(result)) {
goodSpace = mWrapColumn-prefixwidth;
while (goodSpace >= 0 &&
!nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
goodSpace--;
}
}
nsAutoString restOfLine;
if (goodSpace < 0) {
// If we don't found a good place to break, accept long line and
// try to find another place to break
goodSpace=(prefixwidth>mWrapColumn+1)?1:mWrapColumn-prefixwidth+1;
result = NS_OK;
if (nsnull != mLineBreaker) {
result = mLineBreaker->Next(mCurrentLine.GetUnicode(),
mCurrentLine.Length(), goodSpace,
(PRUint32 *) &goodSpace, &oNeedMoreText);
}
// fallback if the line breaker is unavailable or failed
if (nsnull == mLineBreaker || NS_FAILED(result)) {
goodSpace=(prefixwidth>mWrapColumn)?1:mWrapColumn-prefixwidth;
while (goodSpace < linelength &&
!nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
goodSpace++;
}
}
}
if((goodSpace < linelength) && (goodSpace > 0)) {
// Found a place to break
// -1 (trim a char at the break position)
// only if the line break was a space.
if (nsCRT::IsAsciiSpace(mCurrentLine.CharAt(goodSpace))) {
mCurrentLine.Right(restOfLine, linelength-goodSpace-1);
}
else {
mCurrentLine.Right(restOfLine, linelength-goodSpace);
}
mCurrentLine.Truncate(goodSpace);
EndLine(PR_TRUE);
mCurrentLine.Truncate();
// Space stuff new line?
if(mFlags & nsIDocumentEncoder::OutputFormatFlowed) {
if((restOfLine.Length()>0) &&
((restOfLine[0] == '>') ||
(restOfLine[0] == ' ') ||
(!restOfLine.CompareWithConversion("From ",PR_FALSE,5)))) {
// Space stuffing a la RFC 2646 (format=flowed).
mCurrentLine.AppendWithConversion(' ');
}
}
mCurrentLine.Append(restOfLine);
mCurrentLineWidth = unicharwidth(mCurrentLine.GetUnicode(),
mCurrentLine.Length());
linelength = mCurrentLine.Length();
mEmptyLines = -1;
}
else {
// Nothing to do. Hopefully we get more data later
// to use for a place to break line
break;
}
}
}
else {
// No wrapping.
}
}
void
nsPlainTextSerializer::EndLine(PRBool softlinebreak)
{
if(softlinebreak) {
if(0 == mCurrentLine.Length()) {
// No meaning
return;
}
WriteQuotesAndIndent();
// Remove SPACE from the end of the line.
PRUint32 linelength = mCurrentLine.Length();
while(linelength > 0 &&
' ' == mCurrentLine[--linelength]) {
mCurrentLine.SetLength(linelength);
}
if((mFlags & nsIDocumentEncoder::OutputFormatFlowed) &&
(mIndent == 0)) {
// Add the soft part of the soft linebreak (RFC 2646 4.1)
// We only do this when there is no indentation since format=flowed
// lines and indentation doesn't work well together.
mCurrentLine.AppendWithConversion(' ');
}
mCurrentLine.Append(mLineBreak);
WriteSimple(mCurrentLine);
mCurrentLine.Truncate();
mCurrentLineWidth = 0;
mColPos=0;
mEmptyLines=0;
mInWhitespace=PR_TRUE;
}
else {
// Hard break
if(0 == mColPos) {
WriteQuotesAndIndent();
}
if(mCurrentLine.Length()>0) {
mEmptyLines=-1;
}
// Output current line
// Remove SPACE from the end of the line, unless we got
// "-- " in a format=flowed output. "-- " is the sig delimiter
// by convention and shouldn't be touched even in format=flowed
// (see RFC 2646).
PRUint32 currentlinelength = mCurrentLine.Length();
while((currentlinelength > 0) &&
(' ' == mCurrentLine[currentlinelength-1]) &&
(!mCurrentLine.EqualsWithConversion("-- "))) {
mCurrentLine.SetLength(--currentlinelength);
}
mCurrentLine.Append(mLineBreak);
WriteSimple(mCurrentLine);
mCurrentLine.Truncate();
mCurrentLineWidth = 0;
mColPos=0;
mEmptyLines++;
mInWhitespace=PR_TRUE;
}
}
void
nsPlainTextSerializer::WriteQuotesAndIndent()
{
// Put the mail quote "> " chars in, if appropriate:
if (mCiteQuoteLevel > 0) {
nsAutoString quotes;
for(int i=0; i < mCiteQuoteLevel; i++) {
quotes.AppendWithConversion('>');
}
quotes.AppendWithConversion(' ');
WriteSimple(quotes);
mColPos += (mCiteQuoteLevel+1);
}
// Indent if necessary
PRInt32 indentwidth = mIndent - mInIndentString.Length();
if (indentwidth > 0) {
nsAutoString spaces;
for (int i=0; i < indentwidth; ++i)
spaces.AppendWithConversion(' ');
WriteSimple(spaces);
mColPos += indentwidth;
}
if(mInIndentString.Length() > 0) {
WriteSimple(mInIndentString);
mColPos += mInIndentString.Length();
mInIndentString.Truncate();
}
}
//
// Write a string, wrapping appropriately to mWrapColumn.
// This routine also handles indentation and mail-quoting,
// and so should be used for formatted output even if we're not wrapping.
//
void
nsPlainTextSerializer::Write(const nsAReadableString& aString)
{
#ifdef DEBUG_wrapping
char* foo = ToNewCString(aString);
printf("Write(%s): wrap col = %d, mColPos = %d\n", foo, mWrapColumn, mColPos);
nsMemory::Free(foo);
#endif
PRInt32 bol = 0;
PRInt32 newline;
PRInt32 totLen = aString.Length();
if ((mPreFormatted && !mWrapColumn) || IsInPre()) {
// No intelligent wrapping. This mustn't be mixed with
// intelligent wrapping without clearing the mCurrentLine
// buffer before!!!
NS_WARN_IF_FALSE(mCurrentLine.Length() == 0,
"Mixed wrapping data and nonwrapping data on the same line");
if (mCurrentLine.Length() > 0) {
FlushLine();
}
// Put the mail quote "> " chars in, if appropriate.
// Have to put it in before every line.
while(bol<totLen) {
if(0 == mColPos) {
WriteQuotesAndIndent();
}
newline = aString.FindChar(PRUnichar('\n'),bol);
// XXX should probably also look for \r even though they shouldn't happen
if(newline < 0) {
// No new lines.
nsAutoString stringpart;
aString.Right(stringpart, totLen-bol);
if(stringpart.Length() > 0) {
PRUnichar lastchar = stringpart[stringpart.Length()-1];
if((lastchar == '\t') || (lastchar == ' ') ||
(lastchar == '\r') ||(lastchar == '\n')) {
mInWhitespace = PR_TRUE;
}
else {
mInWhitespace = PR_FALSE;
}
}
WriteSimple(stringpart);
mEmptyLines=-1;
mColPos += totLen-bol;
bol = totLen;
}
else {
// There is a newline
nsAutoString stringpart;
aString.Mid(stringpart, bol, newline-bol);
mInWhitespace = PR_TRUE;
WriteSimple(stringpart);
// and write the newline
WriteSimple(mLineBreak);
mEmptyLines=0;
mColPos=0;
bol = newline+1;
}
}
#ifdef DEBUG_wrapping
printf("No wrapping: newline is %d, totLen is %d; leaving mColPos = %d\n",
newline, totLen, mColPos);
#endif
return;
}
// XXX Copy necessary to use nsString methods and gain
// access to underlying buffer
nsAutoString str(aString);
// Intelligent handling of text
// If needed, strip out all "end of lines"
// and multiple whitespace between words
PRInt32 nextpos;
nsAutoString tempstr;
const PRUnichar * offsetIntoBuffer = nsnull;
while (bol < totLen) { // Loop over lines
// Find a place where we may have to do whitespace compression
nextpos = str.FindCharInSet(" \t\n\r", bol);
#ifdef DEBUG_wrapping
nsAutoString remaining;
str.Right(remaining, totLen - bol);
foo = remaining.ToNewCString();
// printf("Next line: bol = %d, newlinepos = %d, totLen = %d, string = '%s'\n",
// bol, nextpos, totLen, foo);
nsMemory::Free(foo);
#endif
if(nextpos < 0) {
// The rest of the string
if(!mCacheLine) {
str.Right(tempstr, totLen-bol);
WriteSimple(tempstr);
}
else {
offsetIntoBuffer = str.GetUnicode();
offsetIntoBuffer = &offsetIntoBuffer[bol];
AddToLine(offsetIntoBuffer, totLen-bol);
}
bol=totLen;
mInWhitespace=PR_FALSE;
}
else {
// There's still whitespace left in the string
// If we're already in whitespace and not preformatted, just skip it:
if (mInWhitespace && (nextpos == bol) && !mPreFormatted &&
!(mFlags & nsIDocumentEncoder::OutputPreformatted)) {
// Skip whitespace
bol++;
continue;
}
if(nextpos == bol) {
// Note that we are in whitespace.
mInWhitespace = PR_TRUE;
if(!mCacheLine) {
nsAutoString whitestring(str[nextpos]);
WriteSimple(whitestring);
}
else {
offsetIntoBuffer = str.GetUnicode();
offsetIntoBuffer = &offsetIntoBuffer[nextpos];
AddToLine(offsetIntoBuffer, 1);
}
bol++;
continue;
}
if(!mCacheLine) {
str.Mid(tempstr,bol,nextpos-bol);
if(mFlags & nsIDocumentEncoder::OutputPreformatted) {
bol = nextpos;
}
else {
tempstr.AppendWithConversion(" ");
bol = nextpos + 1;
mInWhitespace = PR_TRUE;
}
WriteSimple(tempstr);
}
else {
mInWhitespace = PR_TRUE;
offsetIntoBuffer = str.GetUnicode();
offsetIntoBuffer = &offsetIntoBuffer[bol];
if(mPreFormatted || (mFlags & nsIDocumentEncoder::OutputPreformatted)) {
// Preserve the real whitespace character
nextpos++;
AddToLine(offsetIntoBuffer, nextpos-bol);
bol = nextpos;
}
else {
// Replace the whitespace with a space
AddToLine(offsetIntoBuffer, nextpos-bol);
AddToLine(NS_ConvertToString(" ").GetUnicode(),1);
bol = nextpos + 1; // Let's eat the whitespace
}
}
}
} // Continue looping over the string
}
PRBool
nsPlainTextSerializer::MayWrap()
{
return mWrapColumn &&
((mFlags & nsIDocumentEncoder::OutputFormatted) ||
(mFlags & nsIDocumentEncoder::OutputWrap));
}
nsresult
nsPlainTextSerializer::GetAttributeValue(nsIAtom* aName,
nsString& aValueRet)
{
if (mContent) {
if (NS_CONTENT_ATTR_NOT_THERE != mContent->GetAttribute(kNameSpaceID_None,
aName, aValueRet)) {
return NS_OK;
}
}
else if (mParserNode) {
const PRUnichar *name;
aName->GetUnicode(&name);
PRInt32 count = mParserNode->GetAttributeCount();
for (PRInt32 i=0;i<count;i++) {
const nsAReadableString& key = mParserNode->GetKeyAt(i);
if (key.Equals(name)) {
aValueRet = mParserNode->GetValueAt(i);
aValueRet.StripChars("\"");
return NS_OK;
}
}
}
return NS_ERROR_NOT_AVAILABLE;
}
/**
* Returns true, if the element was inserted by Moz' TXT->HTML converter.
* In this case, we should ignore it.
*/
PRBool
nsPlainTextSerializer::IsCurrentNodeConverted()
{
nsAutoString value;
nsresult rv = GetAttributeValue(nsHTMLAtoms::kClass, value);
return (NS_SUCCEEDED(rv) &&
(value.EqualsWithConversion("txt", PR_TRUE, 3) ||
value.EqualsWithConversion("\"txt", PR_TRUE, 4)));
}
PRBool
nsPlainTextSerializer::DoOutput()
{
return !mInHead;
}
nsresult
nsPlainTextSerializer::GetIdForContent(nsIContent* aContent,
PRInt32* aID)
{
nsCOMPtr<nsIHTMLContent> htmlcontent = do_QueryInterface(aContent);
if (!htmlcontent) {
*aID = eHTMLTag_unknown;
return NS_OK;
}
nsCOMPtr<nsIAtom> tagname;
mContent->GetTag(*getter_AddRefs(tagname));
if (!tagname) return NS_ERROR_FAILURE;
nsAutoString namestr;
tagname->ToString(namestr);
nsresult rv;
nsCOMPtr<nsIParserService> parserService;
rv = GetParserService(getter_AddRefs(parserService));
if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
rv = parserService->HTMLStringTagToId(namestr, aID);
if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
return NS_OK;
}
nsresult
nsPlainTextSerializer::GetParserService(nsIParserService** aParserService)
{
if (!mParserService) {
nsresult rv;
mParserService = do_GetService(kParserServiceCID, &rv);
if (NS_FAILED(rv)) return NS_ERROR_FAILURE;
}
CallQueryInterface(mParserService.get(), aParserService);
return NS_OK;
}
PRBool
nsPlainTextSerializer::IsBlockLevel(PRInt32 aId)
{
PRBool isBlock = PR_FALSE;
nsCOMPtr<nsIParserService> parserService;
GetParserService(getter_AddRefs(parserService));
if (parserService) {
parserService->IsBlock(aId, isBlock);
}
return isBlock;
}
PRBool
nsPlainTextSerializer::IsContainer(PRInt32 aId)
{
PRBool isContainer = PR_FALSE;
nsCOMPtr<nsIParserService> parserService;
GetParserService(getter_AddRefs(parserService));
if (parserService) {
parserService->IsContainer(aId, isContainer);
}
return isContainer;
}
/**
* Returns true if we currently are inside a <pre>. The check is done
* by traversing the tag stack looking for <pre> until we hit a block
* level tag which is assumed to override any <pre>:s below it in
* the stack. To do this correctly to a 100% would require access
* to style which we don't support in this converter.
*/
PRBool
nsPlainTextSerializer::IsInPre()
{
PRInt32 i = mTagStackIndex;
while(i > 0) {
if(mTagStack[i-1] == eHTMLTag_pre)
return PR_TRUE;
if(IsBlockLevel(mTagStack[i-1])) {
// We assume that every other block overrides a <pre>
return PR_FALSE;
}
--i;
}
// Not a <pre> in the whole stack
return PR_FALSE;
}
/*
@return 0 = no header, 1 = h1, ..., 6 = h6
*/
PRInt32 HeaderLevel(eHTMLTags aTag)
{
PRInt32 result;
switch (aTag)
{
case eHTMLTag_h1:
result = 1; break;
case eHTMLTag_h2:
result = 2; break;
case eHTMLTag_h3:
result = 3; break;
case eHTMLTag_h4:
result = 4; break;
case eHTMLTag_h5:
result = 5; break;
case eHTMLTag_h6:
result = 6; break;
default:
result = 0; break;
}
return result;
}
/*
* This is an implementation of wcwidth() and wcswidth() as defined in
* "The Single UNIX Specification, Version 2, The Open Group, 1997"
* <http://www.UNIX-systems.org/online.html>
*
* Markus Kuhn -- 2000-02-08 -- public domain
*
* Minor alterations to fit Mozilla's data types by Daniel Bratell
*/
/* These functions define the column width of an ISO 10646 character
* as follows:
*
* - The null character (U+0000) has a column width of 0.
*
* - Other C0/C1 control characters and DEL will lead to a return
* value of -1.
*
* - Non-spacing and enclosing combining characters (general
* category code Mn or Me in the Unicode database) have a
* column width of 0.
*
* - Spacing characters in the East Asian Wide (W) or East Asian
* FullWidth (F) category as defined in Unicode Technical
* Report #11 have a column width of 2.
*
* - All remaining characters (including all printable
* ISO 8859-1 and WGL4 characters, Unicode control characters,
* etc.) have a column width of 1.
*
* This implementation assumes that wchar_t characters are encoded
* in ISO 10646.
*/
PRInt32 unicharwidth(PRUnichar ucs)
{
/* sorted list of non-overlapping intervals of non-spacing characters */
static const struct interval {
PRUint16 first;
PRUint16 last;
} combining[] = {
{ 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
{ 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
{ 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
{ 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
{ 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 },
{ 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 },
{ 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 },
{ 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 },
{ 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 },
{ 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 },
{ 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 },
{ 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 },
{ 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF },
{ 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 },
{ 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 },
{ 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A },
{ 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 },
{ 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 },
{ 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 },
{ 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 },
{ 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 },
{ 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 },
{ 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD },
{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 },
{ 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
{ 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }
};
PRInt32 min = 0;
PRInt32 max = sizeof(combining) / sizeof(struct interval) - 1;
PRInt32 mid;
/* test for 8-bit control characters */
if (ucs == 0)
return 0;
if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
return -1;
/* first quick check for Latin-1 etc. characters */
if (ucs < combining[0].first)
return 1;
/* binary search in table of non-spacing characters */
while (max >= min) {
mid = (min + max) / 2;
if (combining[mid].last < ucs)
min = mid + 1;
else if (combining[mid].first > ucs)
max = mid - 1;
else if (combining[mid].first <= ucs && combining[mid].last >= ucs)
return 0;
}
/* if we arrive here, ucs is not a combining or C0/C1 control character */
/* fast test for majority of non-wide scripts */
if (ucs < 0x1100)
return 1;
return 1 +
((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */
(ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a &&
ucs != 0x303f) || /* CJK ... Yi */
(ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
(ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
(ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
(ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */
(ucs >= 0xffe0 && ucs <= 0xffe6));
}
PRInt32 unicharwidth(const PRUnichar* pwcs, PRInt32 n)
{
PRInt32 w, width = 0;
for (;*pwcs && n-- > 0; pwcs++)
if ((w = unicharwidth(*pwcs)) < 0)
return -1;
else
width += w;
return width;
}