Bug 1563452 - Don't break attributes when serializing with nsXMLContentSerializer. r=mbrodesser,m_kato

Based on work by JMM <jose@bmo.anonaddy.com>.

Differential Revision: https://phabricator.services.mozilla.com/D143447
This commit is contained in:
Ben Campbell 2022-11-30 05:16:13 +00:00
parent a837f93948
commit 9d6eb60049
10 changed files with 168 additions and 32 deletions

View File

@ -1,6 +1,6 @@
<!DOCTYPE html>
<html><head><meta http-equiv="content-type" content="text/html;
charset=UTF-8">
<html><head><meta http-equiv="content-type"
content="text/html; charset=UTF-8">
<title>Test for html serializer</title>
</head><body>

View File

@ -0,0 +1,69 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "gtest/gtest.h"
#include "nsCOMPtr.h"
#include "nsIDocumentEncoder.h"
#include "nsString.h"
#include "mozilla/ErrorResult.h"
#include "mozilla/dom/Document.h"
#include "mozilla/dom/DOMParser.h"
using namespace mozilla;
using namespace mozilla::dom;
// Test that serialising some DOM doesn't destroy links by word-wrapping long
// href values containing spaces.
TEST(TestXMLSerializerNoBreakLink, TestXMLSerializerNoBreakLinkMain)
{
// Build up a stupidly-long URL with spaces. Default is to wrap at column
// 72, so we want to exceed that.
nsString longURL = u"http://www.example.com/link with spaces"_ns;
for (int i = 1; i < 125; ++i) {
longURL.Append(u' ');
longURL.Append(IntToTString<char16_t>(i));
}
nsString htmlInput =
u"<html><head>"
"<meta charset=\"utf-8\">"
"</head><body>Hello Thunderbird! <a href=\""_ns +
longURL + u"\">Link</a></body></html>"_ns;
// Parse HTML into a Document.
nsCOMPtr<Document> document;
{
IgnoredErrorResult rv;
RefPtr<DOMParser> parser = DOMParser::CreateWithoutGlobal(rv);
ASSERT_FALSE(rv.Failed());
document = parser->ParseFromString(htmlInput, SupportedType::Text_html, rv);
ASSERT_FALSE(rv.Failed());
}
// Serialize back in a variety of flavours and check the URL survives the
// round trip intact.
nsCString contentTypes[] = {"text/xml"_ns, "application/xml"_ns,
"application/xhtml+xml"_ns, "image/svg+xml"_ns,
"text/html"_ns};
for (auto const& contentType : contentTypes) {
uint32_t flagsToTest[] = {
nsIDocumentEncoder::OutputFormatted, nsIDocumentEncoder::OutputWrap,
nsIDocumentEncoder::OutputFormatted | nsIDocumentEncoder::OutputWrap};
for (uint32_t flags : flagsToTest) {
// Serialize doc back to HTML source again.
nsCOMPtr<nsIDocumentEncoder> encoder =
do_createDocumentEncoder(contentType.get());
ASSERT_TRUE(encoder);
nsresult rv =
encoder->Init(document, NS_ConvertASCIItoUTF16(contentType), flags);
ASSERT_TRUE(NS_SUCCEEDED(rv));
nsString parsed;
rv = encoder->EncodeToString(parsed);
ASSERT_TRUE(NS_SUCCEEDED(rv));
// URL is intact?
EXPECT_TRUE(parsed.Find(longURL) != kNotFound);
}
}
}

View File

@ -10,6 +10,7 @@ UNIFIED_SOURCES += [
"TestParser.cpp",
"TestPlainTextSerializer.cpp",
"TestScheduler.cpp",
"TestXMLSerializerNoBreakLink.cpp",
"TestXPathGenerator.cpp",
]

View File

@ -1,9 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE whatever PUBLIC
"-//MOZ//WHATEVER//EN"
"http://mozilla.org/ns/foo">
<!DOCTYPE whatever PUBLIC "-//MOZ//WHATEVER//EN" "http://mozilla.org/ns/foo">
<foo xmlns="htp://mozilla.org/ns">
<baz /><!-- a comment --> <bar> &lt;robots&gt; &amp; &lt;aliens&gt;
<baz/><!-- a comment --> <bar> &lt;robots&gt; &amp; &lt;aliens&gt;
<mozilla> a a a a a éèàùûî</mozilla>
<firefox>Lorem ip<!-- aaa -->sum dolor sit amet, consectetuer adipiscing elit. Nam eu sapien. Sed viverra lacus. Donec quis ipsum. Nunc cursus aliquet lectus. Nunc vitae eros. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos hymenaeos. Nam tellus massa, fringilla aliquam, fermentum sit amet, posuere ac, est. Duis tristique egestas ligula. Mauris quis felis. Fusce a ipsum non lacus posuere aliquet. Sed fermentum posuere nulla. Donec tempor. Donec sollicitudin tortor lacinia libero ullamcorper laoreet. Cras quis nisi at odio consectetuer molestie.</firefox>
<?xml-foo "hey" ?>

View File

@ -2,7 +2,7 @@
<!DOCTYPE whatever PUBLIC "-//MOZ//WHATEVER//EN" "http://mozilla.org/ns/foo">
<foo xmlns="http://mozilla.org/ns" xmlns:falsexul="http://mozilla.org/ns3">
<!-- document to test namespaces-->
<baz />
<baz/>
<bar> &lt;robots&gt; &amp; &lt;aliens&gt;
<mozilla xmlns="http://mozilla.org/ns2"> a a a <moz>a a</moz> éèàùûî</mozilla>
<firefox>Lorem ipsum dolor sit amet, consectetuer adipiscing elit.</firefox>
@ -19,7 +19,7 @@
<box>
<other:what>lorem ipsum</other:what>
<description>xul fake</description>
<what xmlns="http://mozilla.org/ns/other">lorem ipsum <falsexul:label value="hello" /> the return</what>
<what xmlns="http://mozilla.org/ns/other">lorem ipsum <falsexul:label value="hello"/> the return</what>
</box>
</falsexul:xul>

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE whatever PUBLIC "-//MOZ//WHATEVER//EN" "http://mozilla.org/ns/foo">
<foo xmlns="http://mozilla.org/ns"
xmlns:falsexul="http://mozilla.org/ns3">
xmlns:falsexul="http://mozilla.org/ns3">
<!-- document to test namespaces-->
<baz/>
<bar> &lt;robots&gt; &amp;
@ -27,7 +27,7 @@ xmlns:other="http://mozilla.org/ns/other">
<other:what>lorem ipsum</other:what>
<description>xul fake</description>
<what
xmlns="http://mozilla.org/ns/other">lorem
xmlns="http://mozilla.org/ns/other">lorem
ipsum
<falsexul:label value="hello"/>
the return</what>

View File

@ -1,19 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE whatever PUBLIC "-//MOZ//WHATEVER//EN" "http://mozilla.org/ns/foo">
<foo xmlns="http://mozilla.org/ns"
<foo xmlns="http://mozilla.org/ns"
xmlns:falsexul="http://mozilla.org/ns3">
<!-- document to test namespaces-->
<baz/>
<bar> &lt;robots&gt; &amp;
&lt;aliens&gt;
<mozilla
<mozilla
xmlns="http://mozilla.org/ns2"> a a a <moz>a
a</moz> éèàùûî</mozilla>
<firefox>Lorem ipsum dolor sit
amet, consectetuer adipiscing elit.</firefox>
</bar>
<xul xmlns="http://mozilla.org/ns3"
<xul xmlns="http://mozilla.org/ns3"
xmlns:other="http://mozilla.org/ns/other">
<box>
<other:what>lorem ipsum</other:what>
@ -22,26 +22,26 @@ fake</description>
</box>
</xul>
<falsexul:xul
<falsexul:xul
xmlns:other="http://mozilla.org/ns/other">
<box>
<other:what>lorem ipsum</other:what>
<description>xul fake</description>
<what
<what
xmlns="http://mozilla.org/ns/other">lorem
ipsum <falsexul:label value="hello"/>
the return</what>
</box>
</falsexul:xul>
<ho:xul
xmlns="http://mozilla.org/ns4"
xmlns:ho="http://mozilla.org/ns4"
<ho:xul
xmlns="http://mozilla.org/ns4"
xmlns:ho="http://mozilla.org/ns4"
xmlns:other="http://mozilla.org/ns/other">
<box>
<other:what>lorem ipsum</other:what>
<description ho:foo="bar"
bla="hello" other:yes="no"
<description ho:foo="bar"
bla="hello" other:yes="no"
ho:foo2="bar2">xul fake</description>
</box>
</ho:xul>

View File

@ -19,7 +19,7 @@ async function xmlEncode(aFile, aFlags, aCharset) {
return encoder.encodeToString();
}
add_task(async function() {
add_task(async function test_encoding() {
var result, expected;
const de = Ci.nsIDocumentEncoder;
@ -121,3 +121,59 @@ add_task(async function() {
expected = loadContentFile("4_result_6.xml");
Assert.equal(expected, result);
});
// OutputRaw should cause OutputWrap and OutputFormatted to be ignored.
// Check by encoding each test file and making sure the result matches what
// was fed in.
add_task(async function test_outputRaw() {
let result, expected;
const de = Ci.nsIDocumentEncoder;
expected = loadContentFile("2_original.xml");
result = await xmlEncode(
"2_original.xml",
de.OutputRaw | de.OutputLFLineBreak | de.OutputWrap
);
Assert.equal(expected, result);
result = await xmlEncode(
"2_original.xml",
de.OutputRaw | de.OutputLFLineBreak | de.OutputFormatted | de.OutputWrap
);
Assert.equal(expected, result);
expected = loadContentFile("3_original.xml");
result = await xmlEncode(
"3_original.xml",
de.OutputRaw | de.OutputLFLineBreak | de.OutputWrap
);
Assert.equal(expected, result);
result = await xmlEncode(
"3_original.xml",
de.OutputRaw | de.OutputLFLineBreak | de.OutputFormatted | de.OutputWrap
);
Assert.equal(expected, result);
expected = loadContentFile("4_original.xml");
let doc = await do_parse_document("4_original.xml", "text/xml");
let encoder = Cu.createDocumentEncoder("text/xml");
encoder.setCharset("UTF-8");
encoder.init(
doc,
"text/xml",
de.OutputRaw | de.OutputLFLineBreak | de.OutputWrap
);
encoder.setWrapColumn(40);
result = encoder.encodeToString();
Assert.equal(expected, result);
encoder.init(
doc,
"text/xml",
de.OutputRaw | de.OutputLFLineBreak | de.OutputFormatted | de.OutputWrap
);
encoder.setWrapColumn(40);
result = encoder.encodeToString();
Assert.equal(expected, result);
});

View File

@ -47,11 +47,17 @@ interface nsIDocumentEncoder : nsISupports
*/
const unsigned long OutputSelectionOnly = (1 << 0);
/** Plaintext output: Convert html to plaintext that looks like the html.
* Can't be used in conjunction with `OutputPreformatted`.
* Implies wrap (except inside <pre>), since html wraps.
* HTML, XHTML and XML output: do prettyprinting, ignoring existing formatting.
* XML output : it doesn't implicitly wrap
/**
* Plaintext output:
* - Convert html to plaintext that looks like the html.
* - Can't be used in conjunction with `OutputPreformatted`.
* - Implies wrap (except inside <pre>), since html wraps.
* HTML and XHTML output:
* - Do prettyprinting, ignoring existing formatting.
* - Implies wrap (except in attribute values and inside <pre>).
* XML output:
* - Do prettyprinting, ignoring existing formatting.
* - Doesn't implicitly wrap
*/
const unsigned long OutputFormatted = (1 << 1);
@ -84,6 +90,7 @@ interface nsIDocumentEncoder : nsISupports
* Supported in XML, XHTML, HTML and Plaintext output.
* Set implicitly in HTML/XHTML output when no OutputRaw.
* Ignored when OutputRaw.
* For XML, XHTML and HTML: does not wrap values in attributes.
* XXXLJ: set implicitly in HTML/XHTML output, to keep compatible behaviors
* for old callers of this interface
* XXXbz How does this interact with OutputFormatFlowed?

View File

@ -665,12 +665,17 @@ bool nsXMLContentSerializer::SerializeAttr(const nsAString& aPrefix,
NS_ENSURE_TRUE(attrString.Append(sValue, mozilla::fallible), false);
NS_ENSURE_TRUE(attrString.Append(cDelimiter, mozilla::fallible), false);
}
if (mDoRaw || PreLevel() > 0) {
NS_ENSURE_TRUE(AppendToStringConvertLF(attrString, aStr), false);
} else if (mDoFormat) {
NS_ENSURE_TRUE(AppendToStringFormatedWrapped(attrString, aStr), false);
} else if (mDoWrap) {
NS_ENSURE_TRUE(AppendToStringWrapped(attrString, aStr), false);
if (mDoWrap && mColPos + attrString.Length() > mMaxColumn) {
// Attr would cause us to overrun the max width, so begin a new line.
NS_ENSURE_TRUE(AppendNewLineToString(aStr), false);
// Chomp the leading space.
nsDependentSubstring chomped(attrString, 1);
if (mDoFormat && mIndent.Length() + chomped.Length() <= mMaxColumn) {
NS_ENSURE_TRUE(AppendIndentation(aStr), false);
}
NS_ENSURE_TRUE(AppendToStringConvertLF(chomped, aStr), false);
} else {
NS_ENSURE_TRUE(AppendToStringConvertLF(attrString, aStr), false);
}