Remiander of bug 169590. Hook a catch-all W3C entity converter and user-prefs for custom levels of entity-zation in the editor's '<HTML> Source' and 'Save As...'

This commit is contained in:
rbs%maths.uq.edu.au 2002-12-17 00:41:25 +00:00
parent 3a601a1474
commit 008e1e2517
8 changed files with 80 additions and 11 deletions

View File

@ -136,6 +136,11 @@ nsHTMLContentSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn,
mCharSet = aCharSet;
// set up entity converter if we are going to need it
if (mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities) {
mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID);
}
return NS_OK;
}
@ -802,9 +807,10 @@ nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
}
if (aTranslateEntities && !mInCDATA) {
if (mFlags & nsIDocumentEncoder::OutputEncodeBasicEntities ||
mFlags & nsIDocumentEncoder::OutputEncodeLatin1Entities ||
mFlags & nsIDocumentEncoder::OutputEncodeHTMLEntities) {
if (mFlags & (nsIDocumentEncoder::OutputEncodeBasicEntities |
nsIDocumentEncoder::OutputEncodeLatin1Entities |
nsIDocumentEncoder::OutputEncodeHTMLEntities |
nsIDocumentEncoder::OutputEncodeW3CEntities)) {
nsIParserService* parserService =
nsContentUtils::GetParserServiceWeakRef();
@ -831,6 +837,7 @@ nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
const PRUnichar* fragmentEnd = c + fragmentLength;
const char* entityText = nsnull;
nsCAutoString entityReplacement;
char* fullEntityText = nsnull;
advanceLength = 0;
// for each character in this chunk, check if it
@ -855,6 +862,13 @@ nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
break;
}
}
else if (val > 127 &&
mFlags & nsIDocumentEncoder::OutputEncodeW3CEntities &&
mEntityConverter &&
NS_SUCCEEDED(mEntityConverter->ConvertToEntity(val,
nsIEntityConverter::entityW3C, &fullEntityText))) {
break;
}
}
aOutputStr.Append(fragmentStart, advanceLength);
@ -864,6 +878,12 @@ nsHTMLContentSerializer::AppendToString(const nsAString& aStr,
aOutputStr.Append(PRUnichar(';'));
advanceLength++;
}
// if it comes from nsIEntityConverter, it already has '&' and ';'
else if (fullEntityText) {
aOutputStr.Append(NS_ConvertASCIItoUCS2(fullEntityText));
nsMemory::Free(fullEntityText);
advanceLength++;
}
}
} else {
nsXMLContentSerializer::AppendToString(aStr, aOutputStr, aTranslateEntities, aIncrColumn);

View File

@ -81,3 +81,19 @@ pref("editor.use_css", true);
pref("editor.css.default_length_unit", "px");
pref("editor.save_associated_files", true);
pref("editor.always_show_publish_dialog", false);
/*
* What are the entities that you want Mozilla to save using mnemonic
* names rather than numeric codes? E.g. If set, we'll output &nbsp;
* otherwise, we may output 0xa0 depending on the charset.
*
* "none" : don't use any entity names; only use numeric codes.
* "basic" : use entity names just for &nbsp; &amp; &lt; &gt; &quot; for
* interoperability/exchange with products that don't support more
* than that.
* "latin1" : use entity names for 8bit accented letters and other special
* symbols between 128 and 255.
* "html" : use entity names for 8bit accented letters, greek letters, and
* other special markup symbols as defined in HTML4.
*/
//pref("editor.encode_entity", "html");

View File

@ -986,8 +986,9 @@ function OutputFileWithPersistAPI(editorDoc, aDestinationLocation, aRelatedFiles
// returns output flags based on mimetype, wrapCol and prefs
function GetOutputFlags(aMimeType, aWrapColumn)
{
var outputFlags = 0;
var editor = GetCurrentEditor();
var outputFlags = (editor && editor.documentCharacterSet == "ISO-8859-1")
var outputEntity = (editor && editor.documentCharacterSet == "ISO-8859-1")
? webPersist.ENCODE_FLAGS_ENCODE_LATIN1_ENTITIES
: webPersist.ENCODE_FLAGS_ENCODE_BASIC_ENTITIES;
if (aMimeType == "text/plain")
@ -997,14 +998,24 @@ function GetOutputFlags(aMimeType, aWrapColumn)
}
else
{
// Should we prettyprint? Check the pref
try {
// Should we prettyprint? Check the pref
var prefs = GetPrefs();
if (prefs.getBoolPref("editor.prettyprint"))
outputFlags |= webPersist.ENCODE_FLAGS_FORMATTED;
// How much entity names should we output? Check the pref
var encodeEntity = prefs.getCharPref("editor.encode_entity");
switch (encodeEntity) {
case "basic" : outputEntity = webPersist.ENCODE_FLAGS_ENCODE_BASIC_ENTITIES; break;
case "latin1" : outputEntity = webPersist.ENCODE_FLAGS_ENCODE_LATIN1_ENTITIES; break;
case "html" : outputEntity = webPersist.ENCODE_FLAGS_ENCODE_HTML_ENTITIES; break;
case "none" : outputEntity = 0; break;
}
}
catch (e) {}
}
outputFlags |= outputEntity;
if (aWrapColumn > 0)
outputFlags |= webPersist.ENCODE_FLAGS_WRAP;

View File

@ -1578,6 +1578,15 @@ function SetEditMode(mode)
var flags = (editor.documentCharacterSet == "ISO-8859-1")
? 32768 // OutputEncodeLatin1Entities
: 16384; // OutputEncodeBasicEntities
try {
var encodeEntity = gPrefs.getCharPref("editor.encode_entity");
switch (encodeEntity) {
case "basic" : flags = 16384; break; // OutputEncodeBasicEntities
case "latin1" : flags = 32768; break; // OutputEncodeLatin1Entities
case "html" : flags = 65536; break; // OutputEncodeHTMLEntities
case "none" : flags = 0; break;
}
} catch (e) { }
try {
var prettyPrint = gPrefs.getBoolPref("editor.prettyprint");

View File

@ -997,9 +997,7 @@ function unicodeToEntity(text)
} catch (ex) { }
}
const entityVersion =
Components.interfaces.nsIEntityConverter.html40 |
Components.interfaces.nsIEntityConverter.mathml20;
const entityVersion = Components.interfaces.nsIEntityConverter.entityW3C;
var str = text;

View File

@ -55,6 +55,7 @@ interface nsIEntityConverter : nsISupports
const unsigned long mathml20 = 16;
const unsigned long html32 = html40Latin1;
const unsigned long html40 = html40Latin1+html40Symbols+html40Special;
const unsigned long entityW3C = html40+mathml20;
string ConvertToEntity(in wchar character, in unsigned long entityVersion);

View File

@ -81,3 +81,19 @@ pref("editor.use_css", true);
pref("editor.css.default_length_unit", "px");
pref("editor.save_associated_files", true);
pref("editor.always_show_publish_dialog", false);
/*
* What are the entities that you want Mozilla to save using mnemonic
* names rather than numeric codes? E.g. If set, we'll output &nbsp;
* otherwise, we may output 0xa0 depending on the charset.
*
* "none" : don't use any entity names; only use numeric codes.
* "basic" : use entity names just for &nbsp; &amp; &lt; &gt; &quot; for
* interoperability/exchange with products that don't support more
* than that.
* "latin1" : use entity names for 8bit accented letters and other special
* symbols between 128 and 255.
* "html" : use entity names for 8bit accented letters, greek letters, and
* other special markup symbols as defined in HTML4.
*/
//pref("editor.encode_entity", "html");

View File

@ -470,9 +470,7 @@ function unicodeTOentity(text)
} catch(e) { }
}
const entityVersion =
Components.interfaces.nsIEntityConverter.html40 |
Components.interfaces.nsIEntityConverter.mathml20;
const entityVersion = Components.interfaces.nsIEntityConverter.entityW3C;
var str = text;