Bug #272875 --> Convert all RSS msgs to UTF-8 instead of their natural encoding.

Other encoding changes to fix various RSS problems with feeds in non ascii character
sets.

sr=bienvenu
This commit is contained in:
scott%scott-macgregor.org 2004-12-04 05:53:07 +00:00
parent dfd741c2fd
commit 07ec76832b
2 changed files with 46 additions and 7 deletions

View File

@ -246,12 +246,18 @@ Feed.prototype.parseAsRSS2 = function() {
this.itemsToStore = new Array();
this.itemsToStoreIndex = 0;
var converter = Components
.classes["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = 'UTF-8';
for ( var i=0 ; i<itemNodes.length ; i++ ) {
var itemNode = itemNodes[i];
var item = new FeedItem();
item.feed = this;
item.characterSet = this.request.responseXML.characterSet ? this.request.responseXML.characterSet : "UTF-8";
item.characterSet = "UTF-8";
var link = getNodeValue(itemNode.getElementsByTagName("link")[0]);
@ -262,12 +268,20 @@ Feed.prototype.parseAsRSS2 = function() {
guidNode.getAttribute('isPermaLink') == 'false' ? false : true;
}
// getNodeValue returns unicode strings...
// we need to do the proper conversion on these before we call into
// item.Store();
item.url = link ? link : (guid && isPermaLink) ? guid : null;
item.id = guid;
item.description = getNodeValue(itemNode.getElementsByTagName("description")[0]);
item.title = getNodeValue(itemNode.getElementsByTagName("title")[0])
item.title = converter.ConvertFromUnicode(getNodeValue(itemNode.getElementsByTagName("title")[0])
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title;
|| item.title);
// do this after we potentially assign item.description into item.title
// because that potential assignment assumes the value is in unicode still
item.description = converter.ConvertFromUnicode(item.description);
item.author = getNodeValue(itemNode.getElementsByTagName("author")[0]
|| itemNode.getElementsByTagName("creator")[0])
|| this.title
@ -282,7 +296,7 @@ Feed.prototype.parseAsRSS2 = function() {
var content = getNodeValue(itemNode.getElementsByTagNameNS(RSS_CONTENT_NS, "encoded")[0]);
if (content)
item.content = content;
item.content = converter.ConvertFromUnicode(content);
this.itemsToStore[i] = item;
}
@ -297,10 +311,12 @@ Feed.prototype.parseAsRSS1 = function() {
var ds = Components
.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]
.createInstance(Components.interfaces.nsIRDFDataSource);
rdfparser.parseString(ds, this.request.channel.URI, this.request.responseText);
// Get information about the feed as a whole.
var channel = ds.GetSource(RDF_TYPE, RSS_CHANNEL, true);
this.title = this.title || getRDFTargetValue(ds, channel, RSS_TITLE);
this.description = getRDFTargetValue(ds, channel, RSS_DESCRIPTION);
@ -322,12 +338,18 @@ Feed.prototype.parseAsRSS1 = function() {
this.itemsToStoreIndex = 0;
var index = 0;
var converter = Components
.classes["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = "UTF-8";
while (items.hasMoreElements()) {
var itemResource = items.getNext().QueryInterface(Components.interfaces.nsIRDFResource);
var item = new FeedItem();
item.feed = this;
item.characterSet = this.request.responseXML.characterSet ? this.request.responseXML.characterSet : "UTF-8";
item.characterSet = "UTF-8";
// Prefer the value of the link tag to the item URI since the URI could be
// a relative URN.
@ -382,7 +404,7 @@ Feed.prototype.parseAsAtom = function() {
var item = new FeedItem();
item.feed = this;
item.characterSet = this.request.responseXML.characterSet ? this.request.responseXML.characterSet : "UTF-8";
item.characterSet = "UTF-8";
var url;
var links = itemNode.getElementsByTagName("link");

View File

@ -109,10 +109,26 @@ const LOCAL_CONTENT_TEMPLATE = "\n\
const LOCAL_STYLE = "\n";
FeedItem.prototype.store = function() {
FeedItem.unicodeConverter.charset = this.characterSet;
try {
if (this.title)
this.title = FeedItem.unicodeConverter.ConvertToUnicode(this.title);
} catch (ex) {}
try {
if (this.description)
this.description = FeedItem.unicodeConverter.ConvertToUnicode(this.description);
} catch (ex) {}
if (this.isStored()) {
debug(this.identity + " already stored; ignoring");
}
else if (this.content) {
try {
this.content = FeedItem.unicodeConverter.ConvertToUnicode(this.content);
} catch (ex) {}
debug(this.identity + " has content; storing");
var content = MESSAGE_TEMPLATE;
content = content.replace(/%CONTENT_TEMPLATE%/, LOCAL_CONTENT_TEMPLATE);
@ -125,7 +141,9 @@ FeedItem.prototype.store = function() {
}
else if (this.feed.quickMode) {
debug(this.identity + " in quick mode; storing");
this.content = this.description || this.title;
var content = MESSAGE_TEMPLATE;
content = content.replace(/%CONTENT_TEMPLATE%/, LOCAL_CONTENT_TEMPLATE);
content = content.replace(/%STYLE%/, LOCAL_STYLE);
@ -145,7 +163,6 @@ FeedItem.prototype.store = function() {
content = content.replace(/%DESCRIPTION%/, this.description || this.title);
this.content = content; // XXX store it elsewhere, f.e. this.page
this.writeToFolder();
//this.download();
}
}