Bug #279150 --> Break apart the rss feed parser into a separate JS object to make it easier to

hook up unit testing.

Thanks to Robert Sayer for getting this going.
This commit is contained in:
scott%scott-macgregor.org 2005-02-01 04:04:59 +00:00
parent 77d18fbb33
commit 45099bc77f
6 changed files with 550 additions and 447 deletions

View File

@ -1,78 +1,94 @@
var rdfcontainer =
Components
.classes["@mozilla.org/rdf/container-utils;1"]
.getService(Components.interfaces.nsIRDFContainerUtils);
# -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the RSS Parsing Engine
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK ***** */
var rdfparser =
Components
.classes["@mozilla.org/rdf/xml-parser;1"]
.createInstance(Components.interfaces.nsIRDFXMLParser);
// For use when serializing content in Atom feeds.
var serializer =
Components
.classes["@mozilla.org/xmlextras/xmlserializer;1"]
.createInstance(Components.interfaces.nsIDOMSerializer);
// error codes used to inform the consumer about attempts to download a feed
const kNewsBlogSuccess = 0;
const kNewsBlogInvalidFeed = 1; // usually means there was an error trying to parse the feed...
const kNewsBlogRequestFailure = 2; // generic networking failure when trying to download the feed.
const kNewsBlogFeedIsBusy = 3;
// Cache for all of the feeds currently being downloaded, indexed by URL, so the load event listener
// can access the Feed objects after it finishes downloading the feed.
var FeedCache =
{
mFeeds: new Array(),
// Hash of feeds being downloaded, indexed by URL, so the load event listener
// can access the Feed objects after it finishes downloading the feed files.
function FeedCache(){
this.nsURI = Components.classes["@mozilla.org/network/standard-url;1"].
createInstance(Components.interfaces.nsIURI);
return this;
}
putFeed: function (aFeed)
{
this.mFeeds[this.normalizeHost(aFeed.url)] = aFeed;
},
FeedCache.prototype.putFeed =function(feed) {
this[this.normalizeHost(feed.url)] = feed;
}
getFeed: function (aUrl)
{
return this.mFeeds[this.normalizeHost(aUrl)];
},
FeedCache.prototype.getFeedWithUrl =function(url) {
return this[this.normalizeHost(url)];
}
removeFeed: function (aUrl)
{
delete this.mFeeds[this.normalizeHost(aUrl)];
},
FeedCache.prototype.removeFeedWithUrl = function(url) {
delete this[this.normalizeHost(url)];
}
normalizeHost: function (aUrl)
{
normalizedUrl = Components.classes["@mozilla.org/network/standard-url;1"].
createInstance(Components.interfaces.nsIURI);
normalizedUrl.spec = aUrl;
normalizedUrl.host = normalizedUrl.host.toLowerCase();
return normalizedUrl.spec;
}
};
FeedCache.prototype.normalizeHost = function(url){
this.nsURI.spec = url;
this.nsURI.host = this.nsURI.host.toLowerCase();
return this.nsURI.spec;
}
function Feed(resource)
{
this.resource = resource.QueryInterface(Components.interfaces.nsIRDFResource);
this.description = null;
this.author = null;
this.request = null;
this.folder = null;
this.server = null;
this.downloadCallback = null;
this.items = new Array();
var gFzFeedCache = new FeedCache();
function Feed(resource) {
this.resource = resource.QueryInterface(Components.interfaces.nsIRDFResource);
this.description = null;
this.author = null;
this.request = null;
this.folder = null;
this.server = null;
this.downloadCallback = null;
this.items = new Array();
return this;
return this;
}
// The name of the message folder corresponding to the feed.
// XXX This should be called something more descriptive like "folderName".
// XXX Or maybe, when we support nested folders and downloading into any folder,
// there could just be a reference to the folder itself called "folder".
Feed.prototype.name getter = function() {
Feed.prototype.name getter = function()
{
var name = this.title || this.description || this.url;
if (!name)
throw("couldn't compute feed name, as feed has no title, description, or URL.");
@ -89,7 +105,8 @@ Feed.prototype.name getter = function() {
return name;
}
Feed.prototype.download = function(parseItems, aCallback) {
Feed.prototype.download = function(parseItems, aCallback)
{
this.downloadCallback = aCallback; // may be null
// Whether or not to parse items when downloading and parsing the feed.
@ -108,8 +125,12 @@ Feed.prototype.download = function(parseItems, aCallback) {
// Before we try to download the feed, make sure we aren't already processing the feed
// by looking up the url in our feed cache
if (gFzFeedCache.getFeedWithUrl(this.url))
return; // don't do anything, the feed is already in use
if (FeedCache.getFeed(this.url))
{
if (this.downloadCallback)
this.downloadCallback.downloaded(this, kNewsBlogFeedIsBusy);
return ; // don't do anything, the feed is already in use
}
this.request = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"]
.createInstance(Components.interfaces.nsIXMLHttpRequest);
@ -119,15 +140,16 @@ Feed.prototype.download = function(parseItems, aCallback) {
this.request.overrideMimeType("text/xml");
this.request.onload = Feed.onDownloaded;
this.request.onerror = Feed.onDownloadError;
gFzFeedCache.putFeed(this);
FeedCache.putFeed(this);
this.request.send(null);
}
Feed.onDownloaded = function(event) {
Feed.onDownloaded = function(event)
{
var request = event.target;
var url = request.channel.originalURI.spec;
debug(url + " downloaded");
var feed = gFzFeedCache.getFeedWithUrl(url);
var feed = FeedCache.getFeed(url);
if (!feed)
throw("error after downloading " + url + ": couldn't retrieve feed from request");
@ -136,413 +158,148 @@ Feed.onDownloaded = function(event) {
// parse will asynchronously call the download callback when it is done
}
Feed.onProgress = function(event) {
Feed.onProgress = function(event)
{
var request = event.target;
var url = request.channel.originalURI.spec;
var feed = gFzFeedCache.getFeedWithUrl(url);
var feed = FeedCache.getFeed(url);
if (feed.downloadCallback)
feed.downloadCallback.onProgress(feed, event.position, event.totalSize);
}
Feed.onDownloadError = function(event) {
Feed.onDownloadError = function(event)
{
var request = event.target;
var url = request.channel.originalURI.spec;
var feed = gFzFeedCache.getFeedWithUrl(url);
var feed = FeedCache.getFeed(url);
if (feed.downloadCallback)
feed.downloadCallback.downloaded(feed, kNewsBlogRequestFailure);
gFzFeedCache.removeFeedWithUrl(url);
FeedCache.removeFeed(url);
}
Feed.prototype.onParseError = function(feed) {
Feed.prototype.onParseError = function(feed)
{
if (feed && feed.downloadCallback)
{
if (feed.downloadCallback)
feed.downloadCallback.downloaded(feed, kNewsBlogInvalidFeed);
gFzFeedCache.removeFeedWithUrl(url);
FeedCache.removeFeed(url);
}
}
Feed.prototype.url getter = function() {
var ds = getSubscriptionsDS(this.server);
var url = ds.GetTarget(this.resource, DC_IDENTIFIER, true);
if (url)
url = url.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
else
url = this.resource.Value;
return url;
Feed.prototype.url getter = function()
{
var ds = getSubscriptionsDS(this.server);
var url = ds.GetTarget(this.resource, DC_IDENTIFIER, true);
if (url)
url = url.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
else
url = this.resource.Value;
return url;
}
Feed.prototype.title getter = function() {
var ds = getSubscriptionsDS(this.server);
var title = ds.GetTarget(this.resource, DC_TITLE, true);
if (title)
title = title.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
return title;
Feed.prototype.title getter = function()
{
var ds = getSubscriptionsDS(this.server);
var title = ds.GetTarget(this.resource, DC_TITLE, true);
if (title)
title = title.QueryInterface(Components.interfaces.nsIRDFLiteral).Value;
return title;
}
Feed.prototype.title setter = function(new_title) {
var ds = getSubscriptionsDS(this.server);
new_title = rdf.GetLiteral(new_title || "");
var old_title = ds.GetTarget(this.resource, DC_TITLE, true);
if (old_title)
ds.Change(this.resource, DC_TITLE, old_title, new_title);
else
ds.Assert(this.resource, DC_TITLE, new_title, true);
Feed.prototype.title setter = function(new_title)
{
var ds = getSubscriptionsDS(this.server);
new_title = rdf.GetLiteral(new_title || "");
var old_title = ds.GetTarget(this.resource, DC_TITLE, true);
if (old_title)
ds.Change(this.resource, DC_TITLE, old_title, new_title);
else
ds.Assert(this.resource, DC_TITLE, new_title, true);
}
Feed.prototype.quickMode getter = function() {
var ds = getSubscriptionsDS(this.server);
var quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
if (quickMode) {
quickMode = quickMode.QueryInterface(Components.interfaces.nsIRDFLiteral);
quickMode = quickMode.Value;
quickMode = eval(quickMode);
}
return quickMode;
Feed.prototype.quickMode getter = function()
{
var ds = getSubscriptionsDS(this.server);
var quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
if (quickMode)
{
quickMode = quickMode.QueryInterface(Components.interfaces.nsIRDFLiteral);
quickMode = quickMode.Value;
quickMode = eval(quickMode);
}
return quickMode;
}
Feed.prototype.quickMode setter = function(new_quickMode) {
var ds = getSubscriptionsDS(this.server);
new_quickMode = rdf.GetLiteral(new_quickMode || "");
var old_quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
if (old_quickMode)
ds.Change(this.resource, FZ_QUICKMODE, old_quickMode, new_quickMode);
else
ds.Assert(this.resource, FZ_QUICKMODE, new_quickMode, true);
Feed.prototype.quickMode setter = function(new_quickMode)
{
var ds = getSubscriptionsDS(this.server);
new_quickMode = rdf.GetLiteral(new_quickMode || "");
var old_quickMode = ds.GetTarget(this.resource, FZ_QUICKMODE, true);
if (old_quickMode)
ds.Change(this.resource, FZ_QUICKMODE, old_quickMode, new_quickMode);
else
ds.Assert(this.resource, FZ_QUICKMODE, new_quickMode, true);
}
Feed.prototype.parse = function() {
Feed.prototype.parse = function()
{
// Figures out what description language (RSS, Atom) and version this feed
// is using and calls a language/version-specific feed parser.
debug("parsing feed " + this.url);
if (!this.request.responseText) {
if (!this.request.responseText)
return this.onParseError(this);
}
else if (this.request.responseText.search(/=(['"])http:\/\/purl\.org\/rss\/1\.0\/\1/) != -1) {
debug(this.url + " is an RSS 1.x (RDF-based) feed");
this.parseAsRSS1();
}
else if (this.request.responseText.search(/=(['"])http:\/\/purl.org\/atom\/ns#\1/) != -1) {
debug(this.url + " is an Atom feed");
this.parseAsAtom();
}
else if (this.request.responseText.search(/"http:\/\/my\.netscape\.com\/rdf\/simple\/0\.9\/"/) != -1)
{
// RSS 0.9x is forward compatible with RSS 2.0, so use the RSS2 parser to handle it.
debug(this.url + " is an 0.9x feed");
this.parseAsRSS2();
}
// XXX Explicitly check for RSS 2.0 instead of letting it be handled by the
// default behavior (who knows, we may change the default at some point).
else {
// We don't know what kind of feed this is; let's pretend it's RSS 0.9x
// and hope things work out for the best. In theory even RSS 1.0 feeds
// could be parsed by the 0.9x parser if the RSS namespace was the default.
debug(this.url + " is of unknown format; assuming an RSS 0.9x feed");
this.parseAsRSS2();
}
// create a feed parser which will parse the feed for us
var parser = new FeedParser();
this.itemsToStore = parser.parseFeed(this, this.request.responseText, this.request.responseXML, this.request.channel.URI);
// storeNextItem will iterate through the parsed items, storing each one.
this.itemsToStoreIndex = 0;
this.storeNextItem();
}
Feed.prototype.invalidateItems = function ()
{
var ds = getItemsDS(this.server);
ds = ds.QueryInterface(Components.interfaces.nsIRDFRemoteDataSource);
ds.Flush();
}
Feed.prototype.parseAsRSS2 = function() {
if (!this.request.responseXML || !(this.request.responseXML instanceof Components.interfaces.nsIDOMXMLDocument))
return this.onParseError(this);
// Get the first channel (assuming there is only one per RSS File).
var channel = this.request.responseXML.getElementsByTagName("channel")[0];
if (!channel)
return this.onParseError(this);
this.title = this.title || getNodeValue(channel.getElementsByTagName("title")[0]);
this.description = getNodeValue(channel.getElementsByTagName("description")[0]);
if (!this.parseItems)
return;
this.invalidateItems();
var itemNodes = this.request.responseXML.getElementsByTagName("item");
this.itemsToStore = new Array();
this.itemsToStoreIndex = 0;
var converter = Components
.classes["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = 'UTF-8';
for ( var i=0 ; i<itemNodes.length ; i++ ) {
var itemNode = itemNodes[i];
var item = new FeedItem();
item.feed = this;
item.characterSet = "UTF-8";
var link = getNodeValue(itemNode.getElementsByTagName("link")[0]);
var guidNode = itemNode.getElementsByTagName("guid")[0];
if (guidNode) {
var guid = getNodeValue(guidNode);
var isPermaLink =
guidNode.getAttribute('isPermaLink') == 'false' ? false : true;
}
// getNodeValue returns unicode strings...
// we need to do the proper conversion on these before we call into
// item.Store();
item.url = link ? link : (guid && isPermaLink) ? guid : null;
item.id = guid;
item.description = getNodeValue(itemNode.getElementsByTagName("description")[0]);
item.title = converter.ConvertFromUnicode(getNodeValue(itemNode.getElementsByTagName("title")[0])
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title);
// do this after we potentially assign item.description into item.title
// because that potential assignment assumes the value is in unicode still
item.description = converter.ConvertFromUnicode(item.description);
item.author = getNodeValue(itemNode.getElementsByTagName("author")[0]
|| itemNode.getElementsByTagName("creator")[0])
|| this.title
|| item.author;
item.date = getNodeValue(itemNode.getElementsByTagName("pubDate")[0]
|| itemNode.getElementsByTagName("date")[0])
|| item.date;
// If the date is invalid, users will see the beginning of the epoch
// unless we reset it here, so they'll see the current time instead.
// This is typical aggregator behavior.
if(item.date){
item.date = trimString(item.date);
if(!isValidRFC822Date(item.date) ){
// XXX Use this on the other formats as well
item.date = dateRescue(item.date);
}
}
var content = getNodeValue(itemNode.getElementsByTagNameNS(RSS_CONTENT_NS, "encoded")[0]);
if (content)
item.content = converter.ConvertFromUnicode(content);
this.itemsToStore[i] = item;
}
this.storeNextItem();
}
Feed.prototype.parseAsRSS1 = function() {
// RSS 1.0 is valid RDF, so use the RDF parser/service to extract data.
// Create a new RDF data source and parse the feed into it.
var ds = Components
.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]
.createInstance(Components.interfaces.nsIRDFDataSource);
rdfparser.parseString(ds, this.request.channel.URI, this.request.responseText);
// Get information about the feed as a whole.
var channel = ds.GetSource(RDF_TYPE, RSS_CHANNEL, true);
this.title = this.title || getRDFTargetValue(ds, channel, RSS_TITLE);
this.description = getRDFTargetValue(ds, channel, RSS_DESCRIPTION);
if (!this.parseItems)
return;
this.invalidateItems();
var items = ds.GetTarget(channel, RSS_ITEMS, true);
if (items)
items = rdfcontainer.MakeSeq(ds, items).GetElements();
debug("invalidating items for " + this.url);
var items = ds.GetSources(FZ_FEED, this.resource, true);
var item;
// If the channel doesn't list any items, look for resources of type "item"
// (a hacky workaround for some buggy feeds).
if (!items || !items.hasMoreElements())
items = ds.GetSources(RDF_TYPE, RSS_ITEM, true);
this.itemsToStore = new Array();
this.itemsToStoreIndex = 0;
var index = 0;
var converter = Components
.classes["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = "UTF-8";
while (items.hasMoreElements()) {
var itemResource = items.getNext().QueryInterface(Components.interfaces.nsIRDFResource);
var item = new FeedItem();
item.feed = this;
item.characterSet = "UTF-8";
// Prefer the value of the link tag to the item URI since the URI could be
// a relative URN.
var uri = itemResource.Value;
var link = getRDFTargetValue(ds, itemResource, RSS_LINK);
item.url = link || uri;
item.id = item.url;
item.description = getRDFTargetValue(ds, itemResource, RSS_DESCRIPTION);
item.title = getRDFTargetValue(ds, itemResource, RSS_TITLE)
|| getRDFTargetValue(ds, itemResource, DC_SUBJECT)
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title;
item.author = getRDFTargetValue(ds, itemResource, DC_CREATOR)
|| getRDFTargetValue(ds, channel, DC_CREATOR)
|| this.title
|| item.author;
item.date = getRDFTargetValue(ds, itemResource, DC_DATE) || item.date;
item.content = getRDFTargetValue(ds, itemResource, RSS_CONTENT_ENCODED);
this.itemsToStore[index++] = item;
while (items.hasMoreElements())
{
item = items.getNext();
item = item.QueryInterface(Components.interfaces.nsIRDFResource);
debug("invalidating " + item.Value);
var valid = ds.GetTarget(item, FZ_VALID, true);
if (valid)
ds.Unassert(item, FZ_VALID, valid, true);
}
this.storeNextItem();
}
Feed.prototype.parseAsAtom = function() {
if (!this.request.responseXML || !(this.request.responseXML instanceof Components.interfaces.nsIDOMXMLDocument))
return this.onParseError(this);
// Get the first channel (assuming there is only one per Atom File).
var channel = this.request.responseXML.getElementsByTagName("feed")[0];
if (!channel)
return this.onParseError(this);
this.title = this.title || getNodeValue(channel.getElementsByTagName("title")[0]);
this.description = getNodeValue(channel.getElementsByTagName("tagline")[0]);
if (!this.parseItems)
return;
this.invalidateItems();
var items = this.request.responseXML.getElementsByTagName("entry");
this.itemsToStore = new Array();
this.itemsToStoreIndex = 0;
for ( var i=0 ; i<items.length ; i++ ) {
var itemNode = items[i];
var item = new FeedItem();
item.feed = this;
item.characterSet = "UTF-8";
var url;
var links = itemNode.getElementsByTagName("link");
for ( var j=0 ; j<links.length ; j++ ) {
var alink = links[j];
if (alink && alink.getAttribute('rel') && alink.getAttribute('rel') == 'alternate' && alink.getAttribute('href')) {
url = alink.getAttribute('href');
break;
}
}
item.url = url;
item.id = getNodeValue(itemNode.getElementsByTagName("id")[0]);
item.description = getNodeValue(itemNode.getElementsByTagName("summary")[0]);
item.title = getNodeValue(itemNode.getElementsByTagName("title")[0])
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title;
var authorEl = itemNode.getElementsByTagName("author")[0]
|| itemNode.getElementsByTagName("contributor")[0]
|| channel.getElementsByTagName("author")[0];
var author = "";
if (authorEl) {
var name = getNodeValue(authorEl.getElementsByTagName("name")[0]);
var email = getNodeValue(authorEl.getElementsByTagName("email")[0]);
if (name)
author = name + (email ? " <" + email + ">" : "");
else if (email)
author = email;
}
item.author = author || item.author || this.title;
item.date = getNodeValue(itemNode.getElementsByTagName("modified")[0]
|| itemNode.getElementsByTagName("issued")[0]
|| itemNode.getElementsByTagName("created")[0])
|| item.date;
// XXX We should get the xml:base attribute from the content tag as well
// and use it as the base HREF of the message.
// XXX Atom feeds can have multiple content elements; we should differentiate
// between them and pick the best one.
// Some Atom feeds wrap the content in a CTYPE declaration; others use
// a namespace to identify the tags as HTML; and a few are buggy and put
// HTML tags in without declaring their namespace so they look like Atom.
// We deal with the first two but not the third.
var content;
var contentNode = itemNode.getElementsByTagName("content")[0];
if (contentNode) {
content = "";
for ( var j=0 ; j<contentNode.childNodes.length ; j++ ) {
var node = contentNode.childNodes.item(j);
if (node.nodeType == node.CDATA_SECTION_NODE)
content += node.data;
else
content += serializer.serializeToString(node);
//content += getNodeValue(node);
}
if (contentNode.getAttribute('mode') == "escaped") {
content = content.replace(/&lt;/g, "<");
content = content.replace(/&gt;/g, ">");
content = content.replace(/&amp;/g, "&");
}
if (content == "")
content = null;
}
item.content = content;
this.itemsToStore[i] = item;
Feed.prototype.removeInvalidItems = function()
{
var ds = getItemsDS(this.server);
debug("removing invalid items for " + this.url);
var items = ds.GetSources(FZ_FEED, this.resource, true);
var item;
while (items.hasMoreElements())
{
item = items.getNext();
item = item.QueryInterface(Components.interfaces.nsIRDFResource);
if (ds.HasAssertion(item, FZ_VALID, RDF_LITERAL_TRUE, true))
continue;
debug("removing " + item.Value);
ds.Unassert(item, FZ_FEED, this.resource, true);
if (ds.hasArcOut(item, FZ_FEED))
debug(item.Value + " is from more than one feed; only the reference to this feed removed");
else
removeAssertions(ds, item);
}
this.storeNextItem();
}
Feed.prototype.invalidateItems = function invalidateItems() {
var ds = getItemsDS(this.server);
debug("invalidating items for " + this.url);
var items = ds.GetSources(FZ_FEED, this.resource, true);
var item;
while (items.hasMoreElements()) {
item = items.getNext();
item = item.QueryInterface(Components.interfaces.nsIRDFResource);
debug("invalidating " + item.Value);
var valid = ds.GetTarget(item, FZ_VALID, true);
if (valid)
ds.Unassert(item, FZ_VALID, valid, true);
}
}
Feed.prototype.removeInvalidItems = function() {
var ds = getItemsDS(this.server);
debug("removing invalid items for " + this.url);
var items = ds.GetSources(FZ_FEED, this.resource, true);
var item;
while (items.hasMoreElements()) {
item = items.getNext();
item = item.QueryInterface(Components.interfaces.nsIRDFResource);
if (ds.HasAssertion(item, FZ_VALID, RDF_LITERAL_TRUE, true))
continue;
debug("removing " + item.Value);
ds.Unassert(item, FZ_FEED, this.resource, true);
if (ds.hasArcOut(item, FZ_FEED))
debug(item.Value + " is from more than one feed; only the reference to this feed removed");
else
removeAssertions(ds, item);
}
}
// gets the next item from gItemsToStore and forces that item to be stored
@ -590,34 +347,33 @@ Feed.prototype.storeNextItem = function()
Feed.prototype.cleanupParsingState = function(feed) {
// now that we are done parsing the feed, remove the feed from our feed cache
gFzFeedCache.removeFeedWithUrl(feed.url);
FeedCache.removeFeed(feed.url);
feed.removeInvalidItems();
// let's be sure to flush any feed item changes back to disk
// let's be sure to flush any feed item changes back to disk
var ds = getItemsDS(feed.server);
ds.QueryInterface(Components.interfaces.nsIRDFRemoteDataSource).Flush(); // flush any changes
ds.QueryInterface(Components.interfaces.nsIRDFRemoteDataSource).Flush(); // flush any changes
if (feed.downloadCallback)
feed.downloadCallback.downloaded(feed, kNewsBlogSuccess);
feed.request = null; // force the xml http request to go away. This helps reduce some
// nasty assertions on shut down of all things.
this.request = null; // force the xml http request to go away. This helps reduce some nasty assertions on shut down.
this.itemsToStore = "";
this.itemsToStoreIndex = 0;
this.storeItemsTimer = null;
}
this.itemsToStore = "";
this.itemsToStoreIndex = 0;
this.storeItemsTimer = null;
}
Feed.prototype.notify = function(aTimer) {
Feed.prototype.notify = function(aTimer)
{
this.storeNextItem();
}
Feed.prototype.QueryInterface = function(aIID) {
Feed.prototype.QueryInterface = function(aIID)
{
if (aIID.equals(Components.interfaces.nsITimerCallback) || aIID.equals(Components.interfaces.nsISupports))
return this;
Components.returnCode = Components.results.NS_ERROR_NO_INTERFACE;
return null;
}

View File

@ -0,0 +1,344 @@
# -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the RSS Parsing Engine
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK ***** */
// The feed parser depends on FeedItems.js, Feed.js.
var rdfcontainer = Components.classes["@mozilla.org/rdf/container-utils;1"].getService(Components.interfaces.nsIRDFContainerUtils);
var rdfparser = Components.classes["@mozilla.org/rdf/xml-parser;1"].createInstance(Components.interfaces.nsIRDFXMLParser);
var serializer = Components.classes["@mozilla.org/xmlextras/xmlserializer;1"].createInstance(Components.interfaces.nsIDOMSerializer);
function FeedParser()
{}
FeedParser.prototype =
{
// parseFeed returns an array of parsed items ready for processing
// it is currently a synchronous operation. If there was an error parsing the feed,
// parseFeed returns an empty feed in addition to calling aFeed.onParseError
parseFeed: function (aFeed, aSource, aDOM, aBaseURI)
{
if (!aSource || !(aDOM instanceof Components.interfaces.nsIDOMXMLDocument))
{
aFeed.onParseError(aFeed);
return new Array();
}
else if (aSource.search(/=(['"])http:\/\/purl\.org\/rss\/1\.0\/\1/) != -1)
{
debug(aFeed.url + " is an RSS 1.x (RDF-based) feed");
return this.parseAsRSS1(aFeed, aSource, aBaseURI);
}
else if (aSource.search(/=(['"])http:\/\/purl.org\/atom\/ns#\1/) != -1)
{
debug(aFeed.url + " is an Atom feed");
return this.parseAsAtom(aFeed, aDOM);
}
else if (aSource.search(/"http:\/\/my\.netscape\.com\/rdf\/simple\/0\.9\/"/) != -1)
{
// RSS 0.9x is forward compatible with RSS 2.0, so use the RSS2 parser to handle it.
debug(aFeed.url + " is an 0.9x feed");
return this.parseAsRSS2(aFeed, aDOM);
}
// XXX Explicitly check for RSS 2.0 instead of letting it be handled by the
// default behavior (who knows, we may change the default at some point).
else
{
// We don't know what kind of feed this is; let's pretend it's RSS 0.9x
// and hope things work out for the best. In theory even RSS 1.0 feeds
// could be parsed by the 0.9x parser if the RSS namespace was the default.
debug(aFeed.url + " is of unknown format; assuming an RSS 0.9x feed");
return this.parseAsRSS2(aFeed, aDOM);
}
},
parseAsRSS2: function (aFeed, aDOM)
{
// Get the first channel (assuming there is only one per RSS File).
var parsedItems = new Array();
var channel = aDOM.getElementsByTagName("channel")[0];
if (!channel)
return aFeed.onParseError(aFeed);
aFeed.title = aFeed.title || getNodeValue(channel.getElementsByTagName("title")[0]);
aFeed.description = getNodeValue(channel.getElementsByTagName("description")[0]);
if (!aFeed.parseItems)
return parsedItems;
aFeed.invalidateItems();
var itemNodes = aDOM.getElementsByTagName("item");
var converter = Components.classes["@mozilla.org/intl/scriptableunicodeconverter"].
createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = 'UTF-8';
for (var i=0; i<itemNodes.length; i++)
{
var itemNode = itemNodes[i];
var item = new FeedItem();
item.feed = aFeed;
item.characterSet = "UTF-8";
var link = getNodeValue(itemNode.getElementsByTagName("link")[0]);
var guidNode = itemNode.getElementsByTagName("guid")[0];
var guid;
var isPermaLink;
if (guidNode)
{
guid = getNodeValue(guidNode);
isPermaLink = guidNode.getAttribute('isPermaLink') == 'false' ? false : true;
}
// getNodeValue returns unicode strings...
// we need to do the proper conversion on these before we call into
// item.Store();
item.url = link ? link : (guid && isPermaLink) ? guid : null;
item.id = guid;
item.description = getNodeValue(itemNode.getElementsByTagName("description")[0]);
item.title = converter.ConvertFromUnicode(getNodeValue(itemNode.getElementsByTagName("title")[0])
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title);
// do this after we potentially assign item.description into item.title
// because that potential assignment assumes the value is in unicode still
item.description = converter.ConvertFromUnicode(item.description);
item.author = getNodeValue(itemNode.getElementsByTagName("author")[0]
|| itemNode.getElementsByTagName("creator")[0])
|| aFeed.title
|| item.author;
item.date = getNodeValue(itemNode.getElementsByTagName("pubDate")[0]
|| itemNode.getElementsByTagName("date")[0])
|| item.date;
// If the date is invalid, users will see the beginning of the epoch
// unless we reset it here, so they'll see the current time instead.
// This is typical aggregator behavior.
if(item.date)
{
item.date = trimString(item.date);
if(!isValidRFC822Date(item.date))
{
// XXX Use this on the other formats as well
item.date = dateRescue(item.date);
}
}
var content = getNodeValue(itemNode.getElementsByTagNameNS(RSS_CONTENT_NS, "encoded")[0]);
if (content)
item.content = converter.ConvertFromUnicode(content);
parsedItems[i] = item;
}
return parsedItems;
},
parseAsRSS1 : function(aFeed, aSource, aBaseURI)
{
var parsedItems = new Array();
// RSS 1.0 is valid RDF, so use the RDF parser/service to extract data.
// Create a new RDF data source and parse the feed into it.
var ds = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]
.createInstance(Components.interfaces.nsIRDFDataSource);
rdfparser.parseString(ds, aBaseURI, aSource);
// Get information about the feed as a whole.
var channel = ds.GetSource(RDF_TYPE, RSS_CHANNEL, true);
aFeed.title = aFeed.title || getRDFTargetValue(ds, channel, RSS_TITLE);
aFeed.description = getRDFTargetValue(ds, channel, RSS_DESCRIPTION);
if (!aFeed.parseItems)
return parsedItems;
aFeed.invalidateItems();
var items = ds.GetTarget(channel, RSS_ITEMS, true);
if (items)
items = rdfcontainer.MakeSeq(ds, items).GetElements();
// If the channel doesn't list any items, look for resources of type "item"
// (a hacky workaround for some buggy feeds).
if (!items || !items.hasMoreElements())
items = ds.GetSources(RDF_TYPE, RSS_ITEM, true);
var index = 0;
var converter = Components.classes["@mozilla.org/intl/scriptableunicodeconverter"]
.createInstance(Components.interfaces.nsIScriptableUnicodeConverter);
converter.charset = "UTF-8";
while (items.hasMoreElements())
{
var itemResource = items.getNext().QueryInterface(Components.interfaces.nsIRDFResource);
var item = new FeedItem();
item.feed = aFeed;
item.characterSet = "UTF-8";
// Prefer the value of the link tag to the item URI since the URI could be
// a relative URN.
var uri = itemResource.Value;
var link = getRDFTargetValue(ds, itemResource, RSS_LINK);
item.url = link || uri;
item.id = item.url;
item.description = getRDFTargetValue(ds, itemResource, RSS_DESCRIPTION);
item.title = getRDFTargetValue(ds, itemResource, RSS_TITLE)
|| getRDFTargetValue(ds, itemResource, DC_SUBJECT)
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title;
item.author = getRDFTargetValue(ds, itemResource, DC_CREATOR)
|| getRDFTargetValue(ds, channel, DC_CREATOR)
|| aFeed.title
|| item.author;
item.date = getRDFTargetValue(ds, itemResource, DC_DATE) || item.date;
item.content = getRDFTargetValue(ds, itemResource, RSS_CONTENT_ENCODED);
parsedItems[index++] = item;
}
return parsedItems;
},
parseAsAtom: function(aFeed, aDOM)
{
var parsedItems = new Array();
// Get the first channel (assuming there is only one per Atom File).
var channel = aDOM.getElementsByTagName("feed")[0];
if (!channel)
{
aFeed.onParseError(aFeed);
return parsedItems;
}
aFeed.title = aFeed.title || getNodeValue(channel.getElementsByTagName("title")[0]);
aFeed.description = getNodeValue(channel.getElementsByTagName("tagline")[0]);
if (!aFeed.parseItems)
return parsedItems;
aFeed.invalidateItems();
var items = this.mDOM.getElementsByTagName("entry");
debug("Items to parse: " + items.length);
for (var i=0; i<items.length; i++)
{
var itemNode = items[i];
var item = new FeedItem();
item.feed = aFeed;
item.characterSet = "UTF-8";
var url;
var links = itemNode.getElementsByTagName("link");
for (var j=0; j < links.length; j++)
{
var alink = links[j];
if (alink && alink.getAttribute('rel') && alink.getAttribute('rel') == 'alternate' && alink.getAttribute('href'))
{
url = alink.getAttribute('href');
break;
}
}
item.url = url;
item.id = getNodeValue(itemNode.getElementsByTagName("id")[0]);
item.description = getNodeValue(itemNode.getElementsByTagName("summary")[0]);
item.title = getNodeValue(itemNode.getElementsByTagName("title")[0])
|| (item.description ? item.description.substr(0, 150) : null)
|| item.title;
var authorEl = itemNode.getElementsByTagName("author")[0]
|| itemNode.getElementsByTagName("contributor")[0]
|| channel.getElementsByTagName("author")[0];
var author = "";
if (authorEl)
{
var name = getNodeValue(authorEl.getElementsByTagName("name")[0]);
var email = getNodeValue(authorEl.getElementsByTagName("email")[0]);
if (name)
author = name + (email ? " <" + email + ">" : "");
else if (email)
author = email;
}
item.author = author || item.author || aFeed.title;
item.date = getNodeValue(itemNode.getElementsByTagName("modified")[0]
|| itemNode.getElementsByTagName("issued")[0]
|| itemNode.getElementsByTagName("created")[0])
|| item.date;
// XXX We should get the xml:base attribute from the content tag as well
// and use it as the base HREF of the message.
// XXX Atom feeds can have multiple content elements; we should differentiate
// between them and pick the best one.
// Some Atom feeds wrap the content in a CTYPE declaration; others use
// a namespace to identify the tags as HTML; and a few are buggy and put
// HTML tags in without declaring their namespace so they look like Atom.
// We deal with the first two but not the third.
var content;
var contentNode = itemNode.getElementsByTagName("content")[0];
if (contentNode)
{
content = "";
for (var j=0; j < contentNode.childNodes.length; j++)
{
var node = contentNode.childNodes.item(j);
if (node.nodeType == node.CDATA_SECTION_NODE)
content += node.data;
else
content += serializer.serializeToString(node);
}
if (contentNode.getAttribute('mode') == "escaped")
{
content = content.replace(/&lt;/g, "<");
content = content.replace(/&gt;/g, ">");
content = content.replace(/&amp;/g, "&");
}
if (content == "")
content = null;
}
item.content = content;
parsedItems[i] = item;
}
return parsedItems;
}
};

View File

@ -119,7 +119,7 @@ var feedDownloadCallback = {
}
else if (aErrorCode == kNewsBlogInvalidFeed) // the feed was bad...
window.alert(document.getElementById('bundle_newsblog').getFormattedString('newsblog-invalidFeed', [feed.url]));
else // we never even downloaded the feed...(kNewsBlogRequestFailure)
else if (aErrorCode == kNewsBlogRequestFailure)
window.alert(document.getElementById('bundle_newsblog').getFormattedString('newsblog-networkError', [feed.url]));
// re-enable the add button now that we are done subscribing

View File

@ -50,12 +50,13 @@
windowtype="Mail:News-BlogSubscriptions"
flex="1">
<script type="application/x-javascript" src="utils.js" />
<script type="application/x-javascript" src="file-utils.js" />
<script type="application/x-javascript" src="debug-utils.js" />
<script type="application/x-javascript" src="subscriptions.js" />
<script type="application/x-javascript" src="Feed.js" />
<script type="application/x-javascript" src="FeedItem.js" />
<script type="application/x-javascript" src="utils.js"/>
<script type="application/x-javascript" src="file-utils.js"/>
<script type="application/x-javascript" src="debug-utils.js"/>
<script type="application/x-javascript" src="subscriptions.js"/>
<script type="application/x-javascript" src="Feed.js"/>
<script type="application/x-javascript" src="FeedItem.js"/>
<script type="application/x-javascript" src="feed-parser.js"/>
<stringbundle id="bundle_newsblog" src="chrome://messenger-newsblog/locale/newsblog.properties"/>

View File

@ -5,6 +5,7 @@ newsblog.jar:
* content/messenger-newsblog/debug-utils.js (content/debug-utils.js)
* content/messenger-newsblog/Feed.js (content/Feed.js)
* content/messenger-newsblog/FeedItem.js (content/FeedItem.js)
* content/messenger-newsblog/feed-parser.js (content/feed-parser.js)
* content/messenger-newsblog/file-utils.js (content/file-utils.js)
* content/messenger-newsblog/subscriptions.js (content/subscriptions.js)
* content/messenger-newsblog/utils.js (content/utils.js)

View File

@ -309,6 +309,7 @@ function loadScripts()
{
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/Feed.js");
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/FeedItem.js");
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/feed-parser.js");
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/file-utils.js");
scriptLoader.loadSubScript("chrome://messenger-newsblog/content/utils.js");
}
@ -362,7 +363,7 @@ var progressNotifier = {
[feed.url], 1));
else if (aErrorCode == kNewsBlogRequestFailure)
this.mStatusFeedback.showStatusString(GetNewsBlogStringBundle().formatStringFromName("newsblog-networkError",
[feed.url], 1));
[feed.url], 1));
this.mStatusFeedback.stopMeteors();
}