No bug - Update readability from github repo, includes fix for Bug 1142312 and Bug 1285543, r=Gijs

MozReview-Commit-ID: 5hi1iuDO3XE

--HG--
extra : rebase_source : 724cd2af8b9ce4a6620e367665784636c223db6a
This commit is contained in:
Evan Tseng 2016-12-15 12:03:53 +08:00
parent 1a37ce6e11
commit dc0961cbf0

View File

@ -119,7 +119,7 @@ Readability.prototype = {
// All of the regular expressions in use within readability.
// Defined up here so we don't instantiate them repeatedly in loops.
REGEXPS: {
unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i,
unlikelyCandidates: /banner|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup|yom-remote/i,
okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,
negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
@ -155,8 +155,8 @@ Readability.prototype = {
*
* If function is not passed, removes all the nodes in node list.
*
* @param NodeList nodeList The no
* @param Function filterFn
* @param NodeList nodeList The nodes to operate on
* @param Function filterFn the function to use as a filter
* @return void
*/
_removeNodes: function(nodeList, filterFn) {
@ -171,6 +171,20 @@ Readability.prototype = {
}
},
/**
* Iterates over a NodeList, and calls _setNodeTag for each node.
*
* @param NodeList nodeList The nodes to operate on
* @param String newTagName the new tag name to use
* @return void
*/
_replaceNodeTags: function(nodeList, newTagName) {
for (var i = nodeList.length - 1; i >= 0; i--) {
var node = nodeList[i];
this._setNodeTag(node, newTagName);
}
},
/**
* Iterate over a NodeList, which doesn't natively fully implement the Array
* interface.
@ -180,10 +194,9 @@ Readability.prototype = {
*
* @param NodeList nodeList The NodeList.
* @param Function fn The iterate function.
* @param Boolean backward Whether to use backward iteration.
* @return void
*/
_forEachNode: function(nodeList, fn, backward) {
_forEachNode: function(nodeList, fn) {
Array.prototype.forEach.call(nodeList, fn, this);
},
@ -362,9 +375,7 @@ Readability.prototype = {
this._replaceBrs(doc.body);
}
this._forEachNode(doc.getElementsByTagName("font"), function(fontNode) {
this._setNodeTag(fontNode, "SPAN");
});
this._replaceNodeTags(doc.getElementsByTagName("font"), "SPAN");
},
/**
@ -1062,12 +1073,15 @@ Readability.prototype = {
metadata.excerpt = values["twitter:description"];
}
if ("og:title" in values) {
// Use facebook open graph title.
metadata.title = values["og:title"];
} else if ("twitter:title" in values) {
// Use twitter cards title.
metadata.title = values["twitter:title"];
metadata.title = this._getArticleTitle();
if (!metadata.title) {
if ("og:title" in values) {
// Use facebook open graph title.
metadata.title = values["og:title"];
} else if ("twitter:title" in values) {
// Use twitter cards title.
metadata.title = values["twitter:title"];
}
}
return metadata;
@ -1857,7 +1871,7 @@ Readability.prototype = {
this._prepDocument();
var metadata = this._getArticleMetadata();
var articleTitle = metadata.title || this._getArticleTitle();
var articleTitle = metadata.title;
var articleContent = this._grabArticle();
if (!articleContent)