mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-02 15:15:23 +00:00
No bug, update Readability to github tip, rs=me
This commit is contained in:
parent
d5e8a58213
commit
ca1497979e
@ -37,6 +37,39 @@
|
|||||||
dump("JSDOMParser error: " + m + "\n");
|
dump("JSDOMParser error: " + m + "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// XML only defines these and the numeric ones:
|
||||||
|
|
||||||
|
var entityTable = {
|
||||||
|
"lt": "<",
|
||||||
|
"gt": ">",
|
||||||
|
"amp": "&",
|
||||||
|
"quot": '"',
|
||||||
|
"apos": "'",
|
||||||
|
};
|
||||||
|
|
||||||
|
var reverseEntityTable = {
|
||||||
|
"<": "<",
|
||||||
|
">": ">",
|
||||||
|
"&": "&",
|
||||||
|
'"': """,
|
||||||
|
"'": "'",
|
||||||
|
};
|
||||||
|
|
||||||
|
function encodeHTML(s) {
|
||||||
|
return s.replace(/[&<>'"]/g, function(x) {
|
||||||
|
return reverseEntityTable[x];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function decodeHTML(str) {
|
||||||
|
return str.replace(/&(quot|amp|apos|lt|gt);/g, function(match, tag) {
|
||||||
|
return entityTable[tag];
|
||||||
|
}).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi, function(match, hex, numStr) {
|
||||||
|
var num = parseInt(hex || numStr, hex ? 16 : 10); // read num
|
||||||
|
return String.fromCharCode(num);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// When a style is set in JS, map it to the corresponding CSS attribute
|
// When a style is set in JS, map it to the corresponding CSS attribute
|
||||||
var styleMap = {
|
var styleMap = {
|
||||||
"alignmentBaseline": "alignment-baseline",
|
"alignmentBaseline": "alignment-baseline",
|
||||||
@ -447,7 +480,9 @@
|
|||||||
}
|
}
|
||||||
return oldNode;
|
return oldNode;
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
|
||||||
|
__JSDOMParser__: true,
|
||||||
};
|
};
|
||||||
|
|
||||||
for (var i in nodeTypes) {
|
for (var i in nodeTypes) {
|
||||||
@ -456,7 +491,27 @@
|
|||||||
|
|
||||||
var Attribute = function (name, value) {
|
var Attribute = function (name, value) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.value = value;
|
this._value = value;
|
||||||
|
};
|
||||||
|
|
||||||
|
Attribute.prototype = {
|
||||||
|
get value() {
|
||||||
|
return this._value;
|
||||||
|
},
|
||||||
|
setValue: function(newValue) {
|
||||||
|
this._value = newValue;
|
||||||
|
delete this._decodedValue;
|
||||||
|
},
|
||||||
|
setDecodedValue: function(newValue) {
|
||||||
|
this._value = encodeHTML(newValue);
|
||||||
|
this._decodedValue = newValue;
|
||||||
|
},
|
||||||
|
getDecodedValue: function() {
|
||||||
|
if (typeof this._decodedValue === "undefined") {
|
||||||
|
this._decodedValue = (this._value && decodeHTML(this._value)) || "";
|
||||||
|
}
|
||||||
|
return this._decodedValue;
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
var Comment = function () {
|
var Comment = function () {
|
||||||
@ -479,7 +534,27 @@
|
|||||||
|
|
||||||
nodeName: "#text",
|
nodeName: "#text",
|
||||||
nodeType: Node.TEXT_NODE,
|
nodeType: Node.TEXT_NODE,
|
||||||
textContent: ""
|
get textContent() {
|
||||||
|
if (typeof this._textContent === "undefined") {
|
||||||
|
this._textContent = decodeHTML(this._innerHTML || "");
|
||||||
|
}
|
||||||
|
return this._textContent;
|
||||||
|
},
|
||||||
|
get innerHTML() {
|
||||||
|
if (typeof this._innerHTML === "undefined") {
|
||||||
|
this._innerHTML = encodeHTML(this._textContent || "");
|
||||||
|
}
|
||||||
|
return this._innerHTML;
|
||||||
|
},
|
||||||
|
|
||||||
|
set innerHTML(newHTML) {
|
||||||
|
this._innerHTML = newHTML;
|
||||||
|
delete this._textContent;
|
||||||
|
},
|
||||||
|
set textContent(newText) {
|
||||||
|
this._textContent = newText;
|
||||||
|
delete this._innerHTML;
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var Document = function () {
|
var Document = function () {
|
||||||
@ -582,13 +657,15 @@
|
|||||||
// serialize attribute list
|
// serialize attribute list
|
||||||
for (var j = 0; j < child.attributes.length; j++) {
|
for (var j = 0; j < child.attributes.length; j++) {
|
||||||
var attr = child.attributes[j];
|
var attr = child.attributes[j];
|
||||||
var quote = (attr.value.indexOf('"') === -1 ? '"' : "'");
|
// the attribute value will be HTML escaped.
|
||||||
arr.push(" " + attr.name + '=' + quote + attr.value + quote);
|
var val = attr.value;
|
||||||
|
var quote = (val.indexOf('"') === -1 ? '"' : "'");
|
||||||
|
arr.push(" " + attr.name + '=' + quote + val + quote);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (child.localName in voidElems) {
|
if (child.localName in voidElems) {
|
||||||
// if this is a self-closing element, end it here
|
// if this is a self-closing element, end it here
|
||||||
arr.push("/>");
|
arr.push(">");
|
||||||
} else {
|
} else {
|
||||||
// otherwise, add its children
|
// otherwise, add its children
|
||||||
arr.push(">");
|
arr.push(">");
|
||||||
@ -596,7 +673,8 @@
|
|||||||
arr.push("</" + child.localName + ">");
|
arr.push("</" + child.localName + ">");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
arr.push(child.textContent);
|
// This is a text node, so asking for innerHTML won't recurse.
|
||||||
|
arr.push(child.innerHTML);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -658,7 +736,7 @@
|
|||||||
for (var i = this.attributes.length; --i >= 0;) {
|
for (var i = this.attributes.length; --i >= 0;) {
|
||||||
var attr = this.attributes[i];
|
var attr = this.attributes[i];
|
||||||
if (attr.name === name)
|
if (attr.name === name)
|
||||||
return attr.value;
|
return attr.getDecodedValue();
|
||||||
}
|
}
|
||||||
return undefined;
|
return undefined;
|
||||||
},
|
},
|
||||||
@ -667,11 +745,11 @@
|
|||||||
for (var i = this.attributes.length; --i >= 0;) {
|
for (var i = this.attributes.length; --i >= 0;) {
|
||||||
var attr = this.attributes[i];
|
var attr = this.attributes[i];
|
||||||
if (attr.name === name) {
|
if (attr.name === name) {
|
||||||
attr.value = value;
|
attr.setDecodedValue(value);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.attributes.push(new Attribute(name, value));
|
this.attributes.push(new Attribute(name, encodeHTML(value)));
|
||||||
},
|
},
|
||||||
|
|
||||||
removeAttribute: function (name) {
|
removeAttribute: function (name) {
|
||||||
@ -822,9 +900,6 @@
|
|||||||
// Read the attribute value (and consume the matching quote)
|
// Read the attribute value (and consume the matching quote)
|
||||||
var value = this.readString(c);
|
var value = this.readString(c);
|
||||||
|
|
||||||
if (!value)
|
|
||||||
return;
|
|
||||||
|
|
||||||
node.attributes.push(new Attribute(name, value));
|
node.attributes.push(new Attribute(name, value));
|
||||||
|
|
||||||
return;
|
return;
|
||||||
@ -894,7 +969,7 @@
|
|||||||
*/
|
*/
|
||||||
match: function (str) {
|
match: function (str) {
|
||||||
var strlen = str.length;
|
var strlen = str.length;
|
||||||
if (this.html.substr(this.currentChar, strlen) === str) {
|
if (this.html.substr(this.currentChar, strlen).toLowerCase() === str.toLowerCase()) {
|
||||||
this.currentChar += strlen;
|
this.currentChar += strlen;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -954,10 +1029,10 @@
|
|||||||
// looking for the same < all the time.
|
// looking for the same < all the time.
|
||||||
this.currentChar--;
|
this.currentChar--;
|
||||||
if (n === -1) {
|
if (n === -1) {
|
||||||
textNode.textContent += this.html.substring(this.currentChar, this.html.length);
|
textNode.innerHTML += this.html.substring(this.currentChar, this.html.length);
|
||||||
this.currentChar = this.html.length;
|
this.currentChar = this.html.length;
|
||||||
} else {
|
} else {
|
||||||
textNode.textContent += this.html.substring(this.currentChar, n);
|
textNode.innerHTML += this.html.substring(this.currentChar, n);
|
||||||
this.currentChar = n;
|
this.currentChar = n;
|
||||||
}
|
}
|
||||||
if (!haveTextNode)
|
if (!haveTextNode)
|
||||||
@ -1000,10 +1075,10 @@
|
|||||||
var node = new Text();
|
var node = new Text();
|
||||||
var n = this.html.indexOf("<", this.currentChar);
|
var n = this.html.indexOf("<", this.currentChar);
|
||||||
if (n === -1) {
|
if (n === -1) {
|
||||||
node.textContent = this.html.substring(this.currentChar, this.html.length);
|
node.innerHTML = this.html.substring(this.currentChar, this.html.length);
|
||||||
this.currentChar = this.html.length;
|
this.currentChar = this.html.length;
|
||||||
} else {
|
} else {
|
||||||
node.textContent = this.html.substring(this.currentChar, n);
|
node.innerHTML = this.html.substring(this.currentChar, n);
|
||||||
this.currentChar = n;
|
this.currentChar = n;
|
||||||
}
|
}
|
||||||
return node;
|
return node;
|
||||||
|
@ -26,8 +26,15 @@
|
|||||||
* available at: http://code.google.com/p/arc90labs-readability
|
* available at: http://code.google.com/p/arc90labs-readability
|
||||||
*/
|
*/
|
||||||
var root = this;
|
var root = this;
|
||||||
var Readability = function(uri, doc) {
|
|
||||||
var ENABLE_LOGGING = false;
|
/**
|
||||||
|
* Public constructor.
|
||||||
|
* @param {Object} uri The URI descriptor object.
|
||||||
|
* @param {HTMLDocument} doc The document to parse.
|
||||||
|
* @param {Object} options The options object.
|
||||||
|
*/
|
||||||
|
var Readability = function(uri, doc, options) {
|
||||||
|
options = options || {};
|
||||||
|
|
||||||
this._uri = uri;
|
this._uri = uri;
|
||||||
this._doc = doc;
|
this._doc = doc;
|
||||||
@ -35,6 +42,12 @@ var Readability = function(uri, doc) {
|
|||||||
this._articleByline = null;
|
this._articleByline = null;
|
||||||
this._articleDir = null;
|
this._articleDir = null;
|
||||||
|
|
||||||
|
// Configureable options
|
||||||
|
this._debug = !!options.debug;
|
||||||
|
this._maxElemsToParse = options.maxElemsToParse || this.DEFAULT_MAX_ELEMS_TO_PARSE;
|
||||||
|
this._nbTopCandidates = options.nbTopCandidates || this.DEFAULT_N_TOP_CANDIDATES;
|
||||||
|
this._maxPages = options.maxPages || this.DEFAULT_MAX_PAGES;
|
||||||
|
|
||||||
// Start with all flags set
|
// Start with all flags set
|
||||||
this._flags = this.FLAG_STRIP_UNLIKELYS |
|
this._flags = this.FLAG_STRIP_UNLIKELYS |
|
||||||
this.FLAG_WEIGHT_CLASSES |
|
this.FLAG_WEIGHT_CLASSES |
|
||||||
@ -52,7 +65,7 @@ var Readability = function(uri, doc) {
|
|||||||
this._curPageNum = 1;
|
this._curPageNum = 1;
|
||||||
|
|
||||||
// Control whether log messages are sent to the console
|
// Control whether log messages are sent to the console
|
||||||
if (ENABLE_LOGGING) {
|
if (this._debug) {
|
||||||
function logEl(e) {
|
function logEl(e) {
|
||||||
var rv = e.nodeName + " ";
|
var rv = e.nodeName + " ";
|
||||||
if (e.nodeType == e.TEXT_NODE) {
|
if (e.nodeType == e.TEXT_NODE) {
|
||||||
@ -84,21 +97,24 @@ Readability.prototype = {
|
|||||||
FLAG_WEIGHT_CLASSES: 0x2,
|
FLAG_WEIGHT_CLASSES: 0x2,
|
||||||
FLAG_CLEAN_CONDITIONALLY: 0x4,
|
FLAG_CLEAN_CONDITIONALLY: 0x4,
|
||||||
|
|
||||||
|
// Max number of nodes supported by this parser. Default: 0 (no limit)
|
||||||
|
DEFAULT_MAX_ELEMS_TO_PARSE: 0,
|
||||||
|
|
||||||
// The number of top candidates to consider when analysing how
|
// The number of top candidates to consider when analysing how
|
||||||
// tight the competition is among candidates.
|
// tight the competition is among candidates.
|
||||||
N_TOP_CANDIDATES: 5,
|
DEFAULT_N_TOP_CANDIDATES: 5,
|
||||||
|
|
||||||
// The maximum number of pages to loop through before we call
|
// The maximum number of pages to loop through before we call
|
||||||
// it quits and just show a link.
|
// it quits and just show a link.
|
||||||
MAX_PAGES: 5,
|
DEFAULT_MAX_PAGES: 5,
|
||||||
|
|
||||||
// All of the regular expressions in use within readability.
|
// All of the regular expressions in use within readability.
|
||||||
// Defined up here so we don't instantiate them repeatedly in loops.
|
// Defined up here so we don't instantiate them repeatedly in loops.
|
||||||
REGEXPS: {
|
REGEXPS: {
|
||||||
unlikelyCandidates: /combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup/i,
|
unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|menu|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i,
|
||||||
okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
|
okMaybeItsACandidate: /and|article|body|column|main|shadow/i,
|
||||||
positive: /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
|
positive: /article|body|content|entry|hentry|main|page|pagination|post|text|blog|story/i,
|
||||||
negative: /hidden|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|shoutbox|sidebar|sponsor|shopping|tags|tool|widget/i,
|
negative: /hidden|banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,
|
||||||
extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
|
extraneous: /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,
|
||||||
byline: /byline|author|dateline|writtenby/i,
|
byline: /byline|author|dateline|writtenby/i,
|
||||||
replaceFonts: /<(\/?)font[^>]*>/gi,
|
replaceFonts: /<(\/?)font[^>]*>/gi,
|
||||||
@ -336,10 +352,25 @@ Readability.prototype = {
|
|||||||
},
|
},
|
||||||
|
|
||||||
_setNodeTag: function (node, tag) {
|
_setNodeTag: function (node, tag) {
|
||||||
// FIXME this doesn't work on anything but JSDOMParser (ie the node's tag
|
this.log("_setNodeTag", node, tag);
|
||||||
// won't actually be set).
|
if (node.__JSDOMParser__) {
|
||||||
node.localName = tag.toLowerCase();
|
node.localName = tag.toLowerCase();
|
||||||
node.tagName = tag.toUpperCase();
|
node.tagName = tag.toUpperCase();
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
var replacement = node.ownerDocument.createElement(tag);
|
||||||
|
while (node.firstChild) {
|
||||||
|
replacement.appendChild(node.firstChild);
|
||||||
|
}
|
||||||
|
node.parentNode.replaceChild(replacement, node);
|
||||||
|
if (node.readability)
|
||||||
|
replacement.readability = node.readability;
|
||||||
|
|
||||||
|
for (var i = 0; i < node.attributes.length; i++) {
|
||||||
|
replacement.setAttribute(node.attributes[i].name, node.attributes[i].value);
|
||||||
|
}
|
||||||
|
return replacement;
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -469,6 +500,37 @@ Readability.prototype = {
|
|||||||
return node && node.nextElementSibling;
|
return node && node.nextElementSibling;
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like _getNextNode, but for DOM implementations with no
|
||||||
|
* firstElementChild/nextElementSibling functionality...
|
||||||
|
*/
|
||||||
|
_getNextNodeNoElementProperties: function(node, ignoreSelfAndKids) {
|
||||||
|
function nextSiblingEl(n) {
|
||||||
|
do {
|
||||||
|
n = n.nextSibling;
|
||||||
|
} while (n && n.nodeType !== n.ELEMENT_NODE);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
// First check for kids if those aren't being ignored
|
||||||
|
if (!ignoreSelfAndKids && node.children[0]) {
|
||||||
|
return node.children[0];
|
||||||
|
}
|
||||||
|
// Then for siblings...
|
||||||
|
var next = nextSiblingEl(node);
|
||||||
|
if (next) {
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
// And finally, move up the parent chain *and* find a sibling
|
||||||
|
// (because this is depth-first traversal, we will have already
|
||||||
|
// seen the parent nodes themselves).
|
||||||
|
do {
|
||||||
|
node = node.parentNode;
|
||||||
|
if (node)
|
||||||
|
next = nextSiblingEl(node);
|
||||||
|
} while (node && !next);
|
||||||
|
return node && next;
|
||||||
|
},
|
||||||
|
|
||||||
_checkByline: function(node, matchString) {
|
_checkByline: function(node, matchString) {
|
||||||
if (this._articleByline) {
|
if (this._articleByline) {
|
||||||
return false;
|
return false;
|
||||||
@ -494,6 +556,7 @@ Readability.prototype = {
|
|||||||
* @return Element
|
* @return Element
|
||||||
**/
|
**/
|
||||||
_grabArticle: function (page) {
|
_grabArticle: function (page) {
|
||||||
|
this.log("**** grabArticle ****");
|
||||||
var doc = this._doc;
|
var doc = this._doc;
|
||||||
var isPaging = (page !== null ? true: false);
|
var isPaging = (page !== null ? true: false);
|
||||||
page = page ? page : this._doc.body;
|
page = page ? page : this._doc.body;
|
||||||
@ -548,11 +611,11 @@ Readability.prototype = {
|
|||||||
// safely converted into plain P elements to avoid confusing the scoring
|
// safely converted into plain P elements to avoid confusing the scoring
|
||||||
// algorithm with DIVs with are, in practice, paragraphs.
|
// algorithm with DIVs with are, in practice, paragraphs.
|
||||||
if (this._hasSinglePInsideElement(node)) {
|
if (this._hasSinglePInsideElement(node)) {
|
||||||
var newNode = node.firstElementChild;
|
var newNode = node.children[0];
|
||||||
node.parentNode.replaceChild(newNode, node);
|
node.parentNode.replaceChild(newNode, node);
|
||||||
node = newNode;
|
node = newNode;
|
||||||
} else if (!this._hasChildBlockElement(node)) {
|
} else if (!this._hasChildBlockElement(node)) {
|
||||||
this._setNodeTag(node, "P");
|
node = this._setNodeTag(node, "P");
|
||||||
elementsToScore.push(node);
|
elementsToScore.push(node);
|
||||||
} else {
|
} else {
|
||||||
// EXPERIMENTAL
|
// EXPERIMENTAL
|
||||||
@ -635,12 +698,12 @@ Readability.prototype = {
|
|||||||
|
|
||||||
this.log('Candidate:', candidate, "with score " + candidateScore);
|
this.log('Candidate:', candidate, "with score " + candidateScore);
|
||||||
|
|
||||||
for (var t = 0; t < this.N_TOP_CANDIDATES; t++) {
|
for (var t = 0; t < this._nbTopCandidates; t++) {
|
||||||
var aTopCandidate = topCandidates[t];
|
var aTopCandidate = topCandidates[t];
|
||||||
|
|
||||||
if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) {
|
if (!aTopCandidate || candidateScore > aTopCandidate.readability.contentScore) {
|
||||||
topCandidates.splice(t, 0, candidate);
|
topCandidates.splice(t, 0, candidate);
|
||||||
if (topCandidates.length > this.N_TOP_CANDIDATES)
|
if (topCandidates.length > this._nbTopCandidates)
|
||||||
topCandidates.pop();
|
topCandidates.pop();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -743,7 +806,7 @@ Readability.prototype = {
|
|||||||
// Turn it into a div so it doesn't get filtered out later by accident.
|
// Turn it into a div so it doesn't get filtered out later by accident.
|
||||||
this.log("Altering sibling:", sibling, 'to div.');
|
this.log("Altering sibling:", sibling, 'to div.');
|
||||||
|
|
||||||
this._setNodeTag(sibling, "DIV");
|
sibling = this._setNodeTag(sibling, "DIV");
|
||||||
}
|
}
|
||||||
|
|
||||||
// To ensure a node does not interfere with readability styles,
|
// To ensure a node does not interfere with readability styles,
|
||||||
@ -760,11 +823,11 @@ Readability.prototype = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.ENABLE_LOGGING)
|
if (this._debug)
|
||||||
this.log("Article content pre-prep: " + articleContent.innerHTML);
|
this.log("Article content pre-prep: " + articleContent.innerHTML);
|
||||||
// So we have all of the content that we need. Now we clean it up for presentation.
|
// So we have all of the content that we need. Now we clean it up for presentation.
|
||||||
this._prepArticle(articleContent);
|
this._prepArticle(articleContent);
|
||||||
if (this.ENABLE_LOGGING)
|
if (this._debug)
|
||||||
this.log("Article content post-prep: " + articleContent.innerHTML);
|
this.log("Article content post-prep: " + articleContent.innerHTML);
|
||||||
|
|
||||||
if (this._curPageNum === 1) {
|
if (this._curPageNum === 1) {
|
||||||
@ -787,7 +850,7 @@ Readability.prototype = {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.ENABLE_LOGGING)
|
if (this._debug)
|
||||||
this.log("Article content after paging: " + articleContent.innerHTML);
|
this.log("Article content after paging: " + articleContent.innerHTML);
|
||||||
|
|
||||||
// Now that we've gone through the full algorithm, check to see if
|
// Now that we've gone through the full algorithm, check to see if
|
||||||
@ -900,6 +963,10 @@ Readability.prototype = {
|
|||||||
if (scriptNode.parentNode)
|
if (scriptNode.parentNode)
|
||||||
scriptNode.parentNode.removeChild(scriptNode);
|
scriptNode.parentNode.removeChild(scriptNode);
|
||||||
});
|
});
|
||||||
|
this._forEachNode(doc.getElementsByTagName('noscript'), function(noscriptNode) {
|
||||||
|
if (noscriptNode.parentNode)
|
||||||
|
noscriptNode.parentNode.removeChild(noscriptNode);
|
||||||
|
});
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -911,7 +978,7 @@ Readability.prototype = {
|
|||||||
**/
|
**/
|
||||||
_hasSinglePInsideElement: function(element) {
|
_hasSinglePInsideElement: function(element) {
|
||||||
// There should be exactly 1 element child which is a P:
|
// There should be exactly 1 element child which is a P:
|
||||||
if (element.children.length != 1 || element.firstElementChild.tagName !== "P") {
|
if (element.children.length != 1 || element.children[0].tagName !== "P") {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1290,7 +1357,7 @@ Readability.prototype = {
|
|||||||
|
|
||||||
doc.getElementById("readability-content").appendChild(articlePage);
|
doc.getElementById("readability-content").appendChild(articlePage);
|
||||||
|
|
||||||
if (this._curPageNum > this.MAX_PAGES) {
|
if (this._curPageNum > this._maxPages) {
|
||||||
var nextPageMarkup = "<div style='text-align: center'><a href='" + nextPageLink + "'>View Next Page</a></div>";
|
var nextPageMarkup = "<div style='text-align: center'><a href='" + nextPageLink + "'>View Next Page</a></div>";
|
||||||
articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup;
|
articlePage.innerHTML = articlePage.innerHTML + nextPageMarkup;
|
||||||
return;
|
return;
|
||||||
@ -1613,6 +1680,17 @@ Readability.prototype = {
|
|||||||
* @return void
|
* @return void
|
||||||
**/
|
**/
|
||||||
parse: function () {
|
parse: function () {
|
||||||
|
// Avoid parsing too large documents, as per configuration option
|
||||||
|
if (this._maxElemsToParse > 0) {
|
||||||
|
var numTags = this._doc.getElementsByTagName("*").length;
|
||||||
|
if (numTags > this._maxElemsToParse) {
|
||||||
|
throw new Error("Aborting parsing document; " + numTags + " elements found");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof this._doc.documentElement.firstElementChild === "undefined") {
|
||||||
|
this._getNextNode = this._getNextNodeNoElementProperties;
|
||||||
|
}
|
||||||
// Remove script tags from the document.
|
// Remove script tags from the document.
|
||||||
this._removeScripts(this._doc);
|
this._removeScripts(this._doc);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user