Correctly handle void_elements

Fixes #1475
This commit is contained in:
Liam Newman 2018-08-21 13:22:30 -07:00
parent 3253d25412
commit 26ec3a76f2
8 changed files with 146 additions and 40 deletions

View File

@ -36,8 +36,9 @@ var TOKEN = {
EOF: 'TK_EOF'
};
var Tokenizer = function(input_string) { // jshint unused:false
var Tokenizer = function(input_string, options) {
this._input = new InputScanner(input_string);
this._options = options || {};
this.__tokens = null;
this.__newline_count = 0;
this.__whitespace_before_token = '';

View File

@ -38,7 +38,7 @@ var allLineBreaks = acorn.allLineBreaks;
var Printer = function(indent_character, indent_size, wrap_line_length, max_preserve_newlines, preserve_newlines) { //handles input/output and some other printing functions
this.indent_character = indent_character;
this.indent_string = '';
this.indent_string = indent_character;
this.indent_size = indent_size;
this.indent_level = 0;
this.alignment_size = 0;
@ -46,8 +46,8 @@ var Printer = function(indent_character, indent_size, wrap_line_length, max_pres
this.max_preserve_newlines = max_preserve_newlines;
this.preserve_newlines = preserve_newlines;
for (var i = 0; i < this.indent_size; i++) {
this.indent_string += this.indent_character;
if (this.indent_size > 1) {
this.indent_string = new Array(this.indent_size + 1).join(this.indent_character);
}
this._output = new Output(this.indent_string, '');
@ -606,20 +606,19 @@ Beautifier.prototype._get_tag_open = function(parent, raw_token) { //function to
parser_token.is_start_tag = parser_token.tag_check.charAt(0) !== '/';
parser_token.tag_name = !parser_token.is_start_tag ? parser_token.tag_check.substr(1) : parser_token.tag_check;
parser_token.is_end_tag = !parser_token.is_start_tag ||
(raw_token.closed && raw_token.closed.text === '/>');
(raw_token.closed && raw_token.closed.text === '/>') ||
in_array(parser_token.tag_check, this._options.void_elements);
// handlebars tags that don't start with # or ^ are single_tags, and so also start and end.
parser_token.is_end_tag = parser_token.is_end_tag ||
(parser_token.tag_start_char === '{' && (parser_token.text.length < 3 || (/[^#\^]/.test(parser_token.text.charAt(2)))));
parser_token.is_unformatted = !parser_token.tag_complete && in_array(parser_token.tag_check, this._options.unformatted);
parser_token.is_content_unformatted = !parser_token.tag_complete && in_array(parser_token.tag_check, this._options.content_unformatted);
parser_token.is_inline_tag = in_array(parser_token.tag_name, this._options.inline_tags) || parser_token.tag_start_char === '{';
parser_token.is_single_tag = parser_token.tag_complete ||
(parser_token.is_start_tag && parser_token.is_end_tag);
parser_token.is_single_tag = raw_token.type === TOKEN.COMMENT ||
in_array(parser_token.tag_check, this._options.void_elements) ||
(parser_token.is_start_tag && parser_token.is_end_tag) ||
(parser_token.is_unformatted || parser_token.is_content_unformatted);
parser_token.is_unformatted = !parser_token.tag_complete && in_array(parser_token.tag_check, this._options.unformatted);
parser_token.is_content_unformatted = !parser_token.is_single_tag && in_array(parser_token.tag_check, this._options.content_unformatted);
parser_token.is_inline_tag = in_array(parser_token.tag_name, this._options.inline_tags) || parser_token.tag_start_char === '{';
if (parser_token.is_single_tag) {
parser_token.type = 'TK_TAG_SINGLE';
@ -631,6 +630,7 @@ Beautifier.prototype._get_tag_open = function(parent, raw_token) { //function to
parser_token.type = 'TK_TAG_START';
if ((parser_token.tag_name === 'script' || parser_token.tag_name === 'style') &&
!(parser_token.is_unformatted || parser_token.is_content_unformatted) &&
uses_beautifier(parser_token.tag_check, raw_token)) {
parser_token.custom_beautifier = true;
if (parser_token.tag_name === 'script') {
@ -673,7 +673,10 @@ Beautifier.prototype._set_tag_position = function(printer, parser_token, last_ta
} else if (!parser_token.is_inline_tag && !parser_token.is_unformatted) {
printer.print_newline(false);
}
} else if (parser_token.is_unformatted || parser_token.is_content_unformatted) {
if (!parser_token.is_inline_tag && !parser_token.is_unformatted) {
printer.print_newline(false);
}
} else if (parser_token.is_end_tag) { //this tag is a double tag so check for tag-ending
if ((parser_token.start_tag_token && parser_token.start_tag_token.multiline_content) ||
!(parser_token.is_inline_tag ||
@ -684,12 +687,9 @@ Beautifier.prototype._set_tag_position = function(printer, parser_token, last_ta
)) {
printer.print_newline(false);
}
} else if (parser_token.custom_beautifier) {
printer.print_newline(false);
} else { // it's a start-tag
if (parser_token.tag_check !== 'html') {
parser_token.indent_content = true;
}
parser_token.indent_content = parser_token.tag_check !== 'html' &&
!parser_token.custom_beautifier;
if (!parser_token.is_inline_tag && last_token.type !== 'TK_CONTENT') {
if (parser_token.parent) {

View File

@ -47,14 +47,13 @@ var TOKEN = {
var directives_core = new Directives(/<\!--/, /-->/);
var Tokenizer = function(input_string, opts) {
BaseTokenizer.call(this, input_string);
this._opts = opts || {};
var Tokenizer = function(input_string, options) {
BaseTokenizer.call(this, input_string, options);
this._current_tag_name = '';
// Words end at whitespace or when a tag starts
// if we are indenting handlebars, they are considered tags
this._word_pattern = this._opts.indent_handlebars ? /[\s<]|{{/g : /[\s<]/g;
this._word_pattern = this._options.indent_handlebars ? /[\s<]|{{/g : /[\s<]/g;
};
Tokenizer.prototype = new BaseTokenizer();
@ -103,7 +102,7 @@ Tokenizer.prototype._read_comment = function(c) { // jshint unused:false
var peek1 = this._input.peek(1);
var peek2 = this._input.peek(2);
if ((c === '<' && (peek1 === '!' || peek1 === '?' || peek1 === '%')) ||
this._opts.indent_handlebars && c === '{' && peek1 === '{' && peek2 === '!') {
this._options.indent_handlebars && c === '{' && peek1 === '{' && peek2 === '!') {
//if we're in a comment, do something special
// We treat all comments as literals, even more than preformatted tags
// we just look for the appropriate close tag
@ -189,7 +188,7 @@ Tokenizer.prototype._read_open_close = function(c, open_token) { // jshint unuse
resulting_string = this._input.next();
resulting_string += this._input.read(/[^\s>{][^\s>{/]*/g);
return this._create_token(TOKEN.TAG_OPEN, resulting_string);
} else if (this._opts.indent_handlebars && c === '{' && this._input.peek(1) === '{') {
} else if (this._options.indent_handlebars && c === '{' && this._input.peek(1) === '{') {
this._input.next();
this._input.next();
resulting_string = '{{';
@ -240,15 +239,24 @@ Tokenizer.prototype._read_attribute = function(c, previous_token, open_token) {
return null;
};
Tokenizer.prototype._is_content_unformatted = function(tag_name) {
// void_elements have no content and so cannot have unformatted content
// script and style tags should always be read as unformatted content
// finally content_unformatted and unformatted element contents are unformatted
return this._options.void_elements.indexOf(tag_name) === -1 &&
(tag_name === 'script' || tag_name === 'style' ||
this._options.content_unformatted.indexOf(tag_name) !== -1 ||
this._options.unformatted.indexOf(tag_name) !== -1);
};
Tokenizer.prototype._read_raw_content = function(previous_token, open_token) { // jshint unused:false
var resulting_string = '';
if (open_token && open_token.text[0] === '{') {
resulting_string = this._input.readUntil(/}}/g);
} else if (previous_token.type === TOKEN.TAG_CLOSE && (previous_token.opened.text[0] === '<')) {
var tag_name = previous_token.opened.text.substr(1).toLowerCase();
if (tag_name === 'script' || tag_name === 'style' ||
this._opts.content_unformatted.indexOf(tag_name) !== -1 ||
this._opts.unformatted.indexOf(tag_name) !== -1) {
if (this._is_content_unformatted(tag_name)) {
resulting_string = this._input.readUntil(new RegExp('</' + tag_name + '\\s*?>', 'ig'));
}
}

View File

@ -101,9 +101,8 @@ var template_pattern = /(?:(?:<\?php|<\?=)[\s\S]*?\?>)|(?:<%[\s\S]*?%>)/g;
var in_html_comment;
var Tokenizer = function(input_string, opts) {
BaseTokenizer.call(this, input_string);
this._opts = opts;
var Tokenizer = function(input_string, options) {
BaseTokenizer.call(this, input_string, options);
this.positionable_operators = positionable_operators;
this.line_starters = line_starters;
};
@ -303,7 +302,7 @@ Tokenizer.prototype._read_string = function(c) {
resulting_string += this._read_string_recursive(c);
}
if (this.has_char_escapes && this._opts.unescape_strings) {
if (this.has_char_escapes && this._options.unescape_strings) {
resulting_string = unescape_string(resulting_string);
}
if (this._input.peek() === c) {
@ -370,7 +369,7 @@ var xmlRegExp = /[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]
Tokenizer.prototype._read_xml = function(c, previous_token) {
if (this._opts.e4x && c === "<" && this._input.test(startXmlRegExp) && this._allow_regexp_or_xml(previous_token)) {
if (this._options.e4x && c === "<" && this._input.test(startXmlRegExp) && this._allow_regexp_or_xml(previous_token)) {
// handle e4x xml literals
//
var xmlStr = '';

View File

@ -4270,6 +4270,46 @@ function run_html_tests(test_obj, Urlencoded, js_beautify, html_beautify, css_be
' <col>\n' +
' <col>\n' +
'</colgroup>');
bth(
'<source>\n' +
' <source>',
// -- output --
'<source>\n' +
'<source>');
bth(
'<br>\n' +
' <br>',
// -- output --
'<br>\n' +
'<br>');
bth(
'<input>\n' +
' <input>',
// -- output --
'<input>\n' +
'<input>');
bth(
'<meta>\n' +
' <meta>',
// -- output --
'<meta>\n' +
'<meta>');
bth(
'<link>\n' +
' <link>',
// -- output --
'<link>\n' +
'<link>');
bth(
'<colgroup>\n' +
' <col>\n' +
' <col>\n' +
'</colgroup>',
// -- output --
'<colgroup>\n' +
' <col>\n' +
' <col>\n' +
'</colgroup>');
//============================================================
@ -5240,7 +5280,7 @@ function run_html_tests(test_obj, Urlencoded, js_beautify, html_beautify, css_be
// content_unformatted to prevent formatting content
reset_options();
set_name('content_unformatted to prevent formatting content');
opts.content_unformatted = ['?php', 'script', 'style', 'p', 'span', 'br'];
opts.content_unformatted = ['?php', 'script', 'style', 'p', 'span', 'br', 'meta'];
test_fragment(
'<html><body><h1>A</h1><script>if(1){f();}</script><style>.a{display:none;}</style></body></html>',
// -- output --
@ -5274,6 +5314,28 @@ function run_html_tests(test_obj, Urlencoded, js_beautify, html_beautify, css_be
'>But not me</div></p>');
bth('<div><span>blabla<div>something here</div></span></div>');
bth('<div><br /></div>');
bth('<div><br></div>');
bth(
'<div>\n' +
'<br>\n' +
'<br />\n' +
'<br></div>',
// -- output --
'<div>\n' +
' <br>\n' +
' <br />\n' +
' <br></div>');
bth(
'<div>\n' +
'<meta>\n' +
'<meta />\n' +
'<meta></div>',
// -- output --
'<div>\n' +
' <meta>\n' +
' <meta />\n' +
' <meta>\n' +
'</div>');
bth(
'<div><pre>var a=1;\n' +
'var b=a;</pre></div>',

View File

@ -42,11 +42,12 @@ TOKEN = TokenTypes()
class Tokenizer:
def __init__(self, input_string):
def __init__(self, input_string, options):
import jsbeautifier.core.acorn as acorn
self.acorn = acorn
self._input = InputScanner(input_string)
self._options = options
self.__tokens = None
self.__newline_count = 0
self.__whitespace_before_token = ''

View File

@ -115,9 +115,8 @@ class Tokenizer(BaseTokenizer):
line_starters = line_starters
def __init__(self, input_string, opts, indent_string):
BaseTokenizer.__init__(self, input_string)
BaseTokenizer.__init__(self, input_string, opts)
self.opts = opts
self.indent_string = indent_string
self.in_html_comment = False
self.has_char_escapes = False
@ -241,7 +240,7 @@ class Tokenizer(BaseTokenizer):
else:
resulting_string = self.parse_string(resulting_string, c)
if self.has_char_escapes and self.opts.unescape_strings:
if self.has_char_escapes and self._options.unescape_strings:
resulting_string = self.unescape_string(resulting_string)
if self._input.peek() == c :
@ -292,7 +291,7 @@ class Tokenizer(BaseTokenizer):
def _read_xml(self, c, previous_token):
if self.opts.e4x and c == "<" and self._input.test(
if self._options.e4x and c == "<" and self._input.test(
startXmlRegExp) and self.allowRegExOrXML(previous_token):
# handle e4x xml literals
xmlStr = ""

View File

@ -972,7 +972,13 @@ exports.test_data = {
{ unchanged: '<input>\n<input>' },
{ unchanged: '<meta>\n<meta>' },
{ unchanged: '<link>\n<link>' },
{ unchanged: '<colgroup>\n <col>\n <col>\n</colgroup>' }
{ unchanged: '<colgroup>\n <col>\n <col>\n</colgroup>' },
{ input: '<source>\n <source>', output: '<source>\n<source>' },
{ input: '<br>\n <br>' , output: '<br>\n<br>' },
{ input: '<input>\n <input>', output: '<input>\n<input>' },
{ input: '<meta>\n <meta>', output: '<meta>\n<meta>' },
{ input: '<link>\n <link>', output: '<link>\n<link>' },
{ input: '<colgroup>\n <col>\n <col>\n</colgroup>', output: '<colgroup>\n <col>\n <col>\n</colgroup>' }
]
}, {
name: "Unformatted tags",
@ -1675,7 +1681,7 @@ exports.test_data = {
name: "content_unformatted to prevent formatting content",
description: "",
options: [
{ name: 'content_unformatted', value: "['?php', 'script', 'style', 'p', 'span', 'br']" }
{ name: 'content_unformatted', value: "['?php', 'script', 'style', 'p', 'span', 'br', 'meta']" }
],
tests: [{
fragment: true,
@ -1712,6 +1718,36 @@ exports.test_data = {
unchanged: '<div><span>blabla<div>something here</div></span></div>'
}, {
unchanged: '<div><br /></div>'
}, {
unchanged: '<div><br></div>'
}, {
input: [
'<div>',
'<br>',
'<br />',
'<br></div>'
],
output: [
'<div>',
' <br>',
' <br />',
' <br></div>'
]
}, {
input: [
'<div>',
'<meta>',
'<meta />',
'<meta></div>'
],
output: [
'<div>',
' <meta>',
' <meta />',
' <meta>',
'</div>'
]
}, {
input: '<div><pre>var a=1;\nvar b=a;</pre></div>',
output: [