diff --git a/js/src/html/beautifier.js b/js/src/html/beautifier.js index 6636579d..7e218c70 100644 --- a/js/src/html/beautifier.js +++ b/js/src/html/beautifier.js @@ -283,7 +283,7 @@ function Beautifier(source_text, options, js_beautify, css_beautify) { this._options.eol = this._options.eol.replace(/\\r/, '\r').replace(/\\n/, '\n'); - this._options.inline_tags = get_array(options.inline, [ + this._options.inline = get_array(options.inline, [ // https://www.w3.org/TR/html5/dom.html#phrasing-content 'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', 'cite', 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', @@ -344,22 +344,15 @@ Beautifier.prototype.beautify = function() { source_text = source_text.replace(allLineBreaks, '\n'); this._tag_stack = new TagFrame(); - last_token = { + + var last_token = { text: '', type: '' }; - var last_tag_token = { - text: '', - type: '', - tag_name: '', - indent_content: false, - tag_complete: true, - is_start_tag: false, - is_end_tag: false, - is_inline_tag: false - }; - printer = new Printer(this._options.indent_character, this._options.indent_size, + var last_tag_token = new TagOpenParserToken(); + + var printer = new Printer(this._options.indent_character, this._options.indent_size, this._options.wrap_line_length, this._options.max_preserve_newlines, this._options.preserve_newlines); var tokens = new Tokenizer(source_text, this._options).tokenize(); @@ -481,52 +474,8 @@ Beautifier.prototype._handle_inside_tag = function(printer, raw_token, last_tag_ Beautifier.prototype._handle_text = function(printer, raw_token, last_tag_token) { parser_token = { text: raw_token.text, type: 'TK_CONTENT' }; - if (last_tag_token.type === 'TK_TAG_SCRIPT' || last_tag_token.type === 'TK_TAG_STYLE') { //check if we need to format javascript - if (raw_token.text !== '') { - printer.print_newline(false); - var text = raw_token.text, - _beautifier, - script_indent_level = 1; - if (last_tag_token.type === 'TK_TAG_SCRIPT') { - _beautifier = typeof this._js_beautify === 'function' && this._js_beautify; - } else if (last_tag_token.type === 'TK_TAG_STYLE') { - _beautifier = typeof this._css_beautify === 'function' && this._css_beautify; - } - - if (this._options.indent_scripts === "keep") { - script_indent_level = 0; - } else if (this._options.indent_scripts === "separate") { - script_indent_level = -printer.indent_level; - } - - var indentation = printer.get_full_indent(script_indent_level); - - // if there is at least one empty line at the end of this text, strip it - // we'll be adding one back after the text but before the containing tag. - text = text.replace(/\n[ \t]*$/, ''); - - if (_beautifier) { - - // call the Beautifier if avaliable - var Child_options = function() { - this.eol = '\n'; - }; - Child_options.prototype = this._options; - var child_options = new Child_options(); - text = _beautifier(indentation + text, child_options); - } else { - // simply indent the string otherwise - var white = text.match(/^\s*/)[0]; - var _level = white.match(/[^\n\r]*$/)[0].split(this._indent_string).length - 1; - var reindent = this._get_full_indent(script_indent_level - _level); - text = (indentation + text.trim()) - .replace(/\r\n|\r|\n/g, '\n' + reindent); - } - if (text) { - printer.print_raw_text(text); - printer.print_newline(true); - } - } + if (last_tag_token.custom_beautifier) { //check if we need to format javascript + this._print_custom_beatifier_text(printer, raw_token, last_tag_token); } else if (last_tag_token.is_unformatted || last_tag_token.is_content_unformatted) { printer.add_raw_token(raw_token); } else { @@ -536,12 +485,61 @@ Beautifier.prototype._handle_text = function(printer, raw_token, last_tag_token) return parser_token; }; +Beautifier.prototype._print_custom_beatifier_text = function(printer, raw_token, last_tag_token) { + if (raw_token.text !== '') { + printer.print_newline(false); + var text = raw_token.text, + _beautifier, + script_indent_level = 1; + if (last_tag_token.tag_name === 'script') { + _beautifier = typeof this._js_beautify === 'function' && this._js_beautify; + } else if (last_tag_token.tag_name === 'style') { + _beautifier = typeof this._css_beautify === 'function' && this._css_beautify; + } + + if (this._options.indent_scripts === "keep") { + script_indent_level = 0; + } else if (this._options.indent_scripts === "separate") { + script_indent_level = -printer.indent_level; + } + + var indentation = printer.get_full_indent(script_indent_level); + + // if there is at least one empty line at the end of this text, strip it + // we'll be adding one back after the text but before the containing tag. + text = text.replace(/\n[ \t]*$/, ''); + + if (_beautifier) { + + // call the Beautifier if avaliable + var Child_options = function() { + this.eol = '\n'; + }; + Child_options.prototype = this._options; + var child_options = new Child_options(); + text = _beautifier(indentation + text, child_options); + } else { + // simply indent the string otherwise + var white = text.match(/^\s*/)[0]; + var _level = white.match(/[^\n\r]*$/)[0].split(this._indent_string).length - 1; + var reindent = this._get_full_indent(script_indent_level - _level); + text = (indentation + text.trim()) + .replace(/\r\n|\r|\n/g, '\n' + reindent); + } + if (text) { + printer.print_raw_text(text); + printer.print_newline(true); + } + } +}; + Beautifier.prototype._handle_tag_open = function(printer, raw_token, last_tag_token, last_token) { - var parser_token = this._get_tag_open(this._tag_stack.parser_token, raw_token); + var parser_token = this._get_tag_open_token(raw_token); printer.traverse_whitespace(raw_token); this._set_tag_position(printer, parser_token, last_tag_token, last_token); + if ((last_tag_token.is_unformatted || last_tag_token.is_content_unformatted) && raw_token.type === TOKEN.TAG_OPEN && raw_token.text.indexOf(']*)/); - parser_token.tag_check = tag_check_match ? tag_check_match[1] : ''; + if (!raw_token) { + this.tag_complete = true; } else { - tag_check_match = raw_token.text.match(/^{{\#?([^\s}]+)/); - parser_token.tag_check = tag_check_match ? tag_check_match[1] : ''; - } - parser_token.tag_check = parser_token.tag_check.toLowerCase(); + var tag_check_match; - if (raw_token.type === TOKEN.COMMENT) { - parser_token.tag_complete = true; - } - parser_token.text = raw_token.text; + this.tag_start_char = raw_token.text[0]; + this.text = raw_token.text; - parser_token.is_start_tag = parser_token.tag_check.charAt(0) !== '/'; - parser_token.tag_name = !parser_token.is_start_tag ? parser_token.tag_check.substr(1) : parser_token.tag_check; - parser_token.is_end_tag = !parser_token.is_start_tag || - (raw_token.closed && raw_token.closed.text === '/>') || + if (this.tag_start_char === '<') { + tag_check_match = raw_token.text.match(/^<([^\s>]*)/); + this.tag_check = tag_check_match ? tag_check_match[1] : ''; + } else { + tag_check_match = raw_token.text.match(/^{{\#?([^\s}]+)/); + this.tag_check = tag_check_match ? tag_check_match[1] : ''; + } + this.tag_check = this.tag_check.toLowerCase(); + + if (raw_token.type === TOKEN.COMMENT) { + this.tag_complete = true; + } + + this.is_start_tag = this.tag_check.charAt(0) !== '/'; + this.tag_name = !this.is_start_tag ? this.tag_check.substr(1) : this.tag_check; + this.is_end_tag = !this.is_start_tag || + (raw_token.closed && raw_token.closed.text === '/>'); + + // handlebars tags that don't start with # or ^ are single_tags, and so also start and end. + this.is_end_tag = this.is_end_tag || + (this.tag_start_char === '{' && (this.text.length < 3 || (/[^#\^]/.test(this.text.charAt(2))))); + } +}; + +Beautifier.prototype._get_tag_open_token = function(raw_token) { //function to get a full tag and parse its type + var parser_token = new TagOpenParserToken(this._tag_stack.parser_token, raw_token); + + parser_token.alignment_size = this._options.wrap_attributes_indent_size; + + parser_token.is_end_tag = parser_token.is_end_tag || in_array(parser_token.tag_check, this._options.void_elements); - // handlebars tags that don't start with # or ^ are single_tags, and so also start and end. - parser_token.is_end_tag = parser_token.is_end_tag || - (parser_token.tag_start_char === '{' && (parser_token.text.length < 3 || (/[^#\^]/.test(parser_token.text.charAt(2))))); - - parser_token.is_single_tag = parser_token.tag_complete || + parser_token.is_empty_element = parser_token.tag_complete || (parser_token.is_start_tag && parser_token.is_end_tag); parser_token.is_unformatted = !parser_token.tag_complete && in_array(parser_token.tag_check, this._options.unformatted); - parser_token.is_content_unformatted = !parser_token.is_single_tag && in_array(parser_token.tag_check, this._options.content_unformatted); - parser_token.is_inline_tag = in_array(parser_token.tag_name, this._options.inline_tags) || parser_token.tag_start_char === '{'; - - if (parser_token.is_single_tag) { - parser_token.type = 'TK_TAG_SINGLE'; - } else if (parser_token.is_end_tag) { //this tag is a double tag so check for tag-ending - parser_token.start_tag_token = this._tag_stack.retrieve_tag(parser_token.tag_name, printer); //remove it and all ancestors - parser_token.type = 'TK_TAG_END'; - } else { // it's a start-tag - this._tag_stack.record_tag(parser_token.tag_name, parser_token, printer.indent_level); //push it on the tag stack - parser_token.type = 'TK_TAG_START'; - - if ((parser_token.tag_name === 'script' || parser_token.tag_name === 'style') && - !(parser_token.is_unformatted || parser_token.is_content_unformatted) && - uses_beautifier(parser_token.tag_check, raw_token)) { - parser_token.custom_beautifier = true; - if (parser_token.tag_name === 'script') { - parser_token.type = 'TK_TAG_SCRIPT'; - } else { - parser_token.type = 'TK_TAG_STYLE'; - } - } - } - + parser_token.is_content_unformatted = !parser_token.is_empty_element && in_array(parser_token.tag_check, this._options.content_unformatted); + parser_token.is_inline_element = in_array(parser_token.tag_name, this._options.inline) || parser_token.tag_start_char === '{'; return parser_token; }; Beautifier.prototype._set_tag_position = function(printer, parser_token, last_tag_token, last_token) { + if (!parser_token.is_empty_element) { + if (parser_token.is_end_tag) { //this tag is a double tag so check for tag-ending + parser_token.start_tag_token = this._tag_stack.retrieve_tag(parser_token.tag_name, printer); //remove it and all ancestors + } else { // it's a start-tag + this._tag_stack.record_tag(parser_token.tag_name, parser_token, printer.indent_level); //push it on the tag stack + + if ((parser_token.tag_name === 'script' || parser_token.tag_name === 'style') && + !(parser_token.is_unformatted || parser_token.is_content_unformatted)) { + parser_token.custom_beautifier = uses_beautifier(parser_token.tag_check, raw_token); + } + } + } + if (in_array(parser_token.tag_check, this._options.extra_liners)) { //check if this double needs an extra line printer.print_newline(false); if (!printer._output.just_added_blankline()) { @@ -654,7 +654,7 @@ Beautifier.prototype._set_tag_position = function(printer, parser_token, last_ta } } - if (parser_token.is_single_tag) { //if this tag name is a single tag type (either in the list or has a closing /) + if (parser_token.is_empty_element) { //if this tag name is a single tag type (either in the list or has a closing /) if (parser_token.tag_start_char === '{' && parser_token.tag_check === 'else') { this._tag_stack.indent_to_tag(['if', 'unless'], printer); @@ -670,17 +670,17 @@ Beautifier.prototype._set_tag_position = function(printer, parser_token, last_ta if (parser_token.tag_name === '!--' && last_token.type === TOKEN.TAG_CLOSE && last_tag_token.is_end_tag && parser_token.text.indexOf('\n') === -1) { //Do nothing. Leave comments on same line. - } else if (!parser_token.is_inline_tag && !parser_token.is_unformatted) { + } else if (!parser_token.is_inline_element && !parser_token.is_unformatted) { printer.print_newline(false); } } else if (parser_token.is_unformatted || parser_token.is_content_unformatted) { - if (!parser_token.is_inline_tag && !parser_token.is_unformatted) { + if (!parser_token.is_inline_element && !parser_token.is_unformatted) { printer.print_newline(false); } } else if (parser_token.is_end_tag) { //this tag is a double tag so check for tag-ending if ((parser_token.start_tag_token && parser_token.start_tag_token.multiline_content) || - !(parser_token.is_inline_tag || - (last_tag_token.is_inline_tag) || + !(parser_token.is_inline_element || + (last_tag_token.is_inline_element) || (last_token.type === TOKEN.TAG_CLOSE && parser_token.start_tag_token === last_tag_token) || (last_token.type === 'TK_CONTENT') @@ -691,7 +691,7 @@ Beautifier.prototype._set_tag_position = function(printer, parser_token, last_ta parser_token.indent_content = parser_token.tag_check !== 'html' && !parser_token.custom_beautifier; - if (!parser_token.is_inline_tag && last_token.type !== 'TK_CONTENT') { + if (!parser_token.is_inline_element && last_token.type !== 'TK_CONTENT') { if (parser_token.parent) { parser_token.parent.multiline_content = true; } diff --git a/js/src/html/tokenizer.js b/js/src/html/tokenizer.js index 7d9063bd..c657b7ad 100644 --- a/js/src/html/tokenizer.js +++ b/js/src/html/tokenizer.js @@ -88,14 +88,14 @@ Tokenizer.prototype._get_next_token = function(previous_token, open_token) { // token = token || this._read_attribute(c, previous_token, open_token); token = token || this._read_raw_content(previous_token, open_token); token = token || this._read_comment(c); - token = token || this._read_open_close(c, open_token); + token = token || this._read_open(c, open_token); + token = token || this._read_close(c, open_token); token = token || this._read_content_word(); token = token || this._create_token(TOKEN.UNKNOWN, this._input.next()); return token; }; - Tokenizer.prototype._read_comment = function(c) { // jshint unused:false var token = null; if (c === '<' || c === '{') { @@ -171,38 +171,48 @@ Tokenizer.prototype._read_comment = function(c) { // jshint unused:false return token; }; -Tokenizer.prototype._read_open_close = function(c, open_token) { // jshint unused:false +Tokenizer.prototype._read_open = function(c, open_token) { var resulting_string = null; - if (open_token && open_token.text[0] === '<' && (c === '>' || (c === '/' && this._input.peek(1) === '>'))) { - resulting_string = this._input.next(); - if (c === '/') { // for close tag "/>" - resulting_string += this._input.next(); - } - return this._create_token(TOKEN.TAG_CLOSE, resulting_string); - } else if (open_token && open_token.text[0] === '{' && c === '}' && this._input.peek(1) === '}') { - this._input.next(); - this._input.next(); - return this._create_token(TOKEN.TAG_CLOSE, '}}'); - } else if (!open_token) { + var token = null; + if (!open_token) { if (c === '<') { - resulting_string = this._input.next(); - resulting_string += this._input.read(/[^\s>{][^\s>{/]*/g); - return this._create_token(TOKEN.TAG_OPEN, resulting_string); + resulting_string = this._input.read(/<(?:[^\s>{][^\s>{/]*)?/g); + token = this._create_token(TOKEN.TAG_OPEN, resulting_string); } else if (this._options.indent_handlebars && c === '{' && this._input.peek(1) === '{') { - this._input.next(); - this._input.next(); - resulting_string = '{{'; - resulting_string += this._input.readUntil(/[\s}]/g); - return this._create_token(TOKEN.TAG_OPEN, resulting_string); + resulting_string = this._input.readUntil(/[\s}]/g); + token = this._create_token(TOKEN.TAG_OPEN, resulting_string); } } - return null; + return token; }; -Tokenizer.prototype._read_attribute = function(c, previous_token, open_token) { // jshint unused:false +Tokenizer.prototype._read_close = function(c, open_token) { + var resulting_string = null; + var token = null; + if (open_token) { + if (open_token.text[0] === '<' && (c === '>' || (c === '/' && this._input.peek(1) === '>'))) { + resulting_string = this._input.next(); + if (c === '/') { // for close tag "/>" + resulting_string += this._input.next(); + } + token = this._create_token(TOKEN.TAG_CLOSE, resulting_string); + } else if (open_token.text[0] === '{' && c === '}' && this._input.peek(1) === '}') { + this._input.next(); + this._input.next(); + token = this._create_token(TOKEN.TAG_CLOSE, '}}'); + } + } + + return token; +}; + +Tokenizer.prototype._read_attribute = function(c, previous_token, open_token) { + var token = null; + var resulting_string = ''; if (open_token && open_token.text[0] === '<') { + if (c === '=') { - return this._create_token(TOKEN.EQUALS, this._input.next()); + token = this._create_token(TOKEN.EQUALS, this._input.next()); } else if (c === '"' || c === "'") { var content = this._input.next(); var input_string = ''; @@ -217,26 +227,24 @@ Tokenizer.prototype._read_attribute = function(c, previous_token, open_token) { } } - return this._create_token(TOKEN.VALUE, content); - } - - var resulting_string = ''; - - if (c === '{' && this._input.peek(1) === '{') { - resulting_string = this._input.readUntilAfter(/}}/g); + token = this._create_token(TOKEN.VALUE, content); } else { - resulting_string = this._input.readUntil(/[\s=\/>]/g); - } - - if (resulting_string) { - if (previous_token.type === TOKEN.EQUALS) { - return this._create_token(TOKEN.VALUE, resulting_string); + if (c === '{' && this._input.peek(1) === '{') { + resulting_string = this._input.readUntilAfter(/}}/g); } else { - return this._create_token(TOKEN.ATTRIBUTE, resulting_string); + resulting_string = this._input.readUntil(/[\s=\/>]/g); + } + + if (resulting_string) { + if (previous_token.type === TOKEN.EQUALS) { + token = this._create_token(TOKEN.VALUE, resulting_string); + } else { + token = this._create_token(TOKEN.ATTRIBUTE, resulting_string); + } } } } - return null; + return token; }; Tokenizer.prototype._is_content_unformatted = function(tag_name) { @@ -244,9 +252,9 @@ Tokenizer.prototype._is_content_unformatted = function(tag_name) { // script and style tags should always be read as unformatted content // finally content_unformatted and unformatted element contents are unformatted return this._options.void_elements.indexOf(tag_name) === -1 && - (tag_name === 'script' || tag_name === 'style' || - this._options.content_unformatted.indexOf(tag_name) !== -1 || - this._options.unformatted.indexOf(tag_name) !== -1); + (tag_name === 'script' || tag_name === 'style' || + this._options.content_unformatted.indexOf(tag_name) !== -1 || + this._options.unformatted.indexOf(tag_name) !== -1); }; @@ -269,7 +277,7 @@ Tokenizer.prototype._read_raw_content = function(previous_token, open_token) { / }; Tokenizer.prototype._read_content_word = function() { - // if we get here and we see handlebars treat them as a + // if we get here and we see handlebars treat them as plain text var resulting_string = this._input.readUntil(this._word_pattern); if (resulting_string) { return this._create_token(TOKEN.TEXT, resulting_string); diff --git a/test/data/html/tests.js b/test/data/html/tests.js index 072525ea..8769cfee 100644 --- a/test/data/html/tests.js +++ b/test/data/html/tests.js @@ -974,7 +974,7 @@ exports.test_data = { { unchanged: '\n' }, { unchanged: '\n \n \n' }, { input: '\n ', output: '\n' }, - { input: '
\n
' , output: '
\n
' }, + { input: '
\n
', output: '
\n
' }, { input: '\n ', output: '\n' }, { input: '\n ', output: '\n' }, { input: '\n ', output: '\n' },