This commit is contained in:
Liam Newman 2018-07-26 14:11:57 -07:00
parent 9fb8d25496
commit ebc310f470
32 changed files with 730 additions and 432 deletions

View File

@ -5,4 +5,3 @@
import sys
from cssbeautifier import main
sys.exit(main())

View File

@ -106,6 +106,7 @@ Rarely needed options:
else:
return 0
def main():
argv = sys.argv[1:]
@ -160,7 +161,6 @@ def main():
elif opt in ('--space-around-combinator'):
css_options.space_around_combinator = True
if not file:
file = '-'
@ -190,11 +190,11 @@ def main():
f.write(pretty)
except TypeError:
# This is not pretty, but given how we did the version import
# it is the only way to do this without having setup.py fail on a missing six dependency.
# it is the only way to do this without having setup.py
# fail on a missing six dependency.
six = __import__("six")
f.write(six.u(pretty))
except Exception as ex:
print(ex, file=sys.stderr)
return 1

View File

@ -39,6 +39,7 @@ whitespacePattern = re.compile(r"(?:\s|\n)+")
# WORD_RE = re.compile("[\w$\-_]")
def default_options():
return BeautifierOptions()
@ -105,8 +106,10 @@ class Beautifier:
import jsbeautifier.core.acorn as acorn
self.lineBreak = acorn.lineBreak
self.allLineBreaks = acorn.allLineBreaks
self.comment_pattern = re.compile(acorn.six.u(r"\/\/(?:[^\n\r\u2028\u2029]*)"))
self.block_comment_pattern = re.compile(r"\/\*(?:[\s\S]*?)((?:\*\/)|$)")
self.comment_pattern = re.compile(
acorn.six.u(r"\/\/(?:[^\n\r\u2028\u2029]*)"))
self.block_comment_pattern = re.compile(
r"\/\*(?:[\s\S]*?)((?:\*\/)|$)")
if not source_text:
source_text = ''
@ -133,21 +136,22 @@ class Beautifier:
self.opts.eol = self.opts.eol.replace('\\r', '\r').replace('\\n', '\n')
# HACK: newline parsing inconsistent. This brute force normalizes the input newlines.
# HACK: newline parsing inconsistent. This brute force normalizes the
# input newlines.
self.source_text = re.sub(self.allLineBreaks, '\n', source_text)
# https://developer.mozilla.org/en-US/docs/Web/CSS/At-rule
# also in CONDITIONAL_GROUP_RULE below
self.NESTED_AT_RULE = [ \
"@page", \
"@font-face", \
"@keyframes", \
"@media", \
"@supports", \
self.NESTED_AT_RULE = [
"@page",
"@font-face",
"@keyframes",
"@media",
"@supports",
"@document"]
self.CONDITIONAL_GROUP_RULE = [ \
"@media", \
"@supports", \
self.CONDITIONAL_GROUP_RULE = [
"@media",
"@supports",
"@document"]
m = re.search("^[\t ]*", self.source_text)
@ -206,7 +210,10 @@ class Beautifier:
return False
def beautify(self):
printer = Printer(self.indentChar, self.indentSize, self.baseIndentString)
printer = Printer(
self.indentChar,
self.indentSize,
self.baseIndentString)
self.output = printer.output
self.input = InputScanner(self.source_text)
@ -236,7 +243,9 @@ class Beautifier:
# minified code is being beautified.
output.add_new_line()
input.back()
printer.print_string(input.readWhile(self.block_comment_pattern))
printer.print_string(
input.readWhile(
self.block_comment_pattern))
# Ensures any new lines following the comment are preserved
self.eatWhitespace(True)
@ -263,10 +272,12 @@ class Beautifier:
else:
printer.print_string(self.ch)
# strip trailing space, if present, for hash property check
variableOrRule = input.peekUntilAfter(re.compile(r"[: ,;{}()[\]\/='\"]"))
variableOrRule = input.peekUntilAfter(
re.compile(r"[: ,;{}()[\]\/='\"]"))
if variableOrRule[-1] in ": ":
# wwe have a variable or pseudo-class, add it and insert one space before continuing
# wwe have a variable or pseudo-class, add it and
# insert one space before continuing
variableOrRule = self.eatString(": ")
if variableOrRule[-1].isspace():
variableOrRule = variableOrRule[:-1]
@ -305,7 +316,8 @@ class Beautifier:
self.eatWhitespace(True)
output.add_new_line()
# when entering conditional groups, only rulesets are allowed
# when entering conditional groups, only rulesets are
# allowed
if enteringConditionalGroup:
enteringConditionalGroup = False
insideRule = printer.indentLevel > printer.nestedLevel
@ -341,7 +353,8 @@ class Beautifier:
# sass/less parent reference don't use a space
# sass nested pseudo-class don't use a space
# preserve space before pseudoclasses/pseudoelements, as it means "in any child"
# preserve space before pseudoclasses/pseudoelements, as it
# means "in any child"
if input.lookBack(' '):
output.space_before_token = True
if input.peek() == ":":

View File

@ -23,6 +23,7 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class BeautifierOptions:
def __init__(self):
self.indent_size = 4
@ -43,8 +44,7 @@ class BeautifierOptions:
self.space_around_selector_separator = False
def __repr__(self):
return \
"""indent_size = %d
return """indent_size = %d
indent_char = [%s]
indent_with_tabs = [%s]
preserve_newlines = [%s]
@ -52,6 +52,11 @@ separate_selectors_newline = [%s]
end_with_newline = [%s]
newline_between_rules = [%s]
space_around_combinator = [%s]
""" % (self.indent_size, self.indent_char, self.indent_with_tabs, self.preserve_newlines,
self.selector_separator_newline, self.end_with_newline, self.newline_between_rules,
""" % (self.indent_size,
self.indent_char,
self.indent_with_tabs,
self.preserve_newlines,
self.selector_separator_newline,
self.end_with_newline,
self.newline_between_rules,
self.space_around_combinator)

View File

@ -9,8 +9,10 @@ try:
except ImportError:
import profile
def run():
sys.argv.append('discover')
unittest.main()
profile.run('run()')

View File

@ -3,9 +3,12 @@
import sys
import unittest
def run_tests():
suite = unittest.TestLoader().discover('jsbeautifier.tests', pattern = "test*.py")
suite = unittest.TestLoader().discover(
'jsbeautifier.tests', pattern="test*.py")
return unittest.TextTestRunner(verbosity=2).run(suite)
if __name__ == "__main__":
sys.exit(not run_tests().wasSuccessful())

View File

@ -70,6 +70,7 @@ def beautify(string, opts = default_options() ):
b = Beautifier()
return b.beautify(string, opts)
def set_file_editorconfig_opts(filename, js_options):
from editorconfig import get_properties, EditorConfigError
try:
@ -87,7 +88,8 @@ def set_file_editorconfig_opts(filename, js_options):
if _ecoptions.get("max_line_length") == "off":
js_options.wrap_line_length = 0
else:
js_options.wrap_line_length = int(_ecoptions["max_line_length"])
js_options.wrap_line_length = int(
_ecoptions["max_line_length"])
if _ecoptions.get("insert_final_newline") == 'true':
js_options.end_with_newline = True
@ -117,7 +119,9 @@ def beautify_file(file_name, opts = default_options() ):
stream = sys.stdin
input_string = ''.join(stream.readlines())
except Exception:
print("Must pipe input or define at least one file.\n", file=sys.stderr)
print(
"Must pipe input or define at least one file.\n",
file=sys.stderr)
usage(sys.stderr)
raise
else:
@ -196,12 +200,15 @@ def mkdir_p(path):
raise Exception()
def isFileDifferent(filepath, expected):
try:
return (''.join(io.open(filepath, 'rt', newline='').readlines()) != expected)
except:
return (
''.join(
io.open(
filepath,
'rt',
newline='').readlines()) != expected)
except BaseException:
return True
@ -280,7 +287,6 @@ def main():
elif opt in ('--help', '--usage', '-h'):
return usage()
if not file:
file = '-'
@ -325,11 +331,11 @@ def main():
f.write(pretty)
except TypeError:
# This is not pretty, but given how we did the version import
# it is the only way to do this without having setup.py fail on a missing six dependency.
# it is the only way to do this without having setup.py
# fail on a missing six dependency.
six = __import__("six")
f.write(six.u(pretty))
except Exception as ex:
print(ex, file=sys.stderr)
return 1
@ -337,5 +343,6 @@ def main():
# Success
return 0
if __name__ == "__main__":
main()

View File

@ -12,7 +12,8 @@ import re
# https://github.com/marijnh/acorn.git
# This is not pretty, but given how we did the version import
# it is the only way to do this without having setup.py fail on a missing six dependency.
# it is the only way to do this without having setup.py fail on a missing
# six dependency.
six = __import__("six")
# ## Character categories
@ -22,7 +23,8 @@ six = __import__("six")
# are only applied when a character is found to actually have a
# code point above 128.
_nonASCIIwhitespace = re.compile(six.u(r"[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]"))
_nonASCIIwhitespace = re.compile(
six.u(r"[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]"))
_baseASCIIidentifierStartChars = six.u(r"\x24\x40\x41-\x5a\x5f\x61-\x7a")
_nonASCIIidentifierStartChars = six.u(r"\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc")
_baseASCIIidentifierChars = six.u(r"\x24\x30-\x39\x41-\x5a\x5f\x61-\x7a")
@ -30,10 +32,27 @@ _nonASCIIidentifierChars = six.u(r"\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf
#_nonASCIIidentifierStart = re.compile("[" + _nonASCIIidentifierStartChars + "]")
#_nonASCIIidentifier = re.compile("[" + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]")
_identifierStart = re.compile("[" + _baseASCIIidentifierStartChars + _nonASCIIidentifierStartChars + "]")
_identifierChars = re.compile("[" + _baseASCIIidentifierChars + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]")
_identifierStart = re.compile(
"[" +
_baseASCIIidentifierStartChars +
_nonASCIIidentifierStartChars +
"]")
_identifierChars = re.compile(
"[" +
_baseASCIIidentifierChars +
_nonASCIIidentifierStartChars +
_nonASCIIidentifierChars +
"]")
identifier = re.compile("[" + _baseASCIIidentifierStartChars + _nonASCIIidentifierStartChars + "][" + _baseASCIIidentifierChars + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]*")
identifier = re.compile(
"[" +
_baseASCIIidentifierStartChars +
_nonASCIIidentifierStartChars +
"][" +
_baseASCIIidentifierChars +
_nonASCIIidentifierStartChars +
_nonASCIIidentifierChars +
"]*")
# Whether a single character denotes a newline.
@ -58,10 +77,13 @@ def isIdentifierStart(code):
# return code == 95 # permit _ (95)
# if code < 123:
# return True # 97 through 123 are lowercase letters
# return code >= 0xaa and _nonASCIIidentifierStart.match(six.unichr(code)) != None
# return code >= 0xaa and _nonASCIIidentifierStart.match(six.unichr(code))
# != None
return bool(_identifierStart.match(six.unichr(code)))
# Test whether a given character is part of an identifier.
def isIdentifierChar(code):
# if code < 48:
# return code == 36
@ -75,5 +97,6 @@ def isIdentifierChar(code):
# return code == 95
# if code < 123:
# return True
# return code >= 0xaa and _nonASCIIidentifier.match(six.unichr(code)) != None
# return code >= 0xaa and _nonASCIIidentifier.match(six.unichr(code)) !=
# None
return bool(_identifierChars.match(six.unichr(code)))

View File

@ -56,12 +56,13 @@ class InputScanner:
def test(self, pattern, index=0):
index += self.__position
return index >= 0 and index < self.__input_length and bool(pattern.match(self.__input, index))
return index >= 0 and index < self.__input_length and bool(
pattern.match(self.__input, index))
def testChar(self, pattern, index=0):
# test one character regex match
val = self.peek(index)
return val != None and pattern.match(val)
return val is not None and pattern.match(val)
def match(self, pattern):
pattern_match = None

View File

@ -29,6 +29,8 @@ import copy
# mergeOpts(obj, 'b')
#
# Returns: {a: 2, b: {a: 2}}
def mergeOpts(options, childFieldName):
finalOpts = copy.copy(options)

View File

@ -24,7 +24,10 @@
import re
# Using object instead of string to allow for later expansion of info about each line
# Using object instead of string to allow for later expansion of info
# about each line
class OutputLine:
def __init__(self, parent):
self.__parent = parent
@ -41,7 +44,8 @@ class OutputLine:
return self.__empty
def set_indent(self, level):
self.__character_count = self.__parent.baseIndentLength + level * self.__parent.indent_length
self.__character_count = self.__parent.baseIndentLength + \
level * self.__parent.indent_length
self.__indent_count = level
def last(self):
@ -55,7 +59,6 @@ class OutputLine:
self.__character_count += len(input)
self.__empty = False
def pop(self):
item = None
if not self.is_empty():
@ -134,8 +137,8 @@ class Output:
# Never indent your first output indent at the start of the file
if len(self.lines) > 1:
while level >= len(self.indent_cache):
self.indent_cache.append(self.indent_cache[-1] + self.indent_string)
self.indent_cache.append(
self.indent_cache[-1] + self.indent_string)
self.current_line.set_indent(level)
return True
@ -162,7 +165,8 @@ class Output:
def trim(self, eat_newlines=False):
self.current_line.trim()
while eat_newlines and len(self.lines) > 1 and self.current_line.is_empty():
while eat_newlines and len(
self.lines) > 1 and self.current_line.is_empty():
self.lines.pop()
self.current_line = self.lines[-1]
self.current_line.trim()

View File

@ -22,8 +22,16 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class Token:
def __init__(self, type, text, newlines = 0, whitespace_before = '', mode = None, parent = None):
def __init__(
self,
type,
text,
newlines=0,
whitespace_before='',
mode=None,
parent=None):
self.type = type
self.text = text
self.comments_before = []

View File

@ -31,9 +31,11 @@ from .options import BeautifierOptions
from ..core.options import mergeOpts
from ..core.output import Output
def default_options():
return BeautifierOptions()
class BeautifierFlags:
def __init__(self, mode):
self.mode = mode
@ -74,18 +76,26 @@ OPERATOR_POSITION = {
'after_newline': 'after-newline',
'preserve_newline': 'preserve-newline'
}
OPERATOR_POSITION_BEFORE_OR_PRESERVE = [OPERATOR_POSITION['before_newline'], OPERATOR_POSITION['preserve_newline']]
OPERATOR_POSITION_BEFORE_OR_PRESERVE = [
OPERATOR_POSITION['before_newline'],
OPERATOR_POSITION['preserve_newline']]
def sanitizeOperatorPosition(opPosition):
if not opPosition:
return OPERATOR_POSITION['before_newline']
elif opPosition not in OPERATOR_POSITION.values():
raise ValueError("Invalid Option Value: The option 'operator_position' must be one of the following values\n" +
str(OPERATOR_POSITION.values()) +
"\nYou passed in: '" + opPosition + "'")
raise ValueError(
"Invalid Option Value: The option 'operator_position' must be one of the following values\n" +
str(
OPERATOR_POSITION.values()) +
"\nYou passed in: '" +
opPosition +
"'")
return opPosition
class MODE:
BlockStatement, Statement, ObjectLiteral, ArrayLiteral, \
ForInitializer, Conditional, Expression = range(7)
@ -126,7 +136,6 @@ class Beautifier:
self.tokens = []
self.token_pos = 0
# force opts.space_after_anon_function to true if opts.jslint_happy
if self.opts.jslint_happy:
self.opts.space_after_anon_function = True
@ -138,7 +147,8 @@ class Beautifier:
if self.opts.eol == 'auto':
self.opts.eol = '\n'
if self.acorn.lineBreak.search(js_source_text or ''):
self.opts.eol = self.acorn.lineBreak.search(js_source_text).group()
self.opts.eol = self.acorn.lineBreak.search(
js_source_text).group()
self.opts.eol = self.opts.eol.replace('\\r', '\r').replace('\\n', '\n')
@ -149,7 +159,7 @@ class Beautifier:
self.last_last_text = '' # pre-last token text
preindent_index = 0
if not js_source_text == None and len(js_source_text) > 0:
if js_source_text is not None and len(js_source_text) > 0:
while preindent_index < len(js_source_text) and \
js_source_text[preindent_index] in [' ', '\t']:
self.baseIndentString += js_source_text[preindent_index]
@ -157,20 +167,18 @@ class Beautifier:
js_source_text = js_source_text[preindent_index:]
self.output = Output(self.indent_string, self.baseIndentString)
# If testing the ignore directive, start with output disable set to true
# If testing the ignore directive, start with output disable set to
# true
self.output.raw = self.opts.test_output_raw
self.set_mode(MODE.BlockStatement)
return js_source_text
def beautify(self, s, opts=None):
if opts != None:
if opts is not None:
opts = mergeOpts(opts, 'js')
self.opts = copy.copy(opts)
# Compat with old form
if self.opts.brace_style == 'collapse-preserve-inline':
self.opts.brace_style = 'collapse,preserve-inline'
@ -190,7 +198,8 @@ class Beautifier:
# validate each brace_style that's not a preserve-inline
# (results in very similar validation as js version)
if bs not in ['expand', 'collapse', 'end-expand', 'none']:
raise(Exception('opts.brace_style must be "expand", "collapse", "end-expand", or "none".'))
raise(Exception(
'opts.brace_style must be "expand", "collapse", "end-expand", or "none".'))
self.opts.brace_style = bs
s = self.blank_state(s)
@ -216,11 +225,12 @@ class Beautifier:
TOKEN.EOF: self.handle_eof
}
self.tokens = Tokenizer(input, self.opts, self.indent_string).tokenize()
self.tokens = Tokenizer(
input, self.opts, self.indent_string).tokenize()
self.token_pos = 0
current_token = self.get_token()
while current_token != None:
while current_token is not None:
self.handlers[current_token.type](current_token)
self.last_last_text = self.flags.last_text
@ -229,23 +239,26 @@ class Beautifier:
self.token_pos += 1
current_token = self.get_token()
sweet_code = self.output.get_code(self.opts.end_with_newline, self.opts.eol)
sweet_code = self.output.get_code(
self.opts.end_with_newline, self.opts.eol)
return sweet_code
def handle_whitespace_and_comments(self, local_token, preserve_statement_flags = False):
def handle_whitespace_and_comments(
self, local_token, preserve_statement_flags=False):
newlines = local_token.newlines
keep_whitespace = self.opts.keep_array_indentation and self.is_array(self.flags.mode)
keep_whitespace = self.opts.keep_array_indentation and self.is_array(
self.flags.mode)
for comment_token in local_token.comments_before:
# The cleanest handling of inline comments is to treat them as though they aren't there.
# Just continue formatting and the behavior should be logical.
# Also ignore unknown tokens. Again, this should result in better behavior.
self.handle_whitespace_and_comments(comment_token, preserve_statement_flags)
self.handlers[comment_token.type](comment_token, preserve_statement_flags)
# Also ignore unknown tokens. Again, this should result in better
# behavior.
self.handle_whitespace_and_comments(
comment_token, preserve_statement_flags)
self.handlers[comment_token.type](
comment_token, preserve_statement_flags)
if keep_whitespace:
for i in range(newlines):
@ -259,7 +272,6 @@ class Beautifier:
for i in range(1, newlines):
self.print_newline(True, preserve_statement_flags)
def unpack(self, source, evalcode=False):
import jsbeautifier.unpackers as unpackers
try:
@ -268,23 +280,41 @@ class Beautifier:
return source
def is_special_word(self, s):
return s in ['case', 'return', 'do', 'if', 'throw', 'else', 'await', 'break', 'continue', 'async']
return s in [
'case',
'return',
'do',
'if',
'throw',
'else',
'await',
'break',
'continue',
'async']
def is_array(self, mode):
return mode == MODE.ArrayLiteral
def is_expression(self, mode):
return mode in [MODE.Expression, MODE.ForInitializer, MODE.Conditional]
_newline_restricted_tokens = [
'async',
'await',
'break',
'continue',
'return',
'throw',
'yield']
_newline_restricted_tokens = ['async', 'await', 'break', 'continue', 'return', 'throw', 'yield']
def allow_wrap_or_preserved_newline(self, current_token, force_linewrap = False):
def allow_wrap_or_preserved_newline(
self, current_token, force_linewrap=False):
# never wrap the first token of a line.
if self.output.just_added_newline():
return
shouldPreserveOrForce = (self.opts.preserve_newlines and current_token.wanted_newline) or force_linewrap
shouldPreserveOrForce = (
self.opts.preserve_newlines and current_token.wanted_newline) or force_linewrap
operatorLogicApplies = self.flags.last_text in Tokenizer.positionable_operators or current_token.text in Tokenizer.positionable_operators
if operatorLogicApplies:
@ -299,21 +329,24 @@ class Beautifier:
# These tokens should never have a newline inserted between
# them and the following expression.
return
proposed_line_length = self.output.current_line.get_character_count() + len(current_token.text)
proposed_line_length = self.output.current_line.get_character_count() + \
len(current_token.text)
if self.output.space_before_token:
proposed_line_length += 1
if proposed_line_length >= self.opts.wrap_line_length:
self.print_newline(preserve_statement_flags=True)
def print_newline(self, force_newline = False, preserve_statement_flags = False):
def print_newline(
self,
force_newline=False,
preserve_statement_flags=False):
if not preserve_statement_flags:
if self.flags.last_text != ';' and self.flags.last_text != ',' and self.flags.last_text != '=' and (self.last_type != TOKEN.OPERATOR or self.flags.last_text == '--' or self.flags.last_text == '++'):
if self.flags.last_text != ';' and self.flags.last_text != ',' and self.flags.last_text != '=' and (
self.last_type != TOKEN.OPERATOR or self.flags.last_text == '--' or self.flags.last_text == '++'):
next_token = self.get_token(1)
while (self.flags.mode == MODE.Statement and
not (self.flags.if_block and next_token and next_token.type == TOKEN.RESERVED and next_token.text == 'else') and
not self.flags.do_block):
while (self.flags.mode == MODE.Statement and not (self.flags.if_block and next_token and next_token.type ==
TOKEN.RESERVED and next_token.text == 'else') and not self.flags.do_block):
self.restore_mode()
if self.output.add_new_line(force_newline):
@ -322,13 +355,13 @@ class Beautifier:
def print_token_line_indentation(self, current_token):
if self.output.just_added_newline():
line = self.output.current_line
if self.opts.keep_array_indentation and self.is_array(self.flags.mode) and current_token.wanted_newline:
if self.opts.keep_array_indentation and self.is_array(
self.flags.mode) and current_token.wanted_newline:
line.push(current_token.whitespace_before)
self.output.space_before_token = False
elif self.output.set_indent(self.flags.indentation_level):
self.flags.line_indent_level = self.flags.indentation_level
def print_token(self, current_token, s=None):
if self.output.raw:
self.output.add_raw_token(current_token)
@ -350,18 +383,18 @@ class Beautifier:
self.output.add_token(',')
self.output.space_before_token = True
if s == None:
if s is None:
s = current_token.text
self.print_token_line_indentation(current_token)
self.output.add_token(s)
def indent(self):
self.flags.indentation_level += 1
def deindent(self):
allow_deindent = self.flags.indentation_level > 0 and ((self.flags.parent == None) or self.flags.indentation_level > self.flags.parent.indentation_level)
allow_deindent = self.flags.indentation_level > 0 and (
(self.flags.parent is None) or self.flags.indentation_level > self.flags.parent.indentation_level)
if allow_deindent:
self.flags.indentation_level -= 1
@ -374,7 +407,9 @@ class Beautifier:
self.previous_flags = BeautifierFlags(mode)
self.flags = BeautifierFlags(mode)
self.flags.apply_base(self.previous_flags, self.output.just_added_newline())
self.flags.apply_base(
self.previous_flags,
self.output.just_added_newline())
self.flags.start_line_index = self.output.get_line_number()
def restore_mode(self):
@ -384,26 +419,36 @@ class Beautifier:
if self.previous_flags.mode == MODE.Statement:
remove_redundant_indentation(self.output, self.previous_flags)
def start_of_object_property(self):
return self.flags.parent.mode == MODE.ObjectLiteral and self.flags.mode == MODE.Statement and \
((self.flags.last_text == ':' and self.flags.ternary_depth == 0) or (self.last_type == TOKEN.RESERVED and self.flags.last_text in ['get', 'set']))
return self.flags.parent.mode == MODE.ObjectLiteral and self.flags.mode == MODE.Statement and (
(self.flags.last_text == ':' and self.flags.ternary_depth == 0) or (
self.last_type == TOKEN.RESERVED and self.flags.last_text in [
'get', 'set']))
def start_of_statement(self, current_token):
start = False
start = start or (self.last_type == TOKEN.RESERVED and self.flags.last_text in ['var', 'let', 'const'] and current_token.type == TOKEN.WORD)
start = start or (self.last_type == TOKEN.RESERVED and self.flags.last_text== 'do')
start = start or (self.last_type == TOKEN.RESERVED and self.flags.last_text in self._newline_restricted_tokens and not current_token.wanted_newline)
start = start or (self.last_type == TOKEN.RESERVED and self.flags.last_text == 'else' \
and not (current_token.type == TOKEN.RESERVED and current_token.text == 'if' and not len(current_token.comments_before)))
start = start or (self.last_type == TOKEN.END_EXPR and (self.previous_flags.mode == MODE.ForInitializer or self.previous_flags.mode == MODE.Conditional))
start = start or (self.last_type == TOKEN.WORD and self.flags.mode == MODE.BlockStatement \
start = start or (self.last_type == TOKEN.RESERVED and self.flags.last_text in [
'var', 'let', 'const'] and current_token.type == TOKEN.WORD)
start = start or (
self.last_type == TOKEN.RESERVED and self.flags.last_text == 'do')
start = start or (
self.last_type == TOKEN.RESERVED and self.flags.last_text in self._newline_restricted_tokens and not current_token.wanted_newline)
start = start or (
self.last_type == TOKEN.RESERVED and self.flags.last_text == 'else' and not (
current_token.type == TOKEN.RESERVED and current_token.text == 'if' and not len(
current_token.comments_before)))
start = start or (self.last_type == TOKEN.END_EXPR and (
self.previous_flags.mode == MODE.ForInitializer or self.previous_flags.mode == MODE.Conditional))
start = start or (self.last_type == TOKEN.WORD and self.flags.mode == MODE.BlockStatement
and not self.flags.in_case
and not (current_token.text == '--' or current_token.text == '++')
and self.last_last_text != 'function'
and current_token.type != TOKEN.WORD and current_token.type != TOKEN.RESERVED)
start = start or (self.flags.mode == MODE.ObjectLiteral and \
((self.flags.last_text == ':' and self.flags.ternary_depth == 0) or (self.last_type == TOKEN.RESERVED and self.flags.last_text in ['get', 'set'])))
start = start or (
self.flags.mode == MODE.ObjectLiteral and (
(self.flags.last_text == ':' and self.flags.ternary_depth == 0) or (
self.last_type == TOKEN.RESERVED and self.flags.last_text in [
'get', 'set'])))
if (start):
self.set_mode(MODE.Statement)
@ -415,7 +460,9 @@ class Beautifier:
# If starting a new statement with [if, for, while, do], push to a new line.
# if (a) if (b) if(c) d(); else e(); else f();
if not self.start_of_object_property():
self.allow_wrap_or_preserved_newline(current_token, current_token.type == TOKEN.RESERVED and current_token.text in ['do', 'for', 'if', 'while'])
self.allow_wrap_or_preserved_newline(
current_token, current_token.type == TOKEN.RESERVED and current_token.text in [
'do', 'for', 'if', 'while'])
return True
else:
@ -428,7 +475,6 @@ class Beautifier:
else:
return self.tokens[index]
def handle_start_expr(self, current_token):
if self.start_of_statement(current_token):
# The conditional starts the statement if appropriate.
@ -453,13 +499,18 @@ class Beautifier:
if self.is_array(self.flags.mode):
if self.flags.last_text == '[' or (
self.flags.last_text == ',' and (self.last_last_text == ']' or self.last_last_text == '}')):
self.flags.last_text == ',' and (
self.last_last_text == ']' or self.last_last_text == '}')):
# ], [ goes to a new line
# }, [ goes to a new line
if not self.opts.keep_array_indentation:
self.print_newline()
if self.last_type not in [TOKEN.START_EXPR, TOKEN.END_EXPR, TOKEN.WORD, TOKEN.OPERATOR]:
if self.last_type not in [
TOKEN.START_EXPR,
TOKEN.END_EXPR,
TOKEN.WORD,
TOKEN.OPERATOR]:
self.output.space_before_token = True
else:
@ -471,7 +522,8 @@ class Beautifier:
self.output.space_before_token = self.opts.space_before_conditional
next_mode = MODE.Conditional
elif self.flags.last_word in ['await', 'async']:
# Should be a space between await and an IIFE, or async and an arrow function
# Should be a space between await and an IIFE, or async and
# an arrow function
self.output.space_before_token = True
elif self.flags.last_text == 'import' and current_token.whitespace_before == '':
self.output.space_before_token = False
@ -493,21 +545,26 @@ class Beautifier:
# )
self.allow_wrap_or_preserved_newline(current_token)
# function() vs function (), typeof() vs typeof ()
# function*() vs function* (), yield*() vs yield* ()
if (self.last_type == TOKEN.RESERVED and (self.flags.last_word == 'function' or self.flags.last_word == 'typeof')) or \
(self.flags.last_text == '*' and (
self.last_last_text in ['function', 'yield'] or
(self.flags.mode == MODE.ObjectLiteral and self.last_last_text in ['{', ',']))):
if (
self.last_type == TOKEN.RESERVED and (
self.flags.last_word == 'function' or self.flags.last_word == 'typeof')) or (
self.flags.last_text == '*' and (
self.last_last_text in [
'function', 'yield'] or (
self.flags.mode == MODE.ObjectLiteral and self.last_last_text in [
'{', ',']))):
self.output.space_before_token = self.opts.space_after_anon_function
if self.flags.last_text == ';' or self.last_type == TOKEN.START_BLOCK:
self.print_newline()
elif self.last_type in [TOKEN.END_EXPR, TOKEN.START_EXPR, TOKEN.END_BLOCK, TOKEN.COMMA] or self.flags.last_text == '.':
# do nothing on (( and )( and ][ and ]( and .(
# TODO: Consider whether forcing this is required. Review failing tests when removed.
self.allow_wrap_or_preserved_newline(current_token, current_token.wanted_newline)
# TODO: Consider whether forcing this is required. Review failing
# tests when removed.
self.allow_wrap_or_preserved_newline(
current_token, current_token.wanted_newline)
self.set_mode(next_mode)
self.print_token(current_token)
@ -515,11 +572,10 @@ class Beautifier:
if self.opts.space_in_paren:
self.output.space_before_token = True
# In all cases, if we newline while inside an expression it should be indented.
# In all cases, if we newline while inside an expression it should be
# indented.
self.indent()
def handle_end_expr(self, current_token):
# statements inside expressions are not valid syntax, but...
# statements must all be closed when their container closes
@ -529,7 +585,9 @@ class Beautifier:
self.handle_whitespace_and_comments(current_token)
if self.flags.multiline_frame:
self.allow_wrap_or_preserved_newline(current_token, current_token.text == ']' and self.is_array(self.flags.mode) and not self.opts.keep_array_indentation)
self.allow_wrap_or_preserved_newline(
current_token, current_token.text == ']' and self.is_array(
self.flags.mode) and not self.opts.keep_array_indentation)
if self.opts.space_in_paren:
if self.last_type == TOKEN.START_EXPR and not self.opts.space_in_empty_paren:
@ -557,15 +615,26 @@ class Beautifier:
def handle_start_block(self, current_token):
self.handle_whitespace_and_comments(current_token)
# Check if this is a BlockStatement that should be treated as a ObjectLiteral
# Check if this is a BlockStatement that should be treated as a
# ObjectLiteral
next_token = self.get_token(1)
second_token = self.get_token(2)
if second_token != None and \
((second_token.text in [':', ','] and next_token.type in [TOKEN.STRING, TOKEN.WORD, TOKEN.RESERVED]) \
or (next_token.text in ['get', 'set', '...'] and second_token.type in [TOKEN.WORD, TOKEN.RESERVED])):
if second_token is not None and (
(second_token.text in [
':',
','] and next_token.type in [
TOKEN.STRING,
TOKEN.WORD,
TOKEN.RESERVED]) or (
next_token.text in [
'get',
'set',
'...'] and second_token.type in [
TOKEN.WORD,
TOKEN.RESERVED])):
# We don't support TypeScript,but we didn't break it for a very long time.
# We'll try to keep not breaking it.
if not self.last_last_text in ['class','interface']:
if self.last_last_text not in ['class', 'interface']:
self.set_mode(MODE.ObjectLiteral)
else:
self.set_mode(MODE.BlockStatement)
@ -577,12 +646,14 @@ class Beautifier:
# Detecting shorthand function syntax is difficult by scanning forward,
# so check the surrounding context.
# If the block is being returned, imported, export default, passed as arg,
# assigned with = or assigned in a nested object, treat as an ObjectLiteral.
# assigned with = or assigned in a nested object, treat as an
# ObjectLiteral.
self.set_mode(MODE.ObjectLiteral)
else:
self.set_mode(MODE.BlockStatement)
empty_braces = (not next_token == None) and len(next_token.comments_before) == 0 and next_token.text == '}'
empty_braces = (next_token is not None) and len(
next_token.comments_before) == 0 and next_token.text == '}'
empty_anonymous_function = empty_braces and self.flags.last_word == 'function' and \
self.last_type == TOKEN.END_EXPR
@ -598,21 +669,23 @@ class Beautifier:
if check_token.wanted_newline:
self.flags.inline_frame = False
do_loop = (check_token.type != TOKEN.EOF and
not (check_token.type == TOKEN.END_BLOCK and check_token.opened == current_token))
do_loop = (
check_token.type != TOKEN.EOF and not (
check_token.type == TOKEN.END_BLOCK and check_token.opened == current_token))
if (self.opts.brace_style == 'expand' or \
(self.opts.brace_style == 'none' and current_token.wanted_newline)) and \
not self.flags.inline_frame:
if self.last_type != TOKEN.OPERATOR and \
(empty_anonymous_function or
self.last_type == TOKEN.EQUALS or
(self.last_type == TOKEN.RESERVED and self.is_special_word(self.flags.last_text) and self.flags.last_text != 'else')):
if (self.opts.brace_style == 'expand' or (self.opts.brace_style ==
'none' and current_token.wanted_newline)) and not self.flags.inline_frame:
if self.last_type != TOKEN.OPERATOR and (
empty_anonymous_function or self.last_type == TOKEN.EQUALS or (
self.last_type == TOKEN.RESERVED and self.is_special_word(
self.flags.last_text) and self.flags.last_text != 'else')):
self.output.space_before_token = True
else:
self.print_newline(preserve_statement_flags=True)
else: # collapse || inline_frame
if self.is_array(self.previous_flags.mode) and (self.last_type == TOKEN.START_EXPR or self.last_type == TOKEN.COMMA):
if self.is_array(
self.previous_flags.mode) and (
self.last_type == TOKEN.START_EXPR or self.last_type == TOKEN.COMMA):
# if we're preserving inline,
# allow newline between comma and next brace.
if self.flags.inline_frame:
@ -632,7 +705,6 @@ class Beautifier:
self.print_token(current_token)
self.indent()
def handle_end_block(self, current_token):
# statements must all be closed when their container closes
self.handle_whitespace_and_comments(current_token)
@ -642,7 +714,8 @@ class Beautifier:
empty_braces = self.last_type == TOKEN.START_BLOCK
if self.flags.inline_frame and not empty_braces: # try inline_frame (only set if opt.braces-preserve-inline) first
# try inline_frame (only set if opt.braces-preserve-inline) first
if self.flags.inline_frame and not empty_braces:
self.output.space_before_token = True
elif self.opts.brace_style == 'expand':
if not empty_braces:
@ -650,7 +723,8 @@ class Beautifier:
else:
# skip {}
if not empty_braces:
if self.is_array(self.flags.mode) and self.opts.keep_array_indentation:
if self.is_array(
self.flags.mode) and self.opts.keep_array_indentation:
self.opts.keep_array_indentation = False
self.print_newline()
self.opts.keep_array_indentation = True
@ -660,10 +734,10 @@ class Beautifier:
self.restore_mode()
self.print_token(current_token)
def handle_word(self, current_token):
if current_token.type == TOKEN.RESERVED:
if current_token.text in ['set', 'get'] and self.flags.mode != MODE.ObjectLiteral:
if current_token.text in [
'set', 'get'] and self.flags.mode != MODE.ObjectLiteral:
current_token.type = TOKEN.WORD
elif current_token.text in ['as', 'from'] and not self.flags.import_block:
current_token.type = TOKEN.WORD
@ -674,7 +748,8 @@ class Beautifier:
if self.start_of_statement(current_token):
# The conditional starts the statement if appropriate.
if self.last_type == TOKEN.RESERVED and self.flags.last_text in ['var', 'let', 'const'] and current_token.type == TOKEN.WORD:
if self.last_type == TOKEN.RESERVED and self.flags.last_text in [
'var', 'let', 'const'] and current_token.type == TOKEN.WORD:
self.flags.declaration_statement = True
elif current_token.wanted_newline and \
@ -687,7 +762,6 @@ class Beautifier:
else:
self.handle_whitespace_and_comments(current_token)
if self.flags.do_block and not self.flags.do_while:
if current_token.type == TOKEN.RESERVED and current_token.text == 'while':
# do {} ## while ()
@ -704,9 +778,11 @@ class Beautifier:
# if may be followed by else, or not
# Bare/inline ifs are tricky
# Need to unwind the modes correctly: if (a) if (b) c(); else d(); else e();
# Need to unwind the modes correctly: if (a) if (b) c(); else d(); else
# e();
if self.flags.if_block:
if (not self.flags.else_block) and (current_token.type == TOKEN.RESERVED and current_token.text == 'else'):
if (not self.flags.else_block) and (current_token.type ==
TOKEN.RESERVED and current_token.text == 'else'):
self.flags.else_block = True
else:
while self.flags.mode == MODE.Statement:
@ -714,7 +790,8 @@ class Beautifier:
self.flags.if_block = False
if current_token.type == TOKEN.RESERVED and (current_token.text == 'case' or (current_token.text == 'default' and self.flags.in_case_statement)):
if current_token.type == TOKEN.RESERVED and (current_token.text == 'case' or (
current_token.text == 'default' and self.flags.in_case_statement)):
self.print_newline()
if self.flags.case_body or self.opts.jslint_happy:
self.flags.case_body = False
@ -724,16 +801,21 @@ class Beautifier:
self.flags.in_case_statement = True
return
if self.last_type in [TOKEN.COMMA, TOKEN.START_EXPR, TOKEN.EQUALS, TOKEN.OPERATOR]:
if self.last_type in [
TOKEN.COMMA,
TOKEN.START_EXPR,
TOKEN.EQUALS,
TOKEN.OPERATOR]:
if not self.start_of_object_property():
self.allow_wrap_or_preserved_newline(current_token)
if current_token.type == TOKEN.RESERVED and current_token.text == 'function':
if (self.flags.last_text in ['}', ';'] or
(self.output.just_added_newline() and not (self.flags.last_text in ['(', '[', '{', ':', '=', ','] or self.last_type == TOKEN.OPERATOR))):
if (self.flags.last_text in ['}', ';'] or (self.output.just_added_newline() and not (
self.flags.last_text in ['(', '[', '{', ':', '=', ','] or self.last_type == TOKEN.OPERATOR))):
# make sure there is a nice clean space of at least one blank line
# before a new function definition, except in arrays
if not self.output.just_added_blankline() and len(current_token.comments_before) == 0:
if not self.output.just_added_blankline() and len(
current_token.comments_before) == 0:
self.print_newline()
self.print_newline(True)
@ -768,8 +850,8 @@ class Beautifier:
elif not (current_token.type == TOKEN.RESERVED and current_token.text in ['else', 'catch', 'finally', 'from']):
prefix = 'NEWLINE'
else:
if self.opts.brace_style in ['expand', 'end-expand'] or \
(self.opts.brace_style == 'none' and current_token.wanted_newline):
if self.opts.brace_style in ['expand', 'end-expand'] or (
self.opts.brace_style == 'none' and current_token.wanted_newline):
prefix = 'NEWLINE'
else:
prefix = 'SPACE'
@ -801,10 +883,11 @@ class Beautifier:
else:
prefix = 'NEWLINE'
if current_token.type == TOKEN.RESERVED and current_token.text in ['else', 'catch', 'finally']:
if ((not (self.last_type == TOKEN.END_BLOCK and self.previous_flags.mode == MODE.BlockStatement)) \
or self.opts.brace_style == 'expand' \
or self.opts.brace_style == 'end-expand' \
if current_token.type == TOKEN.RESERVED and current_token.text in [
'else', 'catch', 'finally']:
if ((not (self.last_type == TOKEN.END_BLOCK and self.previous_flags.mode == MODE.BlockStatement))
or self.opts.brace_style == 'expand'
or self.opts.brace_style == 'end-expand'
or (self.opts.brace_style == 'none' and current_token.wanted_newline)) \
and not self.flags.inline_frame:
self.print_newline()
@ -818,11 +901,17 @@ class Beautifier:
self.output.space_before_token = True
elif prefix == 'NEWLINE':
if self.last_type == TOKEN.RESERVED and self.is_special_word(self.flags.last_text):
if self.last_type == TOKEN.RESERVED and self.is_special_word(
self.flags.last_text):
# no newline between return nnn
self.output.space_before_token = True
elif self.last_type != TOKEN.END_EXPR:
if (self.last_type != TOKEN.START_EXPR or not (current_token.type == TOKEN.RESERVED and current_token.text in ['var', 'let', 'const'])) and self.flags.last_text != ':':
if (
self.last_type != TOKEN.START_EXPR or not (
current_token.type == TOKEN.RESERVED and current_token.text in [
'var',
'let',
'const'])) and self.flags.last_text != ':':
# no need to force newline on VAR -
# for (var x = 0...
if current_token.type == TOKEN.RESERVED and current_token.text == 'if' and self.flags.last_text == 'else':
@ -836,7 +925,6 @@ class Beautifier:
elif prefix == 'SPACE':
self.output.space_before_token = True
self.print_token(current_token)
self.flags.last_word = current_token.text
@ -850,7 +938,6 @@ class Beautifier:
elif current_token.text == 'from' and self.flags.import_block:
self.flags.import_block = False
def handle_semicolon(self, current_token):
if self.start_of_statement(current_token):
# The conditional starts the statement if appropriate.
@ -860,9 +947,8 @@ class Beautifier:
self.handle_whitespace_and_comments(current_token)
next_token = self.get_token(1)
while (self.flags.mode == MODE.Statement and
not (self.flags.if_block and next_token and next_token.type == TOKEN.RESERVED and next_token.text == 'else') and
not self.flags.do_block):
while (self.flags.mode == MODE.Statement and not (self.flags.if_block and next_token and next_token.type ==
TOKEN.RESERVED and next_token.text == 'else') and not self.flags.do_block):
self.restore_mode()
if self.flags.import_block:
@ -870,7 +956,6 @@ class Beautifier:
self.print_token(current_token)
def handle_string(self, current_token):
if self.start_of_statement(current_token):
# The conditional starts the statement if appropriate.
@ -889,7 +974,6 @@ class Beautifier:
self.print_token(current_token)
def handle_equals(self, current_token):
if self.start_of_statement(current_token):
# The conditional starts the statement if appropriate.
@ -897,16 +981,15 @@ class Beautifier:
else:
self.handle_whitespace_and_comments(current_token)
if self.flags.declaration_statement:
# just got an '=' in a var-line, different line breaking rules will apply
# just got an '=' in a var-line, different line breaking rules will
# apply
self.flags.declaration_assignment = True
self.output.space_before_token = True
self.print_token(current_token)
self.output.space_before_token = True
def handle_comma(self, current_token):
self.handle_whitespace_and_comments(current_token, True)
@ -923,7 +1006,8 @@ class Beautifier:
self.print_newline(preserve_statement_flags=True)
elif self.opts.comma_first:
# for comma-first, we want to allow a newline before the comma
# to turn into a newline after the comma, which we will fixup later
# to turn into a newline after the comma, which we will fixup
# later
self.allow_wrap_or_preserved_newline(current_token)
elif self.flags.mode == MODE.ObjectLiteral \
@ -939,13 +1023,12 @@ class Beautifier:
# to turn into a newline after the comma, which we will fixup later
self.allow_wrap_or_preserved_newline(current_token)
def handle_operator(self, current_token):
isGeneratorAsterisk = current_token.text == '*' and \
((self.last_type == TOKEN.RESERVED and self.flags.last_text in ['function', 'yield']) or
(self.last_type in [TOKEN.START_BLOCK, TOKEN.COMMA, TOKEN.END_BLOCK, TOKEN.SEMICOLON]))
isUnary = current_token.text in ['+', '-'] \
and (self.last_type in [TOKEN.START_BLOCK, TOKEN.START_EXPR, TOKEN.EQUALS, TOKEN.OPERATOR] \
and (self.last_type in [TOKEN.START_BLOCK, TOKEN.START_EXPR, TOKEN.EQUALS, TOKEN.OPERATOR]
or self.flags.last_text in Tokenizer.line_starters or self.flags.last_text == ',')
if self.start_of_statement(current_token):
@ -953,9 +1036,11 @@ class Beautifier:
pass
else:
preserve_statement_flags = not isGeneratorAsterisk
self.handle_whitespace_and_comments(current_token, preserve_statement_flags)
self.handle_whitespace_and_comments(
current_token, preserve_statement_flags)
if self.last_type == TOKEN.RESERVED and self.is_special_word(self.flags.last_text):
if self.last_type == TOKEN.RESERVED and self.is_special_word(
self.flags.last_text):
# return had a special handling in TK_WORD
self.output.space_before_token = True
self.print_token(current_token)
@ -990,7 +1075,8 @@ class Beautifier:
if current_token.text == ':':
if self.flags.ternary_depth == 0:
# Colon is invalid javascript outside of ternary and object, but do our best to guess what was meant.
# Colon is invalid javascript outside of ternary and object,
# but do our best to guess what was meant.
space_before = False
else:
self.flags.ternary_depth -= 1
@ -998,7 +1084,8 @@ class Beautifier:
elif current_token.text == '?':
self.flags.ternary_depth += 1
# let's handle the operator_position option prior to any conflicting logic
# let's handle the operator_position option prior to any conflicting
# logic
if (not isUnary) and (not isGeneratorAsterisk) and \
self.opts.preserve_newlines and current_token.text in Tokenizer.positionable_operators:
@ -1007,7 +1094,8 @@ class Beautifier:
isOtherColon = isColon and not in_ternary
if self.opts.operator_position == OPERATOR_POSITION['before_newline']:
# if the current token is : and it's not a ternary statement then we set space_before to false
# if the current token is : and it's not a ternary statement
# then we set space_before to false
self.output.space_before_token = not isOtherColon
self.print_token(current_token)
@ -1043,7 +1131,8 @@ class Beautifier:
# if we just added a newline, or the current token is : and it's not a ternary statement,
# then we set space_before to false
self.output.space_before_token = not (self.output.just_added_newline() or isOtherColon)
self.output.space_before_token = not (
self.output.just_added_newline() or isOtherColon)
self.print_token(current_token)
@ -1054,7 +1143,8 @@ class Beautifier:
self.allow_wrap_or_preserved_newline(current_token)
space_before = False
next_token = self.get_token(1)
space_after = next_token and next_token.type in [TOKEN.WORD,TOKEN.RESERVED]
space_after = next_token and next_token.type in [
TOKEN.WORD, TOKEN.RESERVED]
elif current_token.text == '...':
self.allow_wrap_or_preserved_newline(current_token)
space_before = self.last_type == TOKEN.START_BLOCK
@ -1067,11 +1157,14 @@ class Beautifier:
space_after = False
# http://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1
# if there is a newline between -- or ++ and anything else we should preserve it.
if current_token.wanted_newline and (current_token.text == '--' or current_token.text == '++'):
# if there is a newline between -- or ++ and anything else we
# should preserve it.
if current_token.wanted_newline and (
current_token.text == '--' or current_token.text == '++'):
self.print_newline(preserve_statement_flags=True)
if self.flags.last_text == ';' and self.is_expression(self.flags.mode):
if self.flags.last_text == ';' and self.is_expression(
self.flags.mode):
# for (;; ++i)
# ^^
space_before = True
@ -1079,20 +1172,24 @@ class Beautifier:
if self.last_type == TOKEN.RESERVED:
space_before = True
elif self.last_type == TOKEN.END_EXPR:
space_before = not (self.flags.last_text == ']' and current_token.text in ['--', '++'])
space_before = not (
self.flags.last_text == ']' and current_token.text in [
'--', '++'])
elif self.last_type == TOKEN.OPERATOR:
# a++ + ++b
# a - -b
space_before = current_token.text in ['--', '-','++', '+'] and self.flags.last_text in ['--', '-','++', '+']
space_before = current_token.text in [
'--', '-', '++', '+'] and self.flags.last_text in ['--', '-', '++', '+']
# + and - are not unary when preceeded by -- or ++ operator
# a-- + b
# a * +b
# a - -b
if current_token.text in ['-', '+'] and self.flags.last_text in ['--', '++']:
if current_token.text in [
'-', '+'] and self.flags.last_text in ['--', '++']:
space_after = True
if (((self.flags.mode == MODE.BlockStatement and not self.flags.inline_frame) or self.flags.mode == MODE.Statement)
and self.flags.last_text in ['{', ';']):
if (((self.flags.mode == MODE.BlockStatement and not self.flags.inline_frame)
or self.flags.mode == MODE.Statement) and self.flags.last_text in ['{', ';']):
# { foo: --i }
# foo(): --bar
self.print_newline()
@ -1105,18 +1202,19 @@ class Beautifier:
if space_after:
self.output.space_before_token = True
def handle_block_comment(self, current_token, preserve_statement_flags):
if self.output.raw:
self.output.add_raw_token(current_token)
if current_token.directives and current_token.directives.get('preserve') == 'end':
# If we're testing the raw output behavior, do not allow a directive to turn it off.
if current_token.directives and current_token.directives.get(
'preserve') == 'end':
# If we're testing the raw output behavior, do not allow a
# directive to turn it off.
self.output.raw = self.opts.test_output_raw
return
if current_token.directives:
self.print_newline(preserve_statement_flags = preserve_statement_flags)
self.print_newline(
preserve_statement_flags=preserve_statement_flags)
self.print_token(current_token)
if current_token.directives.get('preserve') == 'start':
self.output.raw = True
@ -1125,7 +1223,8 @@ class Beautifier:
return
# inline block
if not self.acorn.newline.search(current_token.text) and not current_token.wanted_newline:
if not self.acorn.newline.search(
current_token.text) and not current_token.wanted_newline:
self.output.space_before_token = True
self.print_token(current_token)
self.output.space_before_token = True
@ -1140,8 +1239,10 @@ class Beautifier:
# block comment starts with a new line
self.print_newline(preserve_statement_flags=preserve_statement_flags)
if len(lines) > 1:
javadoc = not any(l for l in lines[1:] if ( l.strip() == '' or (l.lstrip())[0] != '*'))
starless = all(l.startswith(last_indent) or l.strip() == '' for l in lines[1:])
javadoc = not any(l for l in lines[1:] if (
l.strip() == '' or (l.lstrip())[0] != '*'))
starless = all(l.startswith(last_indent)
or l.strip() == '' for l in lines[1:])
# first line always indented
self.print_token(current_token, lines[0])
@ -1161,7 +1262,8 @@ class Beautifier:
def handle_comment(self, current_token, preserve_statement_flags):
if current_token.wanted_newline:
self.print_newline(preserve_statement_flags = preserve_statement_flags)
self.print_newline(
preserve_statement_flags=preserve_statement_flags)
if not current_token.wanted_newline:
self.output.trim(True)
@ -1170,7 +1272,6 @@ class Beautifier:
self.print_token(current_token)
self.print_newline(preserve_statement_flags=preserve_statement_flags)
def handle_dot(self, current_token):
if self.start_of_statement(current_token):
# The conditional starts the statement if appropriate.
@ -1181,20 +1282,23 @@ class Beautifier:
if self.opts.unindent_chained_methods:
self.deindent()
if self.last_type == TOKEN.RESERVED and self.is_special_word(self.flags.last_text):
if self.last_type == TOKEN.RESERVED and self.is_special_word(
self.flags.last_text):
self.output.space_before_token = False
else:
# allow preserved newlines before dots in general
# force newlines on dots after close paren when break_chained - for bar().baz()
self.allow_wrap_or_preserved_newline(current_token,
self.flags.last_text == ')' and self.opts.break_chained_methods)
# force newlines on dots after close paren when break_chained - for
# bar().baz()
self.allow_wrap_or_preserved_newline(
current_token, self.flags.last_text == ')' and self.opts.break_chained_methods)
self.print_token(current_token)
def handle_unknown(self, current_token, preserve_statement_flags):
self.print_token(current_token)
if current_token.text[-1] == '\n':
self.print_newline(preserve_statement_flags = preserve_statement_flags)
self.print_newline(
preserve_statement_flags=preserve_statement_flags)
def handle_eof(self, current_token):
# Unwind any open statements

View File

@ -22,6 +22,7 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class BeautifierOptions:
def __init__(self):
self.indent_size = 4
@ -56,7 +57,6 @@ class BeautifierOptions:
self.test_output_raw = False
self.editorconfig = False
def __repr__(self):
return \
"""indent_size = %d

View File

@ -26,6 +26,7 @@ import re
from ..core.inputscanner import InputScanner
from ..core.token import Token
class TokenTypes:
START_EXPR = 'TK_START_EXPR'
END_EXPR = 'TK_END_EXPR',
@ -47,25 +48,46 @@ class TokenTypes:
def __init__(self):
pass
TOKEN = TokenTypes()
class Tokenizer:
number_pattern = re.compile(r'0[xX][0123456789abcdefABCDEF]*|0[oO][01234567]*|0[bB][01]*|\d+n|(?:\.\d+|\d+\.?\d*)(?:[eE][+-]?\d+)?')
number_pattern = re.compile(
r'0[xX][0123456789abcdefABCDEF]*|0[oO][01234567]*|0[bB][01]*|\d+n|(?:\.\d+|\d+\.?\d*)(?:[eE][+-]?\d+)?')
digit = re.compile(r'[0-9]')
startXmlRegExp = re.compile(r'<()([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>')
xmlRegExp = re.compile(r'[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>')
startXmlRegExp = re.compile(
r'<()([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>')
xmlRegExp = re.compile(
r'[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>')
positionable_operators = '!= !== % & && * ** + - / : < << <= == === > >= >> >>> ? ^ | ||'.split(' ')
positionable_operators = '!= !== % & && * ** + - / : < << <= == === > >= >> >>> ? ^ | ||'.split(
' ')
punct = (positionable_operators +
# non-positionable operators - these do not follow operator position settings
# non-positionable operators - these do not follow operator
# position settings
'! %= &= *= **= ++ += , -- -= /= :: <<= = => >>= >>>= ^= |= ~ ...'.split(' '))
# Words which always should start on a new line
line_starters = 'continue,try,throw,return,var,let,const,if,switch,case,default,for,while,break,function,import,export'.split(',')
reserved_words = line_starters + ['do', 'in', 'of', 'else', 'get', 'set', 'new', 'catch', 'finally', 'typeof', 'yield', 'async', 'await', 'from', 'as']
line_starters = 'continue,try,throw,return,var,let,const,if,switch,case,default,for,while,break,function,import,export'.split(
',')
reserved_words = line_starters + ['do',
'in',
'of',
'else',
'get',
'set',
'new',
'catch',
'finally',
'typeof',
'yield',
'async',
'await',
'from',
'as']
def __init__(self, input_string, opts, indent_string):
import jsbeautifier.core.acorn as acorn
@ -78,16 +100,22 @@ class Tokenizer:
self.block_comment_pattern = re.compile(r'([\s\S]*?)((?:\*\/)|$)')
# comment ends just before nearest linefeed or end of file
self.comment_pattern = re.compile(self.acorn.six.u(r'([^\n\r\u2028\u2029]*)'))
self.comment_pattern = re.compile(
self.acorn.six.u(r'([^\n\r\u2028\u2029]*)'))
self.directives_block_pattern = re.compile(r'\/\* beautify( \w+[:]\w+)+ \*\/')
self.directives_block_pattern = re.compile(
r'\/\* beautify( \w+[:]\w+)+ \*\/')
self.directive_pattern = re.compile(r' (\w+)[:](\w+)')
self.directives_end_ignore_pattern = re.compile(r'([\s\S]*?)((?:\/\*\sbeautify\signore:end\s\*\/)|$)')
self.directives_end_ignore_pattern = re.compile(
r'([\s\S]*?)((?:\/\*\sbeautify\signore:end\s\*\/)|$)')
self.template_pattern = re.compile(r'((<\?php|<\?=)[\s\S]*?\?>)|(<%[\s\S]*?%>)')
self.template_pattern = re.compile(
r'((<\?php|<\?=)[\s\S]*?\?>)|(<%[\s\S]*?%>)')
self.whitespacePattern = re.compile(self.acorn.six.u(r'[\n\r\u2028\u2029\t ]+'))
self.newlinePattern = re.compile(self.acorn.six.u(r'([\t ]*)(\r\n|[\n\r\u2028\u2029])?'))
self.whitespacePattern = re.compile(
self.acorn.six.u(r'[\n\r\u2028\u2029\t ]+'))
self.newlinePattern = re.compile(
self.acorn.six.u(r'([\t ]*)(\r\n|[\n\r\u2028\u2029])?'))
def tokenize(self):
self.in_html_comment = False
@ -99,9 +127,13 @@ class Tokenizer:
open_stack = []
comments = []
while not (not last == None and last.type == TOKEN.EOF):
while not (last is not None and last.type == TOKEN.EOF):
token_values = self.__tokenize_next()
next = Token(token_values[1], token_values[0], self.n_newlines, self.whitespace_before_token)
next = Token(
token_values[1],
token_values[0],
self.n_newlines,
self.whitespace_before_token)
while next.type == TOKEN.COMMENT or next.type == TOKEN.BLOCK_COMMENT or next.type == TOKEN.UNKNOWN:
if next.type == TOKEN.BLOCK_COMMENT:
@ -109,7 +141,11 @@ class Tokenizer:
comments.append(next)
token_values = self.__tokenize_next()
next = Token(token_values[1], token_values[0], self.n_newlines, self.whitespace_before_token)
next = Token(
token_values[1],
token_values[0],
self.n_newlines,
self.whitespace_before_token)
if len(comments) > 0:
next.comments_before = comments
@ -120,9 +156,9 @@ class Tokenizer:
open_stack.append(open_token)
open_token = next
elif (next.type == TOKEN.END_BLOCK or next.type == TOKEN.END_EXPR) and \
(not open_token == None and ( \
(next.text == ']' and open_token.text == '[') or \
(next.text == ')' and open_token.text == '(') or \
(open_token is not None and (
(next.text == ']' and open_token.text == '[') or
(next.text == ')' and open_token.text == '(') or
(next.text == '}' and open_token.text == '{'))):
next.parent = open_token.parent
next.opened = open_token
@ -140,11 +176,11 @@ class Tokenizer:
directive_match = self.directive_pattern.search(text)
while directive_match:
directives[directive_match.group(1)] = directive_match.group(2)
directive_match = self.directive_pattern.search(text, directive_match.end())
directive_match = self.directive_pattern.search(
text, directive_match.end())
return directives
def __tokenize_next(self):
self.n_newlines = 0
@ -153,10 +189,10 @@ class Tokenizer:
if len(self.tokens) > 0:
last_token = self.tokens[-1]
else:
# For the sake of tokenizing we can pretend that there was on open brace to start
# For the sake of tokenizing we can pretend that there was on open
# brace to start
last_token = Token(TOKEN.START_BLOCK, '{')
resulting_string = self.input.readWhile(self.whitespacePattern)
if not resulting_string == '':
if resulting_string == ' ':
@ -171,9 +207,11 @@ class Tokenizer:
resulting_string = self.input.readWhile(self.acorn.identifier)
if not resulting_string == '':
if not (last_token.type == TOKEN.DOT \
or (last_token.type == TOKEN.RESERVED and last_token.text in ['set', 'get'])) \
and resulting_string in self.reserved_words:
if not (
last_token.type == TOKEN.DOT or (
last_token.type == TOKEN.RESERVED and last_token.text in [
'set',
'get'])) and resulting_string in self.reserved_words:
if resulting_string == 'in' or resulting_string == 'of': # in and of are operators, need to hack
return resulting_string, TOKEN.OPERATOR
@ -187,7 +225,7 @@ class Tokenizer:
c = self.input.next()
if c == None:
if c is None:
return '', TOKEN.EOF
if c in '([':
@ -214,7 +252,8 @@ class Tokenizer:
directives = self.get_directives(comment)
if directives and directives.get('ignore') == 'start':
comment_match = self.input.match(self.directives_end_ignore_pattern)
comment_match = self.input.match(
self.directives_end_ignore_pattern)
comment += comment_match.group(0)
comment = re.sub(self.acorn.allLineBreaks, '\n', comment)
return comment, TOKEN.BLOCK_COMMENT, directives
@ -227,35 +266,45 @@ class Tokenizer:
def allowRegExOrXML(self):
return (last_token.type == TOKEN.RESERVED and last_token.text in ['return', 'case', 'throw', 'else', 'do', 'typeof', 'yield']) or \
(last_token.type == TOKEN.END_EXPR and last_token.text == ')' and \
(last_token.type == TOKEN.END_EXPR and last_token.text == ')' and
last_token.parent and last_token.parent.type == TOKEN.RESERVED and last_token.parent.text in ['if', 'while', 'for']) or \
(last_token.type in [TOKEN.COMMENT, TOKEN.START_EXPR, TOKEN.START_BLOCK, TOKEN.END_BLOCK, TOKEN.OPERATOR, \
(last_token.type in [TOKEN.COMMENT, TOKEN.START_EXPR, TOKEN.START_BLOCK, TOKEN.END_BLOCK, TOKEN.OPERATOR,
TOKEN.EQUALS, TOKEN.EOF, TOKEN.SEMICOLON, TOKEN.COMMA])
self.has_char_escapes = False
isString = (c == '`' or c == "'" or c == '"')
isRegExp = (c == '/' and allowRegExOrXML(self))
isXML = (self.opts.e4x and c == "<" and self.input.test(self.startXmlRegExp, -1) and allowRegExOrXML(self))
isXML = (self.opts.e4x and c == "<" and self.input.test(
self.startXmlRegExp, -1) and allowRegExOrXML(self))
sep = c
esc = False
resulting_string = c
in_char_class = False
if isString:
# handle string
def parse_string(self, resulting_string, delimiter, allow_unescaped_newlines = False, start_sub = None):
def parse_string(
self,
resulting_string,
delimiter,
allow_unescaped_newlines=False,
start_sub=None):
esc = False
while self.input.hasNext():
current_char = self.input.peek()
if not (esc or (current_char != delimiter and
(allow_unescaped_newlines or not bool(self.acorn.newline.match(current_char))))):
if not (
esc or (
current_char != delimiter and (
allow_unescaped_newlines or not bool(
self.acorn.newline.match(current_char))))):
break
# Handle \r\n linebreaks after escapes or in template strings
if (esc or allow_unescaped_newlines) and bool(self.acorn.newline.match(current_char)):
# Handle \r\n linebreaks after escapes or in template
# strings
if (esc or allow_unescaped_newlines) and bool(
self.acorn.newline.match(current_char)):
if current_char == '\r' and self.input.peek(1) == '\n':
self.input.next()
current_char = self.input.peek()
@ -276,9 +325,11 @@ class Tokenizer:
if start_sub and resulting_string.endswith(start_sub):
if delimiter == '`':
resulting_string = parse_string(self, resulting_string, '}', allow_unescaped_newlines, '`')
resulting_string = parse_string(
self, resulting_string, '}', allow_unescaped_newlines, '`')
else:
resulting_string = parse_string(self, resulting_string, '`', allow_unescaped_newlines, '${')
resulting_string = parse_string(
self, resulting_string, '`', allow_unescaped_newlines, '${')
if self.input.hasNext():
resulting_string += self.input.next()
@ -286,7 +337,8 @@ class Tokenizer:
return resulting_string
if sep == '`':
resulting_string = parse_string(self, resulting_string, '`', True, '${')
resulting_string = parse_string(
self, resulting_string, '`', True, '${')
else:
resulting_string = parse_string(self, resulting_string, sep)
elif isRegExp:
@ -319,9 +371,10 @@ class Tokenizer:
while bool(match):
isEndTag = match.group(1)
tagName = match.group(2)
isSingletonTag = (match.groups()[-1] != "") or (match.group(2)[0:8] == "![CDATA[")
if not isSingletonTag and (
tagName == rootTag or (isCurlyRoot and re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', tagName)))):
isSingletonTag = (
match.groups()[-1] != "") or (match.group(2)[0:8] == "![CDATA[")
if not isSingletonTag and (tagName == rootTag or (
isCurlyRoot and re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', tagName)))):
if isEndTag:
depth -= 1
else:
@ -349,10 +402,13 @@ class Tokenizer:
if sep == '/':
# regexps may have modifiers /regexp/MOD, so fetch those too
# Only [gim] are valid, but if the user puts in garbage, do what we can to take it.
resulting_string += self.input.readWhile(self.acorn.identifier)
# Only [gim] are valid, but if the user puts in garbage, do
# what we can to take it.
resulting_string += self.input.readWhile(
self.acorn.identifier)
resulting_string = re.sub(self.acorn.allLineBreaks, '\n', resulting_string)
resulting_string = re.sub(
self.acorn.allLineBreaks, '\n', resulting_string)
return resulting_string, TOKEN.STRING
@ -366,10 +422,10 @@ class Tokenizer:
resulting_string += c
return resulting_string.strip() + '\n', TOKEN.UNKNOWN
# Spidermonkey-specific sharp variables for circular references
# https://developer.mozilla.org/En/Sharp_variables_in_JavaScript
# http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp around line 1935
# http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp
# around line 1935
sharp = '#'
if self.input.hasNext() and self.input.testChar(self.digit):
while True:
@ -397,7 +453,6 @@ class Tokenizer:
c = re.sub(self.acorn.allLineBreaks, '\n', c)
return c, TOKEN.STRING
if c == '<' and self.input.match(re.compile(r'\!--')):
c = '<!--'
while self.input.hasNext() and not self.input.testChar(self.acorn.newline):
@ -406,7 +461,8 @@ class Tokenizer:
self.in_html_comment = True
return c, TOKEN.COMMENT
if c == '-' and self.in_html_comment and self.input.match(re.compile('->')):
if c == '-' and self.in_html_comment and self.input.match(
re.compile('->')):
self.in_html_comment = False
return '-->', TOKEN.COMMENT
@ -472,7 +528,8 @@ class Tokenizer:
escaped = int(matched.group(1), 16)
if escaped > 0x7e and escaped <= 0xff and matched.group(0).startswith('x'):
if escaped > 0x7e and escaped <= 0xff and matched.group(
0).startswith('x'):
# we bail out on \x7f..\xff,
# leaving whole string escaped,
# as it's probably completely binary

View File

@ -1,6 +1,7 @@
import unittest
from ...core.inputscanner import InputScanner
class TestInputScanner(unittest.TestCase):
@classmethod

View File

@ -6250,7 +6250,7 @@ class TestJSBeautifier(unittest.TestCase):
def decodesto(self, input, expectation=None):
if expectation == None:
if expectation is None:
expectation = input
self.assertMultiLineEqual(
@ -6258,7 +6258,7 @@ class TestJSBeautifier(unittest.TestCase):
# if the expected is different from input, run it again
# expected output should be unchanged when run twice.
if not expectation == None:
if not expectation is None:
self.assertMultiLineEqual(
jsbeautifier.beautify(expectation, self.options), expectation)
@ -6281,7 +6281,7 @@ class TestJSBeautifier(unittest.TestCase):
return self.wrapregex.sub(' \\1', text)
def bt(self, input, expectation=None):
if expectation == None:
if expectation is None:
expectation = input
self.decodesto(input, expectation)

View File

@ -19,6 +19,7 @@ def test_str(str, expected):
fails = fails + 1
return False
str = "eval(function(p,a,c,k,e,d){e=function(c){return c.toString(36)};if(!''.replace(/^/,String)){while(c--){d[c.toString(a)]=k[c]||c.toString(a)}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('2 0=\"4 3!\";2 1=0.5(/b/6);a.9(\"8\").7=1;',12,12,'str|n|var|W3Schools|Visit|search|i|innerHTML|demo|getElementById|document|w3Schools'.split('|'),0,{}))"
expected = "var str = \"Visit W3Schools!\";\nvar n = str.search(/w3Schools/i);\ndocument.getElementById(\"demo\").innerHTML = n;"

View File

@ -2,23 +2,28 @@ import re
import unittest
import jsbeautifier
class TestJSBeautifierIndentation(unittest.TestCase):
def test_tabs(self):
test_fragment = self.decodesto
self.options.indent_with_tabs = 1;
test_fragment('{tabs()}', "{\n\ttabs()\n}");
self.options.indent_with_tabs = 1
test_fragment('{tabs()}', "{\n\ttabs()\n}")
def test_function_indent(self):
test_fragment = self.decodesto
self.options.indent_with_tabs = 1;
self.options.keep_function_indentation = 1;
test_fragment('var foo = function(){ bar() }();', "var foo = function() {\n\tbar()\n}();");
self.options.indent_with_tabs = 1
self.options.keep_function_indentation = 1
test_fragment(
'var foo = function(){ bar() }();',
"var foo = function() {\n\tbar()\n}();")
self.options.tabs = 1;
self.options.keep_function_indentation = 0;
test_fragment('var foo = function(){ baz() }();', "var foo = function() {\n\tbaz()\n}();");
self.options.tabs = 1
self.options.keep_function_indentation = 0
test_fragment(
'var foo = function(){ baz() }();',
"var foo = function() {\n\tbaz()\n}();")
def decodesto(self, input, expectation=None):
self.assertEqual(

View File

@ -12,11 +12,13 @@ from jsbeautifier.unpackers import evalbased
# NOTE: AT THE MOMENT, IT IS DEACTIVATED FOR YOUR SECURITY: it runs js!
BLACKLIST = ['jsbeautifier.unpackers.evalbased']
class UnpackingError(Exception):
"""Badly packed source or general error. Argument is a
meaningful description."""
pass
def getunpackers():
"""Scans the unpackers dir, finds unpackers and add them to UNPACKERS list.
An unpacker will be loaded only if it is a valid python module (name must
@ -37,8 +39,10 @@ def getunpackers():
return sorted(unpackers, key=lambda mod: mod.PRIORITY)
UNPACKERS = getunpackers()
def run(source, evalcode=False):
"""Runs the applicable unpackers and return unpacked source as a string."""
for unpacker in [mod for mod in UNPACKERS if mod.detect(source)]:
@ -47,6 +51,7 @@ def run(source, evalcode=False):
source = evalbased.unpack(source)
return source
def filtercomments(source):
"""NOT USED: strips trailing comments and put them at the top."""
trailing_comments = []

View File

@ -18,15 +18,19 @@ from subprocess import PIPE, Popen
PRIORITY = 3
def detect(source):
"""Detects if source is likely to be eval() packed."""
return source.strip().lower().startswith('eval(function(')
def unpack(source):
"""Runs source and return resulting code."""
return jseval('print %s;' % source[4:]) if detect(source) else source
# In case of failure, we'll just return the original, without crashing on user.
def jseval(script):
"""Run code in the JS interpreter and return output."""
try:

View File

@ -20,6 +20,7 @@ import re
PRIORITY = 1
def smartsplit(code):
"""Split `code` at " symbol, only if it is not escaped."""
strings = []
@ -40,11 +41,13 @@ def smartsplit(code):
pos += 1
return strings
def detect(code):
"""Detects if `code` is JavascriptObfuscator.com packed."""
# prefer `is not` idiom, so that a true boolean is returned
return (re.search(r'^var _0x[a-f0-9]+ ?\= ?\[', code) is not None)
def unpack(code):
"""Unpacks JavascriptObfuscator.com packed code."""
if detect(code):

View File

@ -51,7 +51,8 @@ CAVEAT = """//
"""
SIGNATURE = (r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F'
SIGNATURE = (
r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F'
r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65'
r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75'
r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B'
@ -59,10 +60,12 @@ SIGNATURE = (r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F'
r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","'
r'\x6C\x65\x6E\x67\x74\x68"]')
def detect(source):
"""Detects MyObfuscate.com packer."""
return SIGNATURE in source
def unpack(source):
"""Unpacks js code packed with MyObfuscate.com"""
if not detect(source):
@ -73,6 +76,7 @@ def unpack(source):
polished = match.group(1) if match else source
return CAVEAT + polished
def _filter(source):
"""Extracts and decode payload (original file) from `source`"""
try:

View File

@ -63,7 +63,8 @@ def unpack(source):
def _filterargs(source):
"""Juice from a source file the four args needed by decoder."""
juicers = [ (r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)"),
juicers = [
(r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)"),
(r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\)"),
]
for juicer in juicers:
@ -80,7 +81,8 @@ def _filterargs(source):
raise UnpackingError('Corrupted p.a.c.k.e.r. data.')
# could not find a satisfying regex
raise UnpackingError('Could not make sense of p.a.c.k.e.r data (unexpected code structure)')
raise UnpackingError(
'Could not make sense of p.a.c.k.e.r data (unexpected code structure)')
def _replacestrings(source):
@ -124,8 +126,9 @@ class Unbaser(object):
else:
# Build conversion dictionary cache
try:
self.dictionary = dict((cipher, index) for
index, cipher in enumerate(self.ALPHABET[base]))
self.dictionary = dict(
(cipher, index) for index, cipher in enumerate(
self.ALPHABET[base]))
except KeyError:
raise TypeError('Unsupported base encoding.')

View File

@ -9,12 +9,16 @@ from jsbeautifier.unpackers.javascriptobfuscator import (
unpack, detect, smartsplit)
# pylint: disable=R0904
class TestJavascriptObfuscator(unittest.TestCase):
"""JavascriptObfuscator.com test case."""
def test_smartsplit(self):
"""Test smartsplit() function."""
split = smartsplit
equals = lambda data, result: self.assertEqual(split(data), result)
def equals(data, result): return self.assertEqual(split(data), result)
equals('', [])
equals('"a", "b"', ['"a"', '"b"'])
@ -23,8 +27,9 @@ class TestJavascriptObfuscator(unittest.TestCase):
def test_detect(self):
"""Test detect() function."""
positive = lambda source: self.assertTrue(detect(source))
negative = lambda source: self.assertFalse(detect(source))
def positive(source): return self.assertTrue(detect(source))
def negative(source): return self.assertFalse(detect(source))
negative('')
negative('abcd')
@ -35,12 +40,15 @@ class TestJavascriptObfuscator(unittest.TestCase):
def test_unpack(self):
"""Test unpack() function."""
decodeto = lambda ob, original: self.assertEqual(unpack(ob), original)
def decodeto(
ob, original): return self.assertEqual(
unpack(ob), original)
decodeto('var _0x8df3=[];var a=10;', 'var a=10;')
decodeto('var _0xb2a7=["\x74\x27\x65\x73\x74"];var i;for(i=0;i<10;++i)'
'{alert(_0xb2a7[0]);} ;', 'var i;for(i=0;i<10;++i){alert'
'("t\'est");} ;')
if __name__ == '__main__':
unittest.main()

View File

@ -13,6 +13,8 @@ INPUT = os.path.join(path[0], 'test-myobfuscate-input.js')
OUTPUT = os.path.join(path[0], 'test-myobfuscate-output.js')
# pylint: disable=R0904
class TestMyObfuscate(unittest.TestCase):
# pylint: disable=C0103
"""MyObfuscate obfuscator testcase."""
@ -26,15 +28,16 @@ class TestMyObfuscate(unittest.TestCase):
def test_detect(self):
"""Test detect() function."""
detected = lambda source: self.assertTrue(detect(source))
def detected(source): return self.assertTrue(detect(source))
detected(self.input)
def test_unpack(self):
"""Test unpack() function."""
check = lambda inp, out: self.assertEqual(unpack(inp), out)
def check(inp, out): return self.assertEqual(unpack(inp), out)
check(self.input, self.output)
if __name__ == '__main__':
unittest.main()

View File

@ -8,12 +8,16 @@ import unittest
from jsbeautifier.unpackers.packer import detect, unpack
# pylint: disable=R0904
class TestPacker(unittest.TestCase):
"""P.A.C.K.E.R. testcase."""
def test_detect(self):
"""Test detect() function."""
positive = lambda source: self.assertTrue(detect(source))
negative = lambda source: self.assertFalse(detect(source))
def positive(source): return self.assertTrue(detect(source))
def negative(source): return self.assertFalse(detect(source))
negative('')
negative('var a = b')
@ -22,7 +26,7 @@ class TestPacker(unittest.TestCase):
def test_unpack(self):
"""Test unpack() function."""
check = lambda inp, out: self.assertEqual(unpack(inp), out)
def check(inp, out): return self.assertEqual(unpack(inp), out)
check("eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)"
"){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e="
@ -30,5 +34,6 @@ class TestPacker(unittest.TestCase):
"new RegExp('\\\\b'+e(c)+'\\\\b','g'),k[c]);return p}('0 2=1',"
"62,3,'var||a'.split('|'),0,{}))", 'var a=1')
if __name__ == '__main__':
unittest.main()

View File

@ -9,12 +9,16 @@ import unittest
from jsbeautifier.unpackers.urlencode import detect, unpack
# pylint: disable=R0904
class TestUrlencode(unittest.TestCase):
"""urlencode test case."""
def test_detect(self):
"""Test detect() function."""
encoded = lambda source: self.assertTrue(detect(source))
unencoded = lambda source: self.assertFalse(detect(source))
def encoded(source): return self.assertTrue(detect(source))
def unencoded(source): return self.assertFalse(detect(source))
unencoded('')
unencoded('var a = b')
@ -24,7 +28,11 @@ class TestUrlencode(unittest.TestCase):
def test_unpack(self):
"""Test unpack function."""
equals = lambda source, result: self.assertEqual(unpack(source), result)
def equals(
source,
result): return self.assertEqual(
unpack(source),
result)
equals('', '')
equals('abcd', 'abcd')
@ -32,5 +40,6 @@ class TestUrlencode(unittest.TestCase):
equals('var%20a=b', 'var a=b')
equals('var%20a+=+b', 'var a = b')
if __name__ == '__main__':
unittest.main()

View File

@ -23,12 +23,14 @@ except ImportError:
PRIORITY = 0
def detect(code):
"""Detects if a scriptlet is urlencoded."""
# the fact that script doesn't contain any space, but has %20 instead
# should be sufficient check for now.
return ' ' not in code and ('%20' in code or code.count('%') > 3)
def unpack(code):
"""URL decode `code` source string."""
return unquote_plus(code) if detect(code) else code

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
import os,sys
import os
import sys
from setuptools import setup
from jsbeautifier.__version__ import __version__
@ -8,12 +9,15 @@ from jsbeautifier.__version__ import __version__
from setuptools.command.test import test as TestCommand
DIR = 'jsbeautifier/tests/'
class PyTest(TestCommand):
user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")]
def initialize_options(self):
TestCommand.initialize_options(self)
self.pytest_args = ['--assert=plain'] +[DIR+x for x in os.listdir(DIR) if x.endswith('.py') and x[0] not in '._']
self.pytest_args = ['--assert=plain'] + [DIR + \
x for x in os.listdir(DIR) if x.endswith('.py') and x[0] not in '._']
def run_tests(self):
# import here, cause outside the eggs aren't loaded
@ -21,6 +25,7 @@ class PyTest(TestCommand):
errno = pytest.main(self.pytest_args)
sys.exit(errno)
setup(name='jsbeautifier',
version=__version__,
description='JavaScript unobfuscator and beautifier.',

View File

@ -9,22 +9,34 @@ options.wrap_line_length = 80
data = ''
data_min = ''
def beautifier_test_underscore():
jsbeautifier.beautify(data, options)
def beautifier_test_underscore_min():
jsbeautifier.beautify(data_min, options)
def report_perf(fn):
import timeit
iter = 50
time = timeit.timeit(fn + "()", setup="from __main__ import " + fn + "; gc.enable()", number=iter)
time = timeit.timeit(
fn +
"()",
setup="from __main__ import " +
fn +
"; gc.enable()",
number=iter)
print(fn + ": " + str(iter / time) + " cycles/sec")
if __name__ == '__main__':
dirname = os.path.dirname(os.path.abspath(__file__))
underscore_file = os.path.join(dirname, "../", "test/resources/underscore.js")
underscore_min_file = os.path.join(dirname, "../", "test/resources/underscore-min.js")
underscore_file = os.path.join(
dirname, "../", "test/resources/underscore.js")
underscore_min_file = os.path.join(
dirname, "../", "test/resources/underscore-min.js")
data = copy.copy(''.join(open(underscore_file).readlines()))
data_min = copy.copy(''.join(open(underscore_min_file).readlines()))

View File

@ -1262,7 +1262,7 @@ class TestJSBeautifier(unittest.TestCase):
def decodesto(self, input, expectation=None):
if expectation == None:
if expectation is None:
expectation = input
self.assertMultiLineEqual(
@ -1270,7 +1270,7 @@ class TestJSBeautifier(unittest.TestCase):
# if the expected is different from input, run it again
# expected output should be unchanged when run twice.
if not expectation == None:
if not expectation is None:
self.assertMultiLineEqual(
jsbeautifier.beautify(expectation, self.options), expectation)
@ -1293,7 +1293,7 @@ class TestJSBeautifier(unittest.TestCase):
return self.wrapregex.sub(' \\1', text)
def bt(self, input, expectation=None):
if expectation == None:
if expectation is None:
expectation = input
self.decodesto(input, expectation)