This commit is contained in:
Liam Newman 2018-07-26 14:11:57 -07:00
parent 9fb8d25496
commit ebc310f470
32 changed files with 730 additions and 432 deletions

View File

@ -5,4 +5,3 @@
import sys
from cssbeautifier import main
sys.exit(main())

View File

@ -106,17 +106,18 @@ Rarely needed options:
else:
return 0
def main():
argv = sys.argv[1:]
try:
opts, args = getopt.getopt(argv, "hvio:rs:c:e:tn",
['help', 'usage', 'version', 'stdin', 'outfile=', 'replace',
'indent-size=', 'indent-char=', 'eol=', 'indent-with-tabs',
'preserve-newlines', 'disable-selector-separator-newline',
'end-with-newline', 'disable-newline-between-rules',
'space-around-combinator'])
['help', 'usage', 'version', 'stdin', 'outfile=', 'replace',
'indent-size=', 'indent-char=', 'eol=', 'indent-with-tabs',
'preserve-newlines', 'disable-selector-separator-newline',
'end-with-newline', 'disable-newline-between-rules',
'space-around-combinator'])
except getopt.GetoptError as ex:
print(ex, file=sys.stderr)
return usage(sys.stderr)
@ -160,7 +161,6 @@ def main():
elif opt in ('--space-around-combinator'):
css_options.space_around_combinator = True
if not file:
file = '-'
@ -190,11 +190,11 @@ def main():
f.write(pretty)
except TypeError:
# This is not pretty, but given how we did the version import
# it is the only way to do this without having setup.py fail on a missing six dependency.
# it is the only way to do this without having setup.py
# fail on a missing six dependency.
six = __import__("six")
f.write(six.u(pretty))
except Exception as ex:
print(ex, file=sys.stderr)
return 1

View File

@ -39,6 +39,7 @@ whitespacePattern = re.compile(r"(?:\s|\n)+")
# WORD_RE = re.compile("[\w$\-_]")
def default_options():
return BeautifierOptions()
@ -88,7 +89,7 @@ class Printer:
if self.indentLevel > 0:
self.indentLevel -= 1
def preserveSingleSpace(self,isAfterSpace):
def preserveSingleSpace(self, isAfterSpace):
if isAfterSpace:
self.output.space_before_token = True
@ -105,8 +106,10 @@ class Beautifier:
import jsbeautifier.core.acorn as acorn
self.lineBreak = acorn.lineBreak
self.allLineBreaks = acorn.allLineBreaks
self.comment_pattern = re.compile(acorn.six.u(r"\/\/(?:[^\n\r\u2028\u2029]*)"))
self.block_comment_pattern = re.compile(r"\/\*(?:[\s\S]*?)((?:\*\/)|$)")
self.comment_pattern = re.compile(
acorn.six.u(r"\/\/(?:[^\n\r\u2028\u2029]*)"))
self.block_comment_pattern = re.compile(
r"\/\*(?:[\s\S]*?)((?:\*\/)|$)")
if not source_text:
source_text = ''
@ -133,21 +136,22 @@ class Beautifier:
self.opts.eol = self.opts.eol.replace('\\r', '\r').replace('\\n', '\n')
# HACK: newline parsing inconsistent. This brute force normalizes the input newlines.
# HACK: newline parsing inconsistent. This brute force normalizes the
# input newlines.
self.source_text = re.sub(self.allLineBreaks, '\n', source_text)
# https://developer.mozilla.org/en-US/docs/Web/CSS/At-rule
# also in CONDITIONAL_GROUP_RULE below
self.NESTED_AT_RULE = [ \
"@page", \
"@font-face", \
"@keyframes", \
"@media", \
"@supports", \
self.NESTED_AT_RULE = [
"@page",
"@font-face",
"@keyframes",
"@media",
"@supports",
"@document"]
self.CONDITIONAL_GROUP_RULE = [ \
"@media", \
"@supports", \
self.CONDITIONAL_GROUP_RULE = [
"@media",
"@supports",
"@document"]
m = re.search("^[\t ]*", self.source_text)
@ -173,7 +177,7 @@ class Beautifier:
result = whitespaceChar.search(self.input.peek() or '') is not None
isFirstNewLine = True
while whitespaceChar.search(self.input.peek() or '') is not None:
while whitespaceChar.search(self.input.peek() or '') is not None:
self.ch = self.input.next()
if allowAtLeastOneNewLine and self.ch == "\n":
if self.opts.preserve_newlines or isFirstNewLine:
@ -206,7 +210,10 @@ class Beautifier:
return False
def beautify(self):
printer = Printer(self.indentChar, self.indentSize, self.baseIndentString)
printer = Printer(
self.indentChar,
self.indentSize,
self.baseIndentString)
self.output = printer.output
self.input = InputScanner(self.source_text)
@ -236,7 +243,9 @@ class Beautifier:
# minified code is being beautified.
output.add_new_line()
input.back()
printer.print_string(input.readWhile(self.block_comment_pattern))
printer.print_string(
input.readWhile(
self.block_comment_pattern))
# Ensures any new lines following the comment are preserved
self.eatWhitespace(True)
@ -263,10 +272,12 @@ class Beautifier:
else:
printer.print_string(self.ch)
# strip trailing space, if present, for hash property check
variableOrRule = input.peekUntilAfter(re.compile(r"[: ,;{}()[\]\/='\"]"))
variableOrRule = input.peekUntilAfter(
re.compile(r"[: ,;{}()[\]\/='\"]"))
if variableOrRule[-1] in ": ":
# wwe have a variable or pseudo-class, add it and insert one space before continuing
# wwe have a variable or pseudo-class, add it and
# insert one space before continuing
variableOrRule = self.eatString(": ")
if variableOrRule[-1].isspace():
variableOrRule = variableOrRule[:-1]
@ -305,7 +316,8 @@ class Beautifier:
self.eatWhitespace(True)
output.add_new_line()
# when entering conditional groups, only rulesets are allowed
# when entering conditional groups, only rulesets are
# allowed
if enteringConditionalGroup:
enteringConditionalGroup = False
insideRule = printer.indentLevel > printer.nestedLevel
@ -341,7 +353,8 @@ class Beautifier:
# sass/less parent reference don't use a space
# sass nested pseudo-class don't use a space
# preserve space before pseudoclasses/pseudoelements, as it means "in any child"
# preserve space before pseudoclasses/pseudoelements, as it
# means "in any child"
if input.lookBack(' '):
output.space_before_token = True
if input.peek() == ":":
@ -374,7 +387,7 @@ class Beautifier:
self.ch = input.next()
if self.ch:
if self.ch is not ')' and self.ch is not '"' \
and self.ch is not '\'':
and self.ch is not '\'':
printer.print_string(self.ch + self.eatString(')'))
else:
input.back()
@ -395,7 +408,7 @@ class Beautifier:
else:
output.space_before_token = True
elif (self.ch == '>' or self.ch == '+' or self.ch == '~') and \
not insidePropertyValue and parenLevel < 1:
not insidePropertyValue and parenLevel < 1:
# handle combinator spacing
if self.opts.space_around_combinator:
output.space_before_token = True

View File

@ -23,6 +23,7 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class BeautifierOptions:
def __init__(self):
self.indent_size = 4
@ -43,8 +44,7 @@ class BeautifierOptions:
self.space_around_selector_separator = False
def __repr__(self):
return \
"""indent_size = %d
return """indent_size = %d
indent_char = [%s]
indent_with_tabs = [%s]
preserve_newlines = [%s]
@ -52,6 +52,11 @@ separate_selectors_newline = [%s]
end_with_newline = [%s]
newline_between_rules = [%s]
space_around_combinator = [%s]
""" % (self.indent_size, self.indent_char, self.indent_with_tabs, self.preserve_newlines,
self.selector_separator_newline, self.end_with_newline, self.newline_between_rules,
self.space_around_combinator)
""" % (self.indent_size,
self.indent_char,
self.indent_with_tabs,
self.preserve_newlines,
self.selector_separator_newline,
self.end_with_newline,
self.newline_between_rules,
self.space_around_combinator)

View File

@ -3,14 +3,16 @@
import sys
import unittest
#Speedup things...
# Speedup things...
try:
import cProfile as profile
except ImportError:
import profile
def run():
sys.argv.append('discover')
unittest.main()
profile.run('run()')

View File

@ -3,9 +3,12 @@
import sys
import unittest
def run_tests():
suite = unittest.TestLoader().discover('jsbeautifier.tests', pattern = "test*.py")
suite = unittest.TestLoader().discover(
'jsbeautifier.tests', pattern="test*.py")
return unittest.TextTestRunner(verbosity=2).run(suite)
if __name__ == "__main__":
sys.exit(not run_tests().wasSuccessful())

View File

@ -66,10 +66,11 @@ def default_options():
return BeautifierOptions()
def beautify(string, opts = default_options() ):
def beautify(string, opts=default_options()):
b = Beautifier()
return b.beautify(string, opts)
def set_file_editorconfig_opts(filename, js_options):
from editorconfig import get_properties, EditorConfigError
try:
@ -87,7 +88,8 @@ def set_file_editorconfig_opts(filename, js_options):
if _ecoptions.get("max_line_length") == "off":
js_options.wrap_line_length = 0
else:
js_options.wrap_line_length = int(_ecoptions["max_line_length"])
js_options.wrap_line_length = int(
_ecoptions["max_line_length"])
if _ecoptions.get("insert_final_newline") == 'true':
js_options.end_with_newline = True
@ -107,9 +109,9 @@ def set_file_editorconfig_opts(filename, js_options):
print("Error loading EditorConfig. Ignoring.", file=sys.stderr)
def beautify_file(file_name, opts = default_options() ):
def beautify_file(file_name, opts=default_options()):
input_string = ''
if file_name == '-': # stdin
if file_name == '-': # stdin
try:
if sys.stdin.isatty():
raise Exception()
@ -117,7 +119,9 @@ def beautify_file(file_name, opts = default_options() ):
stream = sys.stdin
input_string = ''.join(stream.readlines())
except Exception:
print("Must pipe input or define at least one file.\n", file=sys.stderr)
print(
"Must pipe input or define at least one file.\n",
file=sys.stderr)
usage(sys.stderr)
raise
else:
@ -189,19 +193,22 @@ def mkdir_p(path):
try:
if path:
os.makedirs(path)
except OSError as exc: # Python >2.5
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise Exception()
def isFileDifferent(filepath, expected):
try:
return (''.join(io.open(filepath, 'rt', newline='').readlines()) != expected)
except:
return (
''.join(
io.open(
filepath,
'rt',
newline='').readlines()) != expected)
except BaseException:
return True
@ -211,11 +218,11 @@ def main():
try:
opts, args = getopt.getopt(argv, "s:c:e:o:rdEPjabkil:xhtfvXnCO:w:",
['indent-size=','indent-char=','eol=', 'outfile=', 'replace', 'disable-preserve-newlines',
'space-in-paren', 'space-in-empty-paren', 'jslint-happy', 'space-after-anon-function',
'brace-style=', 'keep-array-indentation', 'indent-level=', 'unescape-strings',
'help', 'usage', 'stdin', 'eval-code', 'indent-with-tabs', 'keep-function-indentation', 'version',
'e4x', 'end-with-newline','comma-first','operator-position=','wrap-line-length','editorconfig'])
['indent-size=', 'indent-char=', 'eol=', 'outfile=', 'replace', 'disable-preserve-newlines',
'space-in-paren', 'space-in-empty-paren', 'jslint-happy', 'space-after-anon-function',
'brace-style=', 'keep-array-indentation', 'indent-level=', 'unescape-strings',
'help', 'usage', 'stdin', 'eval-code', 'indent-with-tabs', 'keep-function-indentation', 'version',
'e4x', 'end-with-newline', 'comma-first', 'operator-position=', 'wrap-line-length', 'editorconfig'])
except getopt.GetoptError as ex:
print(ex, file=sys.stderr)
return usage(sys.stderr)
@ -231,7 +238,7 @@ def main():
for opt, arg in opts:
if opt in ('--keep-array-indentation', '-k'):
js_options.keep_array_indentation = True
if opt in ('--keep-function-indentation','-f'):
if opt in ('--keep-function-indentation', '-f'):
js_options.keep_function_indentation = True
elif opt in ('--outfile', '-o'):
outfile = arg
@ -280,7 +287,6 @@ def main():
elif opt in ('--help', '--usage', '-h'):
return usage()
if not file:
file = '-'
@ -325,11 +331,11 @@ def main():
f.write(pretty)
except TypeError:
# This is not pretty, but given how we did the version import
# it is the only way to do this without having setup.py fail on a missing six dependency.
# it is the only way to do this without having setup.py
# fail on a missing six dependency.
six = __import__("six")
f.write(six.u(pretty))
except Exception as ex:
print(ex, file=sys.stderr)
return 1
@ -337,5 +343,6 @@ def main():
# Success
return 0
if __name__ == "__main__":
main()

View File

@ -12,7 +12,8 @@ import re
# https://github.com/marijnh/acorn.git
# This is not pretty, but given how we did the version import
# it is the only way to do this without having setup.py fail on a missing six dependency.
# it is the only way to do this without having setup.py fail on a missing
# six dependency.
six = __import__("six")
# ## Character categories
@ -22,7 +23,8 @@ six = __import__("six")
# are only applied when a character is found to actually have a
# code point above 128.
_nonASCIIwhitespace = re.compile(six.u(r"[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]"))
_nonASCIIwhitespace = re.compile(
six.u(r"[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]"))
_baseASCIIidentifierStartChars = six.u(r"\x24\x40\x41-\x5a\x5f\x61-\x7a")
_nonASCIIidentifierStartChars = six.u(r"\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc")
_baseASCIIidentifierChars = six.u(r"\x24\x30-\x39\x41-\x5a\x5f\x61-\x7a")
@ -30,10 +32,27 @@ _nonASCIIidentifierChars = six.u(r"\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf
#_nonASCIIidentifierStart = re.compile("[" + _nonASCIIidentifierStartChars + "]")
#_nonASCIIidentifier = re.compile("[" + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]")
_identifierStart = re.compile("[" + _baseASCIIidentifierStartChars + _nonASCIIidentifierStartChars + "]")
_identifierChars = re.compile("[" + _baseASCIIidentifierChars + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]")
_identifierStart = re.compile(
"[" +
_baseASCIIidentifierStartChars +
_nonASCIIidentifierStartChars +
"]")
_identifierChars = re.compile(
"[" +
_baseASCIIidentifierChars +
_nonASCIIidentifierStartChars +
_nonASCIIidentifierChars +
"]")
identifier = re.compile("[" + _baseASCIIidentifierStartChars + _nonASCIIidentifierStartChars + "][" + _baseASCIIidentifierChars + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]*")
identifier = re.compile(
"[" +
_baseASCIIidentifierStartChars +
_nonASCIIidentifierStartChars +
"][" +
_baseASCIIidentifierChars +
_nonASCIIidentifierStartChars +
_nonASCIIidentifierChars +
"]*")
# Whether a single character denotes a newline.
@ -58,10 +77,13 @@ def isIdentifierStart(code):
# return code == 95 # permit _ (95)
# if code < 123:
# return True # 97 through 123 are lowercase letters
# return code >= 0xaa and _nonASCIIidentifierStart.match(six.unichr(code)) != None
# return code >= 0xaa and _nonASCIIidentifierStart.match(six.unichr(code))
# != None
return bool(_identifierStart.match(six.unichr(code)))
# Test whether a given character is part of an identifier.
def isIdentifierChar(code):
# if code < 48:
# return code == 36
@ -75,5 +97,6 @@ def isIdentifierChar(code):
# return code == 95
# if code < 123:
# return True
# return code >= 0xaa and _nonASCIIidentifier.match(six.unichr(code)) != None
# return code >= 0xaa and _nonASCIIidentifier.match(six.unichr(code)) !=
# None
return bool(_identifierChars.match(six.unichr(code)))

View File

@ -46,7 +46,7 @@ class InputScanner:
return val
def peek(self, index = 0):
def peek(self, index=0):
val = None
index += self.__position
if index >= 0 and index < self.__input_length:
@ -54,14 +54,15 @@ class InputScanner:
return val
def test(self, pattern, index = 0):
def test(self, pattern, index=0):
index += self.__position
return index >= 0 and index < self.__input_length and bool(pattern.match(self.__input, index))
return index >= 0 and index < self.__input_length and bool(
pattern.match(self.__input, index))
def testChar(self, pattern, index = 0):
def testChar(self, pattern, index=0):
# test one character regex match
val = self.peek(index)
return val != None and pattern.match(val)
return val is not None and pattern.match(val)
def match(self, pattern):
pattern_match = None
@ -118,6 +119,6 @@ class InputScanner:
return val
def lookBack(self, testVal):
start = self.__position - 1
return start >= len(testVal) and \
start = self.__position - 1
return start >= len(testVal) and \
self.__input[start - len(testVal):start].lower() == testVal

View File

@ -29,6 +29,8 @@ import copy
# mergeOpts(obj, 'b')
#
# Returns: {a: 2, b: {a: 2}}
def mergeOpts(options, childFieldName):
finalOpts = copy.copy(options)

View File

@ -24,7 +24,10 @@
import re
# Using object instead of string to allow for later expansion of info about each line
# Using object instead of string to allow for later expansion of info
# about each line
class OutputLine:
def __init__(self, parent):
self.__parent = parent
@ -41,7 +44,8 @@ class OutputLine:
return self.__empty
def set_indent(self, level):
self.__character_count = self.__parent.baseIndentLength + level * self.__parent.indent_length
self.__character_count = self.__parent.baseIndentLength + \
level * self.__parent.indent_length
self.__indent_count = level
def last(self):
@ -55,7 +59,6 @@ class OutputLine:
self.__character_count += len(input)
self.__empty = False
def pop(self):
item = None
if not self.is_empty():
@ -85,11 +88,11 @@ class OutputLine:
class Output:
def __init__(self, indent_string, baseIndentString = ''):
def __init__(self, indent_string, baseIndentString=''):
self.indent_string = indent_string
self.baseIndentString = baseIndentString
self.indent_cache = [ baseIndentString ]
self.indent_cache = [baseIndentString]
self.baseIndentLength = len(baseIndentString)
self.indent_length = len(indent_string)
self.raw = False
@ -134,8 +137,8 @@ class Output:
# Never indent your first output indent at the start of the file
if len(self.lines) > 1:
while level >= len(self.indent_cache):
self.indent_cache.append(self.indent_cache[-1] + self.indent_string)
self.indent_cache.append(
self.indent_cache[-1] + self.indent_string)
self.current_line.set_indent(level)
return True
@ -159,10 +162,11 @@ class Output:
self.current_line.push(' ')
self.space_before_token = False
def trim(self, eat_newlines = False):
def trim(self, eat_newlines=False):
self.current_line.trim()
while eat_newlines and len(self.lines) > 1 and self.current_line.is_empty():
while eat_newlines and len(
self.lines) > 1 and self.current_line.is_empty():
self.lines.pop()
self.current_line = self.lines[-1]
self.current_line.trim()

View File

@ -22,8 +22,16 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class Token:
def __init__(self, type, text, newlines = 0, whitespace_before = '', mode = None, parent = None):
def __init__(
self,
type,
text,
newlines=0,
whitespace_before='',
mode=None,
parent=None):
self.type = type
self.text = text
self.comments_before = []

File diff suppressed because it is too large Load Diff

View File

@ -22,6 +22,7 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
class BeautifierOptions:
def __init__(self):
self.indent_size = 4
@ -56,10 +57,9 @@ class BeautifierOptions:
self.test_output_raw = False
self.editorconfig = False
def __repr__(self):
return \
"""indent_size = %d
"""indent_size = %d
indent_char = [%s]
preserve_newlines = %s
max_preserve_newlines = %d
@ -72,17 +72,17 @@ keep_array_indentation = %s
eval_code = %s
wrap_line_length = %s
unescape_strings = %s
""" % ( self.indent_size,
self.indent_char,
self.preserve_newlines,
self.max_preserve_newlines,
self.space_in_paren,
self.jslint_happy,
self.space_after_anon_function,
self.indent_with_tabs,
self.brace_style,
self.keep_array_indentation,
self.eval_code,
self.wrap_line_length,
self.unescape_strings,
)
""" % (self.indent_size,
self.indent_char,
self.preserve_newlines,
self.max_preserve_newlines,
self.space_in_paren,
self.jslint_happy,
self.space_after_anon_function,
self.indent_with_tabs,
self.brace_style,
self.keep_array_indentation,
self.eval_code,
self.wrap_line_length,
self.unescape_strings,
)

View File

@ -26,6 +26,7 @@ import re
from ..core.inputscanner import InputScanner
from ..core.token import Token
class TokenTypes:
START_EXPR = 'TK_START_EXPR'
END_EXPR = 'TK_END_EXPR',
@ -47,27 +48,48 @@ class TokenTypes:
def __init__(self):
pass
TOKEN = TokenTypes()
class Tokenizer:
number_pattern = re.compile(r'0[xX][0123456789abcdefABCDEF]*|0[oO][01234567]*|0[bB][01]*|\d+n|(?:\.\d+|\d+\.?\d*)(?:[eE][+-]?\d+)?')
number_pattern = re.compile(
r'0[xX][0123456789abcdefABCDEF]*|0[oO][01234567]*|0[bB][01]*|\d+n|(?:\.\d+|\d+\.?\d*)(?:[eE][+-]?\d+)?')
digit = re.compile(r'[0-9]')
startXmlRegExp = re.compile(r'<()([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>')
xmlRegExp = re.compile(r'[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>')
startXmlRegExp = re.compile(
r'<()([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>')
xmlRegExp = re.compile(
r'[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[\s\S]+?}|!\[CDATA\[[\s\S]*?\]\])(\s+{[\s\S]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*(\'[^\']*\'|"[^"]*"|{[\s\S]+?}))*\s*(/?)\s*>')
positionable_operators = '!= !== % & && * ** + - / : < << <= == === > >= >> >>> ? ^ | ||'.split(' ')
positionable_operators = '!= !== % & && * ** + - / : < << <= == === > >= >> >>> ? ^ | ||'.split(
' ')
punct = (positionable_operators +
# non-positionable operators - these do not follow operator position settings
'! %= &= *= **= ++ += , -- -= /= :: <<= = => >>= >>>= ^= |= ~ ...'.split(' '))
# non-positionable operators - these do not follow operator
# position settings
'! %= &= *= **= ++ += , -- -= /= :: <<= = => >>= >>>= ^= |= ~ ...'.split(' '))
# Words which always should start on a new line
line_starters = 'continue,try,throw,return,var,let,const,if,switch,case,default,for,while,break,function,import,export'.split(',')
reserved_words = line_starters + ['do', 'in', 'of', 'else', 'get', 'set', 'new', 'catch', 'finally', 'typeof', 'yield', 'async', 'await', 'from', 'as']
line_starters = 'continue,try,throw,return,var,let,const,if,switch,case,default,for,while,break,function,import,export'.split(
',')
reserved_words = line_starters + ['do',
'in',
'of',
'else',
'get',
'set',
'new',
'catch',
'finally',
'typeof',
'yield',
'async',
'await',
'from',
'as']
def __init__ (self, input_string, opts, indent_string):
def __init__(self, input_string, opts, indent_string):
import jsbeautifier.core.acorn as acorn
self.acorn = acorn
@ -78,16 +100,22 @@ class Tokenizer:
self.block_comment_pattern = re.compile(r'([\s\S]*?)((?:\*\/)|$)')
# comment ends just before nearest linefeed or end of file
self.comment_pattern = re.compile(self.acorn.six.u(r'([^\n\r\u2028\u2029]*)'))
self.comment_pattern = re.compile(
self.acorn.six.u(r'([^\n\r\u2028\u2029]*)'))
self.directives_block_pattern = re.compile(r'\/\* beautify( \w+[:]\w+)+ \*\/')
self.directives_block_pattern = re.compile(
r'\/\* beautify( \w+[:]\w+)+ \*\/')
self.directive_pattern = re.compile(r' (\w+)[:](\w+)')
self.directives_end_ignore_pattern = re.compile(r'([\s\S]*?)((?:\/\*\sbeautify\signore:end\s\*\/)|$)')
self.directives_end_ignore_pattern = re.compile(
r'([\s\S]*?)((?:\/\*\sbeautify\signore:end\s\*\/)|$)')
self.template_pattern = re.compile(r'((<\?php|<\?=)[\s\S]*?\?>)|(<%[\s\S]*?%>)')
self.template_pattern = re.compile(
r'((<\?php|<\?=)[\s\S]*?\?>)|(<%[\s\S]*?%>)')
self.whitespacePattern = re.compile(self.acorn.six.u(r'[\n\r\u2028\u2029\t ]+'))
self.newlinePattern = re.compile(self.acorn.six.u(r'([\t ]*)(\r\n|[\n\r\u2028\u2029])?'))
self.whitespacePattern = re.compile(
self.acorn.six.u(r'[\n\r\u2028\u2029\t ]+'))
self.newlinePattern = re.compile(
self.acorn.six.u(r'([\t ]*)(\r\n|[\n\r\u2028\u2029])?'))
def tokenize(self):
self.in_html_comment = False
@ -99,9 +127,13 @@ class Tokenizer:
open_stack = []
comments = []
while not (not last == None and last.type == TOKEN.EOF):
while not (last is not None and last.type == TOKEN.EOF):
token_values = self.__tokenize_next()
next = Token(token_values[1], token_values[0], self.n_newlines, self.whitespace_before_token)
next = Token(
token_values[1],
token_values[0],
self.n_newlines,
self.whitespace_before_token)
while next.type == TOKEN.COMMENT or next.type == TOKEN.BLOCK_COMMENT or next.type == TOKEN.UNKNOWN:
if next.type == TOKEN.BLOCK_COMMENT:
@ -109,7 +141,11 @@ class Tokenizer:
comments.append(next)
token_values = self.__tokenize_next()
next = Token(token_values[1], token_values[0], self.n_newlines, self.whitespace_before_token)
next = Token(
token_values[1],
token_values[0],
self.n_newlines,
self.whitespace_before_token)
if len(comments) > 0:
next.comments_before = comments
@ -120,9 +156,9 @@ class Tokenizer:
open_stack.append(open_token)
open_token = next
elif (next.type == TOKEN.END_BLOCK or next.type == TOKEN.END_EXPR) and \
(not open_token == None and ( \
(next.text == ']' and open_token.text == '[') or \
(next.text == ')' and open_token.text == '(') or \
(open_token is not None and (
(next.text == ']' and open_token.text == '[') or
(next.text == ')' and open_token.text == '(') or
(next.text == '}' and open_token.text == '{'))):
next.parent = open_token.parent
next.opened = open_token
@ -132,7 +168,7 @@ class Tokenizer:
last = next
return self.tokens
def get_directives (self, text):
def get_directives(self, text):
if not self.directives_block_pattern.match(text):
return None
@ -140,11 +176,11 @@ class Tokenizer:
directive_match = self.directive_pattern.search(text)
while directive_match:
directives[directive_match.group(1)] = directive_match.group(2)
directive_match = self.directive_pattern.search(text, directive_match.end())
directive_match = self.directive_pattern.search(
text, directive_match.end())
return directives
def __tokenize_next(self):
self.n_newlines = 0
@ -153,10 +189,10 @@ class Tokenizer:
if len(self.tokens) > 0:
last_token = self.tokens[-1]
else:
# For the sake of tokenizing we can pretend that there was on open brace to start
# For the sake of tokenizing we can pretend that there was on open
# brace to start
last_token = Token(TOKEN.START_BLOCK, '{')
resulting_string = self.input.readWhile(self.whitespacePattern)
if not resulting_string == '':
if resulting_string == ' ':
@ -171,10 +207,12 @@ class Tokenizer:
resulting_string = self.input.readWhile(self.acorn.identifier)
if not resulting_string == '':
if not (last_token.type == TOKEN.DOT \
or (last_token.type == TOKEN.RESERVED and last_token.text in ['set', 'get'])) \
and resulting_string in self.reserved_words:
if resulting_string == 'in' or resulting_string == 'of': # in and of are operators, need to hack
if not (
last_token.type == TOKEN.DOT or (
last_token.type == TOKEN.RESERVED and last_token.text in [
'set',
'get'])) and resulting_string in self.reserved_words:
if resulting_string == 'in' or resulting_string == 'of': # in and of are operators, need to hack
return resulting_string, TOKEN.OPERATOR
return resulting_string, TOKEN.RESERVED
@ -187,7 +225,7 @@ class Tokenizer:
c = self.input.next()
if c == None:
if c is None:
return '', TOKEN.EOF
if c in '([':
@ -207,19 +245,20 @@ class Tokenizer:
if c == '/':
comment = ''
if self.input.peek() == '*': # peek /* .. */ comment
if self.input.peek() == '*': # peek /* .. */ comment
self.input.next()
comment_match = self.input.match(self.block_comment_pattern)
comment = '/*' + comment_match.group(0)
directives = self.get_directives(comment)
if directives and directives.get('ignore') == 'start':
comment_match = self.input.match(self.directives_end_ignore_pattern)
comment_match = self.input.match(
self.directives_end_ignore_pattern)
comment += comment_match.group(0)
comment = re.sub(self.acorn.allLineBreaks, '\n', comment)
return comment, TOKEN.BLOCK_COMMENT, directives
if self.input.peek() == '/': # peek // comment
if self.input.peek() == '/': # peek // comment
self.input.next()
comment_match = self.input.match(self.comment_pattern)
comment = '//' + comment_match.group(0)
@ -227,35 +266,45 @@ class Tokenizer:
def allowRegExOrXML(self):
return (last_token.type == TOKEN.RESERVED and last_token.text in ['return', 'case', 'throw', 'else', 'do', 'typeof', 'yield']) or \
(last_token.type == TOKEN.END_EXPR and last_token.text == ')' and \
last_token.parent and last_token.parent.type == TOKEN.RESERVED and last_token.parent.text in ['if', 'while', 'for']) or \
(last_token.type in [TOKEN.COMMENT, TOKEN.START_EXPR, TOKEN.START_BLOCK, TOKEN.END_BLOCK, TOKEN.OPERATOR, \
TOKEN.EQUALS, TOKEN.EOF, TOKEN.SEMICOLON, TOKEN.COMMA])
(last_token.type == TOKEN.END_EXPR and last_token.text == ')' and
last_token.parent and last_token.parent.type == TOKEN.RESERVED and last_token.parent.text in ['if', 'while', 'for']) or \
(last_token.type in [TOKEN.COMMENT, TOKEN.START_EXPR, TOKEN.START_BLOCK, TOKEN.END_BLOCK, TOKEN.OPERATOR,
TOKEN.EQUALS, TOKEN.EOF, TOKEN.SEMICOLON, TOKEN.COMMA])
self.has_char_escapes = False
isString = (c == '`' or c == "'" or c == '"')
isRegExp = (c == '/' and allowRegExOrXML(self))
isXML = (self.opts.e4x and c == "<" and self.input.test(self.startXmlRegExp, -1) and allowRegExOrXML(self))
isXML = (self.opts.e4x and c == "<" and self.input.test(
self.startXmlRegExp, -1) and allowRegExOrXML(self))
sep = c
esc = False
resulting_string = c
in_char_class = False
if isString:
# handle string
def parse_string(self, resulting_string, delimiter, allow_unescaped_newlines = False, start_sub = None):
def parse_string(
self,
resulting_string,
delimiter,
allow_unescaped_newlines=False,
start_sub=None):
esc = False
while self.input.hasNext():
current_char = self.input.peek()
if not (esc or (current_char != delimiter and
(allow_unescaped_newlines or not bool(self.acorn.newline.match(current_char))))):
if not (
esc or (
current_char != delimiter and (
allow_unescaped_newlines or not bool(
self.acorn.newline.match(current_char))))):
break
# Handle \r\n linebreaks after escapes or in template strings
if (esc or allow_unescaped_newlines) and bool(self.acorn.newline.match(current_char)):
# Handle \r\n linebreaks after escapes or in template
# strings
if (esc or allow_unescaped_newlines) and bool(
self.acorn.newline.match(current_char)):
if current_char == '\r' and self.input.peek(1) == '\n':
self.input.next()
current_char = self.input.peek()
@ -276,9 +325,11 @@ class Tokenizer:
if start_sub and resulting_string.endswith(start_sub):
if delimiter == '`':
resulting_string = parse_string(self, resulting_string, '}', allow_unescaped_newlines, '`')
resulting_string = parse_string(
self, resulting_string, '}', allow_unescaped_newlines, '`')
else:
resulting_string = parse_string(self, resulting_string, '`', allow_unescaped_newlines, '${')
resulting_string = parse_string(
self, resulting_string, '`', allow_unescaped_newlines, '${')
if self.input.hasNext():
resulting_string += self.input.next()
@ -286,14 +337,15 @@ class Tokenizer:
return resulting_string
if sep == '`':
resulting_string = parse_string(self, resulting_string, '`', True, '${')
resulting_string = parse_string(
self, resulting_string, '`', True, '${')
else:
resulting_string = parse_string(self, resulting_string, sep)
elif isRegExp:
# handle regexp
in_char_class = False
while self.input.hasNext() and \
(esc or in_char_class or self.input.peek()!= sep) and \
(esc or in_char_class or self.input.peek() != sep) and \
not self.input.testChar(self.acorn.newline):
resulting_string += self.input.peek()
if not esc:
@ -319,9 +371,10 @@ class Tokenizer:
while bool(match):
isEndTag = match.group(1)
tagName = match.group(2)
isSingletonTag = (match.groups()[-1] != "") or (match.group(2)[0:8] == "![CDATA[")
if not isSingletonTag and (
tagName == rootTag or (isCurlyRoot and re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', tagName)))):
isSingletonTag = (
match.groups()[-1] != "") or (match.group(2)[0:8] == "![CDATA[")
if not isSingletonTag and (tagName == rootTag or (
isCurlyRoot and re.sub(r'^{\s+', '{', re.sub(r'\s+}$', '}', tagName)))):
if isEndTag:
depth -= 1
else:
@ -349,10 +402,13 @@ class Tokenizer:
if sep == '/':
# regexps may have modifiers /regexp/MOD, so fetch those too
# Only [gim] are valid, but if the user puts in garbage, do what we can to take it.
resulting_string += self.input.readWhile(self.acorn.identifier)
# Only [gim] are valid, but if the user puts in garbage, do
# what we can to take it.
resulting_string += self.input.readWhile(
self.acorn.identifier)
resulting_string = re.sub(self.acorn.allLineBreaks, '\n', resulting_string)
resulting_string = re.sub(
self.acorn.allLineBreaks, '\n', resulting_string)
return resulting_string, TOKEN.STRING
@ -366,10 +422,10 @@ class Tokenizer:
resulting_string += c
return resulting_string.strip() + '\n', TOKEN.UNKNOWN
# Spidermonkey-specific sharp variables for circular references
# https://developer.mozilla.org/En/Sharp_variables_in_JavaScript
# http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp around line 1935
# http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp
# around line 1935
sharp = '#'
if self.input.hasNext() and self.input.testChar(self.digit):
while True:
@ -397,7 +453,6 @@ class Tokenizer:
c = re.sub(self.acorn.allLineBreaks, '\n', c)
return c, TOKEN.STRING
if c == '<' and self.input.match(re.compile(r'\!--')):
c = '<!--'
while self.input.hasNext() and not self.input.testChar(self.acorn.newline):
@ -406,7 +461,8 @@ class Tokenizer:
self.in_html_comment = True
return c, TOKEN.COMMENT
if c == '-' and self.in_html_comment and self.input.match(re.compile('->')):
if c == '-' and self.in_html_comment and self.input.match(
re.compile('->')):
self.in_html_comment = False
return '-->', TOKEN.COMMENT
@ -472,7 +528,8 @@ class Tokenizer:
escaped = int(matched.group(1), 16)
if escaped > 0x7e and escaped <= 0xff and matched.group(0).startswith('x'):
if escaped > 0x7e and escaped <= 0xff and matched.group(
0).startswith('x'):
# we bail out on \x7f..\xff,
# leaving whole string escaped,
# as it's probably completely binary

View File

@ -1,6 +1,7 @@
import unittest
from ...core.inputscanner import InputScanner
class TestInputScanner(unittest.TestCase):
@classmethod
@ -13,4 +14,4 @@ class TestInputScanner(unittest.TestCase):
if __name__ == '__main__':
unittest.main()
unittest.main()

View File

@ -6250,7 +6250,7 @@ class TestJSBeautifier(unittest.TestCase):
def decodesto(self, input, expectation=None):
if expectation == None:
if expectation is None:
expectation = input
self.assertMultiLineEqual(
@ -6258,7 +6258,7 @@ class TestJSBeautifier(unittest.TestCase):
# if the expected is different from input, run it again
# expected output should be unchanged when run twice.
if not expectation == None:
if not expectation is None:
self.assertMultiLineEqual(
jsbeautifier.beautify(expectation, self.options), expectation)
@ -6281,7 +6281,7 @@ class TestJSBeautifier(unittest.TestCase):
return self.wrapregex.sub(' \\1', text)
def bt(self, input, expectation=None):
if expectation == None:
if expectation is None:
expectation = input
self.decodesto(input, expectation)

View File

@ -9,15 +9,16 @@ fails = 0
def test_str(str, expected):
global fails
res = jsbeautifier.beautify(str, opts)
if(res == expected):
print(".")
return True
else:
print("___got:" + res + "\n___expected:" + expected + "\n")
fails = fails + 1
return False
global fails
res = jsbeautifier.beautify(str, opts)
if(res == expected):
print(".")
return True
else:
print("___got:" + res + "\n___expected:" + expected + "\n")
fails = fails + 1
return False
str = "eval(function(p,a,c,k,e,d){e=function(c){return c.toString(36)};if(!''.replace(/^/,String)){while(c--){d[c.toString(a)]=k[c]||c.toString(a)}k=[function(e){return d[e]}];e=function(){return'\\w+'};c=1};while(c--){if(k[c]){p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c])}}return p}('2 0=\"4 3!\";2 1=0.5(/b/6);a.9(\"8\").7=1;',12,12,'str|n|var|W3Schools|Visit|search|i|innerHTML|demo|getElementById|document|w3Schools'.split('|'),0,{}))"
expected = "var str = \"Visit W3Schools!\";\nvar n = str.search(/w3Schools/i);\ndocument.getElementById(\"demo\").innerHTML = n;"
@ -35,4 +36,4 @@ expected = "$(document).ready(function() {\n $(\'.r8ce6\').html(52136);\n
res = test_str(str, expected)
if (fails == 0):
print("OK")
print("OK")

View File

@ -2,23 +2,28 @@ import re
import unittest
import jsbeautifier
class TestJSBeautifierIndentation(unittest.TestCase):
def test_tabs(self):
test_fragment = self.decodesto
self.options.indent_with_tabs = 1;
test_fragment('{tabs()}', "{\n\ttabs()\n}");
self.options.indent_with_tabs = 1
test_fragment('{tabs()}', "{\n\ttabs()\n}")
def test_function_indent(self):
test_fragment = self.decodesto
self.options.indent_with_tabs = 1;
self.options.keep_function_indentation = 1;
test_fragment('var foo = function(){ bar() }();', "var foo = function() {\n\tbar()\n}();");
self.options.indent_with_tabs = 1
self.options.keep_function_indentation = 1
test_fragment(
'var foo = function(){ bar() }();',
"var foo = function() {\n\tbar()\n}();")
self.options.tabs = 1;
self.options.keep_function_indentation = 0;
test_fragment('var foo = function(){ baz() }();', "var foo = function() {\n\tbaz()\n}();");
self.options.tabs = 1
self.options.keep_function_indentation = 0
test_fragment(
'var foo = function(){ baz() }();',
"var foo = function() {\n\tbaz()\n}();")
def decodesto(self, input, expectation=None):
self.assertEqual(

View File

@ -12,11 +12,13 @@ from jsbeautifier.unpackers import evalbased
# NOTE: AT THE MOMENT, IT IS DEACTIVATED FOR YOUR SECURITY: it runs js!
BLACKLIST = ['jsbeautifier.unpackers.evalbased']
class UnpackingError(Exception):
"""Badly packed source or general error. Argument is a
meaningful description."""
pass
def getunpackers():
"""Scans the unpackers dir, finds unpackers and add them to UNPACKERS list.
An unpacker will be loaded only if it is a valid python module (name must
@ -35,10 +37,12 @@ def getunpackers():
else:
unpackers.append(module)
return sorted(unpackers, key = lambda mod: mod.PRIORITY)
return sorted(unpackers, key=lambda mod: mod.PRIORITY)
UNPACKERS = getunpackers()
def run(source, evalcode=False):
"""Runs the applicable unpackers and return unpacked source as a string."""
for unpacker in [mod for mod in UNPACKERS if mod.detect(source)]:
@ -47,6 +51,7 @@ def run(source, evalcode=False):
source = evalbased.unpack(source)
return source
def filtercomments(source):
"""NOT USED: strips trailing comments and put them at the top."""
trailing_comments = []

View File

@ -18,15 +18,19 @@ from subprocess import PIPE, Popen
PRIORITY = 3
def detect(source):
"""Detects if source is likely to be eval() packed."""
return source.strip().lower().startswith('eval(function(')
def unpack(source):
"""Runs source and return resulting code."""
return jseval('print %s;' % source[4:]) if detect(source) else source
# In case of failure, we'll just return the original, without crashing on user.
def jseval(script):
"""Run code in the JS interpreter and return output."""
try:

View File

@ -20,13 +20,14 @@ import re
PRIORITY = 1
def smartsplit(code):
"""Split `code` at " symbol, only if it is not escaped."""
strings = []
pos = 0
while pos < len(code):
if code[pos] == '"':
word = '' # new word
word = '' # new word
pos += 1
while pos < len(code):
if code[pos] == '"':
@ -40,11 +41,13 @@ def smartsplit(code):
pos += 1
return strings
def detect(code):
"""Detects if `code` is JavascriptObfuscator.com packed."""
# prefer `is not` idiom, so that a true boolean is returned
return (re.search(r'^var _0x[a-f0-9]+ ?\= ?\[', code) is not None)
def unpack(code):
"""Unpacks JavascriptObfuscator.com packed code."""
if detect(code):

View File

@ -51,18 +51,21 @@ CAVEAT = """//
"""
SIGNATURE = (r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F'
r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65'
r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75'
r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B'
r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78'
r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","'
r'\x6C\x65\x6E\x67\x74\x68"]')
SIGNATURE = (
r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F'
r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65'
r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75'
r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B'
r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78'
r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","'
r'\x6C\x65\x6E\x67\x74\x68"]')
def detect(source):
"""Detects MyObfuscate.com packer."""
return SIGNATURE in source
def unpack(source):
"""Unpacks js code packed with MyObfuscate.com"""
if not detect(source):
@ -73,6 +76,7 @@ def unpack(source):
polished = match.group(1) if match else source
return CAVEAT + polished
def _filter(source):
"""Extracts and decode payload (original file) from `source`"""
try:

View File

@ -27,16 +27,16 @@ def detect(source):
"""Detects whether `source` is P.A.C.K.E.R. coded."""
mystr = source.replace(' ', '').find('eval(function(p,a,c,k,e,')
if(mystr > 0):
beginstr = source[:mystr]
beginstr = source[:mystr]
if(mystr != -1):
""" Find endstr"""
if(source.split("')))", 1)[0] == source):
try:
endstr = source.split("}))", 1)[1]
except IndexError:
endstr = ''
else:
endstr = source.split("')))", 1)[1]
""" Find endstr"""
if(source.split("')))", 1)[0] == source):
try:
endstr = source.split("}))", 1)[1]
except IndexError:
endstr = ''
else:
endstr = source.split("')))", 1)[1]
return (mystr != -1)
@ -63,9 +63,10 @@ def unpack(source):
def _filterargs(source):
"""Juice from a source file the four args needed by decoder."""
juicers = [ (r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)"),
(r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\)"),
]
juicers = [
(r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\), *(\d+), *(.*)\)\)"),
(r"}\('(.*)', *(\d+|\[\]), *(\d+), *'(.*)'\.split\('\|'\)"),
]
for juicer in juicers:
args = re.search(juicer, source, re.DOTALL)
if args:
@ -80,7 +81,8 @@ def _filterargs(source):
raise UnpackingError('Corrupted p.a.c.k.e.r. data.')
# could not find a satisfying regex
raise UnpackingError('Could not make sense of p.a.c.k.e.r data (unexpected code structure)')
raise UnpackingError(
'Could not make sense of p.a.c.k.e.r data (unexpected code structure)')
def _replacestrings(source):
@ -106,7 +108,7 @@ class Unbaser(object):
ALPHABET = {
62: '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
95: (' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
'[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
}
def __init__(self, base):
@ -115,7 +117,7 @@ class Unbaser(object):
# fill elements 37...61, if necessary
if 36 < base < 62:
if not hasattr(self.ALPHABET, self.ALPHABET[62][:base]):
self.ALPHABET[base] = self.ALPHABET[62][:base]
self.ALPHABET[base] = self.ALPHABET[62][:base]
# attrs = self.ALPHABET
# print ', '.join("%s: %s" % item for item in attrs.items())
# If base can be handled by int() builtin, let it do it for us
@ -124,8 +126,9 @@ class Unbaser(object):
else:
# Build conversion dictionary cache
try:
self.dictionary = dict((cipher, index) for
index, cipher in enumerate(self.ALPHABET[base]))
self.dictionary = dict(
(cipher, index) for index, cipher in enumerate(
self.ALPHABET[base]))
except KeyError:
raise TypeError('Unsupported base encoding.')

View File

@ -9,12 +9,16 @@ from jsbeautifier.unpackers.javascriptobfuscator import (
unpack, detect, smartsplit)
# pylint: disable=R0904
class TestJavascriptObfuscator(unittest.TestCase):
"""JavascriptObfuscator.com test case."""
def test_smartsplit(self):
"""Test smartsplit() function."""
split = smartsplit
equals = lambda data, result: self.assertEqual(split(data), result)
def equals(data, result): return self.assertEqual(split(data), result)
equals('', [])
equals('"a", "b"', ['"a"', '"b"'])
@ -23,8 +27,9 @@ class TestJavascriptObfuscator(unittest.TestCase):
def test_detect(self):
"""Test detect() function."""
positive = lambda source: self.assertTrue(detect(source))
negative = lambda source: self.assertFalse(detect(source))
def positive(source): return self.assertTrue(detect(source))
def negative(source): return self.assertFalse(detect(source))
negative('')
negative('abcd')
@ -35,12 +40,15 @@ class TestJavascriptObfuscator(unittest.TestCase):
def test_unpack(self):
"""Test unpack() function."""
decodeto = lambda ob, original: self.assertEqual(unpack(ob), original)
def decodeto(
ob, original): return self.assertEqual(
unpack(ob), original)
decodeto('var _0x8df3=[];var a=10;', 'var a=10;')
decodeto('var _0xb2a7=["\x74\x27\x65\x73\x74"];var i;for(i=0;i<10;++i)'
'{alert(_0xb2a7[0]);} ;', 'var i;for(i=0;i<10;++i){alert'
'("t\'est");} ;')
if __name__ == '__main__':
unittest.main()

View File

@ -13,6 +13,8 @@ INPUT = os.path.join(path[0], 'test-myobfuscate-input.js')
OUTPUT = os.path.join(path[0], 'test-myobfuscate-output.js')
# pylint: disable=R0904
class TestMyObfuscate(unittest.TestCase):
# pylint: disable=C0103
"""MyObfuscate obfuscator testcase."""
@ -26,15 +28,16 @@ class TestMyObfuscate(unittest.TestCase):
def test_detect(self):
"""Test detect() function."""
detected = lambda source: self.assertTrue(detect(source))
def detected(source): return self.assertTrue(detect(source))
detected(self.input)
def test_unpack(self):
"""Test unpack() function."""
check = lambda inp, out: self.assertEqual(unpack(inp), out)
def check(inp, out): return self.assertEqual(unpack(inp), out)
check(self.input, self.output)
if __name__ == '__main__':
unittest.main()

View File

@ -8,12 +8,16 @@ import unittest
from jsbeautifier.unpackers.packer import detect, unpack
# pylint: disable=R0904
class TestPacker(unittest.TestCase):
"""P.A.C.K.E.R. testcase."""
def test_detect(self):
"""Test detect() function."""
positive = lambda source: self.assertTrue(detect(source))
negative = lambda source: self.assertFalse(detect(source))
def positive(source): return self.assertTrue(detect(source))
def negative(source): return self.assertFalse(detect(source))
negative('')
negative('var a = b')
@ -22,7 +26,7 @@ class TestPacker(unittest.TestCase):
def test_unpack(self):
"""Test unpack() function."""
check = lambda inp, out: self.assertEqual(unpack(inp), out)
def check(inp, out): return self.assertEqual(unpack(inp), out)
check("eval(function(p,a,c,k,e,r){e=String;if(!''.replace(/^/,String)"
"){while(c--)r[c]=k[c]||c;k=[function(e){return r[e]}];e="
@ -30,5 +34,6 @@ class TestPacker(unittest.TestCase):
"new RegExp('\\\\b'+e(c)+'\\\\b','g'),k[c]);return p}('0 2=1',"
"62,3,'var||a'.split('|'),0,{}))", 'var a=1')
if __name__ == '__main__':
unittest.main()

View File

@ -9,12 +9,16 @@ import unittest
from jsbeautifier.unpackers.urlencode import detect, unpack
# pylint: disable=R0904
class TestUrlencode(unittest.TestCase):
"""urlencode test case."""
def test_detect(self):
"""Test detect() function."""
encoded = lambda source: self.assertTrue(detect(source))
unencoded = lambda source: self.assertFalse(detect(source))
def encoded(source): return self.assertTrue(detect(source))
def unencoded(source): return self.assertFalse(detect(source))
unencoded('')
unencoded('var a = b')
@ -24,7 +28,11 @@ class TestUrlencode(unittest.TestCase):
def test_unpack(self):
"""Test unpack function."""
equals = lambda source, result: self.assertEqual(unpack(source), result)
def equals(
source,
result): return self.assertEqual(
unpack(source),
result)
equals('', '')
equals('abcd', 'abcd')
@ -32,5 +40,6 @@ class TestUrlencode(unittest.TestCase):
equals('var%20a=b', 'var a=b')
equals('var%20a+=+b', 'var a = b')
if __name__ == '__main__':
unittest.main()

View File

@ -23,12 +23,14 @@ except ImportError:
PRIORITY = 0
def detect(code):
"""Detects if a scriptlet is urlencoded."""
# the fact that script doesn't contain any space, but has %20 instead
# should be sufficient check for now.
return ' ' not in code and ('%20' in code or code.count('%') > 3)
def unpack(code):
"""URL decode `code` source string."""
return unquote_plus(code) if detect(code) else code

View File

@ -1,26 +1,31 @@
#!/usr/bin/env python
import os,sys
import os
import sys
from setuptools import setup
from jsbeautifier.__version__ import __version__
from setuptools.command.test import test as TestCommand
DIR='jsbeautifier/tests/'
DIR = 'jsbeautifier/tests/'
class PyTest(TestCommand):
user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")]
def initialize_options(self):
TestCommand.initialize_options(self)
self.pytest_args = ['--assert=plain'] +[DIR+x for x in os.listdir(DIR) if x.endswith('.py') and x[0] not in '._']
self.pytest_args = ['--assert=plain'] + [DIR + \
x for x in os.listdir(DIR) if x.endswith('.py') and x[0] not in '._']
def run_tests(self):
#import here, cause outside the eggs aren't loaded
# import here, cause outside the eggs aren't loaded
import pytest
errno = pytest.main(self.pytest_args)
sys.exit(errno)
setup(name='jsbeautifier',
version=__version__,
description='JavaScript unobfuscator and beautifier.',
@ -42,6 +47,6 @@ setup(name='jsbeautifier',
install_requires=["six>=1.6.1", "editorconfig>=0.12.0"],
license='MIT',
test_suite='pytest.collector',
cmdclass = {'test': PyTest},
cmdclass={'test': PyTest},
)
)

View File

@ -9,22 +9,34 @@ options.wrap_line_length = 80
data = ''
data_min = ''
def beautifier_test_underscore():
jsbeautifier.beautify(data, options)
def beautifier_test_underscore_min():
jsbeautifier.beautify(data_min, options)
def report_perf(fn):
import timeit
iter = 50
time = timeit.timeit(fn + "()", setup="from __main__ import " + fn + "; gc.enable()", number=iter)
print(fn + ": " + str(iter/time) + " cycles/sec")
time = timeit.timeit(
fn +
"()",
setup="from __main__ import " +
fn +
"; gc.enable()",
number=iter)
print(fn + ": " + str(iter / time) + " cycles/sec")
if __name__ == '__main__':
dirname = os.path.dirname(os.path.abspath(__file__))
underscore_file = os.path.join(dirname, "../", "test/resources/underscore.js")
underscore_min_file = os.path.join(dirname, "../", "test/resources/underscore-min.js")
underscore_file = os.path.join(
dirname, "../", "test/resources/underscore.js")
underscore_min_file = os.path.join(
dirname, "../", "test/resources/underscore-min.js")
data = copy.copy(''.join(open(underscore_file).readlines()))
data_min = copy.copy(''.join(open(underscore_min_file).readlines()))

View File

@ -1262,7 +1262,7 @@ class TestJSBeautifier(unittest.TestCase):
def decodesto(self, input, expectation=None):
if expectation == None:
if expectation is None:
expectation = input
self.assertMultiLineEqual(
@ -1270,7 +1270,7 @@ class TestJSBeautifier(unittest.TestCase):
# if the expected is different from input, run it again
# expected output should be unchanged when run twice.
if not expectation == None:
if not expectation is None:
self.assertMultiLineEqual(
jsbeautifier.beautify(expectation, self.options), expectation)
@ -1293,7 +1293,7 @@ class TestJSBeautifier(unittest.TestCase):
return self.wrapregex.sub(' \\1', text)
def bt(self, input, expectation=None):
if expectation == None:
if expectation is None:
expectation = input
self.decodesto(input, expectation)