From f66a3e2474a0e4fab50745f352fe060c8b026891 Mon Sep 17 00:00:00 2001
From: Roja A M <roam@mchic02fd1vm.tuc.com>
Date: Sat, 26 Aug 2023 13:45:34 +0530
Subject: [PATCH] Fix - Invalid prettification of object with unicode as key

---
 js/src/javascript/tokenizer.js              | 15 +++++++++++++++
 python/jsbeautifier/javascript/tokenizer.py | 13 +++++++++++++
 test/data/javascript/tests.js               |  9 +++++++++
 3 files changed, 37 insertions(+)

diff --git a/js/src/javascript/tokenizer.js b/js/src/javascript/tokenizer.js
index ee35c571..1abbc598 100644
--- a/js/src/javascript/tokenizer.js
+++ b/js/src/javascript/tokenizer.js
@@ -57,6 +57,7 @@ var TOKEN = {
   BLOCK_COMMENT: 'TK_BLOCK_COMMENT',
   COMMENT: 'TK_COMMENT',
   DOT: 'TK_DOT',
+  UNICODE: 'TK_UNICODE',
   UNKNOWN: 'TK_UNKNOWN',
   START: BASETOKEN.START,
   RAW: BASETOKEN.RAW,
@@ -129,6 +130,7 @@ var Tokenizer = function(input_string, options) {
     xml: pattern_reader.matching(/[\s\S]*?<(\/?)([-a-zA-Z:0-9_.]+|{[^}]+?}|!\[CDATA\[[^\]]*?\]\]|)(\s*{[^}]+?}|\s+[-a-zA-Z:0-9_.]+|\s+[-a-zA-Z:0-9_.]+\s*=\s*('[^']*'|"[^"]*"|{([^{}]|{[^}]+?})+?}))*\s*(\/?)\s*>/),
     single_quote: templatable.until(/['\\\n\r\u2028\u2029]/),
     double_quote: templatable.until(/["\\\n\r\u2028\u2029]/),
+    unicode: pattern_reader.matching(/\\u{[0-9a-fA-F]{4,5}}/),
     template_text: templatable.until(/[`\\$]/),
     template_expression: templatable.until(/[`}\\]/)
   };
@@ -174,6 +176,7 @@ Tokenizer.prototype._get_next_token = function(previous_token, open_token) { //
   token = token || this._read_regexp(c, previous_token);
   token = token || this._read_xml(c, previous_token);
   token = token || this._read_punctuation();
+  token = token || this._read_unicode_with_braces(c);
   token = token || this._create_token(TOKEN.UNKNOWN, this._input.next());
 
   return token;
@@ -457,6 +460,18 @@ Tokenizer.prototype._read_xml = function(c, previous_token) {
   return null;
 };
 
+Tokenizer.prototype._read_unicode_with_braces = function(c) {
+  var token = null;
+  if(c === '\\'){
+    var unicode = '';
+    if (this._input.peek(1) === 'u') {
+      unicode = this.__patterns.unicode.read();
+      token = this._create_token(TOKEN.UNICODE, unicode);
+    }
+  }
+  return token;
+};
+
 function unescape_string(s) {
   // You think that a regex would work for this
   // return s.replace(/\\x([0-9a-f]{2})/gi, function(match, val) {
diff --git a/python/jsbeautifier/javascript/tokenizer.py b/python/jsbeautifier/javascript/tokenizer.py
index 0eeb8a07..1727cea9 100644
--- a/python/jsbeautifier/javascript/tokenizer.py
+++ b/python/jsbeautifier/javascript/tokenizer.py
@@ -51,6 +51,7 @@ class TokenTypes(BaseTokenTypes):
     BLOCK_COMMENT = "TK_BLOCK_COMMENT"
     COMMENT = "TK_COMMENT"
     DOT = "TK_DOT"
+    UNICODE = ("TK_UNICODE",)
     UNKNOWN = "TK_UNKNOWN"
 
     def __init__(self):
@@ -164,6 +165,8 @@ class TokenizerPatterns(BaseTokenizerPatterns):
         self.template_text = templatable.until(r"[`\\$]")
         self.template_expression = templatable.until(r"[`}\\]")
 
+        self.unicode = pattern.matching(r"\\u{[0-9a-fA-F]{4,5}}")
+
 
 class Tokenizer(BaseTokenizer):
     positionable_operators = positionable_operators
@@ -229,6 +232,7 @@ class Tokenizer(BaseTokenizer):
         token = token or self._read_regexp(c, previous_token)
         token = token or self._read_xml(c, previous_token)
         token = token or self._read_punctuation()
+        token = token or self._read_unicode_with_braces(c)
         token = token or self._create_token(TOKEN.UNKNOWN, self._input.next())
 
         return token
@@ -500,6 +504,15 @@ class Tokenizer(BaseTokenizer):
 
         return token
 
+    def _read_unicode_with_braces(self, c):
+        token = None
+        if c == "\\":
+            unicode = ""
+            if self._input.peek(1) == "u":
+                unicode = self._patterns.unicode.read()
+                token = self._create_token(TOKEN.UNICODE, unicode)
+        return token
+
     __regexTokens = {
         TOKEN.COMMENT,
         TOKEN.START_EXPR,
diff --git a/test/data/javascript/tests.js b/test/data/javascript/tests.js
index 25967eea..a1fa610a 100644
--- a/test/data/javascript/tests.js
+++ b/test/data/javascript/tests.js
@@ -1750,6 +1750,15 @@ exports.test_data = {
         {
           input: 'fn[0]`tagged`',
           output: 'fn[0] `tagged`'
+        },
+        {
+          comment: 'Issue #2159: Invalid prettification of object with unicode escape character as object key - test scenario: object with unicode as key',
+          input: '{\\\\u{1d4b6}:"ascr"}',
+          output: [
+            '{',
+            '    \\\\u{1d4b6}: "ascr"',
+            '}'
+          ]
         }
       ]
     }, {