Bugs in long-literal handlin

Move n_dict to n_actions and special case n_const_list.
Generalize build_collection out of 3.7+ and into all Pythons
This commit is contained in:
rocky 2022-04-24 17:02:05 -04:00
parent 371138cfbc
commit 8cdf741b62
6 changed files with 304 additions and 256 deletions

Binary file not shown.

View File

@ -1306,7 +1306,7 @@ assert tuple(x.keys()) == (1, 3)
# Try a long dictionary.
# This should not be slow as it has been in the past
values = {
"valuea": a + 1,
"value1": x,
"value2": 2 + 1,
"value3": 3 + 1,
"value4": 4 + 1,
@ -1811,3 +1811,46 @@ values = {
}
assert list(values.values()) == list(range(2, 502 + 2))
# Try a long dictionary that fails because we have a binary op.
# We can get a expr32 grouping speedup
# which is slower than if this were all constant.
# The above was not implemented at the time this test was written.
values = {
"value1": x + 1, # This is a binary op not consant
"value2": 2,
"value3": 3,
"value4": 4,
"value5": 5,
"value6": 6,
"value7": 7,
"value8": 8,
"value9": 9,
"value10": 10,
"value11": 11,
"value12": 12,
"value13": 13,
"value14": 14,
"value15": 15,
"value16": 16,
"value17": 17,
"value18": 18,
"value19": 19,
"value20": 20,
"value21": 21,
"value22": 22,
"value23": 23,
"value24": 24,
"value25": 25,
"value26": 26,
"value27": 27,
"value28": 28,
"value29": 29,
"value30": 30,
"value31": 31,
"value32": 32,
"value33": 33,
}
assert list(values.values()) == list(range(2, 502 + 2))

View File

@ -84,6 +84,9 @@ def long(num):
return num
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
class Code(object):
"""
Class for representing code-objects.
@ -122,6 +125,80 @@ class Scanner(object):
# FIXME: This weird Python2 behavior is not Python3
self.resetTokenClass()
def bound_collection(
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
):
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return next_tokens + [t]
collection_start = i - count
for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return next_tokens + [t]
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset=f"{start_offset}_0",
has_arg=True,
opc=self.opc,
has_extended_arg=False,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
has_arg=True,
linestart=tokens[j].linestart,
opc=self.opc,
has_extended_arg=False,
)
)
new_tokens.append(
Token(
opname=f"BUILD_{collection_type}",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
has_arg=t.has_arg,
linestart=t.linestart,
opc=t.opc,
has_extended_arg=False,
)
)
return new_tokens
def build_instructions(self, co):
"""
Create a list of instructions (a structured object rather than

View File

@ -47,9 +47,6 @@ import sys
globals().update(op3.opmap)
CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")
class Scanner37Base(Scanner):
def __init__(self, version, show_asm=None, is_pypy=False):
super(Scanner37Base, self).__init__(version, show_asm, is_pypy)
@ -184,80 +181,6 @@ class Scanner37Base(Scanner):
# self.varargs_ops = frozenset(self.opc.hasvargs)
return
def bound_collection(
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
):
count = t.attr
assert isinstance(count, int)
assert count <= i
if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1
# For small lists don't bother
if count < 5:
return next_tokens + [t]
collection_start = i - count
for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return next_tokens + [t]
collection_enum = CONST_COLLECTIONS.index(collection_type)
# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset=f"{start_offset}_0",
has_arg=True,
opc=self.opc,
has_extended_arg=False,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
has_arg=True,
linestart=tokens[j].linestart,
opc=self.opc,
has_extended_arg=False,
)
)
new_tokens.append(
Token(
opname=f"BUILD_{collection_type}",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
has_arg=t.has_arg,
linestart=t.linestart,
opc=t.opc,
has_extended_arg=False,
)
)
return new_tokens
def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Pick out tokens from an uncompyle6 code object, and transform them,

View File

@ -274,6 +274,189 @@ class NonterminalActions:
n_store_subscript = n_subscript = n_delete_subscript
def n_dict(self, node):
"""
Prettyprint a dict.
'dict' is something like k = {'a': 1, 'b': 42}"
We will use source-code line breaks to guide us when to break.
"""
if len(node) == 1 and node[0] == "const_list":
self.preorder(node[0])
self.prune()
return
p = self.prec
self.prec = 100
self.indent_more(INDENT_PER_LEVEL)
sep = INDENT_PER_LEVEL[:-1]
if node[0] != "dict_entry":
self.write("{")
line_number = self.line_number
if self.version >= (3, 0) and not self.is_pypy:
if node[0].kind.startswith("kvlist"):
# Python 3.5+ style key/value list in dict
kv_node = node[0]
l = list(kv_node)
length = len(l)
if kv_node[-1].kind.startswith("BUILD_MAP"):
length -= 1
i = 0
# Respect line breaks from source
while i < length:
self.write(sep)
name = self.traverse(l[i], indent="")
if i > 0:
line_number = self.indent_if_source_nl(
line_number, self.indent + INDENT_PER_LEVEL[:-1]
)
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(
l[i + 1], indent=self.indent + (len(name) + 2) * " "
)
self.write(value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
i += 2
pass
pass
elif len(node) > 1 and node[1].kind.startswith("kvlist"):
# Python 3.0..3.4 style key/value list in dict
kv_node = node[1]
l = list(kv_node)
if len(l) > 0 and l[0].kind == "kv3":
# Python 3.2 does this
kv_node = node[1][0]
l = list(kv_node)
i = 0
while i < len(l):
self.write(sep)
name = self.traverse(l[i + 1], indent="")
if i > 0:
line_number = self.indent_if_source_nl(
line_number, self.indent + INDENT_PER_LEVEL[:-1]
)
pass
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(
l[i], indent=self.indent + (len(name) + 2) * " "
)
self.write(value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
else:
sep += " "
i += 3
pass
pass
elif node[-1].kind.startswith("BUILD_CONST_KEY_MAP"):
# Python 3.6+ style const map
keys = node[-2].pattr
values = node[:-2]
# FIXME: Line numbers?
for key, value in zip(keys, values):
self.write(sep)
self.write(repr(key))
line_number = self.line_number
self.write(":")
self.write(self.traverse(value[0]))
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
else:
sep += " "
pass
pass
if sep.startswith(",\n"):
self.write(sep[1:])
pass
elif node[0].kind.startswith("dict_entry"):
assert self.version >= (3, 5)
template = ("%C", (0, len(node[0]), ", **"))
self.template_engine(template, node[0])
sep = ""
elif node[-1].kind.startswith("BUILD_MAP_UNPACK") or node[
-1
].kind.startswith("dict_entry"):
assert self.version >= (3, 5)
# FIXME: I think we can intermingle dict_comp's with other
# dictionary kinds of things. The most common though is
# a sequence of dict_comp's
kwargs = node[-1].attr
template = ("**%C", (0, kwargs, ", **"))
self.template_engine(template, node)
sep = ""
pass
else:
# Python 2 style kvlist. Find beginning of kvlist.
indent = self.indent + " "
line_number = self.line_number
if node[0].kind.startswith("BUILD_MAP"):
if len(node) > 1 and node[1].kind in ("kvlist", "kvlist_n"):
kv_node = node[1]
else:
kv_node = node[1:]
self.kv_map(kv_node, sep, line_number, indent)
else:
sep = ""
opname = node[-1].kind
if self.is_pypy and self.version >= (3, 5):
if opname.startswith("BUILD_CONST_KEY_MAP"):
keys = node[-2].attr
# FIXME: DRY this and the above
for i in range(len(keys)):
key = keys[i]
value = self.traverse(node[i], indent="")
self.write(sep, key, ": ", value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + " "
line_number = self.line_number
pass
pass
pass
else:
if opname.startswith("kvlist"):
list_node = node[0]
else:
list_node = node
assert list_node[-1].kind.startswith("BUILD_MAP")
for i in range(0, len(list_node) - 1, 2):
key = self.traverse(list_node[i], indent="")
value = self.traverse(list_node[i + 1], indent="")
self.write(sep, key, ": ", value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + " "
line_number = self.line_number
pass
pass
pass
elif opname.startswith("kvlist"):
kv_node = node[-1]
self.kv_map(node[-1], sep, line_number, indent)
pass
pass
if sep.startswith(",\n"):
self.write(sep[1:])
if node[0] != "dict_entry":
self.write("}")
self.indent_less(INDENT_PER_LEVEL)
self.prec = p
self.prune()
def n_docstring(self, node):
indent = self.indent

View File

@ -696,184 +696,6 @@ class SourceWalker(GenericASTTraversal, NonterminalActions, ComprehensionMixin):
pass
pass
def n_dict(self, node):
"""
prettyprint a dict
'dict' is something like k = {'a': 1, 'b': 42}"
We will use source-code line breaks to guide us when to break.
"""
p = self.prec
self.prec = 100
self.indent_more(INDENT_PER_LEVEL)
sep = INDENT_PER_LEVEL[:-1]
if node[0] != "dict_entry":
self.write("{")
line_number = self.line_number
if self.version >= (3, 0) and not self.is_pypy:
if node[0].kind.startswith("kvlist"):
# Python 3.5+ style key/value list in dict
kv_node = node[0]
l = list(kv_node)
length = len(l)
if kv_node[-1].kind.startswith("BUILD_MAP"):
length -= 1
i = 0
# Respect line breaks from source
while i < length:
self.write(sep)
name = self.traverse(l[i], indent="")
if i > 0:
line_number = self.indent_if_source_nl(
line_number, self.indent + INDENT_PER_LEVEL[:-1]
)
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(
l[i + 1], indent=self.indent + (len(name) + 2) * " "
)
self.write(value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
i += 2
pass
pass
elif len(node) > 1 and node[1].kind.startswith("kvlist"):
# Python 3.0..3.4 style key/value list in dict
kv_node = node[1]
l = list(kv_node)
if len(l) > 0 and l[0].kind == "kv3":
# Python 3.2 does this
kv_node = node[1][0]
l = list(kv_node)
i = 0
while i < len(l):
self.write(sep)
name = self.traverse(l[i + 1], indent="")
if i > 0:
line_number = self.indent_if_source_nl(
line_number, self.indent + INDENT_PER_LEVEL[:-1]
)
pass
line_number = self.line_number
self.write(name, ": ")
value = self.traverse(
l[i], indent=self.indent + (len(name) + 2) * " "
)
self.write(value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
else:
sep += " "
i += 3
pass
pass
elif node[-1].kind.startswith("BUILD_CONST_KEY_MAP"):
# Python 3.6+ style const map
keys = node[-2].pattr
values = node[:-2]
# FIXME: Line numbers?
for key, value in zip(keys, values):
self.write(sep)
self.write(repr(key))
line_number = self.line_number
self.write(":")
self.write(self.traverse(value[0]))
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + INDENT_PER_LEVEL[:-1]
line_number = self.line_number
else:
sep += " "
pass
pass
if sep.startswith(",\n"):
self.write(sep[1:])
pass
elif node[0].kind.startswith("dict_entry"):
assert self.version >= (3, 5)
template = ("%C", (0, len(node[0]), ", **"))
self.template_engine(template, node[0])
sep = ""
elif node[-1].kind.startswith("BUILD_MAP_UNPACK") or node[
-1
].kind.startswith("dict_entry"):
assert self.version >= (3, 5)
# FIXME: I think we can intermingle dict_comp's with other
# dictionary kinds of things. The most common though is
# a sequence of dict_comp's
kwargs = node[-1].attr
template = ("**%C", (0, kwargs, ", **"))
self.template_engine(template, node)
sep = ""
pass
else:
# Python 2 style kvlist. Find beginning of kvlist.
indent = self.indent + " "
line_number = self.line_number
if node[0].kind.startswith("BUILD_MAP"):
if len(node) > 1 and node[1].kind in ("kvlist", "kvlist_n"):
kv_node = node[1]
else:
kv_node = node[1:]
self.kv_map(kv_node, sep, line_number, indent)
else:
sep = ""
opname = node[-1].kind
if self.is_pypy and self.version >= (3, 5):
if opname.startswith("BUILD_CONST_KEY_MAP"):
keys = node[-2].attr
# FIXME: DRY this and the above
for i in range(len(keys)):
key = keys[i]
value = self.traverse(node[i], indent="")
self.write(sep, key, ": ", value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + " "
line_number = self.line_number
pass
pass
pass
else:
if opname.startswith("kvlist"):
list_node = node[0]
else:
list_node = node
assert list_node[-1].kind.startswith("BUILD_MAP")
for i in range(0, len(list_node) - 1, 2):
key = self.traverse(list_node[i], indent="")
value = self.traverse(list_node[i + 1], indent="")
self.write(sep, key, ": ", value)
sep = ", "
if line_number != self.line_number:
sep += "\n" + self.indent + " "
line_number = self.line_number
pass
pass
pass
elif opname.startswith("kvlist"):
kv_node = node[-1]
self.kv_map(node[-1], sep, line_number, indent)
pass
pass
if sep.startswith(",\n"):
self.write(sep[1:])
if node[0] != "dict_entry":
self.write("}")
self.indent_less(INDENT_PER_LEVEL)
self.prec = p
self.prune()
def template_engine(self, entry, startnode):
"""The format template interpetation engine. See the comment at the
beginning of this module for the how we interpret format