creating clean .py

This commit is contained in:
Mysterie 2012-06-05 15:32:21 +02:00
parent 322c76f66c
commit 0b1561d760
17 changed files with 0 additions and 8233 deletions

View File

@ -1,776 +0,0 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['parse', 'AST', 'ParserError', 'Parser']
from spark import GenericASTBuilder
import string, exceptions, sys
from UserList import UserList
from Scanner import Token
class AST(UserList):
def __init__(self, type, kids=[]):
self.type = intern(type)
UserList.__init__(self, kids)
def __getslice__(self, low, high): return self.data[low:high]
def __eq__(self, o):
if isinstance(o, AST):
return self.type == o.type \
and UserList.__eq__(self, o)
else:
return self.type == o
def __hash__(self): return hash(self.type)
def __repr__(self, indent=''):
rv = str(self.type)
for k in self:
rv = rv + '\n' + string.replace(str(k), '\n', '\n ')
return rv
class ParserError(Exception):
def __init__(self, token, offset):
self.token = token
self.offset = offset
def __str__(self):
return "Syntax error at or near `%r' token at offset %s" % \
(self.token, self.offset)
class Parser(GenericASTBuilder):
def __init__(self):
GenericASTBuilder.__init__(self, AST, 'stmts')
self.customized = {}
def cleanup(self):
"""
Remove recursive references to allow garbage
collector to collect this object.
"""
for dict in (self.rule2func, self.rules, self.rule2name):
for i in dict.keys():
dict[i] = None
for i in dir(self):
setattr(self, i, None)
def error(self, token):
raise ParserError(token, token.offset)
def typestring(self, token):
return token.type
def p_funcdef(self, args):
'''
stmt ::= funcdef
funcdef ::= mkfunc designator
stmt ::= funcdefdeco
funcdefdeco ::= mkfuncdeco designator
mkfuncdeco ::= expr mkfuncdeco CALL_FUNCTION_1
mkfuncdeco ::= expr mkfuncdeco0 CALL_FUNCTION_1
mkfuncdeco0 ::= mkfunc
load_closure ::= load_closure LOAD_CLOSURE
load_closure ::= LOAD_CLOSURE
'''
def p_list_comprehension(self, args):
'''
expr ::= list_compr
list_compr ::= BUILD_LIST_0 list_iter
list_iter ::= list_for
list_iter ::= list_if
list_iter ::= list_if_not
list_iter ::= lc_body
_come_from ::= COME_FROM
_come_from ::=
list_for ::= expr _for designator list_iter JUMP_BACK
list_if ::= expr jmp_false list_iter
list_if_not ::= expr jmp_true list_iter
lc_body ::= expr LIST_APPEND
'''
def p_setcomp(self, args):
'''
expr ::= setcomp
setcomp ::= LOAD_SETCOMP MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= setcomp_func
setcomp_func ::= BUILD_SET_0 LOAD_FAST FOR_ITER designator comp_iter
JUMP_BACK RETURN_VALUE RETURN_LAST
comp_iter ::= comp_if
comp_iter ::= comp_ifnot
comp_iter ::= comp_for
comp_iter ::= comp_body
comp_body ::= set_comp_body
comp_body ::= gen_comp_body
comp_body ::= dict_comp_body
set_comp_body ::= expr SET_ADD
gen_comp_body ::= expr YIELD_VALUE POP_TOP
dict_comp_body ::= expr expr MAP_ADD
comp_if ::= expr jmp_false comp_iter
comp_ifnot ::= expr jmp_true comp_iter
comp_for ::= expr _for designator comp_iter JUMP_BACK
'''
def p_genexpr(self, args):
'''
expr ::= genexpr
genexpr ::= LOAD_GENEXPR MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= genexpr_func
genexpr_func ::= LOAD_FAST FOR_ITER designator comp_iter JUMP_BACK
'''
def p_dictcomp(self, args):
'''
expr ::= dictcomp
dictcomp ::= LOAD_DICTCOMP MAKE_FUNCTION_0 expr GET_ITER CALL_FUNCTION_1
stmt ::= dictcomp_func
dictcomp_func ::= BUILD_MAP LOAD_FAST FOR_ITER designator
comp_iter JUMP_BACK RETURN_VALUE RETURN_LAST
'''
def p_augmented_assign(self, args):
'''
stmt ::= augassign1
stmt ::= augassign2
augassign1 ::= expr expr inplace_op designator
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR
augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2
augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3
augassign2 ::= expr DUP_TOP LOAD_ATTR expr
inplace_op ROT_TWO STORE_ATTR
inplace_op ::= INPLACE_ADD
inplace_op ::= INPLACE_SUBTRACT
inplace_op ::= INPLACE_MULTIPLY
inplace_op ::= INPLACE_DIVIDE
inplace_op ::= INPLACE_TRUE_DIVIDE
inplace_op ::= INPLACE_FLOOR_DIVIDE
inplace_op ::= INPLACE_MODULO
inplace_op ::= INPLACE_POWER
inplace_op ::= INPLACE_LSHIFT
inplace_op ::= INPLACE_RSHIFT
inplace_op ::= INPLACE_AND
inplace_op ::= INPLACE_XOR
inplace_op ::= INPLACE_OR
'''
def p_assign(self, args):
'''
stmt ::= assign
assign ::= expr DUP_TOP designList
assign ::= expr designator
stmt ::= assign2
stmt ::= assign3
assign2 ::= expr expr ROT_TWO designator designator
assign3 ::= expr expr expr ROT_THREE ROT_TWO designator designator designator
'''
def p_print(self, args):
'''
stmt ::= print_items_stmt
stmt ::= print_nl
stmt ::= print_items_nl_stmt
print_items_stmt ::= expr PRINT_ITEM print_items_opt
print_items_nl_stmt ::= expr PRINT_ITEM print_items_opt PRINT_NEWLINE_CONT
print_items_opt ::= print_items
print_items_opt ::=
print_items ::= print_items print_item
print_items ::= print_item
print_item ::= expr PRINT_ITEM_CONT
print_nl ::= PRINT_NEWLINE
'''
def p_print_to(self, args):
'''
stmt ::= print_to
stmt ::= print_to_nl
stmt ::= print_nl_to
print_to ::= expr print_to_items POP_TOP
print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO
print_nl_to ::= expr PRINT_NEWLINE_TO
print_to_items ::= print_to_items print_to_item
print_to_items ::= print_to_item
print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO
'''
def p_import20(self, args):
'''
stmt ::= importstmt
stmt ::= importfrom
stmt ::= importstar
stmt ::= importmultiple
importlist2 ::= importlist2 import_as
importlist2 ::= import_as
import_as ::= IMPORT_NAME designator
import_as ::= IMPORT_NAME LOAD_ATTR designator
import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR designator
import_as ::= IMPORT_NAME LOAD_ATTR LOAD_ATTR LOAD_ATTR designator
import_as ::= IMPORT_FROM designator
importstmt ::= LOAD_CONST LOAD_CONST import_as
importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME IMPORT_STAR
importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME importlist2 POP_TOP
importstar ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT IMPORT_STAR
importfrom ::= LOAD_CONST LOAD_CONST IMPORT_NAME_CONT importlist2 POP_TOP
importmultiple ::= LOAD_CONST LOAD_CONST import_as imports_cont
imports_cont ::= imports_cont import_cont
imports_cont ::= import_cont
import_cont ::= LOAD_CONST LOAD_CONST import_as_cont
import_as_cont ::= IMPORT_NAME_CONT designator
import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR designator
import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR designator
import_as_cont ::= IMPORT_NAME_CONT LOAD_ATTR LOAD_ATTR LOAD_ATTR designator
import_as_cont ::= IMPORT_FROM designator
'''
def p_grammar(self, args):
'''
stmts ::= stmts sstmt
stmts ::= sstmt
sstmt ::= stmt
sstmt ::= ifelsestmtr
sstmt ::= return_stmt RETURN_LAST
stmts_opt ::= stmts
stmts_opt ::= passstmt
passstmt ::=
_stmts ::= _stmts stmt
_stmts ::= stmt
c_stmts ::= _stmts
c_stmts ::= _stmts lastc_stmt
c_stmts ::= lastc_stmt
c_stmts ::= continue_stmts
lastc_stmt ::= iflaststmt
lastc_stmt ::= whileelselaststmt
lastc_stmt ::= forelselaststmt
lastc_stmt ::= ifelsestmtr
lastc_stmt ::= ifelsestmtc
lastc_stmt ::= tryelsestmtc
c_stmts_opt ::= c_stmts
c_stmts_opt ::= passstmt
l_stmts ::= _stmts
l_stmts ::= return_stmts
l_stmts ::= continue_stmts
l_stmts ::= _stmts lastl_stmt
l_stmts ::= lastl_stmt
lastl_stmt ::= iflaststmtl
lastl_stmt ::= ifelsestmtl
lastl_stmt ::= forelselaststmtl
lastl_stmt ::= tryelsestmtl
l_stmts_opt ::= l_stmts
l_stmts_opt ::= passstmt
suite_stmts ::= _stmts
suite_stmts ::= return_stmts
suite_stmts ::= continue_stmts
suite_stmts_opt ::= suite_stmts
suite_stmts_opt ::= passstmt
else_suite ::= suite_stmts
else_suitel ::= l_stmts
else_suitec ::= c_stmts
else_suitec ::= return_stmts
designList ::= designator designator
designList ::= designator DUP_TOP designList
designator ::= STORE_FAST
designator ::= STORE_NAME
designator ::= STORE_GLOBAL
designator ::= STORE_DEREF
designator ::= expr STORE_ATTR
designator ::= expr STORE_SLICE+0
designator ::= expr expr STORE_SLICE+1
designator ::= expr expr STORE_SLICE+2
designator ::= expr expr expr STORE_SLICE+3
designator ::= store_subscr
store_subscr ::= expr expr STORE_SUBSCR
designator ::= unpack
designator ::= unpack_list
stmt ::= classdef
stmt ::= call_stmt
call_stmt ::= expr POP_TOP
stmt ::= return_stmt
return_stmt ::= expr RETURN_VALUE
return_stmts ::= return_stmt
return_stmts ::= _stmts return_stmt
return_if_stmts ::= return_if_stmt
return_if_stmts ::= _stmts return_if_stmt
return_if_stmt ::= expr RETURN_END_IF
stmt ::= break_stmt
break_stmt ::= BREAK_LOOP
stmt ::= continue_stmt
continue_stmt ::= CONTINUE
continue_stmt ::= CONTINUE_LOOP
continue_stmts ::= _stmts lastl_stmt continue_stmt
continue_stmts ::= lastl_stmt continue_stmt
continue_stmts ::= continue_stmt
stmt ::= raise_stmt
raise_stmt ::= exprlist RAISE_VARARGS
raise_stmt ::= nullexprlist RAISE_VARARGS
stmt ::= exec_stmt
exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT
exec_stmt ::= expr exprlist EXEC_STMT
stmt ::= assert
stmt ::= assert2
stmt ::= ifstmt
stmt ::= ifelsestmt
stmt ::= whilestmt
stmt ::= whilenotstmt
stmt ::= while1stmt
stmt ::= whileelsestmt
stmt ::= while1elsestmt
stmt ::= forstmt
stmt ::= forelsestmt
stmt ::= trystmt
stmt ::= tryelsestmt
stmt ::= tryfinallystmt
stmt ::= withstmt
stmt ::= withasstmt
stmt ::= del_stmt
del_stmt ::= DELETE_FAST
del_stmt ::= DELETE_NAME
del_stmt ::= DELETE_GLOBAL
del_stmt ::= expr DELETE_SLICE+0
del_stmt ::= expr expr DELETE_SLICE+1
del_stmt ::= expr expr DELETE_SLICE+2
del_stmt ::= expr expr expr DELETE_SLICE+3
del_stmt ::= delete_subscr
delete_subscr ::= expr expr DELETE_SUBSCR
del_stmt ::= expr DELETE_ATTR
kwarg ::= LOAD_CONST expr
classdef ::= LOAD_CONST expr mkfunc
CALL_FUNCTION_0 BUILD_CLASS designator
stmt ::= classdefdeco
classdefdeco ::= classdefdeco1 designator
classdefdeco1 ::= expr classdefdeco1 CALL_FUNCTION_1
classdefdeco1 ::= expr classdefdeco2 CALL_FUNCTION_1
classdefdeco2 ::= LOAD_CONST expr mkfunc CALL_FUNCTION_0 BUILD_CLASS
_jump ::= JUMP_ABSOLUTE
_jump ::= JUMP_FORWARD
_jump ::= JUMP_BACK
jmp_false ::= POP_JUMP_IF_FALSE
jmp_false ::= JUMP_IF_FALSE
jmp_true ::= POP_JUMP_IF_TRUE
jmp_true ::= JUMP_IF_TRUE
multi_come_from ::= multi_come_from COME_FROM
multi_come_from ::=
assert_end ::= multi_come_from POP_TOP
assert_end ::=
assert ::= assert_expr jmp_true
LOAD_ASSERT RAISE_VARARGS assert_end
assert2 ::= assert_expr jmp_true
LOAD_ASSERT expr RAISE_VARARGS assert_end
assert ::= assert_expr jmp_true
LOAD_GLOBAL RAISE_VARARGS assert_end
assert2 ::= assert_expr jmp_true
LOAD_GLOBAL expr RAISE_VARARGS assert_end
assert_expr ::= assert_expr_or
assert_expr ::= assert_expr_and
assert_expr ::= expr
assert_expr_or ::= assert_expr jmp_true expr
assert_expr_and ::= assert_expr jmp_false expr
ifstmt ::= testexpr _ifstmts_jump
testexpr ::= testfalse
testexpr ::= testtrue
testfalse ::= expr jmp_false
testtrue ::= expr jmp_true
_ifstmts_jump ::= return_if_stmts
_ifstmts_jump ::= c_stmts_opt JUMP_FORWARD COME_FROM
iflaststmt ::= testexpr c_stmts_opt JUMP_ABSOLUTE
iflaststmtl ::= testexpr c_stmts_opt JUMP_BACK
ifelsestmt ::= testexpr c_stmts_opt JUMP_FORWARD else_suite COME_FROM
ifelsestmtc ::= testexpr c_stmts_opt JUMP_ABSOLUTE else_suitec
ifelsestmtr ::= testexpr return_if_stmts return_stmts
ifelsestmtl ::= testexpr c_stmts_opt JUMP_BACK else_suitel
trystmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle COME_FROM
tryelsestmt ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suite COME_FROM
tryelsestmtc ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suitec COME_FROM
tryelsestmtl ::= SETUP_EXCEPT suite_stmts_opt POP_BLOCK
try_middle else_suitel COME_FROM
try_middle ::= jmp_abs COME_FROM except_stmts
END_FINALLY
try_middle ::= JUMP_FORWARD COME_FROM except_stmts
END_FINALLY COME_FROM
except_stmts ::= except_stmts except_stmt
except_stmts ::= except_stmt
except_stmt ::= except_cond1 except_suite
except_stmt ::= except_cond2 except_suite
except_stmt ::= except
except_suite ::= c_stmts_opt JUMP_FORWARD
except_suite ::= c_stmts_opt jmp_abs
except_suite ::= return_stmts
except_cond1 ::= DUP_TOP expr COMPARE_OP
jmp_false POP_TOP POP_TOP POP_TOP
except_cond2 ::= DUP_TOP expr COMPARE_OP
jmp_false POP_TOP designator POP_TOP
except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt JUMP_FORWARD
except ::= POP_TOP POP_TOP POP_TOP c_stmts_opt jmp_abs
except ::= POP_TOP POP_TOP POP_TOP return_stmts
jmp_abs ::= JUMP_ABSOLUTE
jmp_abs ::= JUMP_BACK
tryfinallystmt ::= SETUP_FINALLY suite_stmts
POP_BLOCK LOAD_CONST
COME_FROM suite_stmts_opt END_FINALLY
withstmt ::= expr SETUP_WITH POP_TOP suite_stmts_opt
POP_BLOCK LOAD_CONST COME_FROM
WITH_CLEANUP END_FINALLY
withasstmt ::= expr SETUP_WITH designator suite_stmts_opt
POP_BLOCK LOAD_CONST COME_FROM
WITH_CLEANUP END_FINALLY
whilestmt ::= SETUP_LOOP
testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK COME_FROM
whilestmt ::= SETUP_LOOP
testexpr
return_stmts
POP_BLOCK COME_FROM
while1stmt ::= SETUP_LOOP l_stmts JUMP_BACK COME_FROM
while1stmt ::= SETUP_LOOP return_stmts COME_FROM
whileelsestmt ::= SETUP_LOOP testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK
else_suite COME_FROM
whileelselaststmt ::= SETUP_LOOP testexpr
l_stmts_opt JUMP_BACK
POP_BLOCK
else_suitec COME_FROM
_for ::= GET_ITER FOR_ITER
_for ::= LOAD_CONST FOR_LOOP
for_block ::= l_stmts_opt JUMP_BACK
for_block ::= return_stmts _come_from
forstmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK COME_FROM
forelsestmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suite COME_FROM
forelselaststmt ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suitec COME_FROM
forelselaststmtl ::= SETUP_LOOP expr _for designator
for_block POP_BLOCK else_suitel COME_FROM
'''
def p_expr(self, args):
'''
expr ::= _mklambda
expr ::= SET_LINENO
expr ::= LOAD_FAST
expr ::= LOAD_NAME
expr ::= LOAD_CONST
expr ::= LOAD_ASSERT
expr ::= LOAD_GLOBAL
expr ::= LOAD_DEREF
expr ::= LOAD_LOCALS
expr ::= load_attr
expr ::= binary_expr
expr ::= binary_expr_na
expr ::= build_list
expr ::= cmp
expr ::= mapexpr
expr ::= and
expr ::= and2
expr ::= or
expr ::= unary_expr
expr ::= call_function
expr ::= unary_not
expr ::= unary_convert
expr ::= binary_subscr
expr ::= binary_subscr2
expr ::= load_attr
expr ::= get_iter
expr ::= slice0
expr ::= slice1
expr ::= slice2
expr ::= slice3
expr ::= buildslice2
expr ::= buildslice3
expr ::= yield
binary_expr ::= expr expr binary_op
binary_op ::= BINARY_ADD
binary_op ::= BINARY_MULTIPLY
binary_op ::= BINARY_AND
binary_op ::= BINARY_OR
binary_op ::= BINARY_XOR
binary_op ::= BINARY_SUBTRACT
binary_op ::= BINARY_DIVIDE
binary_op ::= BINARY_TRUE_DIVIDE
binary_op ::= BINARY_FLOOR_DIVIDE
binary_op ::= BINARY_MODULO
binary_op ::= BINARY_LSHIFT
binary_op ::= BINARY_RSHIFT
binary_op ::= BINARY_POWER
unary_expr ::= expr unary_op
unary_op ::= UNARY_POSITIVE
unary_op ::= UNARY_NEGATIVE
unary_op ::= UNARY_INVERT
unary_not ::= expr UNARY_NOT
unary_convert ::= expr UNARY_CONVERT
binary_subscr ::= expr expr BINARY_SUBSCR
binary_subscr2 ::= expr expr DUP_TOPX_2 BINARY_SUBSCR
load_attr ::= expr LOAD_ATTR
get_iter ::= expr GET_ITER
slice0 ::= expr SLICE+0
slice0 ::= expr DUP_TOP SLICE+0
slice1 ::= expr expr SLICE+1
slice1 ::= expr expr DUP_TOPX_2 SLICE+1
slice2 ::= expr expr SLICE+2
slice2 ::= expr expr DUP_TOPX_2 SLICE+2
slice3 ::= expr expr expr SLICE+3
slice3 ::= expr expr expr DUP_TOPX_3 SLICE+3
buildslice3 ::= expr expr expr BUILD_SLICE_3
buildslice2 ::= expr expr BUILD_SLICE_2
yield ::= expr YIELD_VALUE
_mklambda ::= load_closure mklambda
_mklambda ::= mklambda
or ::= expr jmp_true expr _come_from
or ::= expr JUMP_IF_TRUE_OR_POP expr COME_FROM
and ::= expr jmp_false expr _come_from
and ::= expr JUMP_IF_FALSE_OR_POP expr COME_FROM
and2 ::= _jump jmp_false COME_FROM expr COME_FROM
expr ::= conditional
conditional ::= expr jmp_false expr JUMP_FORWARD expr COME_FROM
conditional ::= expr jmp_false expr JUMP_ABSOLUTE expr
expr ::= conditionalnot
conditionalnot ::= expr jmp_true expr _jump expr COME_FROM
stmt ::= return_lambda
stmt ::= conditional_lambda
stmt ::= conditional_lambda2
return_lambda ::= expr RETURN_VALUE LAMBDA_MARKER
conditional_lambda ::= expr jmp_false return_if_stmt return_stmt LAMBDA_MARKER
cmp ::= cmp_list
cmp ::= compare
compare ::= expr expr COMPARE_OP
cmp_list ::= expr cmp_list1 ROT_TWO POP_TOP
_come_from
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE_OR_POP
cmp_list1 COME_FROM
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP jmp_false
cmp_list1 _come_from
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP JUMP_IF_FALSE_OR_POP
cmp_list2 COME_FROM
cmp_list1 ::= expr DUP_TOP ROT_THREE
COMPARE_OP jmp_false
cmp_list2 _come_from
cmp_list2 ::= expr COMPARE_OP JUMP_FORWARD
cmp_list2 ::= expr COMPARE_OP RETURN_VALUE
mapexpr ::= BUILD_MAP kvlist
kvlist ::= kvlist kv
kvlist ::= kvlist kv2
kvlist ::= kvlist kv3
kvlist ::=
kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
kv2 ::= DUP_TOP expr expr ROT_THREE STORE_SUBSCR
kv3 ::= expr expr STORE_MAP
exprlist ::= exprlist expr
exprlist ::= expr
nullexprlist ::=
'''
def nonterminal(self, nt, args):
collect = ('stmts', 'exprlist', 'kvlist', '_stmts', 'print_items')
if nt in collect and len(args) > 1:
#
# Collect iterated thingies together.
#
rv = args[0]
rv.append(args[1])
else:
rv = GenericASTBuilder.nonterminal(self, nt, args)
return rv
def __ambiguity(self, children):
# only for debugging! to be removed hG/2000-10-15
print children
return GenericASTBuilder.ambiguity(self, children)
def resolve(self, list):
if len(list) == 2 and 'funcdef' in list and 'assign' in list:
return 'funcdef'
if 'grammar' in list and 'expr' in list:
return 'expr'
#print >> sys.stderr, 'resolve', str(list)
return GenericASTBuilder.resolve(self, list)
nop = lambda self, args: None
p = Parser()
def parse(tokens, customize):
#
# Special handling for opcodes that take a variable number
# of arguments -- we add a new rule for each:
#
# expr ::= {expr}^n BUILD_LIST_n
# expr ::= {expr}^n BUILD_TUPLE_n
# unpack_list ::= UNPACK_LIST {expr}^n
# unpack ::= UNPACK_TUPLE {expr}^n
# unpack ::= UNPACK_SEQEUENE {expr}^n
# mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
# mkfunc ::= {expr}^n load_closure LOAD_CONST MAKE_FUNCTION_n
# expr ::= expr {expr}^n CALL_FUNCTION_n
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
# expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
#
global p
for k, v in customize.items():
# avoid adding the same rule twice to this parser
if p.customized.has_key(k):
continue
p.customized[k] = None
#nop = lambda self, args: None
op = k[:string.rfind(k, '_')]
if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SET'):
rule = 'build_list ::= ' + 'expr '*v + k
elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
rule = 'unpack ::= ' + k + ' designator'*v
elif op == 'UNPACK_LIST':
rule = 'unpack_list ::= ' + k + ' designator'*v
elif op == 'DUP_TOPX':
# no need to add a rule
continue
#rule = 'dup_topx ::= ' + 'expr '*v + k
elif op == 'MAKE_FUNCTION':
p.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
('expr '*v, k), nop)
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
elif op == 'MAKE_CLOSURE':
p.addRule('mklambda ::= %s load_closure LOAD_LAMBDA %s' %
('expr '*v, k), nop)
p.addRule('genexpr ::= %s load_closure LOAD_GENEXPR %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
p.addRule('setcomp ::= %s load_closure LOAD_SETCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
p.addRule('dictcomp ::= %s load_closure LOAD_DICTCOMP %s expr GET_ITER CALL_FUNCTION_1' %
('expr '*v, k), nop)
rule = 'mkfunc ::= %s load_closure LOAD_CONST %s' % ('expr '*v, k)
# rule = 'mkfunc ::= %s closure_list LOAD_CONST %s' % ('expr '*v, k)
elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
na = (v & 0xff) # positional parameters
nk = (v >> 8) & 0xff # keyword parameters
# number of apply equiv arguments:
nak = ( len(op)-len('CALL_FUNCTION') ) / 3
rule = 'call_function ::= expr ' + 'expr '*na + 'kwarg '*nk \
+ 'expr ' * nak + k
else:
raise Exception('unknown customize token %s' % k)
p.addRule(rule, nop)
ast = p.parse(tokens)
# p.cleanup()
return ast

View File

@ -1,849 +0,0 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
class Token:
"""
Class representing a byte-code token.
A byte-code token is equivalent to the contents of one line
as output by dis.dis().
"""
def __init__(self, type_, attr=None, pattr=None, offset=-1, linestart=False):
self.type = intern(type_)
self.attr = attr
self.pattr = pattr
self.offset = offset
self.linestart = linestart
def __cmp__(self, o):
if isinstance(o, Token):
# both are tokens: compare type and pattr
return cmp(self.type, o.type) or cmp(self.pattr, o.pattr)
else:
return cmp(self.type, o)
def __repr__(self): return str(self.type)
def __str__(self):
pattr = self.pattr
if self.linestart:
return '\n%s\t%-17s %r' % (self.offset, self.type, pattr)
else:
return '%s\t%-17s %r' % (self.offset, self.type, pattr)
def __hash__(self): return hash(self.type)
def __getitem__(self, i): raise IndexError
class Code:
"""
Class for representing code-objects.
This is similar to the original code object, but additionally
the diassembled code is stored in the attribute '_tokens'.
"""
def __init__(self, co, scanner, classname=None):
for i in dir(co):
if i.startswith('co_'):
setattr(self, i, getattr(co, i))
self._tokens, self._customize = scanner.disassemble(co, classname)
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['POP_JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['POP_JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = code = array('B', co.co_code)
n = len(code)
self.prev = [0]
# mapping adresses of instru & arg
for i in self.op_range(0, n):
c = code[i]
op = code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
j = 0
# linestarts contains bloc code adresse (addr,block)
linestarts = list(dis.findlinestarts(co))
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(code)
# contains (code, [addrRefToCode])
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if code[last_stmt] == PRINT_ITEM:
if code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if code[last_import] == IMPORT_NAME == code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = code[offset+1] + code[offset+2] * 256 + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.code[pos+1] + self.code[pos+2] * 256
if op in dis.hasjrel:
target += pos + 3
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+1,
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
target = self.get_target(pos, op)
if target > pos:
unop_target = self.last_instr(pos, target, JF, target)
if unop_target and code[unop_target+3] != ROT_TWO:
self.fixed_jumps[pos] = unop_target
else:
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = code[i+1] + code[i+2] * 256
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
elif op in hasjabs:
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
if (oparg > i):
label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

View File

@ -1,945 +0,0 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
from struct import *
from Scanner import Token, Code
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = array('B', co.co_code)
n = len(self.code)
# linestarts contains bloc code adresse (addr,block)
self.linestarts = list(dis.findlinestarts(co))
self.prev = [0]
pop_delet = 0
i=0
self.restructRelativeJump()
# class and names
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
self.names = names
# add instruction to remonde in "toDel" list
toDel = []
while i < n-pop_delet:
op = self.code[i]
ret = self.getOpcodeToDel(i)
if ret != None:
toDel += ret
if op >= dis.HAVE_ARGUMENT:
i += 2
i += 1
if toDel: # degeu a revoir / repenser (tout faire d'un coup? chaud)
toDel = sorted(list(set(toDel)))
delta = 0
for x in toDel:
if self.code[x-delta] >= dis.HAVE_ARGUMENT:
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 3
else:
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 1
# mapping adresses of prev instru
n = len(self.code)
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
j = 0
linestarts = self.linestarts
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = self.code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(self.code)
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if self.code[last_stmt] == PRINT_ITEM:
if self.code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif self.code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if self.code[last_import] == IMPORT_NAME == self.code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = self.code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
self.code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def getOpcodeToDel(self, i):
"""
check validity of the opcode at position I and return a list of opcode to delete
"""
opcode = self.code[i]
opsize = self.op_size(opcode)
if opcode == EXTENDED_ARG:
raise 'A faire'
if opcode in (PJIF,PJIT,JA,JF):
if self.code[i+opsize] == POP_TOP:
if self.code[i+opsize] == self.code[i+opsize+1] and self.code[i+opsize] == self.code[i+opsize+2] \
and opcode in (JF,JA) and self.code[i+opsize] != self.code[i+opsize+3]:
pass
else:
return [i+opsize]
if opcode == RAISE_VARARGS:
if self.code[i+opsize] == POP_TOP:
return [i+opsize]
if opcode == BUILD_LIST:
if self.code[i+opsize] == DUP_TOP and self.code[i+opsize+1] in (STORE_NAME,STORE_FAST):
# del DUP/STORE_NAME x
toDel = [i+opsize,i+opsize+1]
nameDel = self.get_argument(i+opsize+1)
start = i+opsize+1
end = start
# del LOAD_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (LOAD_NAME,LOAD_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == LOAD_NAME:
end += self.op_size(LOAD_NAME)
else:
end += self.op_size(LOAD_FAST)
# log JA/POP_TOP to del and update PJIF
while start < end:
start = self.first_instr(start, len(self.code), (PJIF))
if start == None: break
target = self.get_target(start)
if self.code[target] == POP_TOP and self.code[target-3] == JA:
toDel += [target, target-3]
# update PJIF
target = self.get_target(target-3)
if target > 0xFFFF:
raise 'A gerer'
self.code[start+1] = target & 0xFF
self.code[start+2] = (target >> 8) & 0xFF
start += self.op_size(PJIF)
# del DELETE_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (DELETE_NAME,DELETE_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == DELETE_NAME:
end += self.op_size(DELETE_NAME)
else:
end += self.op_size(DELETE_FAST)
return toDel
return None
def restructRelativeJump(self):
"""
change relative JUMP_IF_FALSE/TRUE to absolut jump
and remap the target of PJIF/PJIT
"""
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_argument(i)
target += i + 3
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_target(i)
if self.code[target] == JA:
target = self.get_target(target)
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
def restructCode(self, i):
"""
restruct linestarts and jump destination after removing a POP_TOP
"""
result = list()
for item in self.linestarts:
if i < item[0]:
result.append((item[0]-1, item[1]))
else:
result.append((item[0], item[1]))
self.linestarts = result
for x in self.op_range(0, len(self.code)):
op = self.code[x]
if op >= HAVE_ARGUMENT:
if op in dis.hasjrel:
if x < i and self.get_target(x) > i:
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
elif op in dis.hasjabs:
if i < self.get_target(x):
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.get_argument(pos)
if op in dis.hasjrel:
target += pos + 3
return target
def get_argument(self, pos):
target = self.code[pos+1] + self.code[pos+2] * 256
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, (PJIF))
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
if self.code[self.prev[i]] == NOP:
i = self.prev[i]
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_FINALLY):
count_SETUP_ += 1
#return self.lines[start].next
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if jmp == None: # check
i = self.next_stmt[i]
continue
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
#self.fixed_jumps[i] = jmp
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+2, # check
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
# if it's an old JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP
#if target > pos:
# unop_target = self.last_instr(pos, target, JF, target)
# if unop_target and code[unop_target+3] != ROT_TWO:
# self.fixed_jumps[pos] = unop_target
# else:
# self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = self.get_argument(i)
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
#elif op in hasjabs: Pas de gestion des jump abslt
#if op in (PJIF, PJIT): Or pop a faire
#if (oparg > i):
#label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

View File

@ -1,937 +0,0 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
from struct import *
from Scanner import Token, Code
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = array('B', co.co_code)
n = len(self.code)
# linestarts contains bloc code adresse (addr,block)
self.linestarts = list(dis.findlinestarts(co))
self.prev = [0]
pop_delet = 0
i=0
self.restructRelativeJump()
# class and names
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
self.names = names
# add instruction to remonde in "toDel" list
toDel = []
while i < n-pop_delet:
op = self.code[i]
ret = self.getOpcodeToDel(i)
if ret != None:
toDel += ret
if op >= dis.HAVE_ARGUMENT:
i += 2
i += 1
if toDel: # degeu a revoir / repenser (tout faire d'un coup? chaud)
toDel = sorted(list(set(toDel)))
delta = 0
for x in toDel:
if self.code[x-delta] >= dis.HAVE_ARGUMENT:
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 3
else:
self.code.pop(x-delta)
self.restructCode(x-delta)
delta += 1
# mapping adresses of prev instru
n = len(self.code)
for i in self.op_range(0, n):
op = self.code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
j = 0
linestarts = self.linestarts
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = self.code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(self.code)
# contains (code, [addrRefToCode])
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if self.code[last_stmt] == PRINT_ITEM:
if self.code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif self.code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if self.code[last_import] == IMPORT_NAME == self.code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = self.code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = self.get_argument(offset) + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
self.code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and self.code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def getOpcodeToDel(self, i):
"""
check validity of the opcode at position I and return a list of opcode to delete
"""
opcode = self.code[i]
opsize = self.op_size(opcode)
if opcode == EXTENDED_ARG:
raise 'A faire'
if opcode in (PJIF,PJIT,JA,JF):
if self.code[i+opsize] == POP_TOP:
if self.code[i+opsize] == self.code[i+opsize+1] and self.code[i+opsize] == self.code[i+opsize+2] \
and opcode in (JF,JA) and self.code[i+opsize] != self.code[i+opsize+3]:
pass
else:
return [i+opsize]
if opcode == BUILD_LIST:
if self.code[i+opsize] == DUP_TOP and self.code[i+opsize+1] in (STORE_NAME,STORE_FAST):
# del DUP/STORE_NAME x
toDel = [i+opsize,i+opsize+1]
nameDel = self.get_argument(i+opsize+1)
start = i+opsize+1
end = start
# del LOAD_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (LOAD_NAME,LOAD_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == LOAD_NAME:
end += self.op_size(LOAD_NAME)
else:
end += self.op_size(LOAD_FAST)
# log JA/POP_TOP to del and update PJIF
while start < end:
start = self.first_instr(start, len(self.code), (PJIF))
if start == None: break
target = self.get_target(start)
if self.code[target] == POP_TOP and self.code[target-3] == JA:
toDel += [target, target-3]
# update PJIF
target = self.get_target(target-3)
if target > 0xFFFF:
raise 'A gerer'
self.code[start+1] = target & 0xFF
self.code[start+2] = (target >> 8) & 0xFF
start += self.op_size(PJIF)
# del DELETE_NAME x
while end < len(self.code):
end = self.first_instr(end, len(self.code), (DELETE_NAME,DELETE_FAST))
if nameDel == self.get_argument(end):
toDel += [end]
break
if self.code[end] == DELETE_NAME:
end += self.op_size(DELETE_NAME)
else:
end += self.op_size(DELETE_FAST)
return toDel
return None
def restructRelativeJump(self):
"""
change relative JUMP_IF_FALSE/TRUE to absolut jump
"""
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_argument(i)
target += i + 3
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
for i in self.op_range(0, len(self.code)):
if(self.code[i] in (PJIF,PJIT)):
target = self.get_target(i)
if self.code[target] == JA:
target = self.get_target(target)
if target > 0xFFFF:
raise 'A gerer'
self.code[i+1] = target & 0xFF
self.code[i+2] = (target >> 8) & 0xFF
def restructCode(self, i):
"""
restruct linestarts and jump destination after removing a POP_TOP
"""
result = list()
for item in self.linestarts:
if i < item[0]:
result.append((item[0]-1, item[1]))
else:
result.append((item[0], item[1]))
self.linestarts = result
for x in self.op_range(0, len(self.code)):
op = self.code[x]
if op >= HAVE_ARGUMENT:
if op in dis.hasjrel:
if x < i and self.get_target(x) > i:
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
elif op in dis.hasjabs:
if i < self.get_target(x):
if self.code[x+1]-1 < 0:
self.code[x+2] -= 1
self.code[x+1] = self.code[x+1]+255
else:
self.code[x+1] -= 1
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.get_argument(pos)
if op in dis.hasjrel:
target += pos + 3
return target
def get_argument(self, pos):
target = self.code[pos+1] + self.code[pos+2] * 256
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, (PJIF))
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
if self.code[self.prev[i]] == NOP:
i = self.prev[i]
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_FINALLY):
count_SETUP_ += 1
#return self.lines[start].next
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if jmp == None: # check
i = self.next_stmt[i]
continue
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
#self.fixed_jumps[i] = jmp
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+2, # check
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = self.get_argument(i)
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
#elif op in hasjabs: Pas de gestion des jump abslt
#if op in (PJIF, PJIT): Or pop a faire
#if (oparg > i):
#label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

View File

@ -1,796 +0,0 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
# Copyright (c) 2005 by Dan Pascu <dan@windowmaker.org>
#
# See main module for license.
#
__all__ = ['Token', 'Scanner', 'getscanner']
import types
import disas as dis
from collections import namedtuple
from array import array
from operator import itemgetter
from Scanner import Token, Code
class Scanner:
def __init__(self, version):
self.version = version
self.resetTokenClass()
dis.setVersion(version)
globals().update({'HAVE_ARGUMENT': dis.HAVE_ARGUMENT})
globals().update({k.replace('+','_'):v for (k,v) in dis.opmap.items()})
globals().update({'PJIF': dis.opmap['POP_JUMP_IF_FALSE']})
globals().update({'PJIT': dis.opmap['POP_JUMP_IF_TRUE']})
globals().update({'JA': dis.opmap['JUMP_ABSOLUTE']})
globals().update({'JF': dis.opmap['JUMP_FORWARD']})
self.JUMP_OPs = map(lambda op: dis.opname[op],
dis.hasjrel + dis.hasjabs)
def setShowAsm(self, showasm, out=None):
self.showasm = showasm
self.out = out
def setTokenClass(self, tokenClass):
assert type(tokenClass) == types.ClassType
self.Token = tokenClass
def resetTokenClass(self):
self.setTokenClass(Token)
def disassemble(self, co, classname=None):
"""
Disassemble a code object, returning a list of 'Token'.
The main part of this procedure is modelled after
dis.disassemble().
"""
rv = []
customize = {}
Token = self.Token # shortcut
self.code = code = array('B', co.co_code)
n = len(code)
self.prev = [0]
# mapping adresses of instru & arg
for i in self.op_range(0, n):
op = code[i]
self.prev.append(i)
if op >= HAVE_ARGUMENT:
self.prev.append(i)
self.prev.append(i)
self.lines = []
linetuple = namedtuple('linetuple', ['l_no', 'next'])
j = 0
# linestarts contains bloc code adresse (addr,block)
linestarts = list(dis.findlinestarts(co))
linestartoffsets = {a for (a, _) in linestarts}
(prev_start_byte, prev_line_no) = linestarts[0]
for (start_byte, line_no) in linestarts[1:]:
while j < start_byte:
self.lines.append(linetuple(prev_line_no, start_byte))
j += 1
last_op = code[self.prev[start_byte]]
(prev_start_byte, prev_line_no) = (start_byte, line_no)
while j < n:
self.lines.append(linetuple(prev_line_no, n))
j+=1
# self.lines contains (block,addrLastInstr)
cf = self.find_jump_targets(code)
# contains (code, [addrRefToCode])
if classname:
classname = '_' + classname.lstrip('_') + '__'
def unmangle(name):
if name.startswith(classname) and name[-2:] != '__':
return name[len(classname) - 2:]
return name
free = [ unmangle(name) for name in (co.co_cellvars + co.co_freevars) ]
names = [ unmangle(name) for name in co.co_names ]
varnames = [ unmangle(name) for name in co.co_varnames ]
else:
free = co.co_cellvars + co.co_freevars
names = co.co_names
varnames = co.co_varnames
last_stmt = self.next_stmt[0]
i = self.next_stmt[last_stmt]
replace = {}
while i < n-1:
if self.lines[last_stmt].next > i:
if code[last_stmt] == PRINT_ITEM:
if code[i] == PRINT_ITEM:
replace[i] = 'PRINT_ITEM_CONT'
elif code[i] == PRINT_NEWLINE:
replace[i] = 'PRINT_NEWLINE_CONT'
last_stmt = i
i = self.next_stmt[i]
imports = self.all_instr(0, n, (IMPORT_NAME, IMPORT_FROM, IMPORT_STAR))
if len(imports) > 1:
last_import = imports[0]
for i in imports[1:]:
if self.lines[last_import].next > i:
if code[last_import] == IMPORT_NAME == code[i]:
replace[i] = 'IMPORT_NAME_CONT'
last_import = i
extended_arg = 0
for offset in self.op_range(0, n):
if offset in cf:
k = 0
for j in cf[offset]:
rv.append(Token('COME_FROM', None, repr(j),
offset="%s_%d" % (offset, k) ))
k += 1
op = code[offset]
opname = dis.opname[op]
oparg = None; pattr = None
if op >= HAVE_ARGUMENT:
oparg = code[offset+1] + code[offset+2] * 256 + extended_arg
extended_arg = 0
if op == dis.EXTENDED_ARG:
extended_arg = oparg * 65536L
continue
if op in dis.hasconst:
const = co.co_consts[oparg]
if type(const) == types.CodeType:
oparg = const
if const.co_name == '<lambda>':
assert opname == 'LOAD_CONST'
opname = 'LOAD_LAMBDA'
elif const.co_name == '<genexpr>':
opname = 'LOAD_GENEXPR'
elif const.co_name == '<dictcomp>':
opname = 'LOAD_DICTCOMP'
elif const.co_name == '<setcomp>':
opname = 'LOAD_SETCOMP'
# verify uses 'pattr' for comparism, since 'attr'
# now holds Code(const) and thus can not be used
# for comparism (todo: think about changing this)
#pattr = 'code_object @ 0x%x %s->%s' %\
# (id(const), const.co_filename, const.co_name)
pattr = '<code_object ' + const.co_name + '>'
else:
pattr = const
elif op in dis.hasname:
pattr = names[oparg]
elif op in dis.hasjrel:
pattr = repr(offset + 3 + oparg)
elif op in dis.hasjabs:
pattr = repr(oparg)
elif op in dis.haslocal:
pattr = varnames[oparg]
elif op in dis.hascompare:
pattr = dis.cmp_op[oparg]
elif op in dis.hasfree:
pattr = free[oparg]
if op in (BUILD_LIST, BUILD_TUPLE, BUILD_SET, BUILD_SLICE,
UNPACK_SEQUENCE,
MAKE_FUNCTION, CALL_FUNCTION, MAKE_CLOSURE,
CALL_FUNCTION_VAR, CALL_FUNCTION_KW,
CALL_FUNCTION_VAR_KW, DUP_TOPX,
):
# CE - Hack for >= 2.5
# Now all values loaded via LOAD_CLOSURE are packed into
# a tuple before calling MAKE_CLOSURE.
if op == BUILD_TUPLE and \
code[offset-3] == LOAD_CLOSURE:
continue
else:
opname = '%s_%d' % (opname, oparg)
if op != BUILD_SLICE:
customize[opname] = oparg
elif op == JA:
target = self.get_target(offset)
if target < offset:
if offset in self.stmts and code[offset+3] not in (END_FINALLY, POP_BLOCK) \
and offset not in self.not_continue:
opname = 'CONTINUE'
else:
opname = 'JUMP_BACK'
elif op == LOAD_GLOBAL:
try:
if pattr == 'AssertionError' and rv and rv[-1] == 'POP_JUMP_IF_TRUE':
opname = 'LOAD_ASSERT'
except AttributeError:
pass
elif op == RETURN_VALUE:
if offset in self.return_end_ifs:
opname = 'RETURN_END_IF'
if offset not in replace:
rv.append(Token(opname, oparg, pattr, offset, linestart = offset in linestartoffsets))
else:
rv.append(Token(replace[offset], oparg, pattr, offset, linestart = offset in linestartoffsets))
if self.showasm:
out = self.out # shortcut
for t in rv:
print >>out, t
print >>out
return rv, customize
def get_target(self, pos, op=None):
if op is None:
op = self.code[pos]
target = self.code[pos+1] + self.code[pos+2] * 256
if op in dis.hasjrel:
target += pos + 3
return target
def first_instr(self, start, end, instr, target=None, exact=True):
"""
Find the first <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
return i
dest = self.get_target(i, op)
if dest == target:
return i
elif not exact:
_distance = abs(target - dest)
if _distance < distance:
distance = _distance
pos = i
return pos
def last_instr(self, start, end, instr, target=None, exact=True):
"""
Find the last <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely if exact
is True, or if exact is False, the instruction which has a target
closest to <target> will be returned.
Return index to it or None if not found.
"""
code = self.code
if not (start>=0 and end<=len(code)):
return None
try: None in instr
except: instr = [instr]
pos = None
distance = len(code)
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
pos = i
else:
dest = self.get_target(i, op)
if dest == target:
distance = 0
pos = i
elif not exact:
_distance = abs(target - dest)
if _distance <= distance:
distance = _distance
pos = i
return pos
def all_instr(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
return result
def op_size(self, op):
if op < HAVE_ARGUMENT:
return 1
else:
return 3
def op_range(self, start, end):
while start < end:
yield start
start += self.op_size(self.code[start])
def build_stmt_indices(self):
code = self.code
start = 0;
end = len(code)
stmt_opcodes = {
SETUP_LOOP, BREAK_LOOP, CONTINUE_LOOP,
SETUP_FINALLY, END_FINALLY, SETUP_EXCEPT, SETUP_WITH,
POP_BLOCK, STORE_FAST, DELETE_FAST, STORE_DEREF,
STORE_GLOBAL, DELETE_GLOBAL, STORE_NAME, DELETE_NAME,
STORE_ATTR, DELETE_ATTR, STORE_SUBSCR, DELETE_SUBSCR,
RETURN_VALUE, RAISE_VARARGS, POP_TOP,
PRINT_EXPR, PRINT_ITEM, PRINT_NEWLINE, PRINT_ITEM_TO, PRINT_NEWLINE_TO,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
DELETE_SLICE_0, DELETE_SLICE_1, DELETE_SLICE_2, DELETE_SLICE_3,
JUMP_ABSOLUTE, EXEC_STMT,
}
stmt_opcode_seqs = [(PJIF, JF), (PJIF, JA), (PJIT, JF), (PJIT, JA)]
designator_ops = {
STORE_FAST, STORE_NAME, STORE_GLOBAL, STORE_DEREF, STORE_ATTR,
STORE_SLICE_0, STORE_SLICE_1, STORE_SLICE_2, STORE_SLICE_3,
STORE_SUBSCR, UNPACK_SEQUENCE, JA
}
prelim = self.all_instr(start, end, stmt_opcodes)
stmts = self.stmts = set(prelim)
pass_stmts = set()
for seq in stmt_opcode_seqs:
for i in self.op_range(start, end-(len(seq)+1)):
match = True
for elem in seq:
if elem != code[i]:
match = False
break
i += self.op_size(code[i])
if match:
i = self.prev[i]
stmts.add(i)
pass_stmts.add(i)
if pass_stmts:
stmt_list = list(stmts)
stmt_list.sort()
else:
stmt_list = prelim
last_stmt = -1
self.next_stmt = []
slist = self.next_stmt = []
i = 0
for s in stmt_list:
if code[s] == JA and s not in pass_stmts:
target = self.get_target(s)
if target > s or self.lines[last_stmt].l_no == self.lines[s].l_no:
stmts.remove(s)
continue
j = self.prev[s]
while code[j] == JA:
j = self.prev[j]
if code[j] == LIST_APPEND: #list comprehension
stmts.remove(s)
continue
elif code[s] == POP_TOP and code[self.prev[s]] == ROT_TWO:
stmts.remove(s)
continue
elif code[s] in designator_ops:
j = self.prev[s]
while code[j] in designator_ops:
j = self.prev[j]
if code[j] == FOR_ITER:
stmts.remove(s)
continue
last_stmt = s
slist += [s] * (s-i)
i = s
slist += [len(code)] * (len(code)-len(slist))
def remove_mid_line_ifs(self, ifs):
filtered = []
for i in ifs:
if self.lines[i].l_no == self.lines[i+3].l_no:
if self.code[self.prev[self.lines[i].next]] in (PJIT, PJIF):
continue
filtered.append(i)
return filtered
def rem_or(self, start, end, instr, target=None, include_beyond_target=False):
"""
Find all <instr> in the block from start to end.
<instr> is any python bytecode instruction or a list of opcodes
If <instr> is an opcode with a target (like a jump), a target
destination can be specified which must match precisely.
Return a list with indexes to them or [] if none found.
"""
code = self.code
assert(start>=0 and end<=len(code))
try: None in instr
except: instr = [instr]
result = []
for i in self.op_range(start, end):
op = code[i]
if op in instr:
if target is None:
result.append(i)
else:
t = self.get_target(i, op)
if include_beyond_target and t >= target:
result.append(i)
elif t == target:
result.append(i)
pjits = self.all_instr(start, end, PJIT)
filtered = []
for pjit in pjits:
tgt = self.get_target(pjit)-3
for i in result:
if i <= pjit or i >= tgt:
filtered.append(i)
result = filtered
filtered = []
return result
def next_except_jump(self, start):
"""
Return the next jump that was generated by an except SomeException:
construct in a try...except...else clause or None if not found.
"""
except_match = self.first_instr(start, self.lines[start].next, POP_JUMP_IF_FALSE)
if except_match:
jmp = self.prev[self.get_target(except_match)]
self.ignore_if.add(except_match)
return jmp
count_END_FINALLY = 0
count_SETUP_ = 0
for i in self.op_range(start, len(self.code)):
op = self.code[i]
if op == END_FINALLY:
if count_END_FINALLY == count_SETUP_:
assert self.code[self.prev[i]] in (JA, JF, RETURN_VALUE)
return self.prev[i]
count_END_FINALLY += 1
elif op in (SETUP_EXCEPT, SETUP_WITH, SETUP_FINALLY):
count_SETUP_ += 1
def restrict_to_parent(self, target, parent):
"""Restrict pos to parent boundaries."""
if not (parent['start'] < target < parent['end']):
target = parent['end']
return target
def detect_structure(self, pos, op=None):
"""
Detect type of block structures and their boundaries to fix optimizied jumps
in python2.3+
"""
# TODO: check the struct boundaries more precisely -Dan
code = self.code
# Ev remove this test and make op a mandatory argument -Dan
if op is None:
op = code[pos]
## Detect parent structure
parent = self.structs[0]
start = parent['start']
end = parent['end']
for s in self.structs:
_start = s['start']
_end = s['end']
if (_start <= pos < _end) and (_start >= start and _end <= end):
start = _start
end = _end
parent = s
## We need to know how many new structures were added in this run
origStructCount = len(self.structs)
if op == SETUP_LOOP:
#import pdb; pdb.set_trace()
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
(line_no, next_line_byte) = self.lines[pos]
jump_back = self.last_instr(start, end, JA,
next_line_byte, False)
if not jump_back: # loop suite ends in return. wtf right?
jump_back = self.last_instr(start, end, RETURN_VALUE) + 1
if not jump_back:
return
if code[self.prev[next_line_byte]] not in (PJIF, PJIT):
loop_type = 'for'
else:
loop_type = 'while'
self.ignore_if.add(self.prev[next_line_byte])
target = next_line_byte
end = jump_back + 3
else:
if self.get_target(jump_back) >= next_line_byte:
jump_back = self.last_instr(start, end, JA,
start, False)
if end > jump_back+4 and code[end] in (JF, JA):
if code[jump_back+4] in (JA, JF):
if self.get_target(jump_back+4) == self.get_target(end):
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
elif target < pos:
self.fixed_jumps[pos] = jump_back+4
end = jump_back+4
target = self.get_target(jump_back, JA)
if code[target] in (FOR_ITER, GET_ITER):
loop_type = 'for'
else:
loop_type = 'while'
test = self.prev[next_line_byte]
if test == pos:
loop_type = 'while 1'
else:
self.ignore_if.add(test)
test_target = self.get_target(test)
if test_target > (jump_back+3):
jump_back = test_target
self.loops.append(target)
self.structs.append({'type': loop_type + '-loop',
'start': target,
'end': jump_back})
if jump_back+3 != end:
self.structs.append({'type': loop_type + '-else',
'start': jump_back+3,
'end': end})
elif op == SETUP_EXCEPT:
start = pos+3
target = self.get_target(pos, op)
end = self.restrict_to_parent(target, parent)
if target != end:
self.fixed_jumps[pos] = end
#print target, end, parent
## Add the try block
self.structs.append({'type': 'try',
'start': start,
'end': end-4})
## Now isolate the except and else blocks
end_else = start_else = self.get_target(self.prev[end])
## Add the except blocks
i = end
while self.code[i] != END_FINALLY:
jmp = self.next_except_jump(i)
if self.code[jmp] == RETURN_VALUE:
self.structs.append({'type': 'except',
'start': i,
'end': jmp+1})
i = jmp + 1
else:
if self.get_target(jmp) != start_else:
end_else = self.get_target(jmp)
if self.code[jmp] == JF:
self.fixed_jumps[jmp] = -1
self.structs.append({'type': 'except',
'start': i,
'end': jmp})
i = jmp + 3
## Add the try-else block
if end_else != start_else:
r_end_else = self.restrict_to_parent(end_else, parent)
self.structs.append({'type': 'try-else',
'start': i+1,
'end': r_end_else})
self.fixed_jumps[i] = r_end_else
else:
self.fixed_jumps[i] = i+1
elif op in (PJIF, PJIT):
start = pos+3
target = self.get_target(pos, op)
rtarget = self.restrict_to_parent(target, parent)
pre = self.prev
if target != rtarget and parent['type'] == 'and/or':
self.fixed_jumps[pos] = rtarget
return
#does this jump to right after another cond jump?
# if so, it's part of a larger conditional
if (code[pre[target]] in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP,
PJIF, PJIT)) and (target > pos):
self.fixed_jumps[pos] = pre[target]
self.structs.append({'type': 'and/or',
'start': start,
'end': pre[target]})
return
# is this an if and
if op == PJIF:
match = self.rem_or(start, self.next_stmt[pos], PJIF, target)
match = self.remove_mid_line_ifs(match)
if match:
if code[pre[rtarget]] in (JF, JA) \
and pre[rtarget] not in self.stmts \
and self.restrict_to_parent(self.get_target(pre[rtarget]), parent) == rtarget:
if code[pre[pre[rtarget]]] == JA \
and self.remove_mid_line_ifs([pos]) \
and target == self.get_target(pre[pre[rtarget]]) \
and (pre[pre[rtarget]] not in self.stmts or self.get_target(pre[pre[rtarget]]) > pre[pre[rtarget]])\
and 1 == len(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], (PJIF, PJIT), target))):
pass
elif code[pre[pre[rtarget]]] == RETURN_VALUE \
and self.remove_mid_line_ifs([pos]) \
and 1 == (len(set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT), target))) \
| set(self.remove_mid_line_ifs(self.rem_or(start, pre[pre[rtarget]], \
(PJIF, PJIT, JA), pre[rtarget], True))))):
pass
else:
fix = None
jump_ifs = self.all_instr(start, self.next_stmt[pos], PJIF)
last_jump_good = True
for j in jump_ifs:
if target == self.get_target(j):
if self.lines[j].next == j+3 and last_jump_good:
fix = j
break
else:
last_jump_good = False
self.fixed_jumps[pos] = fix or match[-1]
return
else:
self.fixed_jumps[pos] = match[-1]
return
else: # op == PJIT
next = self.next_stmt[pos]
if pre[next] == pos:
pass
elif code[next] in (JF, JA) and target == self.get_target(next):
if code[pre[next]] == PJIF:
if code[next] == JF or target != rtarget or code[pre[pre[rtarget]]] not in (JA, RETURN_VALUE):
self.fixed_jumps[pos] = pre[next]
return
elif code[next] == JA and code[target] in (JA, JF) \
and self.get_target(target) == self.get_target(next):
self.fixed_jumps[pos] = pre[next]
return
#don't add a struct for a while test, it's already taken care of
if pos in self.ignore_if:
return
if code[pre[rtarget]] == JA and pre[rtarget] in self.stmts \
and pre[rtarget] != pos and pre[pre[rtarget]] != pos \
and not (code[rtarget] == JA and code[rtarget+3] == POP_BLOCK and code[pre[pre[rtarget]]] != JA):
rtarget = pre[rtarget]
#does the if jump just beyond a jump op, then this is probably an if statement
if code[pre[rtarget]] in (JA, JF):
if_end = self.get_target(pre[rtarget])
#is this a loop not an if?
if (if_end < pre[rtarget]) and (code[pre[if_end]] == SETUP_LOOP):
if(if_end > start):
return
end = self.restrict_to_parent(if_end, parent)
self.structs.append({'type': 'if-then',
'start': start,
'end': pre[rtarget]})
self.not_continue.add(pre[rtarget])
if rtarget < end:
self.structs.append({'type': 'if-else',
'start': rtarget,
'end': end})
elif code[pre[rtarget]] == RETURN_VALUE:
self.structs.append({'type': 'if-then',
'start': start,
'end': rtarget})
self.return_end_ifs.add(pre[rtarget])
elif op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
target = self.get_target(pos, op)
if target > pos:
unop_target = self.last_instr(pos, target, JF, target)
if unop_target and code[unop_target+3] != ROT_TWO:
self.fixed_jumps[pos] = unop_target
else:
self.fixed_jumps[pos] = self.restrict_to_parent(target, parent)
def find_jump_targets(self, code):
"""
Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
This procedure is modelled after dis.findlables(), but here
for each target the number of jumps are counted.
"""
hasjrel = dis.hasjrel
hasjabs = dis.hasjabs
n = len(code)
self.structs = [{'type': 'root',
'start': 0,
'end': n-1}]
self.loops = [] ## All loop entry points
self.fixed_jumps = {} ## Map fixed jumps to their real destination
self.ignore_if = set()
self.build_stmt_indices()
self.not_continue = set()
self.return_end_ifs = set()
targets = {}
for i in self.op_range(0, n):
op = code[i]
## Determine structures and fix jumps for 2.3+
self.detect_structure(i, op)
if op >= HAVE_ARGUMENT:
label = self.fixed_jumps.get(i)
oparg = code[i+1] + code[i+2] * 256
if label is None:
if op in hasjrel and op != FOR_ITER:
label = i + 3 + oparg
elif op in hasjabs:
if op in (JUMP_IF_FALSE_OR_POP, JUMP_IF_TRUE_OR_POP):
if (oparg > i):
label = oparg
if label is not None and label != -1:
targets[label] = targets.get(label, []) + [i]
elif op == END_FINALLY and i in self.fixed_jumps:
label = self.fixed_jumps[i]
targets[label] = targets.get(label, []) + [i]
return targets

File diff suppressed because it is too large Load Diff

View File

@ -1,232 +0,0 @@
# Copyright (c) 1999 John Aycock
# Copyright (c) 2000 by hartmut Goebel <hartmut@goebel.noris.de>
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# See the file 'CHANGES' for a list of changes
#
# NB. This is not a masterpiece of software, but became more like a hack.
# Probably a complete rewrite would be sensefull. hG/2000-12-27
#
import sys, types, os
import Walker, verify, magics
sys.setrecursionlimit(5000)
__all__ = ['uncompyle_file', 'uncompyle_file', 'main']
def _load_file(filename):
"""
load a Python source file and compile it to byte-code
_load_module(filename: string): code_object
filename: name of file containing Python source code
(normally a .py)
code_object: code_object compiled from this source code
This function does NOT write any file!
"""
fp = open(filename, 'rb')
source = fp.read()+'\n'
try:
co = compile(source, filename, 'exec')
except SyntaxError:
print >> sys.stderr, '>>Syntax error in', filename
raise
fp.close()
return co
def _load_module(filename):
"""
load a module without importing it
_load_module(filename: string): code_object
filename: name of file containing Python byte-code object
(normally a .pyc)
code_object: code_object from this file
"""
import magics, marshal
fp = open(filename, 'rb')
magic = fp.read(4)
try:
version = float(magics.versions[magic])
except KeyError:
raise ImportError, "Unknown magic number %s in %s" % (ord(magic[0])+256*ord(magic[1]), filename)
if (version > 2.7) or (version < 2.5):
raise ImportError, "This is a Python %s file! Only Python 2.5 to 2.7 files are supported." % version
#print version
fp.read(4) # timestamp
co = marshal.load(fp)
fp.close()
return version, co
def uncompyle(version, co, out=None, showasm=0, showast=0):
"""
diassembles a given code block 'co'
"""
assert type(co) == types.CodeType
# store final output stream for case of error
__real_out = out or sys.stdout
if co.co_filename:
print >>__real_out, '#Embedded file name: %s' % co.co_filename
# diff scanner
if version == 2.7:
import Scanner27 as scan
elif version == 2.6:
import Scanner26 as scan
elif version == 2.5:
import Scanner25 as scan
scanner = scan.Scanner(version)
scanner.setShowAsm(showasm, out)
tokens, customize = scanner.disassemble(co)
#sys.exit(0)
# Build AST from disassembly.
walker = Walker.Walker(out, scanner, showast=showast)
try:
ast = walker.build_ast(tokens, customize)
except Walker.ParserError, e : # parser failed, dump disassembly
print >>__real_out, e
raise
del tokens # save memory
# convert leading '__doc__ = "..." into doc string
assert ast == 'stmts'
try:
if ast[0][0] == Walker.ASSIGN_DOC_STRING(co.co_consts[0]):
walker.print_docstring('', co.co_consts[0])
del ast[0]
if ast[-1] == Walker.RETURN_NONE:
ast.pop() # remove last node
#todo: if empty, add 'pass'
except:
pass
walker.mod_globs = Walker.find_globals(ast, set())
walker.gen_source(ast, customize)
for g in walker.mod_globs:
walker.write('global %s ## Warning: Unused global\n' % g)
if walker.ERROR:
raise walker.ERROR
def uncompyle_file(filename, outstream=None, showasm=0, showast=0):
"""
decompile Python byte-code file (.pyc)
"""
version, co = _load_module(filename)
uncompyle(version, co, outstream, showasm, showast)
co = None
#---- main -------
if sys.platform.startswith('linux') and os.uname()[2][:2] == '2.':
def __memUsage():
mi = open('/proc/self/stat', 'r')
mu = mi.readline().split()[22]
mi.close()
return int(mu) / 1000000
else:
def __memUsage():
return ''
def main(in_base, out_base, files, codes, outfile=None,
showasm=0, showast=0, do_verify=0):
"""
in_base base directory for input files
out_base base directory for output files (ignored when
files list of filenames to be uncompyled (relative to src_base)
outfile write output to this filename (overwrites out_base)
For redirecting output to
- <filename> outfile=<filename> (out_base is ignored)
- files below out_base out_base=...
- stdout out_base=None, outfile=None
"""
def _get_outstream(outfile):
dir = os.path.dirname(outfile)
failed_file = outfile + '_failed'
if os.path.exists(failed_file): os.remove(failed_file)
try:
os.makedirs(dir)
except OSError:
pass
return open(outfile, 'w')
of = outfile
tot_files = okay_files = failed_files = verify_failed_files = 0
for code in codes:
version = sys.version[:3] # "2.5"
with open(code, "r") as f:
co = compile(f.read(), "", "exec")
uncompyle(sys.version[:3], co, sys.stdout, showasm=showasm, showast=showast)
for file in files:
infile = os.path.join(in_base, file)
#print >>sys.stderr, infile
if of: # outfile was given as parameter
outstream = _get_outstream(outfile)
elif out_base is None:
outstream = sys.stdout
else:
outfile = os.path.join(out_base, file) + '_dis'
outstream = _get_outstream(outfile)
#print >>sys.stderr, outfile
# try to decomyple the input file
try:
uncompyle_file(infile, outstream, showasm, showast)
tot_files += 1
except KeyboardInterrupt:
if outfile:
outstream.close()
os.remove(outfile)
raise
except:
failed_files += 1
sys.stderr.write("### Can't uncompyle %s\n" % infile)
if outfile:
outstream.close()
os.rename(outfile, outfile + '_failed')
import traceback
traceback.print_exc()
#raise
else: # uncompyle successfull
if outfile:
outstream.close()
if do_verify:
try:
verify.compare_code_with_srcfile(infile, outfile)
print '# okay decompyling', infile, __memUsage()
okay_files += 1
except verify.VerifyCmpError, e:
verify_failed_files += 1
os.rename(outfile, outfile + '_unverified')
print >>sys.stderr, "### Error Verifiying", file
print >>sys.stderr, e
else:
okay_files += 1
print '# okay decompyling', infile, __memUsage()
return (tot_files, okay_files, failed_files, verify_failed_files)

View File

@ -1,238 +0,0 @@
"""Disassembler of Python byte code into mnemonics."""
import sys
import types
_have_code = (types.MethodType, types.FunctionType, types.CodeType, types.ClassType, type)
def dis(x=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if x is None:
distb()
return
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, 'func_code'):
x = x.func_code
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print "Disassembly of %s:" % name
try:
dis(x1)
except TypeError, msg:
print "Sorry:", msg
print
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError, \
"don't know how to disassemble %s objects" % \
type(x).__name__
def distb(tb=None):
"""Disassemble a traceback (default: last traceback)."""
if tb is None:
try:
tb = sys.last_traceback
except AttributeError:
raise RuntimeError, "no last traceback to disassemble"
while tb.tb_next: tb = tb.tb_next
disassemble(tb.tb_frame.f_code, tb.tb_lasti)
def disassemble(co, lasti=-1):
"""Disassemble a code object."""
code = co.co_code
labels = findlabels(code)
linestarts = dict(findlinestarts(co))
n = len(code)
i = 0
extended_arg = 0
free = None
while i < n:
c = code[i]
op = ord(c)
if i in linestarts:
if i > 0:
print
print "%3d" % linestarts[i],
else:
print ' ',
if i == lasti: print '-->',
else: print ' ',
if i in labels: print '>>',
else: print ' ',
print repr(i).rjust(4),
print opname[op].ljust(20),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
extended_arg = 0
i = i+2
if op == EXTENDED_ARG:
extended_arg = oparg*65536L
print repr(oparg).rjust(5),
if op in hasconst:
print '(' + repr(co.co_consts[oparg]) + ')',
elif op in hasname:
print '(' + co.co_names[oparg] + ')',
elif op in hasjrel:
print '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
print '(' + co.co_varnames[oparg] + ')',
elif op in hascompare:
print '(' + cmp_op[oparg] + ')',
elif op in hasfree:
if free is None:
free = co.co_cellvars + co.co_freevars
print '(' + free[oparg] + ')',
print
def disassemble_string(code, lasti=-1, varnames=None, names=None,
constants=None):
labels = findlabels(code)
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
if i == lasti: print '-->',
else: print ' ',
if i in labels: print '>>',
else: print ' ',
print repr(i).rjust(4),
print opname[op].ljust(15),
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
print repr(oparg).rjust(5),
if op in hasconst:
if constants:
print '(' + repr(constants[oparg]) + ')',
else:
print '(%d)'%oparg,
elif op in hasname:
if names is not None:
print '(' + names[oparg] + ')',
else:
print '(%d)'%oparg,
elif op in hasjrel:
print '(to ' + repr(i + oparg) + ')',
elif op in haslocal:
if varnames:
print '(' + varnames[oparg] + ')',
else:
print '(%d)' % oparg,
elif op in hascompare:
print '(' + cmp_op[oparg] + ')',
print
disco = disassemble # XXX For backwards compatibility
def findlabels(code):
"""Detect all offsets in a byte code which are jump targets.
Return the list of offsets.
"""
labels = []
n = len(code)
i = 0
while i < n:
c = code[i]
op = ord(c)
i = i+1
if op >= HAVE_ARGUMENT:
oparg = ord(code[i]) + ord(code[i+1])*256
i = i+2
label = -1
if op in hasjrel:
label = i+oparg
elif op in hasjabs:
label = oparg
if label >= 0:
if label not in labels:
labels.append(label)
return labels
def findlinestarts(code):
"""Find the offsets in a byte code which are start of lines in the source.
Generate pairs (offset, lineno) as described in Python/compile.c.
"""
byte_increments = [ord(c) for c in code.co_lnotab[0::2]]
line_increments = [ord(c) for c in code.co_lnotab[1::2]]
lastlineno = None
lineno = code.co_firstlineno
addr = 0
for byte_incr, line_incr in zip(byte_increments, line_increments):
if byte_incr:
if lineno != lastlineno:
yield (addr, lineno)
lastlineno = lineno
addr += byte_incr
lineno += line_incr
if lineno != lastlineno:
yield (addr, lineno)
def setVersion(version):
if version == 2.7:
import uncompyle2.opcode.opcode_27 as opcodyn
elif version == 2.6:
import uncompyle2.opcode.opcode_26 as opcodyn
elif version == 2.5:
import uncompyle2.opcode.opcode_25 as opcodyn
globals().update({'cmp_op': opcodyn.cmp_op})
globals().update({'hasconst': opcodyn.hasconst})
globals().update({'hasname': opcodyn.hasname})
globals().update({'hasjrel': opcodyn.hasjrel})
globals().update({'hasjabs': opcodyn.hasjabs})
globals().update({'haslocal': opcodyn.haslocal})
globals().update({'hascompare': opcodyn.hascompare})
globals().update({'hasfree': opcodyn.hasfree})
globals().update({'opname': opcodyn.opname})
globals().update({'opmap': opcodyn.opmap})
globals().update({'HAVE_ARGUMENT': opcodyn.HAVE_ARGUMENT})
globals().update({'EXTENDED_ARG': opcodyn.EXTENDED_ARG})
def _test():
"""Simple test program to disassemble a file."""
if sys.argv[1:]:
if sys.argv[2:]:
sys.stderr.write("usage: python dis.py [-|file]\n")
sys.exit(2)
fn = sys.argv[1]
if not fn or fn == "-":
fn = None
else:
fn = None
if fn is None:
f = sys.stdin
else:
f = open(fn)
source = f.read()
if fn is not None:
f.close()
else:
fn = "<stdin>"
code = compile(source, fn, "exec")
dis(code)
if __name__ == "__main__":
_test()

View File

@ -1,66 +0,0 @@
import struct
__all__ = ['magics', 'versions']
def __build_magic(magic):
return struct.pack('Hcc', magic, '\r', '\n')
def __by_version(magics):
by_version = {}
for m, v in magics.items():
by_version[v] = m
return by_version
versions = {
# taken from from Python/import.c
# magic, version
__build_magic(20121): '1.5', #1.5, 1.5.1, 1.5.2
__build_magic(50428): '1.6', #1.6
__build_magic(50823): '2.0', #2.0, 2.0.1
__build_magic(60202): '2.1', #2.1, 2.1.1, 2.1.2
__build_magic(60717): '2.2', #2.2
__build_magic(62011): '2.3', #2.3a0
__build_magic(62021): '2.3', #2.3a0
__build_magic(62041): '2.4', #2.4a0
__build_magic(62051): '2.4', #2.4a3
__build_magic(62061): '2.4', #2.4b1
__build_magic(62071): '2.5', #2.5a0
__build_magic(62081): '2.5', #2.5a0 (ast-branch)
__build_magic(62091): '2.5', #2.5a0 (with)
__build_magic(62092): '2.5', #2.5a0 (changed WITH_CLEANUP opcode)
__build_magic(62101): '2.5', #2.5b3 (fix wrong code: for x, in ...)
__build_magic(62111): '2.5', #2.5b3 (fix wrong code: x += yield)
__build_magic(62121): '2.5', #2.5c1 (fix wrong lnotab with for loops and
# storing constants that should have been removed
__build_magic(62131): '2.5', #2.5c2 (fix wrong code: for x, in ... in listcomp/genexp)
__build_magic(62151): '2.6', #2.6a0 (peephole optimizations & STORE_MAP)
__build_magic(62161): '2.6', #2.6a1 (WITH_CLEANUP optimization)
__build_magic(62171): '2.7', #2.7a0 (optimize list comprehensions/change LIST_APPEND)
__build_magic(62181): '2.7', #2.7a0 (optimize conditional branches:
# introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE)
__build_magic(62191): '2.7', #2.7a0 (introduce SETUP_WITH)
__build_magic(62201): '2.7', #2.7a0 (introduce BUILD_SET)
__build_magic(62211): '2.7' #2.7a0 (introduce MAP_ADD and SET_ADD)
}
magics = __by_version(versions)
def __show(text, magic):
print text, struct.unpack('BBBB', magic), \
struct.unpack('HBB', magic)
def test():
import imp
magic_20 = by_version['2.0']
current = imp.get_magic()
current_version = magics[current]
magic_current = by_version[ current_version ]
print type(magic_20), len(magic_20), repr(magic_20)
print
print 'This Python interpreter has version', current_version
__show('imp.get_magic():\t', current),
__show('magic[current_version]:\t', magic_current)
__show('magic_20:\t\t', magic_20)
if __name__ == '__main__':
test()

View File

@ -1,188 +0,0 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<' + `op` + '>'
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_MAP', 104) # Always zero for now
name_op('LOAD_ATTR', 105) # Index in name list
def_op('COMPARE_OP', 106) # Comparison operator
hascompare.append(106)
name_op('IMPORT_NAME', 107) # Index in name list
name_op('IMPORT_FROM', 108) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jrel_op('JUMP_IF_FALSE', 111) # ""
jrel_op('JUMP_IF_TRUE', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143)
EXTENDED_ARG = 143
del def_op, name_op, jrel_op, jabs_op

View File

@ -1,190 +0,0 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('LIST_APPEND', 18)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_MAP', 104) # Always zero for now
name_op('LOAD_ATTR', 105) # Index in name list
def_op('COMPARE_OP', 106) # Comparison operator
hascompare.append(106)
name_op('IMPORT_NAME', 107) # Index in name list
name_op('IMPORT_FROM', 108) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jrel_op('JUMP_IF_FALSE', 111) # ""
jrel_op('JUMP_IF_TRUE', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143)
EXTENDED_ARG = 143
del def_op, name_op, jrel_op, jabs_op

View File

@ -1,185 +0,0 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0) # 0
def_op('POP_TOP', 1) # 15
def_op('ROT_TWO', 2) # 59
def_op('ROT_THREE', 3) # 60
def_op('DUP_TOP', 4) # 13
def_op('ROT_FOUR', 5) # 49
def_op('NOP', 9) # 53
def_op('UNARY_POSITIVE', 10) # 48
def_op('UNARY_NEGATIVE', 11) # 54
def_op('UNARY_NOT', 12) # 38
def_op('UNARY_CONVERT', 13) # 25
def_op('UNARY_INVERT', 15) # 34
def_op('LIST_APPEND', 18) # 68
def_op('BINARY_POWER', 19) # 28
def_op('BINARY_MULTIPLY', 20) # 36
def_op('BINARY_DIVIDE', 21) # 12
def_op('BINARY_MODULO', 22) # 41
def_op('BINARY_ADD', 23) # 52
def_op('BINARY_SUBTRACT', 24) # 55
def_op('BINARY_SUBSCR', 25) # 4
def_op('BINARY_FLOOR_DIVIDE', 26) # 43
def_op('BINARY_TRUE_DIVIDE', 27) # 5
def_op('INPLACE_FLOOR_DIVIDE', 28) # 32
def_op('INPLACE_TRUE_DIVIDE', 29) # 30
def_op('SLICE+0', 30) # 16
def_op('SLICE+1', 31) # 17
def_op('SLICE+2', 32) # 18
def_op('SLICE+3', 33) # 19
def_op('STORE_SLICE+0', 40) # 61
def_op('STORE_SLICE+1', 41) # 62
def_op('STORE_SLICE+2', 42) # 63
def_op('STORE_SLICE+3', 43) # 64
def_op('DELETE_SLICE+0', 50) # 44
def_op('DELETE_SLICE+1', 51) # 45
def_op('DELETE_SLICE+2', 52) # 46
def_op('DELETE_SLICE+3', 53) # 47
def_op('INPLACE_ADD', 55) # 6
def_op('INPLACE_SUBTRACT', 56) # 29
def_op('INPLACE_MULTIPLY', 57) # 8
def_op('INPLACE_DIVIDE', 58) # 27
def_op('INPLACE_MODULO', 59) # 3
def_op('STORE_SUBSCR', 60) # 31
def_op('DELETE_SUBSCR', 61) # 69
def_op('BINARY_LSHIFT', 62) # 7
def_op('BINARY_RSHIFT', 63) # 22
def_op('BINARY_AND', 64) # 50
def_op('BINARY_XOR', 65) # 21
def_op('BINARY_OR', 66) # 2
def_op('INPLACE_POWER', 67) # 57
def_op('GET_ITER', 68) # 39
def_op('PRINT_EXPR', 70) # 20
def_op('PRINT_ITEM', 71) # 9
def_op('PRINT_NEWLINE', 72) # 14
def_op('PRINT_ITEM_TO', 73) # 33
def_op('PRINT_NEWLINE_TO', 74) # 35
def_op('INPLACE_LSHIFT', 75) # 11
def_op('INPLACE_RSHIFT', 76) # 58
def_op('INPLACE_AND', 77) # 24
def_op('INPLACE_XOR', 78) # 23
def_op('INPLACE_OR', 79) # 10
def_op('BREAK_LOOP', 80) # 40
def_op('WITH_CLEANUP', 81) # 37
def_op('LOAD_LOCALS', 82) # 51
def_op('RETURN_VALUE', 83) # 66
def_op('IMPORT_STAR', 84) # 56
def_op('EXEC_STMT', 85) # 65
def_op('YIELD_VALUE', 86) # 26
def_op('POP_BLOCK', 87) # 1
def_op('END_FINALLY', 88) # 67
def_op('BUILD_CLASS', 89) # 42
HAVE_ARGUMENT = 90 # 70 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # 95 # Index in name list
name_op('DELETE_NAME', 91) # 94 # ""
def_op('UNPACK_SEQUENCE', 92) # 93 # Number of tuple items
jrel_op('FOR_ITER', 93) # 81
name_op('STORE_ATTR', 95) # 84 # Index in name list
name_op('DELETE_ATTR', 96) # 87 # ""
name_op('STORE_GLOBAL', 97) # 105 # ""
name_op('DELETE_GLOBAL', 98) # 98 # ""
def_op('DUP_TOPX', 99) # 104 # number of items to duplicate
def_op('LOAD_CONST', 100) # 72 # Index in const list
hasconst.append(100) # 72
name_op('LOAD_NAME', 101) # 79 # Index in name list
def_op('BUILD_TUPLE', 102) # 80 # Number of tuple items
def_op('BUILD_LIST', 103) # 107 # Number of list items
def_op('BUILD_MAP', 104) # 78 # Always zero for now
name_op('LOAD_ATTR', 105) # 86 # Index in name list
def_op('COMPARE_OP', 106) # 101 # Comparison operator
hascompare.append(106) # 101
name_op('IMPORT_NAME', 107) # 88 # Index in name list
name_op('IMPORT_FROM', 108) # 89 # Index in name list
jrel_op('JUMP_FORWARD', 110) # 73 # Number of bytes to skip
jabs_op('JUMP_IF_FALSE', 111) # 83 # ""
jabs_op('JUMP_IF_TRUE', 112) # 90 # ""
jabs_op('JUMP_ABSOLUTE', 113) # 103 # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # 70 # Index in name list
jabs_op('CONTINUE_LOOP', 119) # 96 # Target address
jrel_op('SETUP_LOOP', 120) # 74 # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # 75 # ""
jrel_op('SETUP_FINALLY', 122) # 106 # ""
def_op('LOAD_FAST', 124) # 92 # Local variable number
haslocal.append(124) # 92
def_op('STORE_FAST', 125) # 82 # Local variable number
haslocal.append(125) # 82
def_op('DELETE_FAST', 126) # 71 # Local variable number
haslocal.append(126) # 71
def_op('RAISE_VARARGS', 130) # 91 # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # 102 # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # 76 # Number of args with default values
def_op('BUILD_SLICE', 133) # 77 # Number of items
def_op('MAKE_CLOSURE', 134) # 85
def_op('LOAD_CLOSURE', 135) # 97
hasfree.append(135) # 97
def_op('LOAD_DEREF', 136) # 99
hasfree.append(136) # 99
def_op('STORE_DEREF', 137) # 100
hasfree.append(137) # 100
def_op('CALL_FUNCTION_VAR', 140) # 111 # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # 112 # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # 113 # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143) # 114
EXTENDED_ARG = 143 # 114
del def_op, name_op, jrel_op, jabs_op

View File

@ -1,186 +0,0 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('LIST_APPEND', 18)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('STORE_MAP', 54)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('WITH_CLEANUP', 81)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_MAP', 104) # Number of dict entries (upto 255)
name_op('LOAD_ATTR', 105) # Index in name list
def_op('COMPARE_OP', 106) # Comparison operator
hascompare.append(106)
name_op('IMPORT_NAME', 107) # Index in name list
name_op('IMPORT_FROM', 108) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jabs_op('JUMP_IF_FALSE', 111) # ""
jabs_op('JUMP_IF_TRUE', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
def_op('EXTENDED_ARG', 143)
EXTENDED_ARG = 143
del def_op, name_op, jrel_op, jabs_op

View File

@ -1,192 +0,0 @@
"""
opcode module - potentially shared between dis and other modules which
operate on bytecodes (e.g. peephole optimizers).
"""
__all__ = ["cmp_op", "hasconst", "hasname", "hasjrel", "hasjabs",
"haslocal", "hascompare", "hasfree", "opname", "opmap",
"HAVE_ARGUMENT", "EXTENDED_ARG"]
cmp_op = ('<', '<=', '==', '!=', '>', '>=', 'in', 'not in', 'is',
'is not', 'exception match', 'BAD')
hasconst = []
hasname = []
hasjrel = []
hasjabs = []
haslocal = []
hascompare = []
hasfree = []
opmap = {}
opname = [''] * 256
for op in range(256): opname[op] = '<%r>' % (op,)
del op
def def_op(name, op):
opname[op] = name
opmap[name] = op
def name_op(name, op):
def_op(name, op)
hasname.append(op)
def jrel_op(name, op):
def_op(name, op)
hasjrel.append(op)
def jabs_op(name, op):
def_op(name, op)
hasjabs.append(op)
# Instruction opcodes for compiled code
# Blank lines correspond to available opcodes
def_op('STOP_CODE', 0)
def_op('POP_TOP', 1)
def_op('ROT_TWO', 2)
def_op('ROT_THREE', 3)
def_op('DUP_TOP', 4)
def_op('ROT_FOUR', 5)
def_op('NOP', 9)
def_op('UNARY_POSITIVE', 10)
def_op('UNARY_NEGATIVE', 11)
def_op('UNARY_NOT', 12)
def_op('UNARY_CONVERT', 13)
def_op('UNARY_INVERT', 15)
def_op('BINARY_POWER', 19)
def_op('BINARY_MULTIPLY', 20)
def_op('BINARY_DIVIDE', 21)
def_op('BINARY_MODULO', 22)
def_op('BINARY_ADD', 23)
def_op('BINARY_SUBTRACT', 24)
def_op('BINARY_SUBSCR', 25)
def_op('BINARY_FLOOR_DIVIDE', 26)
def_op('BINARY_TRUE_DIVIDE', 27)
def_op('INPLACE_FLOOR_DIVIDE', 28)
def_op('INPLACE_TRUE_DIVIDE', 29)
def_op('SLICE+0', 30)
def_op('SLICE+1', 31)
def_op('SLICE+2', 32)
def_op('SLICE+3', 33)
def_op('STORE_SLICE+0', 40)
def_op('STORE_SLICE+1', 41)
def_op('STORE_SLICE+2', 42)
def_op('STORE_SLICE+3', 43)
def_op('DELETE_SLICE+0', 50)
def_op('DELETE_SLICE+1', 51)
def_op('DELETE_SLICE+2', 52)
def_op('DELETE_SLICE+3', 53)
def_op('STORE_MAP', 54)
def_op('INPLACE_ADD', 55)
def_op('INPLACE_SUBTRACT', 56)
def_op('INPLACE_MULTIPLY', 57)
def_op('INPLACE_DIVIDE', 58)
def_op('INPLACE_MODULO', 59)
def_op('STORE_SUBSCR', 60)
def_op('DELETE_SUBSCR', 61)
def_op('BINARY_LSHIFT', 62)
def_op('BINARY_RSHIFT', 63)
def_op('BINARY_AND', 64)
def_op('BINARY_XOR', 65)
def_op('BINARY_OR', 66)
def_op('INPLACE_POWER', 67)
def_op('GET_ITER', 68)
def_op('PRINT_EXPR', 70)
def_op('PRINT_ITEM', 71)
def_op('PRINT_NEWLINE', 72)
def_op('PRINT_ITEM_TO', 73)
def_op('PRINT_NEWLINE_TO', 74)
def_op('INPLACE_LSHIFT', 75)
def_op('INPLACE_RSHIFT', 76)
def_op('INPLACE_AND', 77)
def_op('INPLACE_XOR', 78)
def_op('INPLACE_OR', 79)
def_op('BREAK_LOOP', 80)
def_op('WITH_CLEANUP', 81)
def_op('LOAD_LOCALS', 82)
def_op('RETURN_VALUE', 83)
def_op('IMPORT_STAR', 84)
def_op('EXEC_STMT', 85)
def_op('YIELD_VALUE', 86)
def_op('POP_BLOCK', 87)
def_op('END_FINALLY', 88)
def_op('BUILD_CLASS', 89)
HAVE_ARGUMENT = 90 # Opcodes from here have an argument:
name_op('STORE_NAME', 90) # Index in name list
name_op('DELETE_NAME', 91) # ""
def_op('UNPACK_SEQUENCE', 92) # Number of tuple items
jrel_op('FOR_ITER', 93)
def_op('LIST_APPEND', 94)
name_op('STORE_ATTR', 95) # Index in name list
name_op('DELETE_ATTR', 96) # ""
name_op('STORE_GLOBAL', 97) # ""
name_op('DELETE_GLOBAL', 98) # ""
def_op('DUP_TOPX', 99) # number of items to duplicate
def_op('LOAD_CONST', 100) # Index in const list
hasconst.append(100)
name_op('LOAD_NAME', 101) # Index in name list
def_op('BUILD_TUPLE', 102) # Number of tuple items
def_op('BUILD_LIST', 103) # Number of list items
def_op('BUILD_SET', 104) # Number of set items
def_op('BUILD_MAP', 105) # Number of dict entries (upto 255)
name_op('LOAD_ATTR', 106) # Index in name list
def_op('COMPARE_OP', 107) # Comparison operator
hascompare.append(107)
name_op('IMPORT_NAME', 108) # Index in name list
name_op('IMPORT_FROM', 109) # Index in name list
jrel_op('JUMP_FORWARD', 110) # Number of bytes to skip
jabs_op('JUMP_IF_FALSE_OR_POP', 111) # Target byte offset from beginning of code
jabs_op('JUMP_IF_TRUE_OR_POP', 112) # ""
jabs_op('JUMP_ABSOLUTE', 113) # ""
jabs_op('POP_JUMP_IF_FALSE', 114) # ""
jabs_op('POP_JUMP_IF_TRUE', 115) # ""
name_op('LOAD_GLOBAL', 116) # Index in name list
jabs_op('CONTINUE_LOOP', 119) # Target address
jrel_op('SETUP_LOOP', 120) # Distance to target address
jrel_op('SETUP_EXCEPT', 121) # ""
jrel_op('SETUP_FINALLY', 122) # ""
def_op('LOAD_FAST', 124) # Local variable number
haslocal.append(124)
def_op('STORE_FAST', 125) # Local variable number
haslocal.append(125)
def_op('DELETE_FAST', 126) # Local variable number
haslocal.append(126)
def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3)
def_op('CALL_FUNCTION', 131) # #args + (#kwargs << 8)
def_op('MAKE_FUNCTION', 132) # Number of args with default values
def_op('BUILD_SLICE', 133) # Number of items
def_op('MAKE_CLOSURE', 134)
def_op('LOAD_CLOSURE', 135)
hasfree.append(135)
def_op('LOAD_DEREF', 136)
hasfree.append(136)
def_op('STORE_DEREF', 137)
hasfree.append(137)
def_op('CALL_FUNCTION_VAR', 140) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_KW', 141) # #args + (#kwargs << 8)
def_op('CALL_FUNCTION_VAR_KW', 142) # #args + (#kwargs << 8)
jrel_op('SETUP_WITH', 143)
def_op('EXTENDED_ARG', 145)
EXTENDED_ARG = 145
def_op('SET_ADD', 146)
def_op('MAP_ADD', 147)
del def_op, name_op, jrel_op, jabs_op

View File

@ -1,700 +0,0 @@
# Copyright (c) 1998-2002 John Aycock
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
__version__ = 'SPARK-0.7 (pre-alpha-7) uncompyle trim'
def _namelist(instance):
namelist, namedict, classlist = [], {}, [instance.__class__]
for c in classlist:
for b in c.__bases__:
classlist.append(b)
for name in c.__dict__.keys():
if not namedict.has_key(name):
namelist.append(name)
namedict[name] = 1
return namelist
#
# Extracted from GenericParser and made global so that [un]picking works.
#
class _State:
def __init__(self, stateno, items):
self.T, self.complete, self.items = [], [], items
self.stateno = stateno
class GenericParser:
#
# An Earley parser, as per J. Earley, "An Efficient Context-Free
# Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley,
# "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
# Carnegie-Mellon University, August 1968. New formulation of
# the parser according to J. Aycock, "Practical Earley Parsing
# and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
# 2001, and J. Aycock and R. N. Horspool, "Practical Earley
# Parsing", unpublished paper, 2001.
#
def __init__(self, start):
self.rules = {}
self.rule2func = {}
self.rule2name = {}
self.collectRules()
self.augment(start)
self.ruleschanged = 1
_NULLABLE = '\e_'
_START = 'START'
_BOF = '|-'
#
# When pickling, take the time to generate the full state machine;
# some information is then extraneous, too. Unfortunately we
# can't save the rule2func map.
#
def __getstate__(self):
if self.ruleschanged:
#
# XXX - duplicated from parse()
#
self.computeNull()
self.newrules = {}
self.new2old = {}
self.makeNewRules()
self.ruleschanged = 0
self.edges, self.cores = {}, {}
self.states = { 0: self.makeState0() }
self.makeState(0, self._BOF)
#
# XXX - should find a better way to do this..
#
changes = 1
while changes:
changes = 0
for k, v in self.edges.items():
if v is None:
state, sym = k
if self.states.has_key(state):
self.goto(state, sym)
changes = 1
rv = self.__dict__.copy()
for s in self.states.values():
del s.items
del rv['rule2func']
del rv['nullable']
del rv['cores']
return rv
def __setstate__(self, D):
self.rules = {}
self.rule2func = {}
self.rule2name = {}
self.collectRules()
start = D['rules'][self._START][0][1][1] # Blech.
self.augment(start)
D['rule2func'] = self.rule2func
D['makeSet'] = self.makeSet_fast
self.__dict__ = D
#
# A hook for GenericASTBuilder and GenericASTMatcher. Mess
# thee not with this; nor shall thee toucheth the _preprocess
# argument to addRule.
#
def preprocess(self, rule, func): return rule, func
def addRule(self, doc, func, _preprocess=1):
fn = func
rules = doc.split()
index = []
for i in xrange(len(rules)):
if rules[i] == '::=':
index.append(i-1)
index.append(len(rules))
for i in xrange(len(index)-1):
lhs = rules[index[i]]
rhs = rules[index[i]+2:index[i+1]]
rule = (lhs, tuple(rhs))
if _preprocess:
rule, fn = self.preprocess(rule, func)
if self.rules.has_key(lhs):
self.rules[lhs].append(rule)
else:
self.rules[lhs] = [ rule ]
self.rule2func[rule] = fn
self.rule2name[rule] = func.__name__[2:]
self.ruleschanged = 1
def collectRules(self):
for name in _namelist(self):
if name[:2] == 'p_':
func = getattr(self, name)
doc = func.__doc__
self.addRule(doc, func)
def augment(self, start):
rule = '%s ::= %s %s' % (self._START, self._BOF, start)
self.addRule(rule, lambda args: args[1], 0)
def computeNull(self):
self.nullable = {}
tbd = []
for rulelist in self.rules.values():
lhs = rulelist[0][0]
self.nullable[lhs] = 0
for rule in rulelist:
rhs = rule[1]
if len(rhs) == 0:
self.nullable[lhs] = 1
continue
#
# We only need to consider rules which
# consist entirely of nonterminal symbols.
# This should be a savings on typical
# grammars.
#
for sym in rhs:
if not self.rules.has_key(sym):
break
else:
tbd.append(rule)
changes = 1
while changes:
changes = 0
for lhs, rhs in tbd:
if self.nullable[lhs]:
continue
for sym in rhs:
if not self.nullable[sym]:
break
else:
self.nullable[lhs] = 1
changes = 1
def makeState0(self):
s0 = _State(0, [])
for rule in self.newrules[self._START]:
s0.items.append((rule, 0))
return s0
def finalState(self, tokens):
#
# Yuck.
#
if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
return 1
start = self.rules[self._START][0][1][1]
return self.goto(1, start)
def makeNewRules(self):
worklist = []
for rulelist in self.rules.values():
for rule in rulelist:
worklist.append((rule, 0, 1, rule))
for rule, i, candidate, oldrule in worklist:
lhs, rhs = rule
n = len(rhs)
while i < n:
sym = rhs[i]
if not self.rules.has_key(sym) or \
not self.nullable[sym]:
candidate = 0
i = i + 1
continue
newrhs = list(rhs)
newrhs[i] = self._NULLABLE+sym
newrule = (lhs, tuple(newrhs))
worklist.append((newrule, i+1,
candidate, oldrule))
candidate = 0
i = i + 1
else:
if candidate:
lhs = self._NULLABLE+lhs
rule = (lhs, rhs)
if self.newrules.has_key(lhs):
self.newrules[lhs].append(rule)
else:
self.newrules[lhs] = [ rule ]
self.new2old[rule] = oldrule
def typestring(self, token):
return None
def error(self, token):
print "Syntax error at or near `%s' token" % token
raise SystemExit
def parse(self, tokens):
sets = [ [(1,0), (2,0)] ]
self.links = {}
if self.ruleschanged:
self.computeNull()
self.newrules = {}
self.new2old = {}
self.makeNewRules()
self.ruleschanged = 0
self.edges, self.cores = {}, {}
self.states = { 0: self.makeState0() }
self.makeState(0, self._BOF)
for i in xrange(len(tokens)):
sets.append([])
if sets[i] == []:
break
self.makeSet(tokens[i], sets, i)
else:
sets.append([])
self.makeSet(None, sets, len(tokens))
finalitem = (self.finalState(tokens), 0)
if finalitem not in sets[-2]:
if len(tokens) > 0:
self.error(tokens[i-1])
else:
self.error(None)
return self.buildTree(self._START, finalitem,
tokens, len(sets)-2)
def isnullable(self, sym):
#
# For symbols in G_e only. If we weren't supporting 1.5,
# could just use sym.startswith().
#
return self._NULLABLE == sym[0:len(self._NULLABLE)]
def skip(self, (lhs, rhs), pos=0):
n = len(rhs)
while pos < n:
if not self.isnullable(rhs[pos]):
break
pos = pos + 1
return pos
def makeState(self, state, sym):
assert sym is not None
#
# Compute \epsilon-kernel state's core and see if
# it exists already.
#
kitems = []
for rule, pos in self.states[state].items:
lhs, rhs = rule
if rhs[pos:pos+1] == (sym,):
kitems.append((rule, self.skip(rule, pos+1)))
tcore = tuple(sorted(kitems))
if self.cores.has_key(tcore):
return self.cores[tcore]
#
# Nope, doesn't exist. Compute it and the associated
# \epsilon-nonkernel state together; we'll need it right away.
#
k = self.cores[tcore] = len(self.states)
K, NK = _State(k, kitems), _State(k+1, [])
self.states[k] = K
predicted = {}
edges = self.edges
rules = self.newrules
for X in K, NK:
worklist = X.items
for item in worklist:
rule, pos = item
lhs, rhs = rule
if pos == len(rhs):
X.complete.append(rule)
continue
nextSym = rhs[pos]
key = (X.stateno, nextSym)
if not rules.has_key(nextSym):
if not edges.has_key(key):
edges[key] = None
X.T.append(nextSym)
else:
edges[key] = None
if not predicted.has_key(nextSym):
predicted[nextSym] = 1
for prule in rules[nextSym]:
ppos = self.skip(prule)
new = (prule, ppos)
NK.items.append(new)
#
# Problem: we know K needs generating, but we
# don't yet know about NK. Can't commit anything
# regarding NK to self.edges until we're sure. Should
# we delay committing on both K and NK to avoid this
# hacky code? This creates other problems..
#
if X is K:
edges = {}
if NK.items == []:
return k
#
# Check for \epsilon-nonkernel's core. Unfortunately we
# need to know the entire set of predicted nonterminals
# to do this without accidentally duplicating states.
#
tcore = tuple(sorted(predicted.keys()))
if self.cores.has_key(tcore):
self.edges[(k, None)] = self.cores[tcore]
return k
nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
self.edges.update(edges)
self.states[nk] = NK
return k
def goto(self, state, sym):
key = (state, sym)
if not self.edges.has_key(key):
#
# No transitions from state on sym.
#
return None
rv = self.edges[key]
if rv is None:
#
# Target state isn't generated yet. Remedy this.
#
rv = self.makeState(state, sym)
self.edges[key] = rv
return rv
def gotoT(self, state, t):
return [self.goto(state, t)]
def gotoST(self, state, st):
rv = []
for t in self.states[state].T:
if st == t:
rv.append(self.goto(state, t))
return rv
def add(self, set, item, i=None, predecessor=None, causal=None):
if predecessor is None:
if item not in set:
set.append(item)
else:
key = (item, i)
if item not in set:
self.links[key] = []
set.append(item)
self.links[key].append((predecessor, causal))
def makeSet(self, token, sets, i):
cur, next = sets[i], sets[i+1]
ttype = token is not None and self.typestring(token) or None
if ttype is not None:
fn, arg = self.gotoT, ttype
else:
fn, arg = self.gotoST, token
for item in cur:
ptr = (item, i)
state, parent = item
add = fn(state, arg)
for k in add:
if k is not None:
self.add(next, (k, parent), i+1, ptr)
nk = self.goto(k, None)
if nk is not None:
self.add(next, (nk, i+1))
if parent == i:
continue
for rule in self.states[state].complete:
lhs, rhs = rule
for pitem in sets[parent]:
pstate, pparent = pitem
k = self.goto(pstate, lhs)
if k is not None:
why = (item, i, rule)
pptr = (pitem, parent)
self.add(cur, (k, pparent),
i, pptr, why)
nk = self.goto(k, None)
if nk is not None:
self.add(cur, (nk, i))
def makeSet_fast(self, token, sets, i):
#
# Call *only* when the entire state machine has been built!
# It relies on self.edges being filled in completely, and
# then duplicates and inlines code to boost speed at the
# cost of extreme ugliness.
#
cur, next = sets[i], sets[i+1]
ttype = token is not None and self.typestring(token) or None
for item in cur:
ptr = (item, i)
state, parent = item
if ttype is not None:
k = self.edges.get((state, ttype), None)
if k is not None:
#self.add(next, (k, parent), i+1, ptr)
#INLINED --v
new = (k, parent)
key = (new, i+1)
if new not in next:
self.links[key] = []
next.append(new)
self.links[key].append((ptr, None))
#INLINED --^
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
#self.add(next, (nk, i+1))
#INLINED --v
new = (nk, i+1)
if new not in next:
next.append(new)
#INLINED --^
else:
add = self.gotoST(state, token)
for k in add:
if k is not None:
self.add(next, (k, parent), i+1, ptr)
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
self.add(next, (nk, i+1))
if parent == i:
continue
for rule in self.states[state].complete:
lhs, rhs = rule
for pitem in sets[parent]:
pstate, pparent = pitem
#k = self.goto(pstate, lhs)
k = self.edges.get((pstate, lhs), None)
if k is not None:
why = (item, i, rule)
pptr = (pitem, parent)
#self.add(cur, (k, pparent),
# i, pptr, why)
#INLINED --v
new = (k, pparent)
key = (new, i)
if new not in cur:
self.links[key] = []
cur.append(new)
self.links[key].append((pptr, why))
#INLINED --^
#nk = self.goto(k, None)
nk = self.edges.get((k, None), None)
if nk is not None:
#self.add(cur, (nk, i))
#INLINED --v
new = (nk, i)
if new not in cur:
cur.append(new)
#INLINED --^
def predecessor(self, key, causal):
for p, c in self.links[key]:
if c == causal:
return p
assert 0
def causal(self, key):
links = self.links[key]
if len(links) == 1:
return links[0][1]
choices = []
rule2cause = {}
for p, c in links:
rule = c[2]
choices.append(rule)
rule2cause[rule] = c
return rule2cause[self.ambiguity(choices)]
def deriveEpsilon(self, nt):
if len(self.newrules[nt]) > 1:
rule = self.ambiguity(self.newrules[nt])
else:
rule = self.newrules[nt][0]
#print rule
rhs = rule[1]
attr = [None] * len(rhs)
for i in xrange(len(rhs)-1, -1, -1):
attr[i] = self.deriveEpsilon(rhs[i])
return self.rule2func[self.new2old[rule]](attr)
def buildTree(self, nt, item, tokens, k):
state, parent = item
choices = []
for rule in self.states[state].complete:
if rule[0] == nt:
choices.append(rule)
rule = choices[0]
if len(choices) > 1:
rule = self.ambiguity(choices)
#print rule
rhs = rule[1]
attr = [None] * len(rhs)
for i in xrange(len(rhs)-1, -1, -1):
sym = rhs[i]
if not self.newrules.has_key(sym):
if sym != self._BOF:
attr[i] = tokens[k-1]
key = (item, k)
item, k = self.predecessor(key, None)
#elif self.isnullable(sym):
elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
attr[i] = self.deriveEpsilon(sym)
else:
key = (item, k)
why = self.causal(key)
attr[i] = self.buildTree(sym, why[0],
tokens, why[1])
item, k = self.predecessor(key, why)
return self.rule2func[self.new2old[rule]](attr)
def ambiguity(self, rules):
#
# XXX - problem here and in collectRules() if the same rule
# appears in >1 method. Also undefined results if rules
# causing the ambiguity appear in the same method.
#
sortlist = []
name2index = {}
for i in xrange(len(rules)):
lhs, rhs = rule = rules[i]
name = self.rule2name[self.new2old[rule]]
sortlist.append((len(rhs), name))
name2index[name] = i
sortlist.sort()
list = map(lambda (a,b): b, sortlist)
return rules[name2index[self.resolve(list)]]
def resolve(self, list):
#
# Resolve ambiguity in favor of the shortest RHS.
# Since we walk the tree from the top down, this
# should effectively resolve in favor of a "shift".
#
return list[0]
#
# GenericASTBuilder automagically constructs a concrete/abstract syntax tree
# for a given input. The extra argument is a class (not an instance!)
# which supports the "__setslice__" and "__len__" methods.
#
# XXX - silently overrides any user code in methods.
#
class GenericASTBuilder(GenericParser):
def __init__(self, AST, start):
GenericParser.__init__(self, start)
self.AST = AST
def preprocess(self, rule, func):
rebind = lambda lhs, self=self: \
lambda args, lhs=lhs, self=self: \
self.buildASTNode(args, lhs)
lhs, rhs = rule
return rule, rebind(lhs)
def buildASTNode(self, args, lhs):
children = []
for arg in args:
if isinstance(arg, self.AST):
children.append(arg)
else:
children.append(arg)
return self.nonterminal(lhs, children)
def nonterminal(self, type, args):
rv = self.AST(type)
rv[:len(args)] = args
return rv
#
# GenericASTTraversal is a Visitor pattern according to Design Patterns. For
# each node it attempts to invoke the method n_<node type>, falling
# back onto the default() method if the n_* can't be found. The preorder
# traversal also looks for an exit hook named n_<node type>_exit (no default
# routine is called if it's not found). To prematurely halt traversal
# of a subtree, call the prune() method -- this only makes sense for a
# preorder traversal. Node type is determined via the typestring() method.
#
class GenericASTTraversalPruningException:
pass
class GenericASTTraversal:
def __init__(self, ast):
self.ast = ast
def typestring(self, node):
return node.type
def prune(self):
raise GenericASTTraversalPruningException
def preorder(self, node=None):
if node is None:
node = self.ast
try:
name = 'n_' + self.typestring(node)
if hasattr(self, name):
func = getattr(self, name)
func(node)
else:
self.default(node)
except GenericASTTraversalPruningException:
return
for kid in node:
self.preorder(kid)
name = name + '_exit'
if hasattr(self, name):
func = getattr(self, name)
func(node)
def default(self, node):
pass

View File

@ -1,335 +0,0 @@
#
# (C) Copyright 2000-2002 by hartmut Goebel <hartmut@goebel.noris.de>
#
# byte-code verifier for uncompyle
#
import types
import operator
import dis
import uncompyle2, Scanner
BIN_OP_FUNCS = {
'BINARY_POWER': operator.pow,
'BINARY_MULTIPLY': operator.mul,
'BINARY_DIVIDE': operator.div,
'BINARY_FLOOR_DIVIDE': operator.floordiv,
'BINARY_TRUE_DIVIDE': operator.truediv,
'BINARY_MODULO' : operator.mod,
'BINARY_ADD': operator.add,
'BINARY_SUBRACT': operator.sub,
'BINARY_LSHIFT': operator.lshift,
'BINARY_RSHIFT': operator.rshift,
'BINARY_AND': operator.and_,
'BINARY_XOR': operator.xor,
'BINARY_OR': operator.or_,
}
JUMP_OPs = None
#--- exceptions ---
class VerifyCmpError(Exception):
pass
class CmpErrorConsts(VerifyCmpError):
"""Exception to be raised when consts differ."""
def __init__(self, name, index):
self.name = name
self.index = index
def __str__(self):
return 'Compare Error within Consts of %s at index %i' % \
(repr(self.name), self.index)
class CmpErrorConstsType(VerifyCmpError):
"""Exception to be raised when consts differ."""
def __init__(self, name, index):
self.name = name
self.index = index
def __str__(self):
return 'Consts type differ in %s at index %i' % \
(repr(self.name), self.index)
class CmpErrorConstsLen(VerifyCmpError):
"""Exception to be raised when length of co_consts differs."""
def __init__(self, name, consts1, consts2):
self.name = name
self.consts = (consts1, consts2)
def __str__(self):
return 'Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n' % \
(repr(self.name),
len(self.consts[0]), `self.consts[0]`,
len(self.consts[1]), `self.consts[1]`)
class CmpErrorCode(VerifyCmpError):
"""Exception to be raised when code differs."""
def __init__(self, name, index, token1, token2, tokens1, tokens2):
self.name = name
self.index = index
self.token1 = token1
self.token2 = token2
self.tokens = [tokens1, tokens2]
def __str__(self):
s = reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
map(lambda a,b: (a,b),
self.tokens[0],
self.tokens[1]),
'Code differs in %s\n' % str(self.name))
return ('Code differs in %s at offset %s [%s] != [%s]\n\n' % \
(repr(self.name), self.index,
repr(self.token1), repr(self.token2))) + s
class CmpErrorCodeLen(VerifyCmpError):
"""Exception to be raised when code length differs."""
def __init__(self, name, tokens1, tokens2):
self.name = name
self.tokens = [tokens1, tokens2]
def __str__(self):
return reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
map(lambda a,b: (a,b),
self.tokens[0],
self.tokens[1]),
'Code len differs in %s\n' % str(self.name))
class CmpErrorMember(VerifyCmpError):
"""Exception to be raised when other members differ."""
def __init__(self, name, member, data1, data2):
self.name = name
self.member = member
self.data = (data1, data2)
def __str__(self):
return 'Member %s differs in %s:\n\t%s\n\t%s\n' % \
(repr(self.member), repr(self.name),
repr(self.data[0]), repr(self.data[1]))
#--- compare ---
# these members are ignored
__IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab', 'co_stacksize', 'co_names']
def cmp_code_objects(version, code_obj1, code_obj2, name=''):
"""
Compare two code-objects.
This is the main part of this module.
"""
#print code_obj1, type(code_obj2)
assert type(code_obj1) == types.CodeType
assert type(code_obj2) == types.CodeType
#print dir(code_obj1)
if isinstance(code_obj1, object):
# new style classes (Python 2.2)
# assume _both_ code objects to be new stle classes
assert dir(code_obj1) == dir(code_obj2)
else:
# old style classes
assert dir(code_obj1) == code_obj1.__members__
assert dir(code_obj2) == code_obj2.__members__
assert code_obj1.__members__ == code_obj2.__members__
if name == '__main__':
name = code_obj1.co_name
else:
name = '%s.%s' % (name, code_obj1.co_name)
if name == '.?': name = '__main__'
if isinstance(code_obj1, object) and cmp(code_obj1, code_obj2):
# use the new style code-classes' __cmp__ method, which
# should be faster and more sophisticated
# if this compare fails, we use the old routine to
# find out, what exactly is nor equal
# if this compare succeds, simply return
#return
pass
if isinstance(code_obj1, object):
members = filter(lambda x: x.startswith('co_'), dir(code_obj1))
else:
members = dir(code_obj1);
members.sort(); #members.reverse()
tokens1 = None
for member in members:
if member in __IGNORE_CODE_MEMBERS__:
pass
elif member == 'co_code':
scanner = Scanner.getscanner(version)
scanner.setShowAsm( showasm=0 )
global JUMP_OPs
JUMP_OPs = scanner.JUMP_OPs + ['JUMP_BACK']
# use changed Token class
# we (re)set this here to save exception handling,
# which would get 'unubersichtlich'
scanner.setTokenClass(Token)
try:
# disassemble both code-objects
tokens1,customize = scanner.disassemble(code_obj1)
del customize # save memory
tokens2,customize = scanner.disassemble(code_obj2)
del customize # save memory
finally:
scanner.resetTokenClass() # restore Token class
targets1 = dis.findlabels(code_obj1.co_code)
tokens1 = [t for t in tokens1 if t.type != 'COME_FROM']
tokens2 = [t for t in tokens2 if t.type != 'COME_FROM']
i1 = 0; i2 = 0
offset_map = {}; check_jumps = {}
while i1 < len(tokens1):
if i2 >= len(tokens2):
if len(tokens1) == len(tokens2) + 2 \
and tokens1[-1].type == 'RETURN_VALUE' \
and tokens1[-2].type == 'LOAD_CONST' \
and tokens1[-2].pattr == None \
and tokens1[-3].type == 'RETURN_VALUE':
break
else:
raise CmpErrorCodeLen(name, tokens1, tokens2)
offset_map[tokens1[i1].offset] = tokens2[i2].offset
for idx1, idx2, offset2 in check_jumps.get(tokens1[i1].offset, []):
if offset2 != tokens2[i2].offset:
raise CmpErrorCode(name, tokens1[idx1].offset, tokens1[idx1],
tokens2[idx2], tokens1, tokens2)
if tokens1[i1] != tokens2[i2]:
if tokens1[i1].type == 'LOAD_CONST' == tokens2[i2].type:
i = 1
while tokens1[i1+i].type == 'LOAD_CONST':
i += 1
if tokens1[i1+i].type.startswith(('BUILD_TUPLE', 'BUILD_LIST')) \
and i == int(tokens1[i1+i].type.split('_')[-1]):
t = tuple([ elem.pattr for elem in tokens1[i1:i1+i] ])
if t != tokens2[i2].pattr:
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2)
i1 += i + 1
i2 += 1
continue
elif i == 2 and tokens1[i1+i].type == 'ROT_TWO' and tokens2[i2+1].type == 'UNPACK_SEQUENCE_2':
i1 += 3
i2 += 2
continue
elif i == 2 and tokens1[i1+i].type in BIN_OP_FUNCS:
f = BIN_OP_FUNCS[tokens1[i1+i].type]
if f(tokens1[i1].pattr, tokens1[i1+1].pattr) == tokens2[i2].pattr:
i1 += 3
i2 += 1
continue
elif tokens1[i1].type == 'UNARY_NOT':
if tokens2[i2].type == 'POP_JUMP_IF_TRUE':
if tokens1[i1+1].type == 'POP_JUMP_IF_FALSE':
i1 += 2
i2 += 1
continue
elif tokens2[i2].type == 'POP_JUMP_IF_FALSE':
if tokens1[i1+1].type == 'POP_JUMP_IF_TRUE':
i1 += 2
i2 += 1
continue
elif tokens1[i1].type in ('JUMP_FORWARD', 'JUMP_BACK') \
and tokens1[i1-1].type == 'RETURN_VALUE' \
and tokens2[i2-1].type in ('RETURN_VALUE', 'RETURN_END_IF') \
and int(tokens1[i1].offset) not in targets1:
i1 += 1
continue
elif tokens1[i1].type == 'JUMP_FORWARD' and tokens2[i2].type == 'JUMP_BACK' \
and tokens1[i1+1].type == 'JUMP_BACK' and tokens2[i2+1].type == 'JUMP_BACK' \
and int(tokens1[i1].pattr) == int(tokens1[i1].offset) + 3:
if int(tokens1[i1].pattr) == int(tokens1[i1+1].offset):
i1 += 2
i2 += 2
continue
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2)
elif tokens1[i1].type in JUMP_OPs and tokens1[i1].pattr != tokens2[i2].pattr:
dest1 = int(tokens1[i1].pattr)
dest2 = int(tokens2[i2].pattr)
if tokens1[i1].type == 'JUMP_BACK':
if offset_map[dest1] != dest2:
raise CmpErrorCode(name, tokens1[i1].offset, tokens1[i1],
tokens2[i2], tokens1, tokens2)
else:
#import pdb; pdb.set_trace()
if dest1 in check_jumps:
check_jumps[dest1].append((i1,i2,dest2))
else:
check_jumps[dest1] = [(i1,i2,dest2)]
i1 += 1
i2 += 1
del tokens1, tokens2 # save memory
elif member == 'co_consts':
# partial optimization can make the co_consts look different,
# so we'll just compare the code consts
codes1 = ( c for c in code_obj1.co_consts if type(c) == types.CodeType )
codes2 = ( c for c in code_obj2.co_consts if type(c) == types.CodeType )
for c1, c2 in zip(codes1, codes2):
cmp_code_objects(version, c1, c2, name=name)
else:
# all other members must be equal
if getattr(code_obj1, member) != getattr(code_obj2, member):
raise CmpErrorMember(name, member,
getattr(code_obj1,member),
getattr(code_obj2,member))
class Token(Scanner.Token):
"""Token class with changed semantics for 'cmp()'."""
def __cmp__(self, o):
t = self.type # shortcut
loads = ('LOAD_NAME', 'LOAD_GLOBAL', 'LOAD_CONST')
if t in loads and o.type in loads:
if self.pattr == 'None' and o.pattr == None:
return 0
if t == 'BUILD_TUPLE_0' and o.type == 'LOAD_CONST' and o.pattr == ():
return 0
if t == 'COME_FROM' == o.type:
return 0
if t == 'PRINT_ITEM_CONT' and o.type == 'PRINT_ITEM':
return 0
if t == 'RETURN_VALUE' and o.type == 'RETURN_END_IF':
return 0
if t == 'JUMP_IF_FALSE_OR_POP' and o.type == 'POP_JUMP_IF_FALSE':
return 0
if t in JUMP_OPs:
# ignore offset
return cmp(t, o.type)
return cmp(t, o.type) or cmp(self.pattr, o.pattr)
def __repr__(self):
return '%s %s (%s)' % (str(self.type), str(self.attr),
repr(self.pattr))
def __str__(self):
return '%s\t%-17s %r' % (self.offset, self.type, self.pattr)
def compare_code_with_srcfile(pyc_filename, src_filename):
"""Compare a .pyc with a source code file."""
version, code_obj1 = uncompyle2._load_module(pyc_filename)
code_obj2 = uncompyle2._load_file(src_filename)
cmp_code_objects(version, code_obj1, code_obj2)
def compare_files(pyc_filename1, pyc_filename2):
"""Compare two .pyc files."""
version, code_obj1 = uncompyle2._load_module(pyc_filename1)
version, code_obj2 = uncompyle2._load_module(pyc_filename2)
cmp_code_objects(version, code_obj1, code_obj2)
if __name__ == '__main__':
t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52)
t2 = Token('LOAD_CONST', -421, 'code_object _expandLang', 55)
print `t1`
print `t2`
print cmp(t1, t2), cmp(t1.type, t2.type), cmp(t1.attr, t2.attr)