You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1500 lines
43 KiB
1500 lines
43 KiB
# Copyright (c) 1999 John Aycock |
|
# Copyright (c) 2000 by hartmut Goebel <hartmut@goebel.noris.de> |
|
# |
|
# Permission is hereby granted, free of charge, to any person obtaining |
|
# a copy of this software and associated documentation files (the |
|
# "Software"), to deal in the Software without restriction, including |
|
# without limitation the rights to use, copy, modify, merge, publish, |
|
# distribute, sublicense, and/or sell copies of the Software, and to |
|
# permit persons to whom the Software is furnished to do so, subject to |
|
# the following conditions: |
|
# |
|
# The above copyright notice and this permission notice shall be |
|
# included in all copies or substantial portions of the Software. |
|
# |
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
|
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
# |
|
# See 'CHANGES' for a list of changes |
|
# |
|
# NB. This is not a masterpiece of software, but became more like a hack. |
|
# Probably a complete write would be sensefull. hG/2000-12-27 |
|
# |
|
|
|
import re, sys, os, types |
|
import dis, imp, marshal |
|
import string |
|
import cStringIO |
|
|
|
|
|
def _load_file(filename): |
|
""" |
|
load a Python source file and compile it to byte-code |
|
_load_module(filename: string): code_object |
|
|
|
filename: name of file containing Python source code |
|
(normally a .py) |
|
code_object: code_object compiled from this source code |
|
|
|
This function does NOT write any file! |
|
""" |
|
fp = open(filename, 'rb') |
|
source = fp.read()+'\n' |
|
try: |
|
co = compile(source, filename, 'exec') |
|
except SyntaxError: |
|
sys.stderr.writelines( ['>>Syntax error in ', filename, '\n'] ) |
|
raise |
|
fp.close() |
|
return co |
|
|
|
def _load_module(filename): |
|
""" |
|
load a module without importing it |
|
_load_module(filename: string): code_object |
|
|
|
filename: name of file containing Python byte-code object |
|
(normally a .pyc) |
|
code_object: code_object from this file |
|
""" |
|
fp = open(filename, 'rb') |
|
if fp.read(4) != imp.get_magic(): |
|
raise ImportError, "Bad magic number in %s" % filename |
|
fp.read(4) |
|
co = marshal.load(fp) |
|
fp.close() |
|
return co |
|
|
|
|
|
#-- start of (de-)compiler |
|
|
|
# |
|
# Scanning |
|
# |
|
|
|
class Code: |
|
"""Class for representing code-objects. |
|
|
|
This is similar to the original code object, but additionally |
|
the diassembled code is stored in the attribute '_tokens'. |
|
""" |
|
def __init__(self, co): |
|
for i in dir(co): |
|
exec 'self.%s = co.%s' % (i, i) |
|
self._tokens, self._customize = disassemble(co) |
|
|
|
class Token: |
|
"""Class representing a byte-code token. |
|
A byte-code token is equivalent to the contents of one line |
|
as output by dis.dis(). |
|
""" |
|
def __init__(self, type, attr=None, pattr=None, offset=-1): |
|
self.type = intern(type) |
|
self.attr = attr |
|
self.pattr = pattr |
|
self.offset = offset |
|
|
|
def __cmp__(self, o): |
|
if isinstance(o, Token): |
|
# both are tokens: compare type and pattr |
|
return cmp(self.type, o.type) \ |
|
or cmp(self.pattr, o.pattr) |
|
else: |
|
return cmp(self.type, o) |
|
|
|
def __repr__(self): return str(self.type) |
|
def __str__(self): |
|
if self.pattr: pattr = self.pattr |
|
else: pattr = '' |
|
return '%s\t%-17s %s' % (self.offset, self.type, pattr) |
|
def __hash__(self): return hash(self.type) |
|
def __getitem__(self, i): raise IndexError |
|
|
|
_JUMP_OPS_ = map(lambda op: dis.opname[op], dis.hasjrel + dis.hasjabs) |
|
|
|
def disassemble(co): |
|
"""Disassemble a code object, returning a list of Token. |
|
|
|
The main part of this procedure is modelled after |
|
dis.diaassemble(). |
|
""" |
|
rv = [] |
|
customize = {} |
|
|
|
code = co.co_code |
|
cf = find_jump_targets(code) |
|
n = len(code) |
|
i = 0 |
|
while i < n: |
|
offset = i |
|
if cf.has_key(offset): |
|
for j in range(cf[offset]): |
|
rv.append(Token('COME_FROM', |
|
offset="%s_%d" % (offset, j) )) |
|
|
|
c = code[i] |
|
op = ord(c) |
|
opname = dis.opname[op] |
|
i = i+1 |
|
oparg = None; pattr = None |
|
if op >= dis.HAVE_ARGUMENT: |
|
oparg = ord(code[i]) + ord(code[i+1])*256 |
|
i = i+2 |
|
if op in dis.hasconst: |
|
const = co.co_consts[oparg] |
|
if type(const) == types.CodeType: |
|
oparg = const |
|
if const.co_name == '<lambda>': |
|
assert opname == 'LOAD_CONST' |
|
opname = 'LOAD_LAMBDA' |
|
# verify uses 'pattr' for |
|
# comparism, since 'attr' now |
|
# hold Code(const) and thus |
|
# can not be used for |
|
# comparism (todo: thinkg |
|
# about changing this) |
|
#pattr = 'code_object @ 0x%x %s->%s' %\ |
|
# (id(const), const.co_filename, const.co_name) |
|
pattr = 'code_object ' + const.co_name |
|
else: |
|
pattr = `const` |
|
elif op in dis.hasname: |
|
pattr = co.co_names[oparg] |
|
elif op in dis.hasjrel: |
|
pattr = `i + oparg` |
|
elif op in dis.haslocal: |
|
pattr = co.co_varnames[oparg] |
|
elif op in dis.hascompare: |
|
pattr = dis.cmp_op[oparg] |
|
|
|
if opname == 'SET_LINENO': |
|
continue |
|
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE', |
|
'UNPACK_LIST', 'UNPACK_TUPLE', |
|
'UNPACK_SEQUENCE', |
|
'MAKE_FUNCTION', 'CALL_FUNCTION', |
|
'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW', |
|
'CALL_FUNCTION_VAR_KW', 'DUP_TOPX', |
|
): |
|
opname = '%s_%d' % (opname, oparg) |
|
customize[opname] = oparg |
|
|
|
rv.append(Token(opname, oparg, pattr, offset)) |
|
|
|
return rv, customize |
|
|
|
|
|
def find_jump_targets(code): |
|
"""Detect all offsets in a byte code which are jump targets. |
|
|
|
Return the list of offsets. |
|
|
|
This procedure is modelled after dis.findlables(), but here |
|
for each target the number of jumps are counted. |
|
""" |
|
targets = {} |
|
n = len(code) |
|
i = 0 |
|
while i < n: |
|
c = code[i] |
|
op = ord(c) |
|
i = i+1 |
|
if op >= dis.HAVE_ARGUMENT: |
|
oparg = ord(code[i]) + ord(code[i+1])*256 |
|
i = i+2 |
|
label = -1 |
|
if op in dis.hasjrel: |
|
label = i+oparg |
|
# todo: absolut jumps |
|
#elif op in dis.hasjabs: |
|
# label = oparg |
|
if label >= 0: |
|
targets[label] = targets.get(label, 0) + 1 |
|
return targets |
|
|
|
|
|
# |
|
# Parsing |
|
# |
|
|
|
class AST: |
|
def __init__(self, type, kids=None): |
|
self.type = intern(type) |
|
if kids == None: kids = [] |
|
self._kids = kids |
|
|
|
def append(self, o): self._kids.append(o) |
|
def pop(self): return self._kids.pop() |
|
def __getitem__(self, i): return self._kids[i] |
|
def __setitem__(self, i, val): self._kids[i] = val |
|
def __delitem__(self, i): del self._kids[i] |
|
def __len__(self): return len(self._kids) |
|
def __getslice__(self, low, high): return self._kids[low:high] |
|
def __setslice__(self, low, high, seq): self._kids[low:high] = seq |
|
def __delslice__(self, low, high): del self._kids[low:high] |
|
def __cmp__(self, o): |
|
if isinstance(o, AST): |
|
return cmp(self.type, o.type) \ |
|
or cmp(self._kids, o._kids) |
|
else: |
|
return cmp(self.type, o) |
|
|
|
def __hash__(self): return hash(self.type) |
|
|
|
def __repr__(self): |
|
rv = str(self.type) |
|
for k in self._kids: |
|
rv = rv + '\n' + string.replace(str(k), '\n', '\n ') |
|
return rv |
|
|
|
# Some ASTs used for comparing code fragments (like 'return None' at |
|
# the end of functions). |
|
|
|
RETURN_LOCALS = AST('stmt', |
|
[ AST('return_stmt', |
|
[ AST('expr', [ Token('LOAD_LOCALS') ]), |
|
Token('RETURN_VALUE')]) ]) |
|
|
|
RETURN_NONE = AST('stmt', |
|
[ AST('return_stmt', |
|
[ AST('expr', [ Token('LOAD_CONST', pattr='None') ]), |
|
Token('RETURN_VALUE')]) ]) |
|
|
|
ASSIGN_DOC_STRING = lambda doc_string: \ |
|
AST('stmt', |
|
[ AST('assign', |
|
[ AST('expr', [ Token('LOAD_CONST', pattr=`doc_string`) ]), |
|
AST('designator', [ Token('STORE_NAME', pattr='__doc__')]) |
|
])]) |
|
|
|
BUILD_TUPLE_0 = AST('expr', |
|
[ Token('BUILD_TUPLE_0') ] ) |
|
|
|
from spark import GenericASTBuilder, GenericASTMatcher |
|
|
|
class Parser(GenericASTBuilder): |
|
def __init__(self): |
|
GenericASTBuilder.__init__(self, AST, 'code') |
|
self.customized = {} |
|
|
|
def cleanup(self): |
|
""" |
|
Remove recursive references to allow garbage |
|
collector to collect this object. |
|
""" |
|
for dict in (self.rule2func, self.rules, self.rule2name, self.first): |
|
for i in dict.keys(): |
|
dict[i] = None |
|
for i in dir(self): |
|
setattr(self, i, None) |
|
|
|
def error(self, token): |
|
# output offset, too |
|
print "Syntax error at or near `%s' token at offset %s" % \ |
|
(`token`, token.offset) |
|
raise SystemExit |
|
|
|
def typestring(self, token): |
|
return token.type |
|
|
|
def p_funcdef(self, args): |
|
''' |
|
stmt ::= funcdef |
|
funcdef ::= mkfunc STORE_FAST |
|
funcdef ::= mkfunc STORE_NAME |
|
''' |
|
|
|
# new for Python2.0 |
|
# |
|
# UNPACK_SEQUENCE # number of tuple items |
|
# EXTENDED_ARG |
|
|
|
def p_list_comprehension(self, args): |
|
''' |
|
expr ::= list_compr |
|
list_compr ::= lc_prep lc_for lc_cleanup |
|
lc_prep ::= BUILD_LIST_0 DUP_TOP LOAD_ATTR STORE_NAME |
|
lc_prep ::= BUILD_LIST_0 DUP_TOP LOAD_ATTR STORE_FAST |
|
|
|
lc_for ::= expr LOAD_CONST |
|
FOR_LOOP designator |
|
lc_for JUMP_ABSOLUTE |
|
COME_FROM |
|
lc_for ::= expr LOAD_CONST |
|
FOR_LOOP designator |
|
lc_if JUMP_ABSOLUTE |
|
COME_FROM |
|
lc_for ::= expr LOAD_CONST |
|
FOR_LOOP designator |
|
lc_body JUMP_ABSOLUTE |
|
COME_FROM |
|
lc_if ::= expr condjmp lc_body |
|
JUMP_FORWARD COME_FROM POP_TOP |
|
COME_FROM |
|
lc_body ::= LOAD_NAME expr CALL_FUNCTION_1 POP_TOP |
|
lc_body ::= LOAD_FAST expr CALL_FUNCTION_1 POP_TOP |
|
lc_cleanup ::= DELETE_NAME |
|
lc_cleanup ::= DELETE_FAST |
|
''' |
|
|
|
def p_augmented_assign(self, args): |
|
''' |
|
stmt ::= augassign1 |
|
stmt ::= augassign2 |
|
augassign1 ::= expr expr inplace_op designator |
|
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR |
|
augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0 |
|
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1 |
|
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2 |
|
augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3 |
|
augassign2 ::= expr DUP_TOP LOAD_ATTR expr |
|
inplace_op ROT_TWO STORE_ATTR |
|
|
|
inplace_op ::= INPLACE_ADD |
|
inplace_op ::= INPLACE_SUBTRACT |
|
inplace_op ::= INPLACE_MULTIPLY |
|
inplace_op ::= INPLACE_DIVIDE |
|
inplace_op ::= INPLACE_MODULO |
|
inplace_op ::= INPLACE_POWER |
|
inplace_op ::= INPLACE_LSHIFT |
|
inplace_op ::= INPLACE_RSHIFT |
|
inplace_op ::= INPLACE_AND |
|
inplace_op ::= INPLACE_XOR |
|
inplace_op ::= INPLACE_OR |
|
''' |
|
|
|
def p_assign(self, args): |
|
''' |
|
stmt ::= assign |
|
assign ::= expr DUP_TOP designList |
|
assign ::= expr designator |
|
''' |
|
|
|
def p_print(self, args): |
|
''' |
|
stmt ::= print_stmt |
|
stmt ::= print_stmt_nl |
|
stmt ::= print_nl_stmt |
|
print_stmt ::= expr PRINT_ITEM |
|
print_nl_stmt ::= PRINT_NEWLINE |
|
print_stmt_nl ::= print_stmt print_nl_stmt |
|
''' |
|
|
|
def p_print_to(self, args): |
|
''' |
|
stmt ::= print_to |
|
stmt ::= print_to_nl |
|
stmt ::= print_nl_to |
|
print_to ::= expr print_to_items POP_TOP |
|
print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO |
|
print_nl_to ::= expr PRINT_NEWLINE_TO |
|
print_to_items ::= print_to_items print_to_item |
|
print_to_items ::= print_to_item |
|
print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO |
|
''' |
|
# expr print_to* POP_TOP |
|
# expr { print_to* } PRINT_NEWLINE_TO |
|
|
|
def p_import15(self, args): |
|
''' |
|
stmt ::= importstmt |
|
stmt ::= importfrom |
|
|
|
importstmt ::= IMPORT_NAME STORE_FAST |
|
importstmt ::= IMPORT_NAME STORE_NAME |
|
|
|
importfrom ::= IMPORT_NAME importlist POP_TOP |
|
importlist ::= importlist IMPORT_FROM |
|
importlist ::= IMPORT_FROM |
|
''' |
|
|
|
def p_import20(self, args): |
|
''' |
|
stmt ::= importstmt2 |
|
stmt ::= importfrom2 |
|
stmt ::= importstar2 |
|
|
|
importstmt2 ::= LOAD_CONST import_as |
|
importstar2 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR |
|
|
|
importfrom2 ::= LOAD_CONST IMPORT_NAME importlist2 POP_TOP |
|
importlist2 ::= importlist2 import_as |
|
importlist2 ::= import_as |
|
import_as ::= IMPORT_NAME STORE_FAST |
|
import_as ::= IMPORT_NAME STORE_NAME |
|
import_as ::= IMPORT_NAME LOAD_ATTR STORE_FAST |
|
import_as ::= IMPORT_NAME LOAD_ATTR STORE_NAME |
|
import_as ::= IMPORT_FROM STORE_FAST |
|
import_as ::= IMPORT_FROM STORE_NAME |
|
''' |
|
# 'import_as' can't use designator, since n_import_as() |
|
# needs to compare both kids' pattr |
|
|
|
def p_grammar(self, args): |
|
''' |
|
code ::= stmts |
|
code ::= |
|
|
|
stmts ::= stmts stmt |
|
stmts ::= stmt |
|
|
|
stmts_opt ::= stmts |
|
stmts_opt ::= passstmt |
|
passstmt ::= |
|
|
|
designList ::= designator designator |
|
designList ::= designator DUP_TOP designList |
|
|
|
designator ::= STORE_FAST |
|
designator ::= STORE_NAME |
|
designator ::= STORE_GLOBAL |
|
designator ::= expr STORE_ATTR |
|
designator ::= expr STORE_SLICE+0 |
|
designator ::= expr expr STORE_SLICE+1 |
|
designator ::= expr expr STORE_SLICE+2 |
|
designator ::= expr expr expr STORE_SLICE+3 |
|
designator ::= store_subscr |
|
store_subscr ::= expr expr STORE_SUBSCR |
|
designator ::= unpack |
|
designator ::= unpack_list |
|
|
|
stmt ::= classdef |
|
stmt ::= call_stmt |
|
call_stmt ::= expr POP_TOP |
|
|
|
stmt ::= return_stmt |
|
return_stmt ::= expr RETURN_VALUE |
|
|
|
stmt ::= break_stmt |
|
break_stmt ::= BREAK_LOOP |
|
|
|
stmt ::= continue_stmt |
|
continue_stmt ::= JUMP_ABSOLUTE |
|
|
|
stmt ::= raise_stmt |
|
raise_stmt ::= exprlist RAISE_VARARGS |
|
raise_stmt ::= nullexprlist RAISE_VARARGS |
|
|
|
stmt ::= exec_stmt |
|
exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT |
|
exec_stmt ::= expr exprlist EXEC_STMT |
|
|
|
stmt ::= assert |
|
stmt ::= assert2 |
|
stmt ::= ifstmt |
|
stmt ::= ifelsestmt |
|
stmt ::= whilestmt |
|
stmt ::= whileelsestmt |
|
stmt ::= forstmt |
|
stmt ::= forelsestmt |
|
stmt ::= trystmt |
|
stmt ::= tryfinallystmt |
|
|
|
stmt ::= DELETE_FAST |
|
stmt ::= DELETE_NAME |
|
stmt ::= DELETE_GLOBAL |
|
stmt ::= expr DELETE_SLICE+0 |
|
stmt ::= expr expr DELETE_SLICE+1 |
|
stmt ::= expr expr DELETE_SLICE+2 |
|
stmt ::= expr expr expr DELETE_SLICE+3 |
|
stmt ::= delete_subscr |
|
delete_subscr ::= expr expr DELETE_SUBSCR |
|
stmt ::= expr DELETE_ATTR |
|
|
|
kwarg ::= LOAD_CONST expr |
|
|
|
classdef ::= LOAD_CONST expr mkfunc |
|
CALL_FUNCTION_0 BUILD_CLASS STORE_NAME |
|
classdef ::= LOAD_CONST expr mkfunc |
|
CALL_FUNCTION_0 BUILD_CLASS STORE_FAST |
|
|
|
condjmp ::= JUMP_IF_FALSE POP_TOP |
|
condjmp ::= JUMP_IF_TRUE POP_TOP |
|
|
|
assert ::= expr JUMP_IF_FALSE POP_TOP |
|
expr JUMP_IF_TRUE POP_TOP |
|
LOAD_GLOBAL RAISE_VARARGS |
|
COME_FROM COME_FROM POP_TOP |
|
assert2 ::= expr JUMP_IF_FALSE POP_TOP |
|
expr JUMP_IF_TRUE POP_TOP |
|
LOAD_GLOBAL expr RAISE_VARARGS |
|
COME_FROM COME_FROM POP_TOP |
|
|
|
ifstmt ::= expr condjmp stmts_opt |
|
JUMP_FORWARD COME_FROM POP_TOP |
|
COME_FROM |
|
|
|
ifelsestmt ::= expr condjmp stmts_opt |
|
JUMP_FORWARD COME_FROM |
|
POP_TOP stmts COME_FROM |
|
|
|
trystmt ::= SETUP_EXCEPT stmts_opt |
|
POP_BLOCK JUMP_FORWARD |
|
COME_FROM except_stmt |
|
|
|
try_end ::= END_FINALLY COME_FROM |
|
try_end ::= except_else |
|
except_else ::= END_FINALLY COME_FROM stmts |
|
|
|
except_stmt ::= except_cond except_stmt COME_FROM |
|
except_stmt ::= except_conds try_end COME_FROM |
|
except_stmt ::= except try_end COME_FROM |
|
except_stmt ::= try_end |
|
|
|
except_conds ::= except_cond except_conds COME_FROM |
|
except_conds ::= |
|
|
|
except_cond ::= except_cond1 |
|
except_cond ::= except_cond2 |
|
except_cond1 ::= DUP_TOP expr COMPARE_OP |
|
JUMP_IF_FALSE |
|
POP_TOP POP_TOP POP_TOP POP_TOP |
|
stmts_opt JUMP_FORWARD COME_FROM |
|
POP_TOP |
|
except_cond2 ::= DUP_TOP expr COMPARE_OP |
|
JUMP_IF_FALSE |
|
POP_TOP POP_TOP designator POP_TOP |
|
stmts_opt JUMP_FORWARD COME_FROM |
|
POP_TOP |
|
except ::= POP_TOP POP_TOP POP_TOP |
|
stmts_opt JUMP_FORWARD |
|
|
|
tryfinallystmt ::= SETUP_FINALLY stmts_opt |
|
POP_BLOCK LOAD_CONST |
|
COME_FROM stmts_opt END_FINALLY |
|
|
|
whilestmt ::= SETUP_LOOP |
|
expr JUMP_IF_FALSE POP_TOP |
|
stmts_opt JUMP_ABSOLUTE |
|
COME_FROM POP_TOP POP_BLOCK COME_FROM |
|
whileelsestmt ::= SETUP_LOOP |
|
expr JUMP_IF_FALSE POP_TOP |
|
stmts_opt JUMP_ABSOLUTE |
|
COME_FROM POP_TOP POP_BLOCK |
|
stmts COME_FROM |
|
|
|
forstmt ::= SETUP_LOOP expr LOAD_CONST |
|
FOR_LOOP designator |
|
stmts_opt JUMP_ABSOLUTE |
|
COME_FROM POP_BLOCK COME_FROM |
|
forelsestmt ::= SETUP_LOOP expr LOAD_CONST |
|
FOR_LOOP designator |
|
stmts_opt JUMP_ABSOLUTE |
|
COME_FROM POP_BLOCK stmts COME_FROM |
|
''' |
|
|
|
def p_expr(self, args): |
|
''' |
|
expr ::= mklambda |
|
expr ::= mkfunc |
|
expr ::= SET_LINENO |
|
expr ::= LOAD_FAST |
|
expr ::= LOAD_NAME |
|
expr ::= LOAD_CONST |
|
expr ::= LOAD_GLOBAL |
|
expr ::= LOAD_LOCALS |
|
expr ::= expr LOAD_ATTR |
|
expr ::= binary_expr |
|
|
|
binary_expr ::= expr expr binary_op |
|
binary_op ::= BINARY_ADD |
|
binary_op ::= BINARY_SUBTRACT |
|
binary_op ::= BINARY_MULTIPLY |
|
binary_op ::= BINARY_DIVIDE |
|
binary_op ::= BINARY_MODULO |
|
binary_op ::= BINARY_LSHIFT |
|
binary_op ::= BINARY_RSHIFT |
|
binary_op ::= BINARY_AND |
|
binary_op ::= BINARY_OR |
|
binary_op ::= BINARY_XOR |
|
binary_op ::= BINARY_POWER |
|
|
|
expr ::= binary_subscr |
|
binary_subscr ::= expr expr BINARY_SUBSCR |
|
expr ::= expr expr DUP_TOPX_2 BINARY_SUBSCR |
|
expr ::= cmp |
|
expr ::= expr UNARY_POSITIVE |
|
expr ::= expr UNARY_NEGATIVE |
|
expr ::= expr UNARY_CONVERT |
|
expr ::= expr UNARY_INVERT |
|
expr ::= expr UNARY_NOT |
|
expr ::= mapexpr |
|
expr ::= expr SLICE+0 |
|
expr ::= expr expr SLICE+1 |
|
expr ::= expr expr SLICE+2 |
|
expr ::= expr expr expr SLICE+3 |
|
expr ::= expr DUP_TOP SLICE+0 |
|
expr ::= expr expr DUP_TOPX_2 SLICE+1 |
|
expr ::= expr expr DUP_TOPX_2 SLICE+2 |
|
expr ::= expr expr expr DUP_TOPX_3 SLICE+3 |
|
expr ::= and |
|
expr ::= or |
|
or ::= expr JUMP_IF_TRUE POP_TOP expr COME_FROM |
|
and ::= expr JUMP_IF_FALSE POP_TOP expr COME_FROM |
|
|
|
cmp ::= cmp_list |
|
cmp ::= compare |
|
compare ::= expr expr COMPARE_OP |
|
cmp_list ::= expr cmp_list1 ROT_TWO POP_TOP |
|
COME_FROM |
|
cmp_list1 ::= expr DUP_TOP ROT_THREE |
|
COMPARE_OP JUMP_IF_FALSE POP_TOP |
|
cmp_list1 COME_FROM |
|
cmp_list1 ::= expr DUP_TOP ROT_THREE |
|
COMPARE_OP JUMP_IF_FALSE POP_TOP |
|
cmp_list2 COME_FROM |
|
cmp_list2 ::= expr COMPARE_OP JUMP_FORWARD |
|
|
|
mapexpr ::= BUILD_MAP kvlist |
|
|
|
kvlist ::= kvlist kv |
|
kvlist ::= |
|
|
|
kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR |
|
|
|
exprlist ::= exprlist expr |
|
exprlist ::= expr |
|
|
|
nullexprlist ::= |
|
''' |
|
|
|
def nonterminal(self, nt, args): |
|
collect = ('stmts', 'exprlist', 'kvlist') |
|
|
|
if nt in collect and len(args) > 1: |
|
# |
|
# Collect iterated thingies together. |
|
# |
|
rv = args[0] |
|
rv.append(args[1]) |
|
else: |
|
rv = GenericASTBuilder.nonterminal(self, nt, args) |
|
return rv |
|
|
|
def __ambiguity(self, children): |
|
# only for debugging! to be removed hG/2000-10-15 |
|
print children |
|
return GenericASTBuilder.ambiguity(self, children) |
|
|
|
def resolve(self, list): |
|
if len(list) == 2 and 'funcdef' in list and 'assign' in list: |
|
return 'funcdef' |
|
#sys.stderr.writelines( ['resolve ', str(list), '\n'] ) |
|
return GenericASTBuilder.resolve(self, list) |
|
|
|
nop = lambda self, args: None |
|
|
|
def parse(tokens, customize): |
|
p = Parser() |
|
# |
|
# Special handling for opcodes that take a variable number |
|
# of arguments -- we add a new rule for each: |
|
# |
|
# expr ::= {expr}^n BUILD_LIST_n |
|
# expr ::= {expr}^n BUILD_TUPLE_n |
|
# expr ::= {expr}^n BUILD_SLICE_n |
|
# unpack_list ::= UNPACK_LIST {expr}^n |
|
# unpack ::= UNPACK_TUPLE {expr}^n |
|
# unpack ::= UNPACK_SEQEUENE {expr}^n |
|
# mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n |
|
# expr ::= expr {expr}^n CALL_FUNCTION_n |
|
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP |
|
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP |
|
# expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP |
|
# |
|
for k, v in customize.items(): |
|
## avoid adding the same rule twice to this parser |
|
#if p.customized.has_key(k): |
|
# continue |
|
#p.customized[k] = None |
|
|
|
#nop = lambda self, args: None |
|
op = k[:string.rfind(k, '_')] |
|
if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE'): |
|
rule = 'expr ::= ' + 'expr '*v + k |
|
elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'): |
|
rule = 'unpack ::= ' + k + ' designator'*v |
|
elif op == 'UNPACK_LIST': |
|
rule = 'unpack_list ::= ' + k + ' designator'*v |
|
elif op == 'DUP_TOPX': |
|
# no need to add a rule |
|
pass |
|
#rule = 'dup_topx ::= ' + 'expr '*v + k |
|
elif op == 'MAKE_FUNCTION': |
|
p.addRule('mklambda ::= %s LOAD_LAMBDA %s' % |
|
('expr '*v, k), nop) |
|
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k) |
|
elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR', |
|
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): |
|
na = (v & 0xff) # positional parameters |
|
nk = (v >> 8) & 0xff # keyword parameters |
|
# number of apply equiv arguments: |
|
nak = ( len(op)-len('CALL_FUNCTION') ) / 3 |
|
rule = 'expr ::= expr ' + 'expr '*na + 'kwarg '*nk \ |
|
+ 'expr ' * nak + k |
|
else: |
|
raise 'unknown customize token %s' % k |
|
p.addRule(rule, nop) |
|
ast = p.parse(tokens) |
|
p.cleanup() |
|
return ast |
|
|
|
# |
|
# Decompilation (walking AST) |
|
# |
|
# All table-driven. Step 1 determines a table (T) and a path to a |
|
# table key (K) from the node type (N) (other nodes are shown as O): |
|
# |
|
# N N N&K |
|
# / | ... \ / | ... \ / | ... \ |
|
# O O O O O K O O O |
|
# | |
|
# K |
|
# |
|
# MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT) |
|
# |
|
# The default is a direct mapping. The key K is then extracted from the |
|
# subtree and used to find a table entry T[K], if any. The result is a |
|
# format string and arguments (a la printf()) for the formatting engine. |
|
# Escapes in the format string are: |
|
# |
|
# %c evaluate N[A] recursively* |
|
# %C evaluate N[A[0]]..N[A[1]] recursively, separate by A[2]* |
|
# %, print ',' if last %C only printed one item (for tuples) |
|
# %| tab to current indentation level |
|
# %+ increase current indentation level |
|
# %- decrease current indentation level |
|
# %{...} evaluate ... in context of N |
|
# %% literal '%' |
|
# |
|
# * indicates an argument (A) required. |
|
# |
|
# The '%' may optionally be followed by a number (C) in square brackets, which |
|
# makes the engine walk down to N[C] before evaluating the escape code. |
|
# |
|
|
|
from spark import GenericASTTraversal |
|
|
|
#TAB = '\t' # as God intended |
|
TAB = ' ' *4 # is less spacy than "\t" |
|
|
|
TABLE_R = { |
|
'build_tuple2': ( '%C', (0,-1,', ') ), |
|
'POP_TOP': ( '%|%c\n', 0 ), |
|
'STORE_ATTR': ( '%c.%[1]{pattr}', 0), |
|
# 'STORE_SUBSCR': ( '%c[%c]', 0, 1 ), |
|
'STORE_SLICE+0':( '%c[:]', 0 ), |
|
'STORE_SLICE+1':( '%c[%c:]', 0, 1 ), |
|
'STORE_SLICE+2':( '%c[:%c]', 0, 1 ), |
|
'STORE_SLICE+3':( '%c[%c:%c]', 0, 1, 2 ), |
|
'JUMP_ABSOLUTE':( '%|continue\n', ), |
|
'DELETE_SLICE+0':( '%|del %c[:]\n', 0 ), |
|
'DELETE_SLICE+1':( '%|del %c[%c:]\n', 0, 1 ), |
|
'DELETE_SLICE+2':( '%|del %c[:%c]\n', 0, 1 ), |
|
'DELETE_SLICE+3':( '%|del %c[%c:%c]\n', 0, 1, 2 ), |
|
'DELETE_ATTR': ( '%|del %c.%[-1]{pattr}\n', 0 ), |
|
#'EXEC_STMT': ( '%|exec %c in %[1]C\n', 0, (0,sys.maxint,', ') ), |
|
'BINARY_SUBSCR':( '%c[%c]', 0, 1), # required for augmented assign |
|
'UNARY_POSITIVE':( '+%c', 0 ), |
|
'UNARY_NEGATIVE':( '-%c', 0 ), |
|
'UNARY_CONVERT':( '`%c`', 0 ), |
|
'UNARY_INVERT': ( '~%c', 0 ), |
|
'UNARY_NOT': ( '(not %c)', 0 ), |
|
'SLICE+0': ( '%c[:]', 0 ), |
|
'SLICE+1': ( '%c[%c:]', 0, 1 ), |
|
'SLICE+2': ( '%c[:%c]', 0, 1 ), |
|
'SLICE+3': ( '%c[%c:%c]', 0, 1, 2 ), |
|
} |
|
TABLE_R0 = { |
|
# 'BUILD_LIST': ( '[%C]', (0,-1,', ') ), |
|
# 'BUILD_TUPLE': ( '(%C)', (0,-1,', ') ), |
|
# 'CALL_FUNCTION':( '%c(%C)', 0, (1,-1,', ') ), |
|
} |
|
TABLE_DIRECT = { |
|
'BINARY_ADD': ( '+' ,), |
|
'BINARY_SUBTRACT': ( '-' ,), |
|
'BINARY_MULTIPLY': ( '*' ,), |
|
'BINARY_DIVIDE': ( '/' ,), |
|
'BINARY_MODULO': ( '%%',), |
|
'BINARY_POWER': ( '**',), |
|
'BINARY_LSHIFT': ( '<<',), |
|
'BINARY_RSHIFT': ( '>>',), |
|
'BINARY_AND': ( '&' ,), |
|
'BINARY_OR': ( '|' ,), |
|
'BINARY_XOR': ( '^' ,), |
|
'INPLACE_ADD': ( '+=' ,), |
|
'INPLACE_SUBTRACT': ( '-=' ,), |
|
'INPLACE_MULTIPLY': ( '*=' ,), |
|
'INPLACE_DIVIDE': ( '/=' ,), |
|
'INPLACE_MODULO': ( '%%=',), |
|
'INPLACE_POWER': ( '**=',), |
|
'INPLACE_LSHIFT': ( '<<=',), |
|
'INPLACE_RSHIFT': ( '>>=',), |
|
'INPLACE_AND': ( '&=' ,), |
|
'INPLACE_OR': ( '|=' ,), |
|
'INPLACE_XOR': ( '^=' ,), |
|
'binary_expr': ( '(%c %c %c)', 0, -1, 1 ), |
|
|
|
'IMPORT_FROM': ( '%{pattr}', ), |
|
'LOAD_ATTR': ( '.%{pattr}', ), |
|
'LOAD_FAST': ( '%{pattr}', ), |
|
'LOAD_NAME': ( '%{pattr}', ), |
|
'LOAD_GLOBAL': ( '%{pattr}', ), |
|
'LOAD_LOCALS': ( 'locals()', ), |
|
#'LOAD_CONST': ( '%{pattr}', ), handled below |
|
'DELETE_FAST': ( '%|del %{pattr}\n', ), |
|
'DELETE_NAME': ( '%|del %{pattr}\n', ), |
|
'DELETE_GLOBAL':( '%|del %{pattr}\n', ), |
|
'delete_subscr':( '%|del %c[%c]\n', 0, 1,), |
|
'binary_subscr':( '%c[%c]', 0, 1), |
|
'store_subscr': ( '%c[%c]', 0, 1), |
|
'STORE_FAST': ( '%{pattr}', ), |
|
'STORE_NAME': ( '%{pattr}', ), |
|
'STORE_GLOBAL': ( '%{pattr}', ), |
|
'unpack': ( '(%C,)', (1, sys.maxint, ', ') ), |
|
'unpack_list': ( '[%C]', (1, sys.maxint, ', ') ), |
|
|
|
'list_compr': ( '[ %c ]', 1), |
|
# 'lc_for': ( ' for %c in %c', 3, 0 ), |
|
'lc_for_nest': ( ' for %c in %c%c', 3, 0, 4 ), |
|
'lc_if': ( ' if %c', 0 ), |
|
'lc_body': ( '%c', 1), |
|
'lc_body__': ( '', ), |
|
|
|
'assign': ( '%|%c = %c\n', -1, 0 ), |
|
'augassign1': ( '%|%c %c %c\n', 0, 2, 1), |
|
'augassign2': ( '%|%c%c %c %c\n', 0, 2, -3, -4), |
|
#'dup_topx': ('%c', 0), |
|
'designList': ( '%c = %c', 0, -1 ), |
|
'and': ( '(%c and %c)', 0, 3 ), |
|
'or': ( '(%c or %c)', 0, 3 ), |
|
'compare': ( '(%c %[-1]{pattr} %c)', 0, 1 ), |
|
'cmp_list': ('%c %c', 0, 1), |
|
'cmp_list1': ('%[3]{pattr} %c %c', 0, -2), |
|
'cmp_list2': ('%[1]{pattr} %c', 0), |
|
'classdef': ( '\n%|class %[0]{pattr[1:-1]}%c:\n%+%{build_class}%-', 1 ), |
|
'funcdef': ( '\n%|def %c\n', 0), |
|
'kwarg': ( '%[0]{pattr[1:-1]}=%c', 1), |
|
'importstmt': ( '%|import %[0]{pattr}\n', ), |
|
'importfrom': ( '%|from %[0]{pattr} import %c\n', 1 ), |
|
'importlist': ( '%C', (0, sys.maxint, ', ') ), |
|
'importstmt2': ( '%|import %c\n', 1), |
|
'importstar2': ( '%|from %[1]{pattr} import *\n', ), |
|
'importfrom2': ( '%|from %[1]{pattr} import %c\n', 2 ), |
|
'importlist2': ( '%C', (0, sys.maxint, ', ') ), |
|
'assert': ( '%|assert %c\n' , 3 ), |
|
'assert2': ( '%|assert %c, %c\n' , 3, -5 ), |
|
|
|
'print_stmt': ( '%|print %c,\n', 0 ), |
|
'print_stmt_nl': ( '%|print %[0]C\n', (0,1, None) ), |
|
'print_nl_stmt': ( '%|print\n', ), |
|
'print_to': ( '%|print >> %c, %c,\n', 0, 1 ), |
|
'print_to_nl': ( '%|print >> %c, %c\n', 0, 1 ), |
|
'print_nl_to': ( '%|print >> %c\n', 0 ), |
|
'print_to_items': ( '%C', (0, 2, ', ') ), |
|
|
|
'call_stmt': ( '%|%c\n', 0), |
|
'break_stmt': ( '%|break\n', ), |
|
'continue_stmt':( '%|continue\n', ), |
|
'raise_stmt': ( '%|raise %[0]C\n', (0,sys.maxint,', ') ), |
|
'return_stmt': ( '%|return %c\n', 0), |
|
'return_lambda': ( '%c', 0), |
|
|
|
'ifstmt': ( '%|if %c:\n%+%c%-', 0, 2 ), |
|
'ifelsestmt': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 2, -2 ), |
|
'ifelifstmt': ( '%|if %c:\n%+%c%-%c', 0, 2, -2 ), |
|
'elifelifstmt': ( '%|elif %c:\n%+%c%-%c', 0, 2, -2 ), |
|
'elifstmt': ( '%|elif %c:\n%+%c%-', 0, 2 ), |
|
'elifelsestmt': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-', 0, 2, -2 ), |
|
|
|
'whilestmt': ( '%|while %c:\n%+%c%-\n', 1, 4 ), |
|
'whileelsestmt':( '%|while %c:\n%+%c%-\n%|else:\n%+%c%-\n', 1, 4, 9 ), |
|
'forstmt': ( '%|for %c in %c:\n%+%c%-\n', 4, 1, 5 ), |
|
'forelsestmt': ( |
|
'%|for %c in %c:\n%+%c%-\n%|else:\n%+%c%-\n', 4, 1, 5, 9 |
|
), |
|
'trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ), |
|
'except': ( '%|except:\n%+%c%-', 3 ), |
|
'except_cond1': ( '%|except %c:\n%+%c%-', 1, 8 ), |
|
'except_cond2': ( '%|except %c, %c:\n%+%c%-', 1, 6, 8 ), |
|
'except_else': ( '%|else:\n%+%c%-', 2 ), |
|
'tryfinallystmt':( '%|try:\n%+%c%-\n%|finally:\n%+%c%-\n', 1, 5 ), |
|
'passstmt': ( '%|pass\n', ), |
|
'STORE_FAST': ( '%{pattr}', ), |
|
'kv': ( '%c: %c', 3, 1 ), |
|
'mapexpr': ( '{%[1]C}', (0,sys.maxint,', ') ), |
|
} |
|
|
|
|
|
MAP_DIRECT = (TABLE_DIRECT, ) |
|
MAP_R0 = (TABLE_R0, -1, 0) |
|
MAP_R = (TABLE_R, -1) |
|
|
|
MAP = { |
|
'stmt': MAP_R, |
|
'designator': MAP_R, |
|
'expr': MAP_R, |
|
'exprlist': MAP_R0, |
|
} |
|
|
|
|
|
ASSIGN_TUPLE_PARAM = lambda param_name: \ |
|
AST('expr', [ Token('LOAD_FAST', pattr=param_name) ]) |
|
|
|
|
|
def get_tuple_parameter(ast, name): |
|
""" |
|
If the name of the formal parameter starts with dot, |
|
it's a tuple parameter, like this: |
|
def MyFunc(xx, (a,b,c), yy): |
|
print a, b*2, c*42 |
|
In byte-code, the whole tuple is assigned to parameter '.1' and |
|
then the tuple gets unpacked to 'a', 'b' and 'c'. |
|
|
|
Since identifiers starting with a dot are illegal in Python, |
|
we can search for the byte-code equivalent to '(a,b,c) = .1' |
|
""" |
|
assert ast == 'code' and ast[0] == 'stmts' |
|
for i in xrange(len(ast[0])): |
|
# search for an assign-statement |
|
assert ast[0][i] == 'stmt' |
|
node = ast[0][i][0] |
|
if node == 'assign' \ |
|
and node[0] == ASSIGN_TUPLE_PARAM(name): |
|
# okay, this assigns '.n' to something |
|
del ast[0][i] |
|
# walk lhs; this |
|
# returns a tuple of identifiers as used |
|
# within the function definition |
|
assert node[1] == 'designator' |
|
# if lhs is not a UNPACK_TUPLE (or equiv.), |
|
# add parenteses to make this a tuple |
|
if node[1][0] not in ('unpack', 'unpack_list'): |
|
return '(' + walk(node[1]) + ')' |
|
return walk(node[1]) |
|
raise "Can't find tuple parameter" % name |
|
|
|
def make_function(self, code, defparams, isLambda, nested=1): |
|
"""Dump function defintion, doc string, and function body.""" |
|
|
|
def build_param(ast, name, default): |
|
"""build parameters: |
|
- handle defaults |
|
- handle format tuple parameters |
|
""" |
|
# if formal parameter is a tuple, the paramater name |
|
# starts with a dot (eg. '.1', '.2') |
|
if name[0] == '.': |
|
# replace the name with the tuple-string |
|
name = get_tuple_parameter(ast, name) |
|
|
|
if default: |
|
if Showast: |
|
print '--', name |
|
print default |
|
print '--' |
|
result = '%s = %s' % ( name, walk(default, indent=0) ) |
|
##w = Walk(default, 0) |
|
##result = '%s = %s' % ( name, w.traverse() ) |
|
##del w # hg/2000-09-03 |
|
if result[-2:] == '= ': # default was 'LOAD_CONST None' |
|
result = result + 'None' |
|
return result |
|
else: |
|
return name |
|
|
|
def writeParams(self, params): |
|
for i in range(len(params)): |
|
if i > 0: self.f.write(', ') |
|
self.f.write(params[i]) |
|
|
|
assert type(code) == types.CodeType |
|
code = Code(code) |
|
#assert isinstance(code, Code) |
|
|
|
ast = _build_ast(self.f, code._tokens, code._customize) |
|
code._tokens = None # save memory |
|
assert ast == 'code' and ast[0] == 'stmts' |
|
if isLambda: |
|
# convert 'return' statement to expression |
|
#assert len(ast[0]) == 1 wrong, see 'lambda (r,b): r,b,g' |
|
assert ast[-1][-1] == 'stmt' |
|
assert len(ast[-1][-1]) == 1 |
|
assert ast[-1][-1][0] == 'return_stmt' |
|
ast[-1][-1][0].type = 'return_lambda' |
|
else: |
|
if ast[0][-1] == RETURN_NONE: |
|
# Python adds a 'return None' to the |
|
# end of any function; remove it |
|
ast[0].pop() # remove last node |
|
|
|
# add defaults values to parameter names |
|
argc = code.co_argcount |
|
paramnames = list(code.co_varnames[:argc]) |
|
|
|
# defaults are for last n parameters, thus reverse |
|
paramnames.reverse(); defparams.reverse() |
|
|
|
# build parameters |
|
# |
|
##This would be a nicer piece of code, but I can't get this to work |
|
## now, have to find a usable lambda constuct hG/2000-09-05 |
|
##params = map(lambda name, default: build_param(ast, name, default), |
|
## paramnames, defparams) |
|
params = [] |
|
for name, default in map(lambda a,b: (a,b), paramnames, defparams): |
|
params.append( build_param(ast, name, default) ) |
|
|
|
params.reverse() # back to correct order |
|
|
|
if 4 & code.co_flags: # flag 2 -> variable number of args |
|
params.append('*%s' % code.co_varnames[argc]) |
|
argc = argc +1 |
|
if 8 & code.co_flags: # flag 3 -> keyword args |
|
params.append('**%s' % code.co_varnames[argc]) |
|
argc = argc +1 |
|
|
|
# dump parameter list (with default values) |
|
indent = TAB * self.indent |
|
if isLambda: |
|
self.f.write('lambda ') |
|
writeParams(self, params) |
|
self.f.write(': ') |
|
else: |
|
self.f.write('(') |
|
writeParams(self, params) |
|
self.f.write('):\n') |
|
#self.f.write('%s#flags:\t%i\n' % (indent, code.co_flags)) |
|
|
|
if code.co_consts[0] != None: # docstring exists, dump it |
|
self.f.writelines([indent, `code.co_consts[0]`, '\n']) |
|
|
|
_gen_source(self.f, ast, code._customize, self.indent, |
|
isLambda=isLambda) |
|
code._tokens = None; code._customize = None # save memory |
|
|
|
|
|
def build_class(self, code): |
|
"""Dump class definition, duc string and class body.""" |
|
|
|
assert type(code) == types.CodeType |
|
code = Code(code) |
|
#assert isinstance(code, Code) |
|
|
|
indent = TAB * self.indent |
|
#self.f.write('%s#flags:\t%i\n' % (indent, code.co_flags)) |
|
ast = _build_ast(self.f, code._tokens, code._customize) |
|
code._tokens = None # save memory |
|
assert ast == 'code' and ast[0] == 'stmts' |
|
|
|
# if docstring exists, dump it |
|
if code.co_consts[0] != None \ |
|
and ast[0][0] == ASSIGN_DOC_STRING(code.co_consts[0]): |
|
#print '\n\n>>-->>doc string set\n\n' |
|
self.f.writelines( [indent,repr(code.co_consts[0]), '\n'] ) |
|
del ast[0][0] |
|
|
|
# the function defining a class normally returns locals(); we |
|
# don't want this to show up in the source, thus remove the node |
|
if ast[0][-1] == RETURN_LOCALS: |
|
ast[0].pop() # remove last node |
|
|
|
_gen_source(self.f, ast, code._customize, self.indent) |
|
code._tokens = None; code._customize = None # save memory |
|
|
|
__globals_tokens__ = ('STORE_GLOBAL', 'DELETE_GLOBAL') # 'LOAD_GLOBAL' |
|
|
|
def find_globals(node, globals): |
|
"""Find globals in this statement.""" |
|
for n in node: |
|
if isinstance(n, AST): |
|
if n != 'stmt': # skip nested statements |
|
globals = find_globals(n, globals) |
|
elif n.type in __globals_tokens__: |
|
globals[n.pattr] = None |
|
return globals |
|
|
|
|
|
class Walk(GenericASTTraversal): |
|
def __init__(self, ast, indent=0, isLambda=0): |
|
GenericASTTraversal.__init__(self, ast) |
|
self._globals = {} |
|
self.f = cStringIO.StringIO() |
|
self.f.seek(0) |
|
self.indent = indent |
|
self.isLambda = isLambda |
|
|
|
def __del__(self): |
|
self.f.close() |
|
|
|
def traverse(self, node=None): |
|
self.preorder(node) |
|
return self.f.getvalue() |
|
|
|
def n_LOAD_CONST(self, node): |
|
data = node.pattr |
|
if data == 'None': |
|
# LOAD_CONST 'None' only occurs, when None is |
|
# implicit eg. in 'return' w/o params |
|
pass |
|
elif data == 'Ellipsis': |
|
self.f.write('...') |
|
elif data[0] == '-': # assume negative integer constant |
|
# convert to hex, since decimal representation |
|
# would result in 'LOAD_CONST; UNARY_NEGATIVE' |
|
self.f.write('0x%x' % int(data)) |
|
else: |
|
self.f.write(data) |
|
|
|
def n_delete_subscr(self, node): |
|
#print >>self.f, '>#', node |
|
#print >>self.f, '---' |
|
maybe_tuple = node[-2][-1] |
|
#print >>self.f, '##', maybe_tuple, maybe_tuple.type[:11] |
|
if maybe_tuple.type[:11] == 'BUILD_TUPLE': |
|
maybe_tuple.type = 'build_tuple2' |
|
#print >>self.f, '##', node |
|
#print >>self.f, '##', maybe_tuple.type |
|
self.default(node) |
|
|
|
n_store_subscr = n_binary_subscr = n_delete_subscr |
|
|
|
def __n_stmts(self, node): |
|
# optimize "print 1, ; print" |
|
last = None; i = 0 |
|
while i < len(node): |
|
n = node[i] |
|
assert(n == 'stmt') |
|
if n[0] == 'print_nl_stmt' and \ |
|
last is not None and \ |
|
last[0] == 'print_stmt': |
|
last[0].type = 'print_stmt_nl' |
|
del node[i] |
|
last = None |
|
else: |
|
last = n |
|
i = i + 1 |
|
self.default(node) |
|
|
|
def n_stmt(self, node): |
|
if not self.isLambda: |
|
indent = TAB * self.indent |
|
for g in find_globals(node, {}).keys(): |
|
self.f.writelines( [indent, |
|
'global ', |
|
g, '\n'] ) |
|
## nice output does not work since engine() |
|
## creates a new Walk instance when recursing |
|
## TODO: reconsider this: engine() no longer |
|
## creates a new Walk instancew hG/2000-12-31 |
|
## if not self._globals.has_key(g): |
|
## self._globals[g] = None |
|
## self.f.writelines( [TAB * self.indent, |
|
## 'global ', |
|
## g, '\n'] ) |
|
self.default(node) |
|
|
|
def n_exec_stmt(self, node): |
|
""" |
|
exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT |
|
exec_stmt ::= expr exprlist EXEC_STMT |
|
""" |
|
w = Walk(node, indent=self.indent) |
|
w.engine(( '%|exec %c in %[1]C', 0, (0,sys.maxint,', ') ), |
|
node) |
|
s = w.f.getvalue() |
|
del w |
|
if s[-3:] == 'in ': |
|
s = s[:-3] |
|
self.f.writelines( [s, '\n'] ) |
|
node[:] = [] # avoid print out when recursive descenting |
|
|
|
def n_ifelsestmt(self, node, preprocess=0): |
|
if len(node[-2]) == 1: |
|
ifnode = node[-2][0][0] |
|
if ifnode == 'ifelsestmt': |
|
node.type = 'ifelifstmt' |
|
self.n_ifelsestmt(ifnode, preprocess=1) |
|
if ifnode == 'ifelifstmt': |
|
ifnode.type = 'elifelifstmt' |
|
elif ifnode == 'ifelsestmt': |
|
ifnode.type = 'elifelsestmt' |
|
elif ifnode == 'ifstmt': |
|
node.type = 'ifelifstmt' |
|
ifnode.type = 'elifstmt' |
|
if not preprocess: |
|
self.default(node) |
|
|
|
def n_import_as(self, node): |
|
iname = node[0].pattr; sname = node[-1].pattr |
|
if iname == sname \ |
|
or iname[:len(sname)+1] == (sname+'.'): |
|
self.f.write(iname) |
|
else: |
|
self.f.writelines([iname, ' as ', sname]) |
|
node[:] = [] # avoid print out when recursive descenting |
|
|
|
def n_mkfunc(self, node): |
|
defparams = node[0:-2] |
|
code = node[-2].attr |
|
node[:] = [] # avoid print out when recursive descenting |
|
self.indent = self.indent + 1 |
|
self.f.write(code.co_name) |
|
make_function(self, code, defparams, isLambda=0) |
|
self.indent = self.indent - 1 |
|
|
|
def n_mklambda(self, node): |
|
defparams = node[0:-2] |
|
code = node[-2].attr |
|
node[:] = [] # avoid print out when recursive descenting |
|
make_function(self, code, defparams, isLambda=1) |
|
|
|
def n_classdef(self, node): |
|
self.f.writelines(['\n', TAB * self.indent, 'class ']) |
|
self.f.write(node[0].pattr[1:-1]) |
|
node._code = node[-4][0].attr |
|
# avoid print out when recursive descenting |
|
if node[1] == BUILD_TUPLE_0: |
|
node[:] = [] |
|
else: |
|
node[:] = [ node[1] ] |
|
|
|
def n_classdef_exit(self, node): |
|
self.f.write(':\n') |
|
self.indent = self.indent +1 |
|
# '\n%|class %[0]{pattr[1:-1]}%c:\n%+%{build_class}%-', 1 ), |
|
# -4 -> MAKE_FUNCTION; -2 -> LOAD_CONST (code) |
|
build_class(self,node._code) |
|
self.indent = self.indent -1 |
|
node._code = None # save memory |
|
|
|
def n_lc_for(self, node): |
|
node.type = 'lc_for_nest' |
|
content = node[4] |
|
while content == 'lc_for': |
|
content.type = 'lc_for_nest' |
|
content = content[4] |
|
while content == 'lc_if': |
|
content = content[2] |
|
assert content == 'lc_body' |
|
self.preorder(content) |
|
content.type = 'lc_body__' |
|
self.default(node) |
|
|
|
def engine(self, entry, startnode): |
|
#self.f.write("-----\n") |
|
#self.f.write(str(startnode.__dict__)); self.f.write('\n') |
|
escape = re.compile(r''' |
|
% ( \[ (?P<child> -? \d+ ) \] )? |
|
((?P<type> [^{] ) | |
|
( [{] (?P<expr> [^}]* ) [}] )) |
|
''', re.VERBOSE) |
|
|
|
fmt = entry[0] |
|
n = len(fmt) |
|
lastC = 0 |
|
arg = 1 |
|
i = 0 |
|
|
|
while i < n: |
|
m = escape.match(fmt, i) |
|
if m is None: |
|
self.f.write(fmt[i]) |
|
i = i + 1 |
|
continue |
|
|
|
i = m.end() |
|
typ = m.group('type') or '{' |
|
|
|
node = startnode |
|
try: |
|
if m.group('child'): |
|
node = node[string.atoi(m.group('child'))] |
|
except: |
|
print node.__dict__ |
|
raise |
|
|
|
if typ == '%': |
|
self.f.write('%') |
|
elif typ == '+': |
|
self.indent = self.indent + 1 |
|
elif typ == '-': |
|
self.indent = self.indent - 1 |
|
elif typ == '|': |
|
self.f.write(TAB * self.indent) |
|
elif typ == ',': |
|
if lastC == 1: |
|
self.f.write(',') |
|
elif typ == 'c': |
|
self.traverse(node[entry[arg]]) |
|
##w = Walk(node[entry[arg]], self.indent) |
|
##self.f.write(w.traverse()) |
|
##del w # hg/2000-09-03 |
|
arg = arg + 1 |
|
elif typ == 'C': |
|
low, high, sep = entry[arg] |
|
lastC = remaining = len(node[low:high]) |
|
for subnode in node[low:high]: |
|
self.traverse(subnode) |
|
##w = Walk(subnode, self.indent) |
|
##self.f.write(w.traverse()) |
|
##del w # hg/2000-09-03 |
|
remaining = remaining - 1 |
|
if remaining > 0: |
|
self.f.write(sep) |
|
arg = arg + 1 |
|
elif typ == '{': |
|
d = node.__dict__ |
|
expr = m.group('expr') |
|
if expr == 'build_class': |
|
# -4 -> MAKE_FUNCTION; -2 -> LOAD_CONST (code) |
|
build_class(self,node[-4][-2].attr) |
|
else: |
|
try: |
|
self.f.write(eval(expr, d, d)) |
|
except: |
|
print node |
|
raise |
|
|
|
def default(self, node): |
|
mapping = MAP.get(node, MAP_DIRECT) |
|
table = mapping[0] |
|
key = node |
|
|
|
for i in mapping[1:]: |
|
key = key[i] |
|
|
|
if table.has_key(key): |
|
self.engine(table[key], node) |
|
self.prune() |
|
|
|
def walk(ast, customize={}, indent=0, isLambda=0): |
|
w = Walk(ast, indent, isLambda=isLambda) |
|
# |
|
# Special handling for opcodes that take a variable number |
|
# of arguments -- we add a new entry for each in TABLE_R. |
|
# |
|
for k, v in customize.items(): |
|
op = k[:string.rfind(k, '_')] |
|
if op == 'BUILD_LIST': |
|
TABLE_R[k] = ( '[%C]', (0,-1,', ') ) |
|
elif op == 'BUILD_SLICE': |
|
TABLE_R[k] = ( '%C', (0,-1,':') ) |
|
elif op == 'BUILD_TUPLE': |
|
TABLE_R[k] = ( '(%C%,)', (0,-1,', ') ) |
|
elif op == 'CALL_FUNCTION': |
|
TABLE_R[k] = ( '%c(%C)', 0, (1,-1,', ') ) |
|
elif op in ('CALL_FUNCTION_VAR', |
|
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'): |
|
if v == 0: |
|
str = '%c(%C' # '%C' is a dummy here ... |
|
p2 = (0, 0, None) # .. because of this |
|
else: |
|
str = '%c(%C, ' |
|
p2 = (1,-2, ', ') |
|
if op == 'CALL_FUNCTION_VAR': |
|
str = str + '*%c)' |
|
entry = (str, 0, p2, -2) |
|
elif op == 'CALL_FUNCTION_KW': |
|
str = str + '**%c)' |
|
entry = (str, 0, p2, -2) |
|
else: |
|
str = str + '*%c, **%c)' |
|
if p2[2]: p2 = (1,-3, ', ') |
|
entry = (str, 0, p2, -3, -2) |
|
TABLE_R[k] = entry |
|
result = w.traverse() |
|
return result |
|
|
|
#-- end of (de-)compiler --- |
|
|
|
#-- start |
|
|
|
Showasm = 0 |
|
Showast = 0 |
|
__real_out = None |
|
|
|
def _tokenize(out, co): |
|
"""Disassemble code object into a token list""" |
|
assert type(co) == types.CodeType |
|
|
|
tokens, customize = disassemble(co) |
|
# See the disassembly.. |
|
if Showasm and out is not None: |
|
for t in tokens: |
|
out.write('%s\n' % t) |
|
out.write('\n') |
|
return tokens, customize |
|
|
|
def _build_ast(out, tokens, customize): |
|
assert type(tokens) == types.ListType |
|
assert isinstance(tokens[0], Token) |
|
|
|
# Build AST from disassembly. |
|
try: |
|
ast = parse(tokens, customize) |
|
except: # parser failed, dump disassembly |
|
#if not Showasm: |
|
__real_out.write('--- This code section failed: ---\n') |
|
for t in tokens: |
|
__real_out.write('%s\n' % t) |
|
__real_out.write('\n') |
|
raise |
|
return ast |
|
|
|
def _gen_source(out, ast, customize, indent=0, isLambda=0): |
|
"""convert AST to source code""" |
|
if Showast: |
|
out.write(`ast`) |
|
|
|
# if code would be empty, append 'pass' |
|
if len(ast[0]) == 0: |
|
out.write(indent * TAB) |
|
out.write('pass\n') |
|
else: |
|
out.write(walk(ast, customize, indent, isLambda=isLambda)) |
|
|
|
|
|
def decompyle(co, out=None, indent=0, showasm=0, showast=0): |
|
""" |
|
diassembles a given code block 'co' |
|
""" |
|
assert type(co) == types.CodeType |
|
|
|
global Showasm, Showast |
|
Showasm = showasm |
|
Showast = showast |
|
|
|
if not out: |
|
out = sys.stdout |
|
global __real_out |
|
__real_out = out # store final output stream for case of error |
|
|
|
tokens, customize = _tokenize(out, co) |
|
ast = _build_ast(out, tokens, customize) |
|
tokens = None # save memory |
|
|
|
assert ast == 'code' and ast[0] == 'stmts' |
|
# convert leading '__doc__ = "..." into doc string |
|
if ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]): |
|
out.writelines( [repr(co.co_consts[0]), '\n'] ) |
|
del ast[0][0] |
|
if ast[0][-1] == RETURN_NONE: |
|
ast[0].pop() # remove last node |
|
#todo: if empty, add 'pass' |
|
|
|
_gen_source(out, ast, customize, indent) |
|
|
|
|
|
def decompyle_file(filename, outstream=None, showasm=0, showast=0): |
|
""" |
|
decompile Python byte-code file (.pyc) |
|
""" |
|
co = _load_module(filename) |
|
decompyle(co, out=outstream, showasm=showasm, showast=showast) |
|
co = None
|
|
|