1501 lines
43 KiB
1501 lines
43 KiB
14 years ago
|
# Copyright (c) 1999 John Aycock
|
||
|
# Copyright (c) 2000 by hartmut Goebel <hartmut@goebel.noris.de>
|
||
|
#
|
||
|
# Permission is hereby granted, free of charge, to any person obtaining
|
||
|
# a copy of this software and associated documentation files (the
|
||
|
# "Software"), to deal in the Software without restriction, including
|
||
|
# without limitation the rights to use, copy, modify, merge, publish,
|
||
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
||
|
# permit persons to whom the Software is furnished to do so, subject to
|
||
|
# the following conditions:
|
||
|
#
|
||
|
# The above copyright notice and this permission notice shall be
|
||
|
# included in all copies or substantial portions of the Software.
|
||
|
#
|
||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||
|
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||
|
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||
|
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||
|
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||
|
#
|
||
|
# See 'CHANGES' for a list of changes
|
||
|
#
|
||
|
# NB. This is not a masterpiece of software, but became more like a hack.
|
||
|
# Probably a complete write would be sensefull. hG/2000-12-27
|
||
|
#
|
||
|
|
||
|
import re, sys, os, types
|
||
|
import dis, imp, marshal
|
||
|
import string
|
||
|
import cStringIO
|
||
|
|
||
|
|
||
|
def _load_file(filename):
|
||
|
"""
|
||
|
load a Python source file and compile it to byte-code
|
||
|
_load_module(filename: string): code_object
|
||
|
|
||
|
filename: name of file containing Python source code
|
||
|
(normally a .py)
|
||
|
code_object: code_object compiled from this source code
|
||
|
|
||
|
This function does NOT write any file!
|
||
|
"""
|
||
|
fp = open(filename, 'rb')
|
||
|
source = fp.read()+'\n'
|
||
|
try:
|
||
|
co = compile(source, filename, 'exec')
|
||
|
except SyntaxError:
|
||
|
sys.stderr.writelines( ['>>Syntax error in ', filename, '\n'] )
|
||
|
raise
|
||
|
fp.close()
|
||
|
return co
|
||
|
|
||
|
def _load_module(filename):
|
||
|
"""
|
||
|
load a module without importing it
|
||
|
_load_module(filename: string): code_object
|
||
|
|
||
|
filename: name of file containing Python byte-code object
|
||
|
(normally a .pyc)
|
||
|
code_object: code_object from this file
|
||
|
"""
|
||
|
fp = open(filename, 'rb')
|
||
|
if fp.read(4) != imp.get_magic():
|
||
|
raise ImportError, "Bad magic number in %s" % filename
|
||
|
fp.read(4)
|
||
|
co = marshal.load(fp)
|
||
|
fp.close()
|
||
|
return co
|
||
|
|
||
|
|
||
|
#-- start of (de-)compiler
|
||
|
|
||
|
#
|
||
|
# Scanning
|
||
|
#
|
||
|
|
||
|
class Code:
|
||
|
"""Class for representing code-objects.
|
||
|
|
||
|
This is similar to the original code object, but additionally
|
||
|
the diassembled code is stored in the attribute '_tokens'.
|
||
|
"""
|
||
|
def __init__(self, co):
|
||
|
for i in dir(co):
|
||
|
exec 'self.%s = co.%s' % (i, i)
|
||
|
self._tokens, self._customize = disassemble(co)
|
||
|
|
||
|
class Token:
|
||
|
"""Class representing a byte-code token.
|
||
|
A byte-code token is equivalent to the contents of one line
|
||
|
as output by dis.dis().
|
||
|
"""
|
||
|
def __init__(self, type, attr=None, pattr=None, offset=-1):
|
||
|
self.type = intern(type)
|
||
|
self.attr = attr
|
||
|
self.pattr = pattr
|
||
|
self.offset = offset
|
||
|
|
||
|
def __cmp__(self, o):
|
||
|
if isinstance(o, Token):
|
||
|
# both are tokens: compare type and pattr
|
||
|
return cmp(self.type, o.type) \
|
||
|
or cmp(self.pattr, o.pattr)
|
||
|
else:
|
||
|
return cmp(self.type, o)
|
||
|
|
||
|
def __repr__(self): return str(self.type)
|
||
|
def __str__(self):
|
||
|
if self.pattr: pattr = self.pattr
|
||
|
else: pattr = ''
|
||
|
return '%s\t%-17s %s' % (self.offset, self.type, pattr)
|
||
|
def __hash__(self): return hash(self.type)
|
||
|
def __getitem__(self, i): raise IndexError
|
||
|
|
||
|
_JUMP_OPS_ = map(lambda op: dis.opname[op], dis.hasjrel + dis.hasjabs)
|
||
|
|
||
|
def disassemble(co):
|
||
|
"""Disassemble a code object, returning a list of Token.
|
||
|
|
||
|
The main part of this procedure is modelled after
|
||
|
dis.diaassemble().
|
||
|
"""
|
||
|
rv = []
|
||
|
customize = {}
|
||
|
|
||
|
code = co.co_code
|
||
|
cf = find_jump_targets(code)
|
||
|
n = len(code)
|
||
|
i = 0
|
||
|
while i < n:
|
||
|
offset = i
|
||
|
if cf.has_key(offset):
|
||
|
for j in range(cf[offset]):
|
||
|
rv.append(Token('COME_FROM',
|
||
|
offset="%s_%d" % (offset, j) ))
|
||
|
|
||
|
c = code[i]
|
||
|
op = ord(c)
|
||
|
opname = dis.opname[op]
|
||
|
i = i+1
|
||
|
oparg = None; pattr = None
|
||
|
if op >= dis.HAVE_ARGUMENT:
|
||
|
oparg = ord(code[i]) + ord(code[i+1])*256
|
||
|
i = i+2
|
||
|
if op in dis.hasconst:
|
||
|
const = co.co_consts[oparg]
|
||
|
if type(const) == types.CodeType:
|
||
|
oparg = const
|
||
|
if const.co_name == '<lambda>':
|
||
|
assert opname == 'LOAD_CONST'
|
||
|
opname = 'LOAD_LAMBDA'
|
||
|
# verify uses 'pattr' for
|
||
|
# comparism, since 'attr' now
|
||
|
# hold Code(const) and thus
|
||
|
# can not be used for
|
||
|
# comparism (todo: thinkg
|
||
|
# about changing this)
|
||
|
#pattr = 'code_object @ 0x%x %s->%s' %\
|
||
|
# (id(const), const.co_filename, const.co_name)
|
||
|
pattr = 'code_object ' + const.co_name
|
||
|
else:
|
||
|
pattr = `const`
|
||
|
elif op in dis.hasname:
|
||
|
pattr = co.co_names[oparg]
|
||
|
elif op in dis.hasjrel:
|
||
|
pattr = `i + oparg`
|
||
|
elif op in dis.haslocal:
|
||
|
pattr = co.co_varnames[oparg]
|
||
|
elif op in dis.hascompare:
|
||
|
pattr = dis.cmp_op[oparg]
|
||
|
|
||
|
if opname == 'SET_LINENO':
|
||
|
continue
|
||
|
elif opname in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE',
|
||
|
'UNPACK_LIST', 'UNPACK_TUPLE',
|
||
|
'UNPACK_SEQUENCE',
|
||
|
'MAKE_FUNCTION', 'CALL_FUNCTION',
|
||
|
'CALL_FUNCTION_VAR', 'CALL_FUNCTION_KW',
|
||
|
'CALL_FUNCTION_VAR_KW', 'DUP_TOPX',
|
||
|
):
|
||
|
opname = '%s_%d' % (opname, oparg)
|
||
|
customize[opname] = oparg
|
||
|
|
||
|
rv.append(Token(opname, oparg, pattr, offset))
|
||
|
|
||
|
return rv, customize
|
||
|
|
||
|
|
||
|
def find_jump_targets(code):
|
||
|
"""Detect all offsets in a byte code which are jump targets.
|
||
|
|
||
|
Return the list of offsets.
|
||
|
|
||
|
This procedure is modelled after dis.findlables(), but here
|
||
|
for each target the number of jumps are counted.
|
||
|
"""
|
||
|
targets = {}
|
||
|
n = len(code)
|
||
|
i = 0
|
||
|
while i < n:
|
||
|
c = code[i]
|
||
|
op = ord(c)
|
||
|
i = i+1
|
||
|
if op >= dis.HAVE_ARGUMENT:
|
||
|
oparg = ord(code[i]) + ord(code[i+1])*256
|
||
|
i = i+2
|
||
|
label = -1
|
||
|
if op in dis.hasjrel:
|
||
|
label = i+oparg
|
||
|
# todo: absolut jumps
|
||
|
#elif op in dis.hasjabs:
|
||
|
# label = oparg
|
||
|
if label >= 0:
|
||
|
targets[label] = targets.get(label, 0) + 1
|
||
|
return targets
|
||
|
|
||
|
|
||
|
#
|
||
|
# Parsing
|
||
|
#
|
||
|
|
||
|
class AST:
|
||
|
def __init__(self, type, kids=None):
|
||
|
self.type = intern(type)
|
||
|
if kids == None: kids = []
|
||
|
self._kids = kids
|
||
|
|
||
|
def append(self, o): self._kids.append(o)
|
||
|
def pop(self): return self._kids.pop()
|
||
|
def __getitem__(self, i): return self._kids[i]
|
||
|
def __setitem__(self, i, val): self._kids[i] = val
|
||
|
def __delitem__(self, i): del self._kids[i]
|
||
|
def __len__(self): return len(self._kids)
|
||
|
def __getslice__(self, low, high): return self._kids[low:high]
|
||
|
def __setslice__(self, low, high, seq): self._kids[low:high] = seq
|
||
|
def __delslice__(self, low, high): del self._kids[low:high]
|
||
|
def __cmp__(self, o):
|
||
|
if isinstance(o, AST):
|
||
|
return cmp(self.type, o.type) \
|
||
|
or cmp(self._kids, o._kids)
|
||
|
else:
|
||
|
return cmp(self.type, o)
|
||
|
|
||
|
def __hash__(self): return hash(self.type)
|
||
|
|
||
|
def __repr__(self):
|
||
|
rv = str(self.type)
|
||
|
for k in self._kids:
|
||
|
rv = rv + '\n' + string.replace(str(k), '\n', '\n ')
|
||
|
return rv
|
||
|
|
||
|
# Some ASTs used for comparing code fragments (like 'return None' at
|
||
|
# the end of functions).
|
||
|
|
||
|
RETURN_LOCALS = AST('stmt',
|
||
|
[ AST('return_stmt',
|
||
|
[ AST('expr', [ Token('LOAD_LOCALS') ]),
|
||
|
Token('RETURN_VALUE')]) ])
|
||
|
|
||
|
RETURN_NONE = AST('stmt',
|
||
|
[ AST('return_stmt',
|
||
|
[ AST('expr', [ Token('LOAD_CONST', pattr='None') ]),
|
||
|
Token('RETURN_VALUE')]) ])
|
||
|
|
||
|
ASSIGN_DOC_STRING = lambda doc_string: \
|
||
|
AST('stmt',
|
||
|
[ AST('assign',
|
||
|
[ AST('expr', [ Token('LOAD_CONST', pattr=`doc_string`) ]),
|
||
|
AST('designator', [ Token('STORE_NAME', pattr='__doc__')])
|
||
|
])])
|
||
|
|
||
|
BUILD_TUPLE_0 = AST('expr',
|
||
|
[ Token('BUILD_TUPLE_0') ] )
|
||
|
|
||
|
from spark import GenericASTBuilder, GenericASTMatcher
|
||
|
|
||
|
class Parser(GenericASTBuilder):
|
||
|
def __init__(self):
|
||
|
GenericASTBuilder.__init__(self, AST, 'code')
|
||
|
self.customized = {}
|
||
|
|
||
|
def cleanup(self):
|
||
|
"""
|
||
|
Remove recursive references to allow garbage
|
||
|
collector to collect this object.
|
||
|
"""
|
||
|
for dict in (self.rule2func, self.rules, self.rule2name, self.first):
|
||
|
for i in dict.keys():
|
||
|
dict[i] = None
|
||
|
for i in dir(self):
|
||
|
setattr(self, i, None)
|
||
|
|
||
|
def error(self, token):
|
||
|
# output offset, too
|
||
|
print "Syntax error at or near `%s' token at offset %s" % \
|
||
|
(`token`, token.offset)
|
||
|
raise SystemExit
|
||
|
|
||
|
def typestring(self, token):
|
||
|
return token.type
|
||
|
|
||
|
def p_funcdef(self, args):
|
||
|
'''
|
||
|
stmt ::= funcdef
|
||
|
funcdef ::= mkfunc STORE_FAST
|
||
|
funcdef ::= mkfunc STORE_NAME
|
||
|
'''
|
||
|
|
||
|
# new for Python2.0
|
||
|
#
|
||
|
# UNPACK_SEQUENCE # number of tuple items
|
||
|
# EXTENDED_ARG
|
||
|
|
||
|
def p_list_comprehension(self, args):
|
||
|
'''
|
||
|
expr ::= list_compr
|
||
|
list_compr ::= lc_prep lc_for lc_cleanup
|
||
|
lc_prep ::= BUILD_LIST_0 DUP_TOP LOAD_ATTR STORE_NAME
|
||
|
lc_prep ::= BUILD_LIST_0 DUP_TOP LOAD_ATTR STORE_FAST
|
||
|
|
||
|
lc_for ::= expr LOAD_CONST
|
||
|
FOR_LOOP designator
|
||
|
lc_for JUMP_ABSOLUTE
|
||
|
COME_FROM
|
||
|
lc_for ::= expr LOAD_CONST
|
||
|
FOR_LOOP designator
|
||
|
lc_if JUMP_ABSOLUTE
|
||
|
COME_FROM
|
||
|
lc_for ::= expr LOAD_CONST
|
||
|
FOR_LOOP designator
|
||
|
lc_body JUMP_ABSOLUTE
|
||
|
COME_FROM
|
||
|
lc_if ::= expr condjmp lc_body
|
||
|
JUMP_FORWARD COME_FROM POP_TOP
|
||
|
COME_FROM
|
||
|
lc_body ::= LOAD_NAME expr CALL_FUNCTION_1 POP_TOP
|
||
|
lc_body ::= LOAD_FAST expr CALL_FUNCTION_1 POP_TOP
|
||
|
lc_cleanup ::= DELETE_NAME
|
||
|
lc_cleanup ::= DELETE_FAST
|
||
|
'''
|
||
|
|
||
|
def p_augmented_assign(self, args):
|
||
|
'''
|
||
|
stmt ::= augassign1
|
||
|
stmt ::= augassign2
|
||
|
augassign1 ::= expr expr inplace_op designator
|
||
|
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SUBSCR
|
||
|
augassign1 ::= expr expr inplace_op ROT_TWO STORE_SLICE+0
|
||
|
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+1
|
||
|
augassign1 ::= expr expr inplace_op ROT_THREE STORE_SLICE+2
|
||
|
augassign1 ::= expr expr inplace_op ROT_FOUR STORE_SLICE+3
|
||
|
augassign2 ::= expr DUP_TOP LOAD_ATTR expr
|
||
|
inplace_op ROT_TWO STORE_ATTR
|
||
|
|
||
|
inplace_op ::= INPLACE_ADD
|
||
|
inplace_op ::= INPLACE_SUBTRACT
|
||
|
inplace_op ::= INPLACE_MULTIPLY
|
||
|
inplace_op ::= INPLACE_DIVIDE
|
||
|
inplace_op ::= INPLACE_MODULO
|
||
|
inplace_op ::= INPLACE_POWER
|
||
|
inplace_op ::= INPLACE_LSHIFT
|
||
|
inplace_op ::= INPLACE_RSHIFT
|
||
|
inplace_op ::= INPLACE_AND
|
||
|
inplace_op ::= INPLACE_XOR
|
||
|
inplace_op ::= INPLACE_OR
|
||
|
'''
|
||
|
|
||
|
def p_assign(self, args):
|
||
|
'''
|
||
|
stmt ::= assign
|
||
|
assign ::= expr DUP_TOP designList
|
||
|
assign ::= expr designator
|
||
|
'''
|
||
|
|
||
|
def p_print(self, args):
|
||
|
'''
|
||
|
stmt ::= print_stmt
|
||
|
stmt ::= print_stmt_nl
|
||
|
stmt ::= print_nl_stmt
|
||
|
print_stmt ::= expr PRINT_ITEM
|
||
|
print_nl_stmt ::= PRINT_NEWLINE
|
||
|
print_stmt_nl ::= print_stmt print_nl_stmt
|
||
|
'''
|
||
|
|
||
|
def p_print_to(self, args):
|
||
|
'''
|
||
|
stmt ::= print_to
|
||
|
stmt ::= print_to_nl
|
||
|
stmt ::= print_nl_to
|
||
|
print_to ::= expr print_to_items POP_TOP
|
||
|
print_to_nl ::= expr print_to_items PRINT_NEWLINE_TO
|
||
|
print_nl_to ::= expr PRINT_NEWLINE_TO
|
||
|
print_to_items ::= print_to_items print_to_item
|
||
|
print_to_items ::= print_to_item
|
||
|
print_to_item ::= DUP_TOP expr ROT_TWO PRINT_ITEM_TO
|
||
|
'''
|
||
|
# expr print_to* POP_TOP
|
||
|
# expr { print_to* } PRINT_NEWLINE_TO
|
||
|
|
||
|
def p_import15(self, args):
|
||
|
'''
|
||
|
stmt ::= importstmt
|
||
|
stmt ::= importfrom
|
||
|
|
||
|
importstmt ::= IMPORT_NAME STORE_FAST
|
||
|
importstmt ::= IMPORT_NAME STORE_NAME
|
||
|
|
||
|
importfrom ::= IMPORT_NAME importlist POP_TOP
|
||
|
importlist ::= importlist IMPORT_FROM
|
||
|
importlist ::= IMPORT_FROM
|
||
|
'''
|
||
|
|
||
|
def p_import20(self, args):
|
||
|
'''
|
||
|
stmt ::= importstmt2
|
||
|
stmt ::= importfrom2
|
||
|
stmt ::= importstar2
|
||
|
|
||
|
importstmt2 ::= LOAD_CONST import_as
|
||
|
importstar2 ::= LOAD_CONST IMPORT_NAME IMPORT_STAR
|
||
|
|
||
|
importfrom2 ::= LOAD_CONST IMPORT_NAME importlist2 POP_TOP
|
||
|
importlist2 ::= importlist2 import_as
|
||
|
importlist2 ::= import_as
|
||
|
import_as ::= IMPORT_NAME STORE_FAST
|
||
|
import_as ::= IMPORT_NAME STORE_NAME
|
||
|
import_as ::= IMPORT_NAME LOAD_ATTR STORE_FAST
|
||
|
import_as ::= IMPORT_NAME LOAD_ATTR STORE_NAME
|
||
|
import_as ::= IMPORT_FROM STORE_FAST
|
||
|
import_as ::= IMPORT_FROM STORE_NAME
|
||
|
'''
|
||
|
# 'import_as' can't use designator, since n_import_as()
|
||
|
# needs to compare both kids' pattr
|
||
|
|
||
|
def p_grammar(self, args):
|
||
|
'''
|
||
|
code ::= stmts
|
||
|
code ::=
|
||
|
|
||
|
stmts ::= stmts stmt
|
||
|
stmts ::= stmt
|
||
|
|
||
|
stmts_opt ::= stmts
|
||
|
stmts_opt ::= passstmt
|
||
|
passstmt ::=
|
||
|
|
||
|
designList ::= designator designator
|
||
|
designList ::= designator DUP_TOP designList
|
||
|
|
||
|
designator ::= STORE_FAST
|
||
|
designator ::= STORE_NAME
|
||
|
designator ::= STORE_GLOBAL
|
||
|
designator ::= expr STORE_ATTR
|
||
|
designator ::= expr STORE_SLICE+0
|
||
|
designator ::= expr expr STORE_SLICE+1
|
||
|
designator ::= expr expr STORE_SLICE+2
|
||
|
designator ::= expr expr expr STORE_SLICE+3
|
||
|
designator ::= store_subscr
|
||
|
store_subscr ::= expr expr STORE_SUBSCR
|
||
|
designator ::= unpack
|
||
|
designator ::= unpack_list
|
||
|
|
||
|
stmt ::= classdef
|
||
|
stmt ::= call_stmt
|
||
|
call_stmt ::= expr POP_TOP
|
||
|
|
||
|
stmt ::= return_stmt
|
||
|
return_stmt ::= expr RETURN_VALUE
|
||
|
|
||
|
stmt ::= break_stmt
|
||
|
break_stmt ::= BREAK_LOOP
|
||
|
|
||
|
stmt ::= continue_stmt
|
||
|
continue_stmt ::= JUMP_ABSOLUTE
|
||
|
|
||
|
stmt ::= raise_stmt
|
||
|
raise_stmt ::= exprlist RAISE_VARARGS
|
||
|
raise_stmt ::= nullexprlist RAISE_VARARGS
|
||
|
|
||
|
stmt ::= exec_stmt
|
||
|
exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT
|
||
|
exec_stmt ::= expr exprlist EXEC_STMT
|
||
|
|
||
|
stmt ::= assert
|
||
|
stmt ::= assert2
|
||
|
stmt ::= ifstmt
|
||
|
stmt ::= ifelsestmt
|
||
|
stmt ::= whilestmt
|
||
|
stmt ::= whileelsestmt
|
||
|
stmt ::= forstmt
|
||
|
stmt ::= forelsestmt
|
||
|
stmt ::= trystmt
|
||
|
stmt ::= tryfinallystmt
|
||
|
|
||
|
stmt ::= DELETE_FAST
|
||
|
stmt ::= DELETE_NAME
|
||
|
stmt ::= DELETE_GLOBAL
|
||
|
stmt ::= expr DELETE_SLICE+0
|
||
|
stmt ::= expr expr DELETE_SLICE+1
|
||
|
stmt ::= expr expr DELETE_SLICE+2
|
||
|
stmt ::= expr expr expr DELETE_SLICE+3
|
||
|
stmt ::= delete_subscr
|
||
|
delete_subscr ::= expr expr DELETE_SUBSCR
|
||
|
stmt ::= expr DELETE_ATTR
|
||
|
|
||
|
kwarg ::= LOAD_CONST expr
|
||
|
|
||
|
classdef ::= LOAD_CONST expr mkfunc
|
||
|
CALL_FUNCTION_0 BUILD_CLASS STORE_NAME
|
||
|
classdef ::= LOAD_CONST expr mkfunc
|
||
|
CALL_FUNCTION_0 BUILD_CLASS STORE_FAST
|
||
|
|
||
|
condjmp ::= JUMP_IF_FALSE POP_TOP
|
||
|
condjmp ::= JUMP_IF_TRUE POP_TOP
|
||
|
|
||
|
assert ::= expr JUMP_IF_FALSE POP_TOP
|
||
|
expr JUMP_IF_TRUE POP_TOP
|
||
|
LOAD_GLOBAL RAISE_VARARGS
|
||
|
COME_FROM COME_FROM POP_TOP
|
||
|
assert2 ::= expr JUMP_IF_FALSE POP_TOP
|
||
|
expr JUMP_IF_TRUE POP_TOP
|
||
|
LOAD_GLOBAL expr RAISE_VARARGS
|
||
|
COME_FROM COME_FROM POP_TOP
|
||
|
|
||
|
ifstmt ::= expr condjmp stmts_opt
|
||
|
JUMP_FORWARD COME_FROM POP_TOP
|
||
|
COME_FROM
|
||
|
|
||
|
ifelsestmt ::= expr condjmp stmts_opt
|
||
|
JUMP_FORWARD COME_FROM
|
||
|
POP_TOP stmts COME_FROM
|
||
|
|
||
|
trystmt ::= SETUP_EXCEPT stmts_opt
|
||
|
POP_BLOCK JUMP_FORWARD
|
||
|
COME_FROM except_stmt
|
||
|
|
||
|
try_end ::= END_FINALLY COME_FROM
|
||
|
try_end ::= except_else
|
||
|
except_else ::= END_FINALLY COME_FROM stmts
|
||
|
|
||
|
except_stmt ::= except_cond except_stmt COME_FROM
|
||
|
except_stmt ::= except_conds try_end COME_FROM
|
||
|
except_stmt ::= except try_end COME_FROM
|
||
|
except_stmt ::= try_end
|
||
|
|
||
|
except_conds ::= except_cond except_conds COME_FROM
|
||
|
except_conds ::=
|
||
|
|
||
|
except_cond ::= except_cond1
|
||
|
except_cond ::= except_cond2
|
||
|
except_cond1 ::= DUP_TOP expr COMPARE_OP
|
||
|
JUMP_IF_FALSE
|
||
|
POP_TOP POP_TOP POP_TOP POP_TOP
|
||
|
stmts_opt JUMP_FORWARD COME_FROM
|
||
|
POP_TOP
|
||
|
except_cond2 ::= DUP_TOP expr COMPARE_OP
|
||
|
JUMP_IF_FALSE
|
||
|
POP_TOP POP_TOP designator POP_TOP
|
||
|
stmts_opt JUMP_FORWARD COME_FROM
|
||
|
POP_TOP
|
||
|
except ::= POP_TOP POP_TOP POP_TOP
|
||
|
stmts_opt JUMP_FORWARD
|
||
|
|
||
|
tryfinallystmt ::= SETUP_FINALLY stmts_opt
|
||
|
POP_BLOCK LOAD_CONST
|
||
|
COME_FROM stmts_opt END_FINALLY
|
||
|
|
||
|
whilestmt ::= SETUP_LOOP
|
||
|
expr JUMP_IF_FALSE POP_TOP
|
||
|
stmts_opt JUMP_ABSOLUTE
|
||
|
COME_FROM POP_TOP POP_BLOCK COME_FROM
|
||
|
whileelsestmt ::= SETUP_LOOP
|
||
|
expr JUMP_IF_FALSE POP_TOP
|
||
|
stmts_opt JUMP_ABSOLUTE
|
||
|
COME_FROM POP_TOP POP_BLOCK
|
||
|
stmts COME_FROM
|
||
|
|
||
|
forstmt ::= SETUP_LOOP expr LOAD_CONST
|
||
|
FOR_LOOP designator
|
||
|
stmts_opt JUMP_ABSOLUTE
|
||
|
COME_FROM POP_BLOCK COME_FROM
|
||
|
forelsestmt ::= SETUP_LOOP expr LOAD_CONST
|
||
|
FOR_LOOP designator
|
||
|
stmts_opt JUMP_ABSOLUTE
|
||
|
COME_FROM POP_BLOCK stmts COME_FROM
|
||
|
'''
|
||
|
|
||
|
def p_expr(self, args):
|
||
|
'''
|
||
|
expr ::= mklambda
|
||
|
expr ::= mkfunc
|
||
|
expr ::= SET_LINENO
|
||
|
expr ::= LOAD_FAST
|
||
|
expr ::= LOAD_NAME
|
||
|
expr ::= LOAD_CONST
|
||
|
expr ::= LOAD_GLOBAL
|
||
|
expr ::= LOAD_LOCALS
|
||
|
expr ::= expr LOAD_ATTR
|
||
|
expr ::= binary_expr
|
||
|
|
||
|
binary_expr ::= expr expr binary_op
|
||
|
binary_op ::= BINARY_ADD
|
||
|
binary_op ::= BINARY_SUBTRACT
|
||
|
binary_op ::= BINARY_MULTIPLY
|
||
|
binary_op ::= BINARY_DIVIDE
|
||
|
binary_op ::= BINARY_MODULO
|
||
|
binary_op ::= BINARY_LSHIFT
|
||
|
binary_op ::= BINARY_RSHIFT
|
||
|
binary_op ::= BINARY_AND
|
||
|
binary_op ::= BINARY_OR
|
||
|
binary_op ::= BINARY_XOR
|
||
|
binary_op ::= BINARY_POWER
|
||
|
|
||
|
expr ::= binary_subscr
|
||
|
binary_subscr ::= expr expr BINARY_SUBSCR
|
||
|
expr ::= expr expr DUP_TOPX_2 BINARY_SUBSCR
|
||
|
expr ::= cmp
|
||
|
expr ::= expr UNARY_POSITIVE
|
||
|
expr ::= expr UNARY_NEGATIVE
|
||
|
expr ::= expr UNARY_CONVERT
|
||
|
expr ::= expr UNARY_INVERT
|
||
|
expr ::= expr UNARY_NOT
|
||
|
expr ::= mapexpr
|
||
|
expr ::= expr SLICE+0
|
||
|
expr ::= expr expr SLICE+1
|
||
|
expr ::= expr expr SLICE+2
|
||
|
expr ::= expr expr expr SLICE+3
|
||
|
expr ::= expr DUP_TOP SLICE+0
|
||
|
expr ::= expr expr DUP_TOPX_2 SLICE+1
|
||
|
expr ::= expr expr DUP_TOPX_2 SLICE+2
|
||
|
expr ::= expr expr expr DUP_TOPX_3 SLICE+3
|
||
|
expr ::= and
|
||
|
expr ::= or
|
||
|
or ::= expr JUMP_IF_TRUE POP_TOP expr COME_FROM
|
||
|
and ::= expr JUMP_IF_FALSE POP_TOP expr COME_FROM
|
||
|
|
||
|
cmp ::= cmp_list
|
||
|
cmp ::= compare
|
||
|
compare ::= expr expr COMPARE_OP
|
||
|
cmp_list ::= expr cmp_list1 ROT_TWO POP_TOP
|
||
|
COME_FROM
|
||
|
cmp_list1 ::= expr DUP_TOP ROT_THREE
|
||
|
COMPARE_OP JUMP_IF_FALSE POP_TOP
|
||
|
cmp_list1 COME_FROM
|
||
|
cmp_list1 ::= expr DUP_TOP ROT_THREE
|
||
|
COMPARE_OP JUMP_IF_FALSE POP_TOP
|
||
|
cmp_list2 COME_FROM
|
||
|
cmp_list2 ::= expr COMPARE_OP JUMP_FORWARD
|
||
|
|
||
|
mapexpr ::= BUILD_MAP kvlist
|
||
|
|
||
|
kvlist ::= kvlist kv
|
||
|
kvlist ::=
|
||
|
|
||
|
kv ::= DUP_TOP expr ROT_TWO expr STORE_SUBSCR
|
||
|
|
||
|
exprlist ::= exprlist expr
|
||
|
exprlist ::= expr
|
||
|
|
||
|
nullexprlist ::=
|
||
|
'''
|
||
|
|
||
|
def nonterminal(self, nt, args):
|
||
|
collect = ('stmts', 'exprlist', 'kvlist')
|
||
|
|
||
|
if nt in collect and len(args) > 1:
|
||
|
#
|
||
|
# Collect iterated thingies together.
|
||
|
#
|
||
|
rv = args[0]
|
||
|
rv.append(args[1])
|
||
|
else:
|
||
|
rv = GenericASTBuilder.nonterminal(self, nt, args)
|
||
|
return rv
|
||
|
|
||
|
def __ambiguity(self, children):
|
||
|
# only for debugging! to be removed hG/2000-10-15
|
||
|
print children
|
||
|
return GenericASTBuilder.ambiguity(self, children)
|
||
|
|
||
|
def resolve(self, list):
|
||
|
if len(list) == 2 and 'funcdef' in list and 'assign' in list:
|
||
|
return 'funcdef'
|
||
|
#sys.stderr.writelines( ['resolve ', str(list), '\n'] )
|
||
|
return GenericASTBuilder.resolve(self, list)
|
||
|
|
||
|
nop = lambda self, args: None
|
||
|
|
||
|
def parse(tokens, customize):
|
||
|
p = Parser()
|
||
|
#
|
||
|
# Special handling for opcodes that take a variable number
|
||
|
# of arguments -- we add a new rule for each:
|
||
|
#
|
||
|
# expr ::= {expr}^n BUILD_LIST_n
|
||
|
# expr ::= {expr}^n BUILD_TUPLE_n
|
||
|
# expr ::= {expr}^n BUILD_SLICE_n
|
||
|
# unpack_list ::= UNPACK_LIST {expr}^n
|
||
|
# unpack ::= UNPACK_TUPLE {expr}^n
|
||
|
# unpack ::= UNPACK_SEQEUENE {expr}^n
|
||
|
# mkfunc ::= {expr}^n LOAD_CONST MAKE_FUNCTION_n
|
||
|
# expr ::= expr {expr}^n CALL_FUNCTION_n
|
||
|
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_n POP_TOP
|
||
|
# expr ::= expr {expr}^n CALL_FUNCTION_VAR_KW_n POP_TOP
|
||
|
# expr ::= expr {expr}^n CALL_FUNCTION_KW_n POP_TOP
|
||
|
#
|
||
|
for k, v in customize.items():
|
||
|
## avoid adding the same rule twice to this parser
|
||
|
#if p.customized.has_key(k):
|
||
|
# continue
|
||
|
#p.customized[k] = None
|
||
|
|
||
|
#nop = lambda self, args: None
|
||
|
op = k[:string.rfind(k, '_')]
|
||
|
if op in ('BUILD_LIST', 'BUILD_TUPLE', 'BUILD_SLICE'):
|
||
|
rule = 'expr ::= ' + 'expr '*v + k
|
||
|
elif op in ('UNPACK_TUPLE', 'UNPACK_SEQUENCE'):
|
||
|
rule = 'unpack ::= ' + k + ' designator'*v
|
||
|
elif op == 'UNPACK_LIST':
|
||
|
rule = 'unpack_list ::= ' + k + ' designator'*v
|
||
|
elif op == 'DUP_TOPX':
|
||
|
# no need to add a rule
|
||
|
pass
|
||
|
#rule = 'dup_topx ::= ' + 'expr '*v + k
|
||
|
elif op == 'MAKE_FUNCTION':
|
||
|
p.addRule('mklambda ::= %s LOAD_LAMBDA %s' %
|
||
|
('expr '*v, k), nop)
|
||
|
rule = 'mkfunc ::= %s LOAD_CONST %s' % ('expr '*v, k)
|
||
|
elif op in ('CALL_FUNCTION', 'CALL_FUNCTION_VAR',
|
||
|
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
|
||
|
na = (v & 0xff) # positional parameters
|
||
|
nk = (v >> 8) & 0xff # keyword parameters
|
||
|
# number of apply equiv arguments:
|
||
|
nak = ( len(op)-len('CALL_FUNCTION') ) / 3
|
||
|
rule = 'expr ::= expr ' + 'expr '*na + 'kwarg '*nk \
|
||
|
+ 'expr ' * nak + k
|
||
|
else:
|
||
|
raise 'unknown customize token %s' % k
|
||
|
p.addRule(rule, nop)
|
||
|
ast = p.parse(tokens)
|
||
|
p.cleanup()
|
||
|
return ast
|
||
|
|
||
|
#
|
||
|
# Decompilation (walking AST)
|
||
|
#
|
||
|
# All table-driven. Step 1 determines a table (T) and a path to a
|
||
|
# table key (K) from the node type (N) (other nodes are shown as O):
|
||
|
#
|
||
|
# N N N&K
|
||
|
# / | ... \ / | ... \ / | ... \
|
||
|
# O O O O O K O O O
|
||
|
# |
|
||
|
# K
|
||
|
#
|
||
|
# MAP_R0 (TABLE_R0) MAP_R (TABLE_R) MAP_DIRECT (TABLE_DIRECT)
|
||
|
#
|
||
|
# The default is a direct mapping. The key K is then extracted from the
|
||
|
# subtree and used to find a table entry T[K], if any. The result is a
|
||
|
# format string and arguments (a la printf()) for the formatting engine.
|
||
|
# Escapes in the format string are:
|
||
|
#
|
||
|
# %c evaluate N[A] recursively*
|
||
|
# %C evaluate N[A[0]]..N[A[1]] recursively, separate by A[2]*
|
||
|
# %, print ',' if last %C only printed one item (for tuples)
|
||
|
# %| tab to current indentation level
|
||
|
# %+ increase current indentation level
|
||
|
# %- decrease current indentation level
|
||
|
# %{...} evaluate ... in context of N
|
||
|
# %% literal '%'
|
||
|
#
|
||
|
# * indicates an argument (A) required.
|
||
|
#
|
||
|
# The '%' may optionally be followed by a number (C) in square brackets, which
|
||
|
# makes the engine walk down to N[C] before evaluating the escape code.
|
||
|
#
|
||
|
|
||
|
from spark import GenericASTTraversal
|
||
|
|
||
|
#TAB = '\t' # as God intended
|
||
|
TAB = ' ' *4 # is less spacy than "\t"
|
||
|
|
||
|
TABLE_R = {
|
||
|
'build_tuple2': ( '%C', (0,-1,', ') ),
|
||
|
'POP_TOP': ( '%|%c\n', 0 ),
|
||
|
'STORE_ATTR': ( '%c.%[1]{pattr}', 0),
|
||
|
# 'STORE_SUBSCR': ( '%c[%c]', 0, 1 ),
|
||
|
'STORE_SLICE+0':( '%c[:]', 0 ),
|
||
|
'STORE_SLICE+1':( '%c[%c:]', 0, 1 ),
|
||
|
'STORE_SLICE+2':( '%c[:%c]', 0, 1 ),
|
||
|
'STORE_SLICE+3':( '%c[%c:%c]', 0, 1, 2 ),
|
||
|
'JUMP_ABSOLUTE':( '%|continue\n', ),
|
||
|
'DELETE_SLICE+0':( '%|del %c[:]\n', 0 ),
|
||
|
'DELETE_SLICE+1':( '%|del %c[%c:]\n', 0, 1 ),
|
||
|
'DELETE_SLICE+2':( '%|del %c[:%c]\n', 0, 1 ),
|
||
|
'DELETE_SLICE+3':( '%|del %c[%c:%c]\n', 0, 1, 2 ),
|
||
|
'DELETE_ATTR': ( '%|del %c.%[-1]{pattr}\n', 0 ),
|
||
|
#'EXEC_STMT': ( '%|exec %c in %[1]C\n', 0, (0,sys.maxint,', ') ),
|
||
|
'BINARY_SUBSCR':( '%c[%c]', 0, 1), # required for augmented assign
|
||
|
'UNARY_POSITIVE':( '+%c', 0 ),
|
||
|
'UNARY_NEGATIVE':( '-%c', 0 ),
|
||
|
'UNARY_CONVERT':( '`%c`', 0 ),
|
||
|
'UNARY_INVERT': ( '~%c', 0 ),
|
||
|
'UNARY_NOT': ( '(not %c)', 0 ),
|
||
|
'SLICE+0': ( '%c[:]', 0 ),
|
||
|
'SLICE+1': ( '%c[%c:]', 0, 1 ),
|
||
|
'SLICE+2': ( '%c[:%c]', 0, 1 ),
|
||
|
'SLICE+3': ( '%c[%c:%c]', 0, 1, 2 ),
|
||
|
}
|
||
|
TABLE_R0 = {
|
||
|
# 'BUILD_LIST': ( '[%C]', (0,-1,', ') ),
|
||
|
# 'BUILD_TUPLE': ( '(%C)', (0,-1,', ') ),
|
||
|
# 'CALL_FUNCTION':( '%c(%C)', 0, (1,-1,', ') ),
|
||
|
}
|
||
|
TABLE_DIRECT = {
|
||
|
'BINARY_ADD': ( '+' ,),
|
||
|
'BINARY_SUBTRACT': ( '-' ,),
|
||
|
'BINARY_MULTIPLY': ( '*' ,),
|
||
|
'BINARY_DIVIDE': ( '/' ,),
|
||
|
'BINARY_MODULO': ( '%%',),
|
||
|
'BINARY_POWER': ( '**',),
|
||
|
'BINARY_LSHIFT': ( '<<',),
|
||
|
'BINARY_RSHIFT': ( '>>',),
|
||
|
'BINARY_AND': ( '&' ,),
|
||
|
'BINARY_OR': ( '|' ,),
|
||
|
'BINARY_XOR': ( '^' ,),
|
||
|
'INPLACE_ADD': ( '+=' ,),
|
||
|
'INPLACE_SUBTRACT': ( '-=' ,),
|
||
|
'INPLACE_MULTIPLY': ( '*=' ,),
|
||
|
'INPLACE_DIVIDE': ( '/=' ,),
|
||
|
'INPLACE_MODULO': ( '%%=',),
|
||
|
'INPLACE_POWER': ( '**=',),
|
||
|
'INPLACE_LSHIFT': ( '<<=',),
|
||
|
'INPLACE_RSHIFT': ( '>>=',),
|
||
|
'INPLACE_AND': ( '&=' ,),
|
||
|
'INPLACE_OR': ( '|=' ,),
|
||
|
'INPLACE_XOR': ( '^=' ,),
|
||
|
'binary_expr': ( '(%c %c %c)', 0, -1, 1 ),
|
||
|
|
||
|
'IMPORT_FROM': ( '%{pattr}', ),
|
||
|
'LOAD_ATTR': ( '.%{pattr}', ),
|
||
|
'LOAD_FAST': ( '%{pattr}', ),
|
||
|
'LOAD_NAME': ( '%{pattr}', ),
|
||
|
'LOAD_GLOBAL': ( '%{pattr}', ),
|
||
|
'LOAD_LOCALS': ( 'locals()', ),
|
||
|
#'LOAD_CONST': ( '%{pattr}', ), handled below
|
||
|
'DELETE_FAST': ( '%|del %{pattr}\n', ),
|
||
|
'DELETE_NAME': ( '%|del %{pattr}\n', ),
|
||
|
'DELETE_GLOBAL':( '%|del %{pattr}\n', ),
|
||
|
'delete_subscr':( '%|del %c[%c]\n', 0, 1,),
|
||
|
'binary_subscr':( '%c[%c]', 0, 1),
|
||
|
'store_subscr': ( '%c[%c]', 0, 1),
|
||
|
'STORE_FAST': ( '%{pattr}', ),
|
||
|
'STORE_NAME': ( '%{pattr}', ),
|
||
|
'STORE_GLOBAL': ( '%{pattr}', ),
|
||
|
'unpack': ( '(%C,)', (1, sys.maxint, ', ') ),
|
||
|
'unpack_list': ( '[%C]', (1, sys.maxint, ', ') ),
|
||
|
|
||
|
'list_compr': ( '[ %c ]', 1),
|
||
|
# 'lc_for': ( ' for %c in %c', 3, 0 ),
|
||
|
'lc_for_nest': ( ' for %c in %c%c', 3, 0, 4 ),
|
||
|
'lc_if': ( ' if %c', 0 ),
|
||
|
'lc_body': ( '%c', 1),
|
||
|
'lc_body__': ( '', ),
|
||
|
|
||
|
'assign': ( '%|%c = %c\n', -1, 0 ),
|
||
|
'augassign1': ( '%|%c %c %c\n', 0, 2, 1),
|
||
|
'augassign2': ( '%|%c%c %c %c\n', 0, 2, -3, -4),
|
||
|
#'dup_topx': ('%c', 0),
|
||
|
'designList': ( '%c = %c', 0, -1 ),
|
||
|
'and': ( '(%c and %c)', 0, 3 ),
|
||
|
'or': ( '(%c or %c)', 0, 3 ),
|
||
|
'compare': ( '(%c %[-1]{pattr} %c)', 0, 1 ),
|
||
|
'cmp_list': ('%c %c', 0, 1),
|
||
|
'cmp_list1': ('%[3]{pattr} %c %c', 0, -2),
|
||
|
'cmp_list2': ('%[1]{pattr} %c', 0),
|
||
|
'classdef': ( '\n%|class %[0]{pattr[1:-1]}%c:\n%+%{build_class}%-', 1 ),
|
||
|
'funcdef': ( '\n%|def %c\n', 0),
|
||
|
'kwarg': ( '%[0]{pattr[1:-1]}=%c', 1),
|
||
|
'importstmt': ( '%|import %[0]{pattr}\n', ),
|
||
|
'importfrom': ( '%|from %[0]{pattr} import %c\n', 1 ),
|
||
|
'importlist': ( '%C', (0, sys.maxint, ', ') ),
|
||
|
'importstmt2': ( '%|import %c\n', 1),
|
||
|
'importstar2': ( '%|from %[1]{pattr} import *\n', ),
|
||
|
'importfrom2': ( '%|from %[1]{pattr} import %c\n', 2 ),
|
||
|
'importlist2': ( '%C', (0, sys.maxint, ', ') ),
|
||
|
'assert': ( '%|assert %c\n' , 3 ),
|
||
|
'assert2': ( '%|assert %c, %c\n' , 3, -5 ),
|
||
|
|
||
|
'print_stmt': ( '%|print %c,\n', 0 ),
|
||
|
'print_stmt_nl': ( '%|print %[0]C\n', (0,1, None) ),
|
||
|
'print_nl_stmt': ( '%|print\n', ),
|
||
|
'print_to': ( '%|print >> %c, %c,\n', 0, 1 ),
|
||
|
'print_to_nl': ( '%|print >> %c, %c\n', 0, 1 ),
|
||
|
'print_nl_to': ( '%|print >> %c\n', 0 ),
|
||
|
'print_to_items': ( '%C', (0, 2, ', ') ),
|
||
|
|
||
|
'call_stmt': ( '%|%c\n', 0),
|
||
|
'break_stmt': ( '%|break\n', ),
|
||
|
'continue_stmt':( '%|continue\n', ),
|
||
|
'raise_stmt': ( '%|raise %[0]C\n', (0,sys.maxint,', ') ),
|
||
|
'return_stmt': ( '%|return %c\n', 0),
|
||
|
'return_lambda': ( '%c', 0),
|
||
|
|
||
|
'ifstmt': ( '%|if %c:\n%+%c%-', 0, 2 ),
|
||
|
'ifelsestmt': ( '%|if %c:\n%+%c%-%|else:\n%+%c%-', 0, 2, -2 ),
|
||
|
'ifelifstmt': ( '%|if %c:\n%+%c%-%c', 0, 2, -2 ),
|
||
|
'elifelifstmt': ( '%|elif %c:\n%+%c%-%c', 0, 2, -2 ),
|
||
|
'elifstmt': ( '%|elif %c:\n%+%c%-', 0, 2 ),
|
||
|
'elifelsestmt': ( '%|elif %c:\n%+%c%-%|else:\n%+%c%-', 0, 2, -2 ),
|
||
|
|
||
|
'whilestmt': ( '%|while %c:\n%+%c%-\n', 1, 4 ),
|
||
|
'whileelsestmt':( '%|while %c:\n%+%c%-\n%|else:\n%+%c%-\n', 1, 4, 9 ),
|
||
|
'forstmt': ( '%|for %c in %c:\n%+%c%-\n', 4, 1, 5 ),
|
||
|
'forelsestmt': (
|
||
|
'%|for %c in %c:\n%+%c%-\n%|else:\n%+%c%-\n', 4, 1, 5, 9
|
||
|
),
|
||
|
'trystmt': ( '%|try:\n%+%c%-%c', 1, 5 ),
|
||
|
'except': ( '%|except:\n%+%c%-', 3 ),
|
||
|
'except_cond1': ( '%|except %c:\n%+%c%-', 1, 8 ),
|
||
|
'except_cond2': ( '%|except %c, %c:\n%+%c%-', 1, 6, 8 ),
|
||
|
'except_else': ( '%|else:\n%+%c%-', 2 ),
|
||
|
'tryfinallystmt':( '%|try:\n%+%c%-\n%|finally:\n%+%c%-\n', 1, 5 ),
|
||
|
'passstmt': ( '%|pass\n', ),
|
||
|
'STORE_FAST': ( '%{pattr}', ),
|
||
|
'kv': ( '%c: %c', 3, 1 ),
|
||
|
'mapexpr': ( '{%[1]C}', (0,sys.maxint,', ') ),
|
||
|
}
|
||
|
|
||
|
|
||
|
MAP_DIRECT = (TABLE_DIRECT, )
|
||
|
MAP_R0 = (TABLE_R0, -1, 0)
|
||
|
MAP_R = (TABLE_R, -1)
|
||
|
|
||
|
MAP = {
|
||
|
'stmt': MAP_R,
|
||
|
'designator': MAP_R,
|
||
|
'expr': MAP_R,
|
||
|
'exprlist': MAP_R0,
|
||
|
}
|
||
|
|
||
|
|
||
|
ASSIGN_TUPLE_PARAM = lambda param_name: \
|
||
|
AST('expr', [ Token('LOAD_FAST', pattr=param_name) ])
|
||
|
|
||
|
|
||
|
def get_tuple_parameter(ast, name):
|
||
|
"""
|
||
|
If the name of the formal parameter starts with dot,
|
||
|
it's a tuple parameter, like this:
|
||
|
def MyFunc(xx, (a,b,c), yy):
|
||
|
print a, b*2, c*42
|
||
|
In byte-code, the whole tuple is assigned to parameter '.1' and
|
||
|
then the tuple gets unpacked to 'a', 'b' and 'c'.
|
||
|
|
||
|
Since identifiers starting with a dot are illegal in Python,
|
||
|
we can search for the byte-code equivalent to '(a,b,c) = .1'
|
||
|
"""
|
||
|
assert ast == 'code' and ast[0] == 'stmts'
|
||
|
for i in xrange(len(ast[0])):
|
||
|
# search for an assign-statement
|
||
|
assert ast[0][i] == 'stmt'
|
||
|
node = ast[0][i][0]
|
||
|
if node == 'assign' \
|
||
|
and node[0] == ASSIGN_TUPLE_PARAM(name):
|
||
|
# okay, this assigns '.n' to something
|
||
|
del ast[0][i]
|
||
|
# walk lhs; this
|
||
|
# returns a tuple of identifiers as used
|
||
|
# within the function definition
|
||
|
assert node[1] == 'designator'
|
||
|
# if lhs is not a UNPACK_TUPLE (or equiv.),
|
||
|
# add parenteses to make this a tuple
|
||
|
if node[1][0] not in ('unpack', 'unpack_list'):
|
||
|
return '(' + walk(node[1]) + ')'
|
||
|
return walk(node[1])
|
||
|
raise "Can't find tuple parameter" % name
|
||
|
|
||
|
def make_function(self, code, defparams, isLambda, nested=1):
|
||
|
"""Dump function defintion, doc string, and function body."""
|
||
|
|
||
|
def build_param(ast, name, default):
|
||
|
"""build parameters:
|
||
|
- handle defaults
|
||
|
- handle format tuple parameters
|
||
|
"""
|
||
|
# if formal parameter is a tuple, the paramater name
|
||
|
# starts with a dot (eg. '.1', '.2')
|
||
|
if name[0] == '.':
|
||
|
# replace the name with the tuple-string
|
||
|
name = get_tuple_parameter(ast, name)
|
||
|
|
||
|
if default:
|
||
|
if Showast:
|
||
|
print '--', name
|
||
|
print default
|
||
|
print '--'
|
||
|
result = '%s = %s' % ( name, walk(default, indent=0) )
|
||
|
##w = Walk(default, 0)
|
||
|
##result = '%s = %s' % ( name, w.traverse() )
|
||
|
##del w # hg/2000-09-03
|
||
|
if result[-2:] == '= ': # default was 'LOAD_CONST None'
|
||
|
result = result + 'None'
|
||
|
return result
|
||
|
else:
|
||
|
return name
|
||
|
|
||
|
def writeParams(self, params):
|
||
|
for i in range(len(params)):
|
||
|
if i > 0: self.f.write(', ')
|
||
|
self.f.write(params[i])
|
||
|
|
||
|
assert type(code) == types.CodeType
|
||
|
code = Code(code)
|
||
|
#assert isinstance(code, Code)
|
||
|
|
||
|
ast = _build_ast(self.f, code._tokens, code._customize)
|
||
|
code._tokens = None # save memory
|
||
|
assert ast == 'code' and ast[0] == 'stmts'
|
||
|
if isLambda:
|
||
|
# convert 'return' statement to expression
|
||
|
#assert len(ast[0]) == 1 wrong, see 'lambda (r,b): r,b,g'
|
||
|
assert ast[-1][-1] == 'stmt'
|
||
|
assert len(ast[-1][-1]) == 1
|
||
|
assert ast[-1][-1][0] == 'return_stmt'
|
||
|
ast[-1][-1][0].type = 'return_lambda'
|
||
|
else:
|
||
|
if ast[0][-1] == RETURN_NONE:
|
||
|
# Python adds a 'return None' to the
|
||
|
# end of any function; remove it
|
||
|
ast[0].pop() # remove last node
|
||
|
|
||
|
# add defaults values to parameter names
|
||
|
argc = code.co_argcount
|
||
|
paramnames = list(code.co_varnames[:argc])
|
||
|
|
||
|
# defaults are for last n parameters, thus reverse
|
||
|
paramnames.reverse(); defparams.reverse()
|
||
|
|
||
|
# build parameters
|
||
|
#
|
||
|
##This would be a nicer piece of code, but I can't get this to work
|
||
|
## now, have to find a usable lambda constuct hG/2000-09-05
|
||
|
##params = map(lambda name, default: build_param(ast, name, default),
|
||
|
## paramnames, defparams)
|
||
|
params = []
|
||
|
for name, default in map(lambda a,b: (a,b), paramnames, defparams):
|
||
|
params.append( build_param(ast, name, default) )
|
||
|
|
||
|
params.reverse() # back to correct order
|
||
|
|
||
|
if 4 & code.co_flags: # flag 2 -> variable number of args
|
||
|
params.append('*%s' % code.co_varnames[argc])
|
||
|
argc = argc +1
|
||
|
if 8 & code.co_flags: # flag 3 -> keyword args
|
||
|
params.append('**%s' % code.co_varnames[argc])
|
||
|
argc = argc +1
|
||
|
|
||
|
# dump parameter list (with default values)
|
||
|
indent = TAB * self.indent
|
||
|
if isLambda:
|
||
|
self.f.write('lambda ')
|
||
|
writeParams(self, params)
|
||
|
self.f.write(': ')
|
||
|
else:
|
||
|
self.f.write('(')
|
||
|
writeParams(self, params)
|
||
|
self.f.write('):\n')
|
||
|
#self.f.write('%s#flags:\t%i\n' % (indent, code.co_flags))
|
||
|
|
||
|
if code.co_consts[0] != None: # docstring exists, dump it
|
||
|
self.f.writelines([indent, `code.co_consts[0]`, '\n'])
|
||
|
|
||
|
_gen_source(self.f, ast, code._customize, self.indent,
|
||
|
isLambda=isLambda)
|
||
|
code._tokens = None; code._customize = None # save memory
|
||
|
|
||
|
|
||
|
def build_class(self, code):
|
||
|
"""Dump class definition, duc string and class body."""
|
||
|
|
||
|
assert type(code) == types.CodeType
|
||
|
code = Code(code)
|
||
|
#assert isinstance(code, Code)
|
||
|
|
||
|
indent = TAB * self.indent
|
||
|
#self.f.write('%s#flags:\t%i\n' % (indent, code.co_flags))
|
||
|
ast = _build_ast(self.f, code._tokens, code._customize)
|
||
|
code._tokens = None # save memory
|
||
|
assert ast == 'code' and ast[0] == 'stmts'
|
||
|
|
||
|
# if docstring exists, dump it
|
||
|
if code.co_consts[0] != None \
|
||
|
and ast[0][0] == ASSIGN_DOC_STRING(code.co_consts[0]):
|
||
|
#print '\n\n>>-->>doc string set\n\n'
|
||
|
self.f.writelines( [indent,repr(code.co_consts[0]), '\n'] )
|
||
|
del ast[0][0]
|
||
|
|
||
|
# the function defining a class normally returns locals(); we
|
||
|
# don't want this to show up in the source, thus remove the node
|
||
|
if ast[0][-1] == RETURN_LOCALS:
|
||
|
ast[0].pop() # remove last node
|
||
|
|
||
|
_gen_source(self.f, ast, code._customize, self.indent)
|
||
|
code._tokens = None; code._customize = None # save memory
|
||
|
|
||
|
__globals_tokens__ = ('STORE_GLOBAL', 'DELETE_GLOBAL') # 'LOAD_GLOBAL'
|
||
|
|
||
|
def find_globals(node, globals):
|
||
|
"""Find globals in this statement."""
|
||
|
for n in node:
|
||
|
if isinstance(n, AST):
|
||
|
if n != 'stmt': # skip nested statements
|
||
|
globals = find_globals(n, globals)
|
||
|
elif n.type in __globals_tokens__:
|
||
|
globals[n.pattr] = None
|
||
|
return globals
|
||
|
|
||
|
|
||
|
class Walk(GenericASTTraversal):
|
||
|
def __init__(self, ast, indent=0, isLambda=0):
|
||
|
GenericASTTraversal.__init__(self, ast)
|
||
|
self._globals = {}
|
||
|
self.f = cStringIO.StringIO()
|
||
|
self.f.seek(0)
|
||
|
self.indent = indent
|
||
|
self.isLambda = isLambda
|
||
|
|
||
|
def __del__(self):
|
||
|
self.f.close()
|
||
|
|
||
|
def traverse(self, node=None):
|
||
|
self.preorder(node)
|
||
|
return self.f.getvalue()
|
||
|
|
||
|
def n_LOAD_CONST(self, node):
|
||
|
data = node.pattr
|
||
|
if data == 'None':
|
||
|
# LOAD_CONST 'None' only occurs, when None is
|
||
|
# implicit eg. in 'return' w/o params
|
||
|
pass
|
||
|
elif data == 'Ellipsis':
|
||
|
self.f.write('...')
|
||
|
elif data[0] == '-': # assume negative integer constant
|
||
|
# convert to hex, since decimal representation
|
||
|
# would result in 'LOAD_CONST; UNARY_NEGATIVE'
|
||
|
self.f.write('0x%x' % int(data))
|
||
|
else:
|
||
|
self.f.write(data)
|
||
|
|
||
|
def n_delete_subscr(self, node):
|
||
|
#print >>self.f, '>#', node
|
||
|
#print >>self.f, '---'
|
||
|
maybe_tuple = node[-2][-1]
|
||
|
#print >>self.f, '##', maybe_tuple, maybe_tuple.type[:11]
|
||
|
if maybe_tuple.type[:11] == 'BUILD_TUPLE':
|
||
|
maybe_tuple.type = 'build_tuple2'
|
||
|
#print >>self.f, '##', node
|
||
|
#print >>self.f, '##', maybe_tuple.type
|
||
|
self.default(node)
|
||
|
|
||
|
n_store_subscr = n_binary_subscr = n_delete_subscr
|
||
|
|
||
|
def __n_stmts(self, node):
|
||
|
# optimize "print 1, ; print"
|
||
|
last = None; i = 0
|
||
|
while i < len(node):
|
||
|
n = node[i]
|
||
|
assert(n == 'stmt')
|
||
|
if n[0] == 'print_nl_stmt' and \
|
||
|
last is not None and \
|
||
|
last[0] == 'print_stmt':
|
||
|
last[0].type = 'print_stmt_nl'
|
||
|
del node[i]
|
||
|
last = None
|
||
|
else:
|
||
|
last = n
|
||
|
i = i + 1
|
||
|
self.default(node)
|
||
|
|
||
|
def n_stmt(self, node):
|
||
|
if not self.isLambda:
|
||
|
indent = TAB * self.indent
|
||
|
for g in find_globals(node, {}).keys():
|
||
|
self.f.writelines( [indent,
|
||
|
'global ',
|
||
|
g, '\n'] )
|
||
|
## nice output does not work since engine()
|
||
|
## creates a new Walk instance when recursing
|
||
|
## TODO: reconsider this: engine() no longer
|
||
|
## creates a new Walk instancew hG/2000-12-31
|
||
|
## if not self._globals.has_key(g):
|
||
|
## self._globals[g] = None
|
||
|
## self.f.writelines( [TAB * self.indent,
|
||
|
## 'global ',
|
||
|
## g, '\n'] )
|
||
|
self.default(node)
|
||
|
|
||
|
def n_exec_stmt(self, node):
|
||
|
"""
|
||
|
exec_stmt ::= expr exprlist DUP_TOP EXEC_STMT
|
||
|
exec_stmt ::= expr exprlist EXEC_STMT
|
||
|
"""
|
||
|
w = Walk(node, indent=self.indent)
|
||
|
w.engine(( '%|exec %c in %[1]C', 0, (0,sys.maxint,', ') ),
|
||
|
node)
|
||
|
s = w.f.getvalue()
|
||
|
del w
|
||
|
if s[-3:] == 'in ':
|
||
|
s = s[:-3]
|
||
|
self.f.writelines( [s, '\n'] )
|
||
|
node[:] = [] # avoid print out when recursive descenting
|
||
|
|
||
|
def n_ifelsestmt(self, node, preprocess=0):
|
||
|
if len(node[-2]) == 1:
|
||
|
ifnode = node[-2][0][0]
|
||
|
if ifnode == 'ifelsestmt':
|
||
|
node.type = 'ifelifstmt'
|
||
|
self.n_ifelsestmt(ifnode, preprocess=1)
|
||
|
if ifnode == 'ifelifstmt':
|
||
|
ifnode.type = 'elifelifstmt'
|
||
|
elif ifnode == 'ifelsestmt':
|
||
|
ifnode.type = 'elifelsestmt'
|
||
|
elif ifnode == 'ifstmt':
|
||
|
node.type = 'ifelifstmt'
|
||
|
ifnode.type = 'elifstmt'
|
||
|
if not preprocess:
|
||
|
self.default(node)
|
||
|
|
||
|
def n_import_as(self, node):
|
||
|
iname = node[0].pattr; sname = node[-1].pattr
|
||
|
if iname == sname \
|
||
|
or iname[:len(sname)+1] == (sname+'.'):
|
||
|
self.f.write(iname)
|
||
|
else:
|
||
|
self.f.writelines([iname, ' as ', sname])
|
||
|
node[:] = [] # avoid print out when recursive descenting
|
||
|
|
||
|
def n_mkfunc(self, node):
|
||
|
defparams = node[0:-2]
|
||
|
code = node[-2].attr
|
||
|
node[:] = [] # avoid print out when recursive descenting
|
||
|
self.indent = self.indent + 1
|
||
|
self.f.write(code.co_name)
|
||
|
make_function(self, code, defparams, isLambda=0)
|
||
|
self.indent = self.indent - 1
|
||
|
|
||
|
def n_mklambda(self, node):
|
||
|
defparams = node[0:-2]
|
||
|
code = node[-2].attr
|
||
|
node[:] = [] # avoid print out when recursive descenting
|
||
|
make_function(self, code, defparams, isLambda=1)
|
||
|
|
||
|
def n_classdef(self, node):
|
||
|
self.f.writelines(['\n', TAB * self.indent, 'class '])
|
||
|
self.f.write(node[0].pattr[1:-1])
|
||
|
node._code = node[-4][0].attr
|
||
|
# avoid print out when recursive descenting
|
||
|
if node[1] == BUILD_TUPLE_0:
|
||
|
node[:] = []
|
||
|
else:
|
||
|
node[:] = [ node[1] ]
|
||
|
|
||
|
def n_classdef_exit(self, node):
|
||
|
self.f.write(':\n')
|
||
|
self.indent = self.indent +1
|
||
|
# '\n%|class %[0]{pattr[1:-1]}%c:\n%+%{build_class}%-', 1 ),
|
||
|
# -4 -> MAKE_FUNCTION; -2 -> LOAD_CONST (code)
|
||
|
build_class(self,node._code)
|
||
|
self.indent = self.indent -1
|
||
|
node._code = None # save memory
|
||
|
|
||
|
def n_lc_for(self, node):
|
||
|
node.type = 'lc_for_nest'
|
||
|
content = node[4]
|
||
|
while content == 'lc_for':
|
||
|
content.type = 'lc_for_nest'
|
||
|
content = content[4]
|
||
|
while content == 'lc_if':
|
||
|
content = content[2]
|
||
|
assert content == 'lc_body'
|
||
|
self.preorder(content)
|
||
|
content.type = 'lc_body__'
|
||
|
self.default(node)
|
||
|
|
||
|
def engine(self, entry, startnode):
|
||
|
#self.f.write("-----\n")
|
||
|
#self.f.write(str(startnode.__dict__)); self.f.write('\n')
|
||
|
escape = re.compile(r'''
|
||
|
% ( \[ (?P<child> -? \d+ ) \] )?
|
||
|
((?P<type> [^{] ) |
|
||
|
( [{] (?P<expr> [^}]* ) [}] ))
|
||
|
''', re.VERBOSE)
|
||
|
|
||
|
fmt = entry[0]
|
||
|
n = len(fmt)
|
||
|
lastC = 0
|
||
|
arg = 1
|
||
|
i = 0
|
||
|
|
||
|
while i < n:
|
||
|
m = escape.match(fmt, i)
|
||
|
if m is None:
|
||
|
self.f.write(fmt[i])
|
||
|
i = i + 1
|
||
|
continue
|
||
|
|
||
|
i = m.end()
|
||
|
typ = m.group('type') or '{'
|
||
|
|
||
|
node = startnode
|
||
|
try:
|
||
|
if m.group('child'):
|
||
|
node = node[string.atoi(m.group('child'))]
|
||
|
except:
|
||
|
print node.__dict__
|
||
|
raise
|
||
|
|
||
|
if typ == '%':
|
||
|
self.f.write('%')
|
||
|
elif typ == '+':
|
||
|
self.indent = self.indent + 1
|
||
|
elif typ == '-':
|
||
|
self.indent = self.indent - 1
|
||
|
elif typ == '|':
|
||
|
self.f.write(TAB * self.indent)
|
||
|
elif typ == ',':
|
||
|
if lastC == 1:
|
||
|
self.f.write(',')
|
||
|
elif typ == 'c':
|
||
|
self.traverse(node[entry[arg]])
|
||
|
##w = Walk(node[entry[arg]], self.indent)
|
||
|
##self.f.write(w.traverse())
|
||
|
##del w # hg/2000-09-03
|
||
|
arg = arg + 1
|
||
|
elif typ == 'C':
|
||
|
low, high, sep = entry[arg]
|
||
|
lastC = remaining = len(node[low:high])
|
||
|
for subnode in node[low:high]:
|
||
|
self.traverse(subnode)
|
||
|
##w = Walk(subnode, self.indent)
|
||
|
##self.f.write(w.traverse())
|
||
|
##del w # hg/2000-09-03
|
||
|
remaining = remaining - 1
|
||
|
if remaining > 0:
|
||
|
self.f.write(sep)
|
||
|
arg = arg + 1
|
||
|
elif typ == '{':
|
||
|
d = node.__dict__
|
||
|
expr = m.group('expr')
|
||
|
if expr == 'build_class':
|
||
|
# -4 -> MAKE_FUNCTION; -2 -> LOAD_CONST (code)
|
||
|
build_class(self,node[-4][-2].attr)
|
||
|
else:
|
||
|
try:
|
||
|
self.f.write(eval(expr, d, d))
|
||
|
except:
|
||
|
print node
|
||
|
raise
|
||
|
|
||
|
def default(self, node):
|
||
|
mapping = MAP.get(node, MAP_DIRECT)
|
||
|
table = mapping[0]
|
||
|
key = node
|
||
|
|
||
|
for i in mapping[1:]:
|
||
|
key = key[i]
|
||
|
|
||
|
if table.has_key(key):
|
||
|
self.engine(table[key], node)
|
||
|
self.prune()
|
||
|
|
||
|
def walk(ast, customize={}, indent=0, isLambda=0):
|
||
|
w = Walk(ast, indent, isLambda=isLambda)
|
||
|
#
|
||
|
# Special handling for opcodes that take a variable number
|
||
|
# of arguments -- we add a new entry for each in TABLE_R.
|
||
|
#
|
||
|
for k, v in customize.items():
|
||
|
op = k[:string.rfind(k, '_')]
|
||
|
if op == 'BUILD_LIST':
|
||
|
TABLE_R[k] = ( '[%C]', (0,-1,', ') )
|
||
|
elif op == 'BUILD_SLICE':
|
||
|
TABLE_R[k] = ( '%C', (0,-1,':') )
|
||
|
elif op == 'BUILD_TUPLE':
|
||
|
TABLE_R[k] = ( '(%C%,)', (0,-1,', ') )
|
||
|
elif op == 'CALL_FUNCTION':
|
||
|
TABLE_R[k] = ( '%c(%C)', 0, (1,-1,', ') )
|
||
|
elif op in ('CALL_FUNCTION_VAR',
|
||
|
'CALL_FUNCTION_VAR_KW', 'CALL_FUNCTION_KW'):
|
||
|
if v == 0:
|
||
|
str = '%c(%C' # '%C' is a dummy here ...
|
||
|
p2 = (0, 0, None) # .. because of this
|
||
|
else:
|
||
|
str = '%c(%C, '
|
||
|
p2 = (1,-2, ', ')
|
||
|
if op == 'CALL_FUNCTION_VAR':
|
||
|
str = str + '*%c)'
|
||
|
entry = (str, 0, p2, -2)
|
||
|
elif op == 'CALL_FUNCTION_KW':
|
||
|
str = str + '**%c)'
|
||
|
entry = (str, 0, p2, -2)
|
||
|
else:
|
||
|
str = str + '*%c, **%c)'
|
||
|
if p2[2]: p2 = (1,-3, ', ')
|
||
|
entry = (str, 0, p2, -3, -2)
|
||
|
TABLE_R[k] = entry
|
||
|
result = w.traverse()
|
||
|
return result
|
||
|
|
||
|
#-- end of (de-)compiler ---
|
||
|
|
||
|
#-- start
|
||
|
|
||
|
Showasm = 0
|
||
|
Showast = 0
|
||
|
__real_out = None
|
||
|
|
||
|
def _tokenize(out, co):
|
||
|
"""Disassemble code object into a token list"""
|
||
|
assert type(co) == types.CodeType
|
||
|
|
||
|
tokens, customize = disassemble(co)
|
||
|
# See the disassembly..
|
||
|
if Showasm and out is not None:
|
||
|
for t in tokens:
|
||
|
out.write('%s\n' % t)
|
||
|
out.write('\n')
|
||
|
return tokens, customize
|
||
|
|
||
|
def _build_ast(out, tokens, customize):
|
||
|
assert type(tokens) == types.ListType
|
||
|
assert isinstance(tokens[0], Token)
|
||
|
|
||
|
# Build AST from disassembly.
|
||
|
try:
|
||
|
ast = parse(tokens, customize)
|
||
|
except: # parser failed, dump disassembly
|
||
|
#if not Showasm:
|
||
|
__real_out.write('--- This code section failed: ---\n')
|
||
|
for t in tokens:
|
||
|
__real_out.write('%s\n' % t)
|
||
|
__real_out.write('\n')
|
||
|
raise
|
||
|
return ast
|
||
|
|
||
|
def _gen_source(out, ast, customize, indent=0, isLambda=0):
|
||
|
"""convert AST to source code"""
|
||
|
if Showast:
|
||
|
out.write(`ast`)
|
||
|
|
||
|
# if code would be empty, append 'pass'
|
||
|
if len(ast[0]) == 0:
|
||
|
out.write(indent * TAB)
|
||
|
out.write('pass\n')
|
||
|
else:
|
||
|
out.write(walk(ast, customize, indent, isLambda=isLambda))
|
||
|
|
||
|
|
||
|
def decompyle(co, out=None, indent=0, showasm=0, showast=0):
|
||
|
"""
|
||
|
diassembles a given code block 'co'
|
||
|
"""
|
||
|
assert type(co) == types.CodeType
|
||
|
|
||
|
global Showasm, Showast
|
||
|
Showasm = showasm
|
||
|
Showast = showast
|
||
|
|
||
|
if not out:
|
||
|
out = sys.stdout
|
||
|
global __real_out
|
||
|
__real_out = out # store final output stream for case of error
|
||
|
|
||
|
tokens, customize = _tokenize(out, co)
|
||
|
ast = _build_ast(out, tokens, customize)
|
||
|
tokens = None # save memory
|
||
|
|
||
|
assert ast == 'code' and ast[0] == 'stmts'
|
||
|
# convert leading '__doc__ = "..." into doc string
|
||
|
if ast[0][0] == ASSIGN_DOC_STRING(co.co_consts[0]):
|
||
|
out.writelines( [repr(co.co_consts[0]), '\n'] )
|
||
|
del ast[0][0]
|
||
|
if ast[0][-1] == RETURN_NONE:
|
||
|
ast[0].pop() # remove last node
|
||
|
#todo: if empty, add 'pass'
|
||
|
|
||
|
_gen_source(out, ast, customize, indent)
|
||
|
|
||
|
|
||
|
def decompyle_file(filename, outstream=None, showasm=0, showast=0):
|
||
|
"""
|
||
|
decompile Python byte-code file (.pyc)
|
||
|
"""
|
||
|
co = _load_module(filename)
|
||
|
decompyle(co, out=outstream, showasm=showasm, showast=showast)
|
||
|
co = None
|