182 lines
5.7 KiB

#
# (C) Copyright 2000 by hartmut Goebel <hartmut@goebel.noris.de>
#
# byte-code verifier for decompyle
#
import types
import decompyle
#--- exceptions ---
class VerifyCmpError(Exception):
pass
class CmpErrorConsts(VerifyCmpError):
"""Exception to be raised when consts differ."""
def __init__(self, name, index):
self.name = name
self.index = index
def __str__(self):
return 'Compare Error within Consts of %s at index %i' % \
(repr(self.name), self.index)
class CmpErrorConstsLen(VerifyCmpError):
"""Exception to be raised when length of co_consts differs."""
def __init__(self, name, consts1, consts2):
self.name = name
self.consts = (consts1, consts2)
def __str__(self):
return 'Consts length differs in %s:\n\n%i:\t%s\n\n%i:\t%s\n\n' % \
(repr(self.name),
len(self.consts[0]), `self.consts[0]`,
len(self.consts[1]), `self.consts[1]`)
class CmpErrorCode(VerifyCmpError):
"""Exception to be raised when code differs."""
def __init__(self, name, index, token1, token2):
self.name = name
self.index = index
self.token1 = token1
self.token2 = token2
def __str__(self):
return 'Code differs in %s at offset %i [%s] != [%s]' % \
(repr(self.name), self.index,
repr(self.token1), repr(self.token2)) #\
# + ('%s %s') % (self.token1.pattr, self.token2.pattr)
class CmpErrorCodeLen(VerifyCmpError):
"""Exception to be raised when code length differs."""
def __init__(self, name, tokens1, tokens2):
self.name = name
self.tokens = [tokens1, tokens2]
def __str__(self):
return reduce(lambda s,t: "%s%-37s\t%-37s\n" % (s, t[0], t[1]),
map(lambda a,b: (a,b),
self.tokens[0],
self.tokens[1]),
'Code len differs in %s\n' % str(self.name))
class CmpErrorMember(VerifyCmpError):
"""Exception to be raised when other members differ."""
def __init__(self, name, member, data1, data2):
self.name = name
self.member = member
self.data = (data1, data2)
def __str__(self):
return 'Member %s differs in %s:\n\t%s\n\t%s\n' % \
(repr(self.member), repr(self.name),
repr(self.data[0]), repr(self.data[1]))
#--- compare ---
# these members are ignored
__IGNORE_CODE_MEMBERS__ = ['co_filename', 'co_firstlineno', 'co_lnotab']
def cmp_code_objects(code_obj1, code_obj2, name=''):
"""
Compare two code-objects.
This is the main part of this module.
"""
assert type(code_obj1) == types.CodeType
assert type(code_obj2) == types.CodeType
assert dir(code_obj1) == code_obj1.__members__
assert dir(code_obj2) == code_obj2.__members__
assert code_obj1.__members__ == code_obj2.__members__
if name == '__main__':
name = code_obj1.co_name
else:
name = '%s.%s' % (name, code_obj1.co_name)
if name == '.?': name = '__main__'
members = code_obj1.__members__; members.sort(); #members.reverse()
tokens1 = None
for member in members:
if member in __IGNORE_CODE_MEMBERS__:
pass
elif member == 'co_code':
# use changed Token class
__Token = decompyle.Token
decompyle.Token = Token
# tokenize both code-objects
tokens1, customize = decompyle._tokenize(None, code_obj1)
tokens2, customize = decompyle._tokenize(None, code_obj2)
del customize
decompyle.Token = __Token # restore Token class
# compare length
if len(tokens1) != len(tokens2):
raise CmpErrorCodeLen(name, tokens1, tokens2)
# compare contents
#print len(tokens1), type(tokens1), type(tokens2)
for i in xrange(len(tokens1)):
if tokens1[i] != tokens2[i]:
#print '-->', i, type(tokens1[i]), type(tokens2[i])
raise CmpErrorCode(name, i, tokens1[i],
tokens2[i])
elif member == 'co_consts':
# compare length
if len(code_obj1.co_consts) != len(code_obj2.co_consts):
raise CmpErrorConstsLen(name, code_obj1.co_consts ,code_obj2.co_consts)
# compare contents
for idx in xrange(len(code_obj1.co_consts)):
const1 = code_obj1.co_consts[idx]
const2 = code_obj2.co_consts[idx]
# same type?
if type(const1) != type(const2):
raise CmpErrorContType(name, idx)
if type(const1) == types.CodeType:
# code object -> recursive compare
cmp_code_objects(const1, const2,
name)
elif cmp(const1, const2) != 0:
# content differs
raise CmpErrorConsts(name, idx)
else:
# all other members must be equal
if eval('code_obj1.%s != code_obj2.%s' % (member, member)):
data1 = eval('code_obj1.%s' % member)
data2 = eval('code_obj2.%s' % member)
raise CmpErrorMember(name, member, data1,data2)
class Token(decompyle.Token):
"""Token class with changed semantics for 'cmp()'."""
def __cmp__(self, o):
if self.type in decompyle._JUMP_OPS_:
# ignore offset
return cmp(self.type, o.type)
else:
return cmp(self.type, o.type) \
or cmp(self.pattr, o.pattr)
def __repr__(self):
return '%s %s (%s)' % (str(self.type), str(self.attr),
str(self.pattr))
def compare_code_with_srcfile(pyc_filename, src_filename):
"""Compare a .pyc with a source code file."""
code_obj1 = decompyle._load_module(pyc_filename)
code_obj2 = decompyle._load_file(src_filename)
cmp_code_objects(code_obj1, code_obj2)
def compare_files(pyc_filename1, pyc_filename2):
"""Compare two .pyc files."""
code_obj1 = decompyle._load_module(pyc_filename1)
code_obj2 = decompyle._load_module(pyc_filename2)
cmp_code_objects(code_obj1, code_obj2)
if __name__ == '__main__':
t1 = Token('LOAD_CONST', None, 'code_object _expandLang', 52)
t2 = Token('LOAD_CONST', -421, 'code_object _expandLang', 55)
print `t1`
print `t2`
print cmp(t1, t2), cmp(t1.type, t2.type), cmp(t1.attr, t2.attr)