pyc花指令
常见的python花指令形式有两种:单重叠指令和多重叠指令。
以下以python3.8为例,指令长度为2字节。
单重叠指令:
例如pyc经过反编译后得到的东西为
0 JUMP_ABSOLUTE [71 04] 5 2 PRINT_ITEM [47 --]4 LOAD_CONST [64 10] 166 STOP_CODE [00 --]
实际在执行时,并不会执行 2 PRINT_ITEM [47 --]
:
0 JUMP_ABSOLUTE [71 04] 5
4 LOAD_CONST [64 10] 16
单重叠指令多是分支的跳转,导致一些反编译工具如pycdc、uncompyle6出错。
多重叠指令:
0 EXTENDED_ARG [91 64] 2 EXTENDED_ARG [91 53]4 JUMP_ABSOLUTE [71 01]
实际执行时
0 EXTENDED_ARG [91 64] 2 EXTENDED_ARG [91 53]4 JUMP_ABSOLUTE [71 02]1 LOAD_CONST [64 91]3 RETURN_VALUE [53 --]
多重叠指令是将指令的数据部分当作下一条指令的opcode部分执行,在跳转基础上进一步混淆控制流的技术手段,可以有效对抗逆向者。
NOP花指令:
NOP为junk code,只要不影响正常执行逻辑,其他的指令可自由发挥,含有NOP的pyc均不可以被现有的反编译工具反编译成py代码。
去除花指令
pyc去除花指令后,很大可能是不能被现有工具反编译成源码的,因为现有反编译工具对pyc要求比较严格,不能有nop以及其他junk指令,但程序运行时python虚拟机却没有。
因此不同于用ida patch 汇编代码,想在patch过的pyc反编译回原来的源码,工作量还是蛮大的。
下面以[2022年安洵杯]flower.pyc为例
626 LOAD_GLOBAL 6: ord
628 LOAD_GLOBAL 18: Base64Table
630 LOAD_FAST 3: i
632 LOAD_CONST 22: 22
634 BINARY_XOR
636 BINARY_SUBSCR
638 CALL_FUNCTION 1
640 STORE_FAST 15: tmp2
这一段是把base64[i]改为了base64[i^22]
258 LOAD_NAME 16: ret
260 LOAD_NAME 18: i
262 LOAD_NAME 18: i
264 LOAD_CONST 40: 4
266 BINARY_ADD
268 BUILD_SLICE 2
270 BINARY_SUBSCR
272 LOAD_NAME 19: Key1
274 LOAD_NAME 17: j
276 STORE_SUBSCR
278 LOAD_NAME 17: j
280 LOAD_CONST 41: 1
282 BINARY_ADD
284 STORE_NAME 17: j
286 LOAD_NAME 18: i
288 LOAD_CONST 40: 4
290 BINARY_ADD
292 STORE_NAME 18: i
294 LOAD_NAME 17: j
296 LOAD_CONST 42: 10
298 COMPARE_OP 2 (==)
300 POP_JUMP_IF_FALSE 258
304 JUMP_ABSOLUTE 312
308 JUMP_ABSOLUTE 258
转化成py代码就是
input_str = input()
ret = My_base64_encode(input_str)
j = 0
i = 0
Key1 = "1234512345"
len_ret = len(ret) // 4while j != 10:Key1[j] = ret[i:i+4]j = j + 1i = i + 4keyCheck = ''
if keyCheck[0] == keyInputCom[8]:
然后后面有一堆重复的,提取出来就是
0 == 8
1 == 9
2 == 1
3 == 7
4 == 5
5 == 0
6 == 6
7 == 4
8 == 3
9 == 2
然后再写题解的代码就可以了。
利用脚本去除花指令:用python模拟执行python的opcode,遇到分支就跳转,直到ret_value停止本次执行,采用的是简单的DFS递归算法
import marshal, sys, opcode, types, disNOP = 9HAVE_ARGUMENT = 90JUMP_FORWARD = 110
JUMP_IF_FALSE_OR_POP = 111
JUMP_IF_TRUE_OR_POP = 112
JUMP_ABSOLUTE = 113
POP_JUMP_IF_FALSE = 114
POP_JUMP_IF_TRUE = 115CONTINUE_LOOP = 119
FOR_ITER = 93RETURN_VALUE = 83used_set = set()def deconf_inner(code, now): global used_set while code[now] != RETURN_VALUE: if now in used_set: break used_set.add(now) if code[now] >= HAVE_ARGUMENT: used_set.add(now+1) used_set.add(now+2) op = code[now] #print(str(now) + " " + opcode.opname[op]) if op == JUMP_FORWARD: arg = code[now+2] << 8 | code[now+1] now += arg + 3 continue elif op == JUMP_ABSOLUTE: arg = code[now+2] << 8 | code[now+1] now = arg continue elif op == JUMP_IF_TRUE_OR_POP: arg = code[now+2] << 8 | code[now+1] deconf_inner(code, arg) elif op == JUMP_IF_FALSE_OR_POP: arg = code[now+2] << 8 | code[now+1] deconf_inner(code, arg) elif op == POP_JUMP_IF_TRUE: arg = code[now+2] << 8 | code[now+1] deconf_inner(code, arg) elif op == POP_JUMP_IF_FALSE: arg = code[now+2] << 8 | code[now+1] deconf_inner(code, arg) elif op == CONTINUE_LOOP: arg = code[now+2] << 8 | code[now+1] deconf_inner(code, arg) elif op == FOR_ITER: arg = code[now+2] << 8 | code[now+1] deconf_inner(code, now + arg + 3) if op < HAVE_ARGUMENT: now += 1 else: now += 3 used_set.add(now) if code[now] >= HAVE_ARGUMENT: used_set.add(now+1) used_set.add(now+2)def deconf(code): global used_set used_set = set() #Remember to clean up used_set for every target function cod = list(map(ord, code)) deconf_inner(cod, 0) for i in range(len(cod)): if i not in used_set: cod[i] = NOP return "".join(list(map(chr, cod)))with open(sys.argv[1], 'rb') as f: header = f.read(8) code = marshal.load(f)print(code.co_consts,type(code))
'''
print(dis.dis(deconf(code.co_consts[3].co_code)))
'''consts = list()for i in range(len(code.co_consts)): if hasattr(code.co_consts[i], 'co_code'): consts.append(types.CodeType(code.co_consts[i].co_argcount, # c.co_kwonlyargcount, Add this in Python3 code.co_consts[i].co_nlocals, code.co_consts[i].co_stacksize, code.co_consts[i].co_flags, deconf(code.co_consts[i].co_code), code.co_consts[i].co_consts, code.co_consts[i].co_names, code.co_consts[i].co_varnames, code.co_consts[i].co_filename, code.co_consts[i].co_name, code.co_consts[i].co_firstlineno, code.co_consts[i].co_lnotab, # In general, You should adjust this code.co_consts[i].co_freevars, code.co_consts[i].co_cellvars)) else: consts.append(code.co_consts[i])mode = types.CodeType(code.co_argcount, # c.co_kwonlyargcount, Add this in Python3 code.co_nlocals, code.co_stacksize, code.co_flags, deconf(code.co_code), tuple(consts), code.co_names, code.co_varnames, code.co_filename, code.co_name, code.co_firstlineno, code.co_lnotab, # In general, You should adjust this code.co_freevars, code.co_cellvars)f = open(sys.argv[1]+".mod", 'wb')
f.write(header)
marshal.dump(mode, f)
import marshal, sys, opcode, types, dis
import opcodedef getopcode(opname): return opcode.opname.index(opname)NOP = getopcode('NOP')# HAVE_ARGUMENT = getopcode('HAVE_ARGUMENT') # py2.7JUMP_FORWARD = getopcode('JUMP_FORWARD')
JUMP_IF_FALSE_OR_POP = getopcode('JUMP_IF_FALSE_OR_POP')
JUMP_IF_TRUE_OR_POP = getopcode('JUMP_IF_TRUE_OR_POP')
JUMP_ABSOLUTE = getopcode('JUMP_ABSOLUTE')
POP_JUMP_IF_FALSE = getopcode('POP_JUMP_IF_FALSE')
POP_JUMP_IF_TRUE = getopcode('POP_JUMP_IF_TRUE')
EXTENDED_ARG = getopcode('EXTENDED_ARG')
# CONTINUE_LOOP = getopcode('CONTINUE_LOOP') # py2.7
FOR_ITER = getopcode('FOR_ITER')RETURN_VALUE = getopcode('RETURN_VALUE')used_set = set()def deconf_inner(code, now): global used_set while code[now] != RETURN_VALUE: if now in used_set: break used_set.add(now) used_set.add(now + 1) op = code[now] # print(str(now) + " " + opcode.opname[op]) if op == EXTENDED_ARG: # 对JUMP_FORWARD带有EXTENDED_ARG的处理 # 第一层 op_next = code[now + 2] now += 2 used_set.add(now) used_set.add(now+1) if op_next == EXTENDED_ARG: # 第二层 arg = code[now - 1] << 8|code[now + 1] op_next_next = code[now + 2] now += 2 used_set.add(now) used_set.add(now+1) if op_next_next == EXTENDED_ARG: arg = arg << 8 | code[now + 1] # 第三层 if op_next == JUMP_FORWARD or op_next == FOR_ITER: arg = arg << 8 | code[now + 1] deconf_inner(code, arg + now + 2) else: arg = arg << 8 | code[now + 1] deconf_inner(code, arg) elif op_next == JUMP_FORWARD or op_next == FOR_ITER: arg = code[now - 1] << 8 | code[now + 1] deconf_inner(code, arg + now + 2) else: arg = code[now - 1] << 8 | code[now + 1] deconf_inner(code, arg) elif op_next == JUMP_FORWARD or op_next == FOR_ITER: arg = code[now - 1] << 8 | code[now + 1] deconf_inner(code, arg + now + 2) else: arg = code[now - 1] << 8 | code[now + 1] deconf_inner(code, arg) elif op == JUMP_FORWARD: arg = code[now + 1] now += arg + 2 op_next = code[now] if op_next == JUMP_FORWARD or arg == 0 or arg == 1 or arg == 2 or arg == 4: # 一般JUMP_FORWARD参数为0、2、4都为花指令 used_set.remove(now - (arg + 2)) used_set.remove(now - (arg + 2) + 1) continue elif op == JUMP_ABSOLUTE: arg = code[now + 1] now = arg continue elif op == JUMP_IF_TRUE_OR_POP: arg = code[now + 1] deconf_inner(code, arg) elif op == JUMP_IF_FALSE_OR_POP: arg = code[now + 1] deconf_inner(code, arg) elif op == POP_JUMP_IF_TRUE: arg = code[now + 1] deconf_inner(code, arg) elif op == POP_JUMP_IF_FALSE: arg = code[now + 1] deconf_inner(code, arg) elif op == FOR_ITER: arg = code[now + 1] deconf_inner(code, now + arg + 2) now += 2 used_set.add(now)def deconf(code): global used_set used_set = set() # Remember to clean up used_set for every target function # cod = list(map(ord, code)) cod = list(code) deconf_inner(cod, 0) for i in range(len(cod)): if i not in used_set: cod[i] = NOP # aa = bytes(cod) aa = b''.join(map(lambda x: int.to_bytes(x, 1, 'little'), cod)) return aafilename = 'PYC.pyc'
with open(filename, 'rb') as f: header = f.read(16) code = marshal.load(f)print(code.co_consts)
'''print(dis.dis(deconf(code.co_consts[3].co_code)))
'''consts = list()for i in range(len(code.co_consts)): if hasattr(code.co_consts[i], 'co_code'): consts.append(types.CodeType(code.co_consts[i].co_argcount, code.co_posonlyargcount, code.co_kwonlyargcount, # Add this in Python3 code.co_consts[i].co_nlocals, code.co_consts[i].co_stacksize, code.co_consts[i].co_flags, deconf(code.co_consts[i].co_code), code.co_consts[i].co_consts, code.co_consts[i].co_names, code.co_consts[i].co_varnames, code.co_consts[i].co_filename, code.co_consts[i].co_name, code.co_consts[i].co_firstlineno, code.co_consts[i].co_lnotab, # In general, You should adjust this code.co_consts[i].co_freevars, code.co_consts[i].co_cellvars)) else: consts.append(code.co_consts[i])mode = types.CodeType(code.co_argcount, code.co_posonlyargcount, code.co_kwonlyargcount, # Add this in Python3 code.co_nlocals, code.co_stacksize, code.co_flags, deconf(code.co_code), tuple(consts), code.co_names, code.co_varnames, code.co_filename, code.co_name, code.co_firstlineno, code.co_lnotab, # In general, You should adjust this code.co_freevars, code.co_cellvars)f = open(filename + ".mod", 'wb')
f.write(header)
marshal.dump(mode, f)