Cython二进制逆向系列(三)运算符
在开始前,先给出本文用到的py源代码
def test1(x, y):# 数学运算符a = x + yb = x - yc = x * yd = x / ye = x // yf = x % yg = x ** y# 位运算符h = x & yi = x | yj = x ^ yk = ~xl = x >> 4m = x << 2print(a, b, c, d, e, f, g, h, i, j, k, l, m)def test2(x, y):# in/not in 运算符if x in y:x = yelif x not in y:y = xprint(x, y)def test3(x, y):# ==运算符与逻辑运算符print(x == 0 and y == 0)print(y == 0 or x == 0)print(not x==0)if __name__ == '__main__':test1(1, 2)test2(1, 2)test3(1, 2)
在这篇文章里,我们会讨论Cython是如何处理运算符的(数学运算符、位运算符、in/not in 运算符、 ==运算符与逻辑运算符)。总的来叔其中大部分是调用虚拟机api来实现的。
数学运算符与位运算符号
可以看得出来全是调用虚拟机的api
下面给出运算符与api的对应表(其实看名字大概都能猜出来):
符号 | 含义 | 函数名 |
---|---|---|
+ | 加 | PyNumber_Add |
- | 减 | PyNumber_Subtract |
* | 乘 | PyNumber_Multiply |
/ | 除 | __Pyx_PyNumber_Divide |
// | 整除 | PyNumber_FloorDivide |
% | 取模 | PyNumber_Remainder |
** | 乘方 | PyNumber_Power |
& | 按位与 | PyNumber_And |
| | 按位或 | PyNumber_Or |
^ | 按位异或 | PyNumber_Xor |
~ | 按位取非 | PyNumber_Invert |
>> | 右移 | PyNumber_Rshift |
<< | 左移 | PyNumber_Lshift |
这里单独看一下位移在ida中的体现
v24 = off_1800095B8[32];if ( *(_QWORD *)(v4 + 8) != PyLong_Type[0] ){v27 = PyNumber_Rshift(v4, off_1800095B8[32]);
LABEL_35:v4 = v27;goto LABEL_36;}v25 = *(_QWORD *)(v4 + 16);if ( v25 ){if ( ((v25 + 1) & 0xFFFFFFFFFFFFFFFDui64) != 0 ){v26 = v25 + 4;switch ( v26 ){case 2i64:v27 = PyLong_FromLongLong(-(__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,v26,v24,0x180000000ui64);break;case 6i64:v27 = PyLong_FromLongLong((__int64)(*(unsigned int *)(v4 + 24) | ((unsigned __int64)*(unsigned int *)(v4 + 28) << 30)) >> 4,v26,v24,0x180000000ui64);break;default:v27 = (*(__int64 (__fastcall **)(__int64, _QWORD *))(PyLong_Type[12] + 96i64))(v4, off_1800095B8[32]);break;}}else{v28 = -*(_DWORD *)(v4 + 24);if ( v25 >= 0 )v28 = *(_DWORD *)(v4 + 24);v27 = PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);}goto LABEL_35;}++*(_QWORD *)v4;
LABEL_36:if ( !v4 ){v12 = 2534i64;v13 = 13i64;goto LABEL_58;}v10 = (_QWORD *)v4;
off_1800095B8[32]
中储存就是4,这里python为了安全性还有对于整数的处理做了安全措施,我们可以看到在else后面PyLong_FromLong((unsigned int)(v28 >> 4), v25, v24, 0x180000000ui64);
这里也可以看到是右移多少。
问题是,这里好像没看到表格中的PyNumber_Rshift
?因为py源代码中位移的位数是立即数,因此直接转换为c语言的位移运算符就好了。但是如果是x>>y
这样的两个都是变量,就会调用api PyNumber_Rshift
in/not in 运算符
/* "test.py":21* def test2(x, y):* # in/not in* if x in y: # <<<<<<<<<<<<<<* x = y* elif x not in y:*/__pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_EQ)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 21, __pyx_L1_error)。。。。。。/* "test.py":23* if x in y:* x = y* elif x not in y: # <<<<<<<<<<<<<<* y = x* print(x, y)*/__pyx_t_1 = (__Pyx_PySequence_ContainsTF(__pyx_v_x, __pyx_v_y, Py_NE)); if (unlikely((__pyx_t_1 < 0))) __PYX_ERR(0, 23, __pyx_L1_error)
这里涉及到一些条件语句的转换,不过没关系,照样能看懂
在c代码中可以看到无论是in还是 not in 调用的都是函数__Pyx_PySequence_ContainsTF
。其前两个参数是前后两个参与运算的变量,而第三个参数Py_EQ
/Py_NE
则决定当前运算到底是in还是 not in
不幸的是,无论是in还是not in ,在ida中都是PySequence_Contains
,具体是哪个要结合上下文分析。比如这里v5 = PySequence_Contains(a3)
判断的是 a3
中是否包含 a2
。如果 v5 == 1
,表示 a2
在 a3
中,则进入接下来的操作(++*v3
和调整 v4
和 v3
的指向)。
而下面那个v9 = PySequence_Contains(v3)
判断的是 v3
中是否包含 v4
(即 v4 not in v3
)。这里,如果 v9 == 0
,表示 v4
不在 v3
中,符合 not in
的语义。因为当 v9 == 0
时表示 v4
不在 v3
中。
说人话就是看后续是对PySequence_Contains
的返回值和谁比较(1或者0)。
==运算符与逻辑运算符
逻辑与运算符的处理
/* "test.py":30* def test3(x, y):* # ==* print(x == 0 and y == 0) # <<<<<<<<<<<<<<* print(y == 0 or x == 0)* print(not x==0)*/__pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)__Pyx_GOTREF(__pyx_t_2);__pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 30, __pyx_L1_error)if (__pyx_t_3) {__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;} else {__Pyx_INCREF(__pyx_t_2);__pyx_t_1 = __pyx_t_2;__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;goto __pyx_L3_bool_binop_done;}__pyx_t_2 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 30, __pyx_L1_error)__Pyx_GOTREF(__pyx_t_2);__Pyx_INCREF(__pyx_t_2);__pyx_t_1 = __pyx_t_2;__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;__pyx_L3_bool_binop_done:;
__Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0)
: 这行代码将 x == 0
的比较操作转换为 C 语言函数。它检查 x
是否等于 0
。(猜测不同类型的==有对应的函数,暂未验证)。
ida中比较==0的部分,看得出来它把变量分为int float 和其他三种情况,除了整数和浮点,一概用PyObject_RichCompare
比较。
在 C 代码中,and
逻辑运算符的处理通常是短路的。即,如果第一个条件为 False
,那么第二个条件不会被计算。在这里,编译后的代码会继续执行 y == 0
的检查,只有在 x == 0
为 True
时才会检查 y == 0
。
然后__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0)
检查 y == 0
,并根据结果将 __pyx_t_2
设置为布尔值。
ida中对and的处理也差不多类似。看着有点恶心,全是if else条件分支和各种goto
逻辑或运算符的处理
/* "test.py":31* # ==* print(x == 0 and y == 0)* print(y == 0 or x == 0) # <<<<<<<<<<<<<<* print(not x==0)* */__pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)__Pyx_GOTREF(__pyx_t_1);__pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 31, __pyx_L1_error)if (!__pyx_t_3) {__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;} else {__Pyx_INCREF(__pyx_t_1);__pyx_t_2 = __pyx_t_1;__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;goto __pyx_L5_bool_binop_done;}__pyx_t_1 = __Pyx_PyInt_EqObjC(__pyx_v_x, __pyx_int_0, 0, 0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 31, __pyx_L1_error)__Pyx_GOTREF(__pyx_t_1);__Pyx_INCREF(__pyx_t_1);__pyx_t_2 = __pyx_t_1;__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;__pyx_L5_bool_binop_done:;
前面都是在处理== :__Pyx_PyInt_EqObjC(__pyx_v_y, __pyx_int_0, 0, 0)
: 检查 y == 0
,即比较 y
是否等于 0
。__Pyx_PyObject_IsTrue(__pyx_t_1)
: 将 __pyx_t_1
转换为布尔值。如果 y == 0
(即 __pyx_t_3
为 True
),就直接跳到 __pyx_L5_bool_binop_done
,并将 __pyx_t_1
(存储 y == 0
结果)传递给下一个操作。
在执行 or
运算时,短路操作符同样会起作用:如果 y == 0
为 True
,则 x == 0
的比较不会被执行,结果会直接为 True
。__pyx_t_2
保存了 y == 0
或 x == 0
的结果,它将作为最终的结果传递给 print
函数。
逻辑非运算符的处理
/* "test.py":32* print(x == 0 and y == 0)* print(y == 0 or x == 0)* print(not x==0) # <<<<<<<<<<<<<<* * */__pyx_t_3 = (__Pyx_PyInt_BoolEqObjC(__pyx_v_x, __pyx_int_0, 0, 0)); if (unlikely((__pyx_t_3 < 0))) __PYX_ERR(0, 32, __pyx_L1_error)__pyx_t_1 = __Pyx_PyBool_FromLong((!__pyx_t_3)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 32, __pyx_L1_error)__Pyx_GOTREF(__pyx_t_1);__pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_builtin_print, __pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 32, __pyx_L1_error)__Pyx_GOTREF(__pyx_t_2);__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;__Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
!__pyx_t_3
:这行代码计算 not x == 0
。由于 __pyx_t_3
是 x == 0
的布尔值,!__pyx_t_3
就是其逻辑取反。__Pyx_PyBool_FromLong((!__pyx_t_3))
将 !__pyx_t_3
转换为 Python 的布尔对象。如果 !__pyx_t_3
为 0
,则返回 False
;如果 !__pyx_t_3
为 1
,则返回 True
。
如果以后逆向在这里出题,考察逻辑运算符,那就认命吧,这里反编译出的代码很绕。
下面粘上test3函数的反编译代码。
// write access to const memory has been detected, the output may be wrong!
__int64 __fastcall sub_180001E30(__int64 a1, __int64 a2, __int64 a3)
{v5 = *((_QWORD *)off_18000B688 + 35);if ( a2 == v5 )goto LABEL_2;v7 = *(_QWORD *)(a2 + 8);if ( v7 == PyLong_Type ){if ( *(_QWORD *)(a2 + 16) ){
LABEL_5:v6 = (_QWORD *)++Py_FalseStruct;goto LABEL_10;}
LABEL_2:v6 = (_QWORD *)++Py_TrueStruct;goto LABEL_10;}if ( v7 == PyFloat_Type ){if ( *(double *)(a2 + 16) != 0.0 )goto LABEL_5;goto LABEL_2;}v6 = (_QWORD *)PyObject_RichCompare(a2, v5, 2LL);
LABEL_10:if ( !v6 ){v8 = 30;v9 = 3136;
LABEL_75:sub_180005F50("test.test3", v9, v8, (__int64)"test.py");return 0LL;}IsTrue = v6 == (_QWORD *)Py_TrueStruct;v11 = v6 == (_QWORD *)Py_NoneStruct;v12 = IsTrue | v11 | (unsigned int)(v6 == (_QWORD *)Py_FalseStruct);if ( !(IsTrue | (v11 || v6 == (_QWORD *)Py_FalseStruct)) )IsTrue = PyObject_IsTrue(v6);if ( IsTrue < 0 ){v8 = 30;v9 = 3138;goto LABEL_73;}v13 = *v6;if ( !IsTrue ){*v6 = v13;v16 = v6;if ( v13 )goto LABEL_26;v18 = v6;goto LABEL_25;}v14 = v13 - 1;*v6 = v14;if ( !v14 )Py_Dealloc(v6);v15 = (_QWORD *)sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));v16 = v15;if ( !v15 ){v8 = 30;v9 = 3147;goto LABEL_75;}v17 = *v15;*v16 = v17;if ( !v17 ){v18 = v16;
LABEL_25:Py_Dealloc(v18);}
LABEL_26:v6 = v16;v19 = (_QWORD *)sub_1800048D0(v12, v16);if ( !v19 ){v8 = 30;v9 = 3153;if ( !v6 )goto LABEL_75;
LABEL_73:v20 = (*v6)-- == 1LL;if ( v20 )Py_Dealloc(v6);goto LABEL_75;}v20 = (*v16)-- == 1LL;if ( v20 )Py_Dealloc(v16);v20 = (*v19)-- == 1LL;if ( v20 )Py_Dealloc(v19);v21 = sub_180004780(a3, *((_QWORD *)off_18000B688 + 35));v6 = (_QWORD *)v21;if ( !v21 ){v8 = 31;v9 = 3165;goto LABEL_75;}v22 = sub_180006570(v21);v23 = (unsigned int)v22;if ( v22 < 0 ){v8 = 31;v9 = 3167;goto LABEL_73;}v24 = *v6;if ( !(_DWORD)v23 ){v25 = v24 - 1;*v6 = v25;if ( !v25 )Py_Dealloc(v6);v26 = (_QWORD *)sub_180004780(a2, *((_QWORD *)off_18000B688 + 35));v6 = v26;if ( !v26 ){v8 = 31;v9 = 3176;goto LABEL_75;}v24 = *v26;}*v6 = v24;if ( !v24 )Py_Dealloc(v6);v28 = (_QWORD *)sub_1800048D0(v23, v6);if ( !v28 ){v8 = 31;v9 = 3182;if ( !v6 )goto LABEL_75;goto LABEL_73;}v20 = (*v6)-- == 1LL;if ( v20 )Py_Dealloc(v6);v20 = (*v28)-- == 1LL;if ( v20 )Py_Dealloc(v28);v29 = *((_QWORD *)off_18000B688 + 35);if ( a2 == v29 )goto LABEL_68;v30 = *(_QWORD *)(a2 + 8);if ( v30 == PyLong_Type ){v31 = *(_QWORD *)(a2 + 16) == 0LL;}else if ( v30 == PyFloat_Type ){if ( *(double *)(a2 + 16) == 0.0 )goto LABEL_68;v31 = 0;}else{v32 = PyObject_RichCompare(a2, v29, 2LL);v33 = (_QWORD *)v32;if ( v32 ){v31 = v32 == Py_TrueStruct;v34 = v32 == Py_NoneStruct;v27 = v31 | v34 | (unsigned int)(v33 == (_QWORD *)Py_FalseStruct);if ( !(v31 | (v34 || v33 == (_QWORD *)Py_FalseStruct)) )v31 = PyObject_IsTrue(v33);v20 = (*v33)-- == 1LL;if ( v20 )Py_Dealloc(v33);}else{v31 = -1;}}if ( v31 < 0 ){v8 = 32;v9 = 3194;goto LABEL_75;}if ( !v31 ){v6 = (_QWORD *)++Py_TrueStruct;goto LABEL_69;}
LABEL_68:v6 = (_QWORD *)++Py_FalseStruct;
LABEL_69:if ( !v6 ){v8 = 32;v9 = 3195;goto LABEL_75;}v35 = (_QWORD *)sub_1800048D0(v27, v6);if ( !v35 ){v8 = 32;v9 = 3197;goto LABEL_73;}v20 = (*v6)-- == 1LL;if ( v20 )Py_Dealloc(v6);v20 = (*v35)-- == 1LL;if ( v20 )Py_Dealloc(v35);return Py_NoneStruct++;
}