我的
std::string
数据类型定义的完整性(导出):struct std_string {
char * data;
char * field_1;
ulonglong size;
ulonglong capacity;
};
拆卸:
**************************************************************
* FUNCTION *
**************************************************************
std_string * __fastcall std_string_operator+(std_string
std_string * RAX:8 <RETURN>
std_string * RCX:8 thisOut
undefined8 RDX:8 thisIn
char * R8:8 stringIn XREF[1]: 140106dcc(W)
longlong R8:8 size XREF[1]: 140106dcc(W)
undefined8 RAX:8 thisIn_ XREF[1]: 140106de6(W)
std_string_operator+
140106dc0 40 53 PUSH RBX
140106dc2 48 83 ec 20 SUB RSP,0x20
140106dc6 49 8b c0 MOV RAX,stringIn
140106dc9 4c 8b ca MOV R9,thisIn
140106dcc 49 c7 c0 MOV size,-0x1
ff ff ff ff
140106dd3 48 8b d9 MOV RBX,thisOut
LAB_140106dd6 XREF[1]: 140106dde(j)
140106dd6 49 ff c0 INC size
140106dd9 42 80 3c CMP byte ptr [RAX + size*0x1],0x0
00 00
140106dde 75 f6 JNZ LAB_140106dd6
140106de0 48 8b d0 MOV thisIn,RAX
140106de3 49 8b c9 MOV thisOut,R9
140106de6 e8 75 fe CALL std_string_append std_string * std_string_append(s
ff ff
140106deb 33 c9 XOR thisOut,thisOut
140106ded 48 89 4b 10 MOV qword ptr [RBX + 0x10],thisOut
140106df1 48 89 4b 18 MOV qword ptr [RBX + 0x18],thisOut
140106df5 0f 10 00 MOVUPS XMM0,xmmword ptr [thisIn_]
140106df8 0f 11 03 MOVUPS xmmword ptr [RBX],XMM0
140106dfb 0f 10 48 10 MOVUPS XMM1,xmmword ptr [thisIn_ + 0x10]
140106dff 0f 11 4b 10 MOVUPS xmmword ptr [RBX + 0x10],XMM1
140106e03 48 89 48 10 MOV qword ptr [thisIn_ + 0x10],thisOut
140106e07 48 c7 40 MOV qword ptr [thisIn_ + 0x18],0xf
18 0f 00
00 00
140106e0f 88 08 MOV byte ptr [thisIn_],thisOut
140106e11 48 8b c3 MOV thisIn_,RBX
140106e14 48 83 c4 20 ADD RSP,0x20
140106e18 5b POP RBX
140106e19 c3 RET
反编译:
我会理解复制八个字节的四个字段,或者(表示某种形式)复制两个128或256位副本。我认为上面的四个字节块是在Ghidra中编码MOVUPS的方式,但是这对我似乎没有太大帮助。这种复制(通过SSE)相对频繁地在每个地方发生,因此每次都有16行噪音是很烦人的。
在memset中使用的
std::string
(这里通过重复下面的8个来填充PUNPCKLBW
个字节,有效地广播将单个字节设置为XMM0
的所有16个字节)会炸成几十行乱码(我确定是忠实地模拟这种效果,但这没有帮助):拆卸:
std_string * std_string_operator+(std_string *thisOut,std_string *thisIn,char *stringIn)
{
undefined4 uVar1;
undefined4 uVar2;
undefined4 uVar3;
std_string *thisIn_;
longlong size;
size = -1;
do {
size = size + 1;
} while (stringIn[size] != ' **************************************************************
* FUNCTION *
**************************************************************
longlong * __fastcall memset(void * location, byte byteT
longlong * RAX:8 <RETURN>
void * RCX:8 location
byte DL:1 byteToSet XREF[1]: 1411960a8(W)
ulonglong R8:8 count
undefined8 R9:8 inputByteRepeated8 XREF[1]: 1411960a0(W)
undefined2 DX:2 inputByteRepeated2 XREF[1]: 1411960a8(W)
memset XREF[518]: [...]
141196090 4c 8b d9 MOV R11,location
141196093 0f b6 d2 MOVZX EDX,DL
141196096 49 b9 01 MOV R9,0x101010101010101
01 01 01
01 01 01 01
1411960a0 4c 0f af ca IMUL R9,RDX
1411960a4 49 83 f8 10 CMP R8,0x10
1411960a8 0f 86 f2 JBE LAB_1411961a0
00 00 00
1411960ae 66 49 0f MOVQ XMM0,R9
6e c1
1411960b3 66 0f 60 c0 PUNPCKLBW XMM0,XMM0
[...]
');
thisIn_ = std_string_append(thisIn,stringIn,size);
thisOut->size = 0;
thisOut->capacity = 0;
uVar1 = *(undefined4 *)((longlong)&thisIn_->data + 4);
uVar2 = *(undefined4 *)&thisIn_->field_1;
uVar3 = *(undefined4 *)((longlong)&thisIn_->field_1 + 4);
*(undefined4 *)&thisOut->data = *(undefined4 *)&thisIn_->data;
*(undefined4 *)((longlong)&thisOut->data + 4) = uVar1;
*(undefined4 *)&thisOut->field_1 = uVar2;
*(undefined4 *)((longlong)&thisOut->field_1 + 4) = uVar3;
uVar1 = *(undefined4 *)((longlong)&thisIn_->size + 4);
uVar2 = *(undefined4 *)&thisIn_->capacity;
uVar3 = *(undefined4 *)((longlong)&thisIn_->capacity + 4);
*(undefined4 *)&thisOut->size = *(undefined4 *)&thisIn_->size;
*(undefined4 *)((longlong)&thisOut->size + 4) = uVar1;
*(undefined4 *)&thisOut->capacity = uVar2;
*(undefined4 *)((longlong)&thisOut->capacity + 4) = uVar3;
thisIn_->size = 0;
thisIn_->capacity = 0xf;
*(undefined *)&thisIn_->data = 0;
return thisOut;
}
反编译:
longlong * memset(void *location,byte byteToSet,ulonglong count)
{
// [...]
ushort inputByteRepeated2;
ulonglong inputByteRepeated8;
undefined4 uVar5;
undefined4 uVar7;
undefined4 uVar8;
undefined auVar6 [13];
inputByteRepeated8 = (ulonglong)byteToSet * 0x101010101010101;
inputByteRepeated2 = (ushort)inputByteRepeated8;
_inputByteRepeated2 = (uint)inputByteRepeated8;
if (count < 0x11) {
// [...]
}
auVar6[6] = SUB141(ZEXT814(inputByteRepeated8) >> 0x30,0);
auVar6 = ZEXT813(inputByteRepeated8);
register0x0000120c =
SUB164(CONCAT313(SUB163(CONCAT214(SUB162(CONCAT115(SUB161(ZEXT816(inputByteRepeated8) >> 0x38
,0),
CONCAT114(SUB151(ZEXT815(
inputByteRepeated8) >> 0x38,0),
ZEXT814(inputByteRepeated8))) >> 0x70,0),
CONCAT113(auVar6[6],auVar6)) >> 0x68,0),
CONCAT112(auVar6[6],ZEXT812(inputByteRepeated8))) >> 0x60,0);
_auVar6 = CONCAT79(SUB167(CONCAT610(SUB166(CONCAT511(SUB165(CONCAT412(register0x0000120c,
CONCAT111(SUB131(auVar6 >>
0x28,0),ZEXT811(inputByteRepeated8))) >> 0x58,0),
CONCAT110(SUB121(ZEXT812(inputByteRepeated8) >>
0x28,0),
(unkuint10)inputByteRepeated8)) >> 0x50,
0),
CONCAT19(SUB131(auVar6 >> 0x20,0),(unkuint9)inputByteRepeated8
)) >> 0x48,0),
(unkuint9)inputByteRepeated8 & 0xffffffffffffffff | 0 << 0x40);
register0x00001208 = SUB168(_auVar6 >> 0x40,0);
_auVar6 = CONCAT115(SUB1611(CONCAT106(SUB1610(CONCAT97(SUB169(CONCAT88(register0x00001208,
(inputByteRepeated8 >> 0x18
) << 0x38) >> 0x38,0),
((uint7)inputByteRepeated8 >> 0x18) << 0x30
) >> 0x30,0),
((uint6)inputByteRepeated8 >> 0x10) << 0x28) >> 0x28,0),
((uint5)inputByteRepeated8 >> 0x10) << 0x20);
_auVar6 = CONCAT142(SUB1614(CONCAT133(SUB1613(CONCAT124(SUB1612(_auVar6 >> 0x20,0),
(_inputByteRepeated2 >> 8) << 0x18) >>
0x18,0),((uint3)inputByteRepeated8 >> 8) << 0x10) >>
0x10,0),inputByteRepeated2 & 0xff | inputByteRepeated2 << 8);
uVar7 = SUB164(_auVar6 >> 0x20,0);
uVar5 = SUB164(_auVar6,0);
uVar8 = SUB164(_auVar6 >> 0x40,0);
// [...]
然后,只要代码仅执行
XMM0
或类似操作,其余的反编译操作也会使用这些笨拙的4字节块。结果花了我一段时间才能将整个功能识别为MOVAPS xmmword ptr [location],XMM0
!第一名)?如果问题已经作为标签存在,我会用
memcpy
标记此问题,但由于我对这个社区不太了解,所以我不愿意创建够了。当然:欢迎提出改进建议!#1 楼
解决此问题的一种方法是功能识别功能,它可以自动检测这些功能,并相应地将其重命名为memcpy
。问题在于,与IDA不同,Ghidra没有提供丰富的签名库,尽管与IDA不同,它至少似乎可以生成自己的签名[0] 您可以查看https:/ /blog.threatrack.de/2019/09/20/ghidra-fid-generator和相关的github存储库,其中包含生成签名和已生成签名的代码,并查看它们是否适合您的情况。
[0]至少我个人从未设法弄清IDA的工作方式
评论
我很欣赏改善工作流程的建议-的确,我想知道将来是否需要反转标准库函数多少次。我更好奇如何改善反编译结果。例如。试图弄清楚某个较大函数的作用时,字符串移动的噪声确实会分散注意力,对此是否需要做一些事情?
– NurTuring
5月12日19:45
您是指内联显式memcpy还是C ++初始化恶作剧的情况?对于前者,有一些功能可以将函数的一部分声明为已内联的另一个函数(不太记得如何做,可能值得一个单独的问题)。对于后者,您可能需要单独的脚本或插件。在这种情况下,您可以使用反编译代码示例与理想结果代码(可选还是实际源代码)示例来编辑问题吗?
–弗洛里安·马金(Florian Magin)
5月13日9:42