我已经看到了大约十二个应用程序,它们能够使用我尝试提取的相同功能来实现此目的并且可以在反编译所有其他可执行文件时确认它们看起来完全相同,无法找出执行相同操作的途径。
任何指针将不胜感激。 />
#1 楼
假设您有一些未知的目标文件。它的源代码是#include <stdio.h>
#include <string.h>
#define NCHAR 26
void setkey(int i);
char *encrypt(char *s);
char keybuf[NCHAR];
char codepoint(char c);
int main(void) {
char *secret;
setkey(5);
secret=strdup("Hello World");
printf("%s\n", encrypt(secret));
}
void setkey(int key) {
int i, val;
val=key;
for (i=0; i<NCHAR; i++) {
keybuf[i]=val%NCHAR;
val+=key;
}
}
char *encrypt(char *s) {
char *t=s;
while (*s) {
if (*s>='a' && *s<='z') {
*s='a'+codepoint(*s-'a');
} else if (*s>='A' && *s<='Z') {
*s='A'+codepoint(*s-'A');
}
s++;
}
return t;
}
char codepoint(char c) {
return keybuf[c];
}
您唯一拥有的就是目标代码。作为参考,
objdump -d original
产生Disassembly of section .text:
0000000000400490 <_start>:
400490: 31 ed xor %ebp,%ebp
...
000000000040057d <main>:
40057d: 55 push %rbp
40057e: 48 89 e5 mov %rsp,%rbp
...
4005b2: c3 retq
00000000004005b3 <setkey>:
4005b3: 55 push %rbp
4005b4: 48 89 e5 mov %rsp,%rbp
...
4005ed: 48 98 cltq
4005ef: 88 90 60 10 60 00 mov %dl,0x601060(%rax)
4005f5: 8b 45 ec mov -0x14(%rbp),%eax
...
400606: c3 retq
0000000000400607 <encrypt>:
400607: 55 push %rbp
400608: 48 89 e5 mov %rsp,%rbp
...
400677: 89 c7 mov %eax,%edi
400679: e8 21 00 00 00 callq 40069f <codepoint>
40067e: 83 c0 41 add $ objdump -s -j .text original | grep '^ 4' | cut -d' ' -f3-6 | tr -d ' ' | sed 's/../0x&,/g'
0x31,0xed,0x49,0x89,0xd1,0x5e,0x48,0x89,0xe2,0x48,0x83,0xe4,0xf0,0x50,0x54,0x49,
...
0x41,0x5e,0x41,0x5f,0xc3,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0xf3,0xc3,
x41,%eax
...
40069e: c3 retq
000000000040069f <codepoint>:
40069f: 55 push %rbp
4006a0: 48 89 e5 mov %rsp,%rbp
...
4006ac: 48 98 cltq
4006ae: 0f b6 80 60 10 60 00 movzbl 0x601060(%rax),%eax
4006b5: 5d pop %rbp
4006b6: c3 retq
您想在程序中使用该代码。
为此,您并不需要真正理解或分解它,也不必使其可编译。
使用IDA,您只需了解一下,调试器或您最喜欢的工具是:
代码中有趣的部分是从0x4005B3到0x4006B6的部分。
此代码在两个不同位置0x4005ef和0x4006ae的0x601060处引用了一些变量。 (指令需要2个resp。3个字节,因此地址分别为0x4005b1和0x4006b1)。要使用它,必须先在0x4005b3处调用带有整数的函数,然后在0x400607处调用带有字符串的函数获取加密的字符串。
让我们将整个.text部分转换为字符数组,并准备将其嵌入到C代码中:
#include <stdio.h>
#include <string.h>
char code[]={
0x31,0xed,0x49,0x89,0xd1,0x5e,0x48,0x89,0xe2,0x48,0x83,0xe4,0xf0,0x50,0x54,0x49,
0xc7,0xc0,0x30,0x07,0x40,0x00,0x48,0xc7,0xc1,0xc0,0x06,0x40,0x00,0x48,0xc7,0xc7,
0x7d,0x05,0x40,0x00,0xe8,0xa7,0xff,0xff,0xff,0xf4,0x66,0x0f,0x1f,0x44,0x00,0x00,
0xb8,0x4f,0x10,0x60,0x00,0x55,0x48,0x2d,0x48,0x10,0x60,0x00,0x48,0x83,0xf8,0x0e,
0x48,0x89,0xe5,0x77,0x02,0x5d,0xc3,0xb8,0x00,0x00,0x00,0x00,0x48,0x85,0xc0,0x74,
0xf4,0x5d,0xbf,0x48,0x10,0x60,0x00,0xff,0xe0,0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
0xb8,0x48,0x10,0x60,0x00,0x55,0x48,0x2d,0x48,0x10,0x60,0x00,0x48,0xc1,0xf8,0x03,
0x48,0x89,0xe5,0x48,0x89,0xc2,0x48,0xc1,0xea,0x3f,0x48,0x01,0xd0,0x48,0xd1,0xf8,
0x75,0x02,0x5d,0xc3,0xba,0x00,0x00,0x00,0x00,0x48,0x85,0xd2,0x74,0xf4,0x5d,0x48,
0x89,0xc6,0xbf,0x48,0x10,0x60,0x00,0xff,0xe2,0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
0x80,0x3d,0x19,0x0b,0x20,0x00,0x00,0x75,0x11,0x55,0x48,0x89,0xe5,0xe8,0x7e,0xff,
0xff,0xff,0x5d,0xc6,0x05,0x06,0x0b,0x20,0x00,0x01,0xf3,0xc3,0x0f,0x1f,0x40,0x00,
0x48,0x83,0x3d,0xc8,0x08,0x20,0x00,0x00,0x74,0x1e,0xb8,0x00,0x00,0x00,0x00,0x48,
0x85,0xc0,0x74,0x14,0x55,0xbf,0x20,0x0e,0x60,0x00,0x48,0x89,0xe5,0xff,0xd0,0x5d,
0xe9,0x7b,0xff,0xff,0xff,0x0f,0x1f,0x00,0xe9,0x73,0xff,0xff,0xff,0x55,0x48,0x89,
0xe5,0x48,0x83,0xec,0x10,0xbf,0x05,0x00,0x00,0x00,0xe8,0x24,0x00,0x00,0x00,0xbf,
0x44,0x07,0x40,0x00,0xe8,0xe7,0xfe,0xff,0xff,0x48,0x89,0x45,0xf8,0x48,0x8b,0x45,
0xf8,0x48,0x89,0xc7,0xe8,0x5e,0x00,0x00,0x00,0x48,0x89,0xc7,0xe8,0x9f,0xfe,0xff,
0xff,0xc9,0xc3,0x55,0x48,0x89,0xe5,0x89,0x7d,0xec,0x8b,0x45,0xec,0x89,0x45,0xfc,
0xc7,0x45,0xf8,0x00,0x00,0x00,0x00,0xeb,0x36,0x8b,0x4d,0xfc,0xba,0x4f,0xec,0xc4,
0x4e,0x89,0xc8,0xf7,0xea,0xc1,0xfa,0x03,0x89,0xc8,0xc1,0xf8,0x1f,0x29,0xc2,0x89,
0xd0,0x6b,0xc0,0x1a,0x29,0xc1,0x89,0xc8,0x89,0xc2,0x8b,0x45,0xf8,0x48,0x98,0x88,
0x90,0x60,0x10,0x60,0x00,0x8b,0x45,0xec,0x01,0x45,0xfc,0x83,0x45,0xf8,0x01,0x83,
0x7d,0xf8,0x19,0x7e,0xc4,0x5d,0xc3,0x55,0x48,0x89,0xe5,0x48,0x83,0xec,0x20,0x48,
0x89,0x7d,0xe8,0x48,0x8b,0x45,0xe8,0x48,0x89,0x45,0xf8,0xeb,0x71,0x48,0x8b,0x45,
0xe8,0x0f,0xb6,0x00,0x3c,0x60,0x7e,0x2c,0x48,0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x3c,
0x7a,0x7f,0x21,0x48,0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x83,0xe8,0x61,0x0f,0xbe,0xc0,
0x89,0xc7,0xe8,0x58,0x00,0x00,0x00,0x83,0xc0,0x61,0x89,0xc2,0x48,0x8b,0x45,0xe8,
0x88,0x10,0xeb,0x35,0x48,0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x3c,0x40,0x7e,0x2a,0x48,
0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x3c,0x5a,0x7f,0x1f,0x48,0x8b,0x45,0xe8,0x0f,0xb6,
0x00,0x83,0xe8,0x41,0x0f,0xbe,0xc0,0x89,0xc7,0xe8,0x21,0x00,0x00,0x00,0x83,0xc0,
0x41,0x89,0xc2,0x48,0x8b,0x45,0xe8,0x88,0x10,0x48,0x83,0x45,0xe8,0x01,0x48,0x8b,
0x45,0xe8,0x0f,0xb6,0x00,0x84,0xc0,0x75,0x84,0x48,0x8b,0x45,0xf8,0xc9,0xc3,0x55,
0x48,0x89,0xe5,0x89,0xf8,0x88,0x45,0xfc,0x0f,0xbe,0x45,0xfc,0x48,0x98,0x0f,0xb6,
0x80,0x60,0x10,0x60,0x00,0x5d,0xc3,0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0x41,0x57,0x41,0x89,0xff,0x41,0x56,0x49,0x89,0xf6,0x41,0x55,0x49,0x89,0xd5,0x41,
0x54,0x4c,0x8d,0x25,0x38,0x07,0x20,0x00,0x55,0x48,0x8d,0x2d,0x38,0x07,0x20,0x00,
0x53,0x4c,0x29,0xe5,0x31,0xdb,0x48,0xc1,0xfd,0x03,0x48,0x83,0xec,0x08,0xe8,0x25,
0xfd,0xff,0xff,0x48,0x85,0xed,0x74,0x1e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0x4c,0x89,0xea,0x4c,0x89,0xf6,0x44,0x89,0xff,0x41,0xff,0x14,0xdc,0x48,0x83,0xc3,
0x01,0x48,0x39,0xeb,0x75,0xea,0x48,0x83,0xc4,0x08,0x5b,0x5d,0x41,0x5c,0x41,0x5d,
0x41,0x5e,0x41,0x5f,0xc3,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0xf3,0xc3,
};
// we don't know how large it has to be. From the disassembly,
// we learned 26 bytes should be enough. Better make it larger and be
// on the safe side.
char buffer[1000];
int main(void) {
void (*f1)(int)=(void *)(code-0x400490+0x4005b3);
char* (*f2)(char *)=(void *)(code-0x400490+0x400607);
*(int *)(code-0x400490+0x4005f1)=(int)(long)&buffer;
*(int *)(code-0x400490+0x4006b1)=(int)(long)&buffer;
(*f1)(5);
printf("%s\n", (*f2)(strdup("Hello World")));
printf("%s\n", (*f2)(strdup("Some other String")));
}
并编写一个新程序来使用它。
$ cc -zexecstack -g -o copy copy.c
$ ./copy
Oziix Lxmiu
Rxnz xwozm Rwmtsj
由于原始文本段始于0x400490,我们的函数始于0x4005b3和0x400607,我们计算了新的偏移量从
code
阵列的开头开始。并且由于我们在原始程序之外找到了对缓冲区的两个引用,因此我们将它们打补丁以指向我们的缓冲区。请注意,从加密到代码点的调用我们无需做任何事情,因为代码始终与位置无关(从此处从0x21字节处调用函数)。编译程序时,别忘了使数据/堆栈段可执行:
q4312078q
当然,还有一些问题需要解决:
您必须找到可能从函数中调用的所有内容,并将其全部包含在您的代码中。
您必须在代码外找到对数据的所有引用,并相应地对指针进行修补。如果数据包含指针,也要对其进行修补。
如果代码调用任何库函数,则必须进行重定位。幸运的是,(ELF / PE)二进制文件的重定位表向您展示了该操作的位置。您可能需要围绕调用编写包装函数,但是
如果ABI不同。
如果您想花哨的话,请不要直接调用这些函数。在程序中嵌入仿真器引擎,并让仿真器执行代码。这可以帮助您检测是否缺少某些代码,或者在哪里引用了外部数据。显然,一旦它在仿真器上运行,您就可以将其留在最终程序中,甚至可以使您可以在新的PC程序中使用某些ARM例程。反之亦然。
您可以从以下位置下载文件:https://mega.co.nz/#!8dR0TZhA!Z4DdQ07JCUzV5nJiJ79PZhHbiKDu9QZEw10IXr7ssuI
评论
0x4005ef使用的缓冲区地址应该是0x4005f1而不是0x4005b1
–Scy
17年4月3日在15:13
#2 楼
唯一声称有这种可能的项目(需要大量的半自动化和手动工作)是McSema。请参阅参考资料和示例,这可能会很有用。
评论
会检查出来的...所以结果仍然是我可以针对其进行编译的二进制/可链接代码,而不是正确的反编译源代码?有什么技巧可以使代码跨平台?我能否简单地为链接到此单个提取二进制文件的不同平台/体系结构构建源代码?
–罗伯托·安德拉德(Roberto Andrade)
2014-12-4 14:42
如果您使用机器代码/汇编,则您的代码没有移植的可能。您必须自己转换指令或使用二进制转换器(例如:英特尔的Houdini,它将NEON转换为SSE)。或者,您将不得不改编反编译的代码,这听起来像是很多工作。
– Yaspr
2014年12月4日在17:08
@yaspr观看他们的演示。看起来非常令人印象深刻。
– w s
2014年12月4日在18:31
评论
好的,一种方法是将功能的反汇编代码保存在.s或.asm文件中,然后将应用程序编译/链接到这些文件。这有40%的机会可以使用,因为大多数反汇编程序不会生成可汇编/可用的代码(请注意错误和带有指令的混合数据)。也可以在手动编写的C函数中将反汇编代码用作内联汇编。这可能会使全局变量/函数的处理更加容易。