我想知道从一个二进制文件中提取一系列功能,针对特定平台和体系结构进行编译并将其包含在一个单独的程序中时,除了尝试进行反汇编/反编译和重组/重新编译之外,还有哪些选择?为了在新编译的程序中调用相同的功能。我正在看的内容很模糊并且难以手动理解和重写/复制。

我已经看到了大约十二个应用程序,它们能够使用我尝试提取的相同功能来实现此目的并且可以在反编译所有其他可执行文件时确认它们看起来完全相同,无法找出执行相同操作的途径。

任何指针将不胜感激。 />

评论

好的,一种方法是将功能的反汇编代码保存在.s或.asm文件中,然后将应用程序编译/链接到这些文件。这有40%的机会可以使用,因为大多数反汇编程序不会生成可汇编/可用的代码(请注意错误和带有指令的混合数据)。

也可以在手动编写的C函数中将反汇编代码用作内联汇编。这可能会使全局变量/函数的处理更加容易。

#1 楼

假设您有一些未知的目标文件。它的源代码是

#include <stdio.h>
#include <string.h>

#define NCHAR   26

void setkey(int i);
char *encrypt(char *s);
char keybuf[NCHAR];
char codepoint(char c);

int main(void) {
    char *secret;
    setkey(5);
    secret=strdup("Hello World");
    printf("%s\n", encrypt(secret));
}

void setkey(int key) {
    int i, val;
    val=key;
    for (i=0; i<NCHAR; i++) {
        keybuf[i]=val%NCHAR;
        val+=key;
    }
}

char *encrypt(char *s) {
    char *t=s;
    while (*s) {
        if (*s>='a' && *s<='z') {
            *s='a'+codepoint(*s-'a');
        } else if (*s>='A' && *s<='Z') {
            *s='A'+codepoint(*s-'A');
        }
        s++;
    }
    return t;
}

char codepoint(char c) {
    return keybuf[c];
}


您唯一拥有的就是目标代码。作为参考,objdump -d original产生

Disassembly of section .text:

0000000000400490 <_start>:
  400490:   31 ed                   xor    %ebp,%ebp
...
000000000040057d <main>:
  40057d:   55                      push   %rbp
  40057e:   48 89 e5                mov    %rsp,%rbp
...
  4005b2:   c3                      retq   

00000000004005b3 <setkey>:
  4005b3:   55                      push   %rbp
  4005b4:   48 89 e5                mov    %rsp,%rbp
...
  4005ed:   48 98                   cltq   
  4005ef:   88 90 60 10 60 00       mov    %dl,0x601060(%rax)
  4005f5:   8b 45 ec                mov    -0x14(%rbp),%eax
...
  400606:   c3                      retq   

0000000000400607 <encrypt>:
  400607:   55                      push   %rbp
  400608:   48 89 e5                mov    %rsp,%rbp
...
  400677:   89 c7                   mov    %eax,%edi
  400679:   e8 21 00 00 00          callq  40069f <codepoint>
  40067e:   83 c0 41                add    
$ objdump -s -j .text original | grep '^ 4' | cut -d' ' -f3-6 | tr -d ' ' | sed 's/../0x&,/g'
0x31,0xed,0x49,0x89,0xd1,0x5e,0x48,0x89,0xe2,0x48,0x83,0xe4,0xf0,0x50,0x54,0x49,
...
0x41,0x5e,0x41,0x5f,0xc3,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0xf3,0xc3,
x41,%eax ... 40069e: c3 retq 000000000040069f <codepoint>: 40069f: 55 push %rbp 4006a0: 48 89 e5 mov %rsp,%rbp ... 4006ac: 48 98 cltq 4006ae: 0f b6 80 60 10 60 00 movzbl 0x601060(%rax),%eax 4006b5: 5d pop %rbp 4006b6: c3 retq


您想在程序中使用该代码。

为此,您并不需要真正理解或分解它,也不必使其可编译。

使用IDA,您只需了解一下,调试器或您最喜欢的工具是:


代码中有趣的部分是从0x4005B3到0x4006B6的部分。
此代码在两个不同位置0x4005ef和0x4006ae的0x601060处引用了一些变量。 (指令需要2个resp。3个字节,因此地址分别为0x4005b1和0x4006b1)。要使用它,必须先在0x4005b3处调用带有整数的函数,然后在0x400607处调用带有字符串的函数获取加密的字符串。

让我们将整个.text部分转换为字符数组,并准备将其嵌入到C代码中:

#include <stdio.h>
#include <string.h>

char code[]={
0x31,0xed,0x49,0x89,0xd1,0x5e,0x48,0x89,0xe2,0x48,0x83,0xe4,0xf0,0x50,0x54,0x49,
0xc7,0xc0,0x30,0x07,0x40,0x00,0x48,0xc7,0xc1,0xc0,0x06,0x40,0x00,0x48,0xc7,0xc7,
0x7d,0x05,0x40,0x00,0xe8,0xa7,0xff,0xff,0xff,0xf4,0x66,0x0f,0x1f,0x44,0x00,0x00,
0xb8,0x4f,0x10,0x60,0x00,0x55,0x48,0x2d,0x48,0x10,0x60,0x00,0x48,0x83,0xf8,0x0e,
0x48,0x89,0xe5,0x77,0x02,0x5d,0xc3,0xb8,0x00,0x00,0x00,0x00,0x48,0x85,0xc0,0x74,
0xf4,0x5d,0xbf,0x48,0x10,0x60,0x00,0xff,0xe0,0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
0xb8,0x48,0x10,0x60,0x00,0x55,0x48,0x2d,0x48,0x10,0x60,0x00,0x48,0xc1,0xf8,0x03,
0x48,0x89,0xe5,0x48,0x89,0xc2,0x48,0xc1,0xea,0x3f,0x48,0x01,0xd0,0x48,0xd1,0xf8,
0x75,0x02,0x5d,0xc3,0xba,0x00,0x00,0x00,0x00,0x48,0x85,0xd2,0x74,0xf4,0x5d,0x48,
0x89,0xc6,0xbf,0x48,0x10,0x60,0x00,0xff,0xe2,0x0f,0x1f,0x80,0x00,0x00,0x00,0x00,
0x80,0x3d,0x19,0x0b,0x20,0x00,0x00,0x75,0x11,0x55,0x48,0x89,0xe5,0xe8,0x7e,0xff,
0xff,0xff,0x5d,0xc6,0x05,0x06,0x0b,0x20,0x00,0x01,0xf3,0xc3,0x0f,0x1f,0x40,0x00,
0x48,0x83,0x3d,0xc8,0x08,0x20,0x00,0x00,0x74,0x1e,0xb8,0x00,0x00,0x00,0x00,0x48,
0x85,0xc0,0x74,0x14,0x55,0xbf,0x20,0x0e,0x60,0x00,0x48,0x89,0xe5,0xff,0xd0,0x5d,
0xe9,0x7b,0xff,0xff,0xff,0x0f,0x1f,0x00,0xe9,0x73,0xff,0xff,0xff,0x55,0x48,0x89,
0xe5,0x48,0x83,0xec,0x10,0xbf,0x05,0x00,0x00,0x00,0xe8,0x24,0x00,0x00,0x00,0xbf,
0x44,0x07,0x40,0x00,0xe8,0xe7,0xfe,0xff,0xff,0x48,0x89,0x45,0xf8,0x48,0x8b,0x45,
0xf8,0x48,0x89,0xc7,0xe8,0x5e,0x00,0x00,0x00,0x48,0x89,0xc7,0xe8,0x9f,0xfe,0xff,
0xff,0xc9,0xc3,0x55,0x48,0x89,0xe5,0x89,0x7d,0xec,0x8b,0x45,0xec,0x89,0x45,0xfc,
0xc7,0x45,0xf8,0x00,0x00,0x00,0x00,0xeb,0x36,0x8b,0x4d,0xfc,0xba,0x4f,0xec,0xc4,
0x4e,0x89,0xc8,0xf7,0xea,0xc1,0xfa,0x03,0x89,0xc8,0xc1,0xf8,0x1f,0x29,0xc2,0x89,
0xd0,0x6b,0xc0,0x1a,0x29,0xc1,0x89,0xc8,0x89,0xc2,0x8b,0x45,0xf8,0x48,0x98,0x88,
0x90,0x60,0x10,0x60,0x00,0x8b,0x45,0xec,0x01,0x45,0xfc,0x83,0x45,0xf8,0x01,0x83,
0x7d,0xf8,0x19,0x7e,0xc4,0x5d,0xc3,0x55,0x48,0x89,0xe5,0x48,0x83,0xec,0x20,0x48,
0x89,0x7d,0xe8,0x48,0x8b,0x45,0xe8,0x48,0x89,0x45,0xf8,0xeb,0x71,0x48,0x8b,0x45,
0xe8,0x0f,0xb6,0x00,0x3c,0x60,0x7e,0x2c,0x48,0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x3c,
0x7a,0x7f,0x21,0x48,0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x83,0xe8,0x61,0x0f,0xbe,0xc0,
0x89,0xc7,0xe8,0x58,0x00,0x00,0x00,0x83,0xc0,0x61,0x89,0xc2,0x48,0x8b,0x45,0xe8,
0x88,0x10,0xeb,0x35,0x48,0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x3c,0x40,0x7e,0x2a,0x48,
0x8b,0x45,0xe8,0x0f,0xb6,0x00,0x3c,0x5a,0x7f,0x1f,0x48,0x8b,0x45,0xe8,0x0f,0xb6,
0x00,0x83,0xe8,0x41,0x0f,0xbe,0xc0,0x89,0xc7,0xe8,0x21,0x00,0x00,0x00,0x83,0xc0,
0x41,0x89,0xc2,0x48,0x8b,0x45,0xe8,0x88,0x10,0x48,0x83,0x45,0xe8,0x01,0x48,0x8b,
0x45,0xe8,0x0f,0xb6,0x00,0x84,0xc0,0x75,0x84,0x48,0x8b,0x45,0xf8,0xc9,0xc3,0x55,
0x48,0x89,0xe5,0x89,0xf8,0x88,0x45,0xfc,0x0f,0xbe,0x45,0xfc,0x48,0x98,0x0f,0xb6,
0x80,0x60,0x10,0x60,0x00,0x5d,0xc3,0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0x41,0x57,0x41,0x89,0xff,0x41,0x56,0x49,0x89,0xf6,0x41,0x55,0x49,0x89,0xd5,0x41,
0x54,0x4c,0x8d,0x25,0x38,0x07,0x20,0x00,0x55,0x48,0x8d,0x2d,0x38,0x07,0x20,0x00,
0x53,0x4c,0x29,0xe5,0x31,0xdb,0x48,0xc1,0xfd,0x03,0x48,0x83,0xec,0x08,0xe8,0x25,
0xfd,0xff,0xff,0x48,0x85,0xed,0x74,0x1e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0x4c,0x89,0xea,0x4c,0x89,0xf6,0x44,0x89,0xff,0x41,0xff,0x14,0xdc,0x48,0x83,0xc3,
0x01,0x48,0x39,0xeb,0x75,0xea,0x48,0x83,0xc4,0x08,0x5b,0x5d,0x41,0x5c,0x41,0x5d,
0x41,0x5e,0x41,0x5f,0xc3,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00,
0xf3,0xc3,
};

// we don't know how large it has to be. From the disassembly,
// we learned 26 bytes should be enough. Better make it larger and be
// on the safe side.
char buffer[1000];

int main(void) {

    void (*f1)(int)=(void *)(code-0x400490+0x4005b3);
    char* (*f2)(char *)=(void *)(code-0x400490+0x400607);

    *(int *)(code-0x400490+0x4005f1)=(int)(long)&buffer;
    *(int *)(code-0x400490+0x4006b1)=(int)(long)&buffer;

    (*f1)(5);
    printf("%s\n", (*f2)(strdup("Hello World")));
    printf("%s\n", (*f2)(strdup("Some other String")));
}


并编写一个新程序来使用它。

$ cc -zexecstack -g -o copy copy.c
$ ./copy
Oziix Lxmiu
Rxnz xwozm Rwmtsj


由于原始文本段始于0x400490,我们的函数始于0x4005b3和0x400607,我们计算了新的偏移量从code阵列的开头开始。并且由于我们在原始程序之外找到了对缓冲区的两个引用,因此我们将它们打补丁以指向我们的缓冲区。请注意,从加密到代码点的调用我们无需做任何事情,
因为代码始终与位置无关(从此处从0x21字节处调用函数)。编译程序时,别忘了使数据/堆栈段可执行:

q4312078q

当然,还有一些问题需要解决:


您必须找到可能从函数中调用的所有内容,并将其全部包含在您的代码中。
您必须在代码外找到对数据的所有引用,并相应地对指针进行修补。如果数据包含指针,也要对其进行修补。
如果代码调用任何库函数,则必须进行重定位。幸运的是,(ELF / PE)二进制文件的重定位表向您展示了该操作的位置。您可能需要围绕调用编写包装函数,但是
如果ABI不同。

如果您想花哨的话,请不要直接调用这些函数。在程序中嵌入仿真器引擎,并让仿真器执行代码。这可以帮助您检测是否缺少某些代码,或者在哪里引用了外部数据。显然,一旦它在仿真器上运行,您就可以将其留在最终程序中,甚至可以使您可以在新的PC程序中使用某些ARM例程。反之亦然。

您可以从以下位置下载文件:https://mega.co.nz/#!8dR0TZhA!Z4DdQ07JCUzV5nJiJ79PZhHbiKDu9QZEw10IXr7ssuI

评论


0x4005ef使用的缓冲区地址应该是0x4005f1而不是0x4005b1

–Scy
17年4月3日在15:13



#2 楼

唯一声称有这种可能的项目(需要大量的半自动化和手动工作)是McSema。
请参阅参考资料和示例,这可能会很有用。

评论


会检查出来的...所以结果仍然是我可以针对其进行编译的二进制/可链接代码,而不是正确的反编译源代码?有什么技巧可以使代码跨平台?我能否简单地为链接到此单个提取二进制文件的不同平台/体系结构构建源代码?

–罗伯托·安德拉德(Roberto Andrade)
2014-12-4 14:42

如果您使用机器代码/汇编,则您的代码没有移植的可能。您必须自己转换指令或使用二进制转换器(例如:英特尔的Houdini,它将NEON转换为SSE)。或者,您将不得不改编反编译的代码,这听起来像是很多工作。

– Yaspr
2014年12月4日在17:08

@yaspr观看他们的演示。看起来非常令人印象深刻。

– w s
2014年12月4日在18:31