我正尝试从ARMv5t驱动的Gameboy Advance ROM解压缩非常长的文本块,该文本块已使用某种自定义LZ风格的压缩算法进行了压缩。 >
据我所知,解压缩算法如下(有人想出来了): 157个字符,永远不会发生[bits == 11]。然后,一旦完成写操作,我对后6位的用途完全一无所知,如果我跳过它们,则下一个字符将正确显示且仅此而已,这简直就是乱码。另外,可能值得注意的是,压缩字符串的前17位的来源似乎是无用的零,所以我对它们什么也没做。

Read data in 32 bit pieces 
position = 0
output = initialized buffer
read 1 bit

if (bit == 1):
    read next 7 bits, save at current position (ASCII character)

if (bit == 0):
    read 2 bits:
        if (bits == 00):
            read 7 bits (X)
            read 2 bits (Y)
            take Y + 2 characters from (position - X) 
            write Y + 2 characters at current position to output
            save X as save_X
        if (bits == 01):
            read 4 bits (X)
            take the character at (position - X + 1) 
            write 1 character at current position to output
        if (bits == 10):
            read 3 bits (Y)
            take Y characters at (position - save_X) 
            write Y characters at current position to output 
        if (bits == 11):
            read 9 bits (X)
            read 2 bits (Y)
            Z = (Y + 1) * 2 + 1;
            take Z characters at (position - X)
            write Z characters at (position - X) to output


压缩的字符串可以在这里找到(部分),我被卡在08D0CCD0上。
请记住,这些值需要读为20006491 A023C247等...

typedef struct tag_input_t {
    uint32_t data;
    FILE* file;
    unsigned buffer;
    int buffer_bits;
} input_t;


uint32_t data = get_next_block(file);  // 32 bit block

input_t input; // input is a struct with a buffer, so that if I need to read X bits and there are only Y (Y<X) bits left, I can load the next 32 bit block.
init_input(&input, data, file); 

new_read_bits(&input, 17);

int save_X = 0;
int position = 0;

while (position < 256) {
    unsigned is_char = new_read_bits(&input, 1);
    if (is_char) output[position++] = new_read_bits(&input, 7);
    else {
        unsigned mode = new_read_bits(&input, 2);
        unsigned X, Y, Z;
        switch (mode) {
            case 0:
                save_X = X = new_read_bits(&input, 7);
                Y = new_read_bits(&input, 2);
                memmove(output + position, output + position - X, Y + 2);
                position += Y + 2;
                break;
            case 1:
                X = new_read_bits(&input, 4);
                output[position] = output[position - X + 1];
                position++;
                break;
            case 2:
                Y = new_read_bits(&input, 3);
                memmove(output + position, output + position - save_X, Y + 1);
                position += Y + 1;
                break;
            default: //case 3:
                X = new_read_bits(&input, 9);
                Y = new_read_bits(&input, 2);
                Z = (Y + 1) * 2 + 1;
                memmove(output + position, output + position - X, Z);
                position += Z;
                break;
        }
    }
}


因此,例如,如果我们采用前8个字节91640020和A023C247,我们将得到:

91640020 47C223A0 1E698F90 79443E92 8FE4E740 31501F79 C43D0736 25F4087D 203C0FE6 C0151332 3E1190C3 02ECF3E0 09584121 DCF3A03D A7C1261D 6B430698 E6422195 41701E0F 34BF0BF4 0EECA58E 3332407C 0BE49541 0262203D DEC7E15C 08D0CCD0 3C8D1C09 7004C4C5 66EEF099


ASM中的例程可以在这里找到:

‭00100000000000000        11001001 0010001‬ 10100000 0010001 11100001 0010001 11__
first 17 are ignored     ^------- ^  ---- ^------- ^  ---- ^------- ^  ---- ^
                             I        *    (space)      *      a         *

* - results in 
[0300:0040] e92d4ff0 stmfd sp!, {r4-r11,lr}                 
[0300:0044] e3a05008 mov r5, #0x8                           
[0300:0048] e3a0a000 mov r10, #0x0                          
[0300:004c] e3a0c001 mov r12, #0x1                          
[0300:0050] ea000003 b 000064       
[0300:0054] e1a08003 mov r8, r3
[0300:0058] eb00008b bl 00028c       
[0300:005c] e0877004 add r7, r7, r4                         
[0300:0060] e4c07001 strb r7, [r0], #0x1                    
[0300:0064] e25aa001 subs r10, r10, #0x1                    
[0300:0068] 44916004 ldrmi r6, [r1], #0x4                   
[0300:006c] 43a0a01f movmi r10, #0x1f                       
[0300:0070] e0966006 adds r6, r6, r6                        
[0300:0074] 2afffff6 bcs 000054       
[0300:0078] e25aa001 subs r10, r10, #0x1                    
[0300:007c] 44916004 ldrmi r6, [r1], #0x4                   
[0300:0080] 43a0a01f movmi r10, #0x1f                       
[0300:0084] e0966006 adds r6, r6, r6                        
[0300:0088] 2a00003b bcs 00017c                          
[0300:008c] e25aa001 subs r10, r10, #0x1                    
[0300:0090] 44916004 ldrmi r6, [r1], #0x4                   
[0300:0094] 43a0a01f movmi r10, #0x1f                       
[0300:0098] e0966006 adds r6, r6, r6                        
[0300:009c] 3a000023 bcc 000130
[0300:00a0] e3a08004 mov r8, #0x4
[0300:00a4] eb000078 bl 00028c       
[0300:00a8] e2577001 subs r7, r7, #0x1                      
[0300:00ac] 0affffeb beq 000060
[0300:00b0] 5a00001c bpl 000128      
[0300:00b4] e25aa001 subs r10, r10, #0x1                    
[0300:00b8] 44916004 ldrmi r6, [r1], #0x4                   
[0300:00bc] 43a0a01f movmi r10, #0x1f                       
[0300:00c0] e0966006 adds r6, r6, r6                        
[0300:00c4] 3a00000b bcc 0000f8                          
[0300:00c8] e3a09f40 mov r9, #0x100                         
[0300:00cc] e3a08008 mov r8, #0x8
[0300:00d0] eb00006d bl 00028c
[0300:00d4] e4c07001 strb r7, [r0], #0x1
[0300:00d8] e2599001 subs r9, r9, #0x1
[0300:00dc] 1afffffa bne 0000cc
[0300:00e0] e25aa001 subs r10, r10, #0x1
[0300:00e4] 44916004 ldrmi r6, [r1], #0x4
[0300:00e8] 43a0a01f movmi r10, #0x1f
[0300:00ec] e0966006 adds r6, r6, r6
[0300:00f0] 2afffff4 bcs 0000c8
[0300:00f4] eaffffda b 000064
[0300:00f8] e3a04000 mov r4, #0x0                           
[0300:00fc] e25aa001 subs r10, r10, #0x1                    
[0300:0100] 44916004 ldrmi r6, [r1], #0x4                   
[0300:0104] 43a0a01f movmi r10, #0x1f                       
[0300:0108] e0966006 adds r6, r6, r6                        
[0300:010c] e2a43007 adc r3, r4, #0x7                       
[0300:0110] e3530008 cmps r3, #0x8                          
[0300:0114] 0affffd2 beq 000064                          
[0300:0118] e3a08008 mov r8, #0x8
[0300:011C] eb00005a bl 00028c       
[0300:0120] e1a04007 mov r4, r7                             
[0300:0124] eaffffce b 000064          
[0300:0128] e7507007 ldrb r7, [r0, -r7]                     
[0300:012c] eaffffcb b 000060                
[0300:0130] e3a08007 mov r8, #0x7
[0300:0134] eb000054 bl 00028c       
[0300:0138] e3570000 cmps r7, #0x0                          
[0300:013c] 0a000006 beq 00015c                          
[0300:0140] e1a0c007 mov r12, r7
[0300:0144] e3a08002 mov r8, #0x2
[0300:0148] eb00004f bl 00028c        
[0300:014C] e2879002 add r9, r7, #0x2
[0300:0150] e35c0001 cmps r12, #0x1                         
[0300:0154] 1a000042 bne 000264
[0300:0158] ea000046 b 000278
[0300:015c] e3a08002 mov r8, #0x2
[0300:0160] eb000049 bl 00028c       
[0300:0164] e3570000 cmps r7, #0x0                          
[0300:0168] 0a000059 beq 0002d4
[0300:016C] e2878003 add r8, r7, #0x03
[0300:0170] eb000045 bl 00028c       
[0300:0174] e1a05007 mov r5, r7                             
[0300:0178] eaffffb9 b 000064                                                                      
[0300:017c] e3a09001 mov r9, #0x1                           
[0300:0180] e25aa001 subs r10, r10, #0x1                    
[0300:0184] 44916004 ldrmi r6, [r1], #0x4                   
[0300:0188] 43a0a01f movmi r10, #0x1f                       
[0300:018c] e0966006 adds r6, r6, r6                        
[0300:0190] e0a99009 adc r9, r9, r9                         
[0300:0194] e25aa001 subs r10, r10, #0x1                    
[0300:0198] 44916004 ldrmi r6, [r1], #0x4                   
[0300:019c] 43a0a01f movmi r10, #0x1f                       
[0300:01a0] e0966006 adds r6, r6, r6                        
[0300:01a4] 2afffff5 bcs 000180                          
[0300:01a8] e3590002 cmps r9, #0x2                          
[0300:01ac] 1a00000d bne 0001e8                          
[0300:01b0] e3a09001 mov r9, #0x1                           
[0300:01b4] e25aa001 subs r10, r10, #0x1                    
[0300:01b8] 44916004 ldrmi r6, [r1], #0x4                   
[0300:01bc] 43a0a01f movmi r10, #0x1f                       
[0300:01c0] e0966006 adds r6, r6, r6                        
[0300:01c4] e0a99009 adc r9, r9, r9                         
[0300:01c8] e25aa001 subs r10, r10, #0x1                    
[0300:01cc] 44916004 ldrmi r6, [r1], #0x4                   
[0300:01d0] 43a0a01f movmi r10, #0x1f                       
[0300:01d4] e0966006 adds r6, r6, r6                        
[0300:01d8] 2afffff5 bcs 0001b4                          
[0300:01dc] e35c0001 cmps r12, #0x1                         
[0300:01e0] 1a00001f bne 000264                          
[0300:01e4] ea000023 b 000278        
[0300:01e8] e2499003 sub r9, r9, #0x3                       
[0300:01ec] e1a08005 mov r8, r5
[0300:01f0] eb000025 bl 00028c        
[0300:01f4] e087c519 add r12, r7, r9, lsl r5                
[0300:01f8] e3a09001 mov r9, #0x1                           
[0300:01fc] e25aa001 subs r10, r10, #0x1                    
[0300:0200] 44916004 ldrmi r6, [r1], #0x4                   
[0300:0204] 43a0a01f movmi r10, #0x1f                       
[0300:0208] e0966006 adds r6, r6, r6                        
[0300:020c] e0a99009 adc r9, r9, r9                         
[0300:0210] e25aa001 subs r10, r10, #0x1                    
[0300:0214] 44916004 ldrmi r6, [r1], #0x4                   
[0300:0218] 43a0a01f movmi r10, #0x1f                       
[0300:021c] e0966006 adds r6, r6, r6                        
[0300:0220] 2afffff5 bcs 0001fc                          
[0300:0224] e35c0b40 cmps r12, #0x10000
[0300:0228] 22899003 addcs r9, r9, #0x3                     
[0300:022c] 2a00000c bcs 000264                          
[0300:0230] e59f80f0 ldr r8, [000328] (=q4312078q0037ff)       
[0300:0234] e15c0008 cmps r12, r8                           
[0300:0238] 22899002 addcs r9, r9, #0x2                     
[0300:023c] 2a000008 bcs 000264                          
[0300:0240] e59f80e4 ldr r8, [00032c] (=q4312078q00027f)       
[0300:0244] e15c0008 cmps r12, r8                           
[0300:0248] 22899001 addcs r9, r9, #0x1                     
[0300:024c] 2a000004 bcs 000264                          
[0300:0250] e35c007f cmps r12, #0x7f                        
[0300:0254] 8a000002 bhi 000264                          
[0300:0258] e2899004 add r9, r9, #0x4                       
[0300:025c] e35c0001 cmps r12, #0x1                         
[0300:0260] 0a000004 beq 000278
[0300:0264] e750800c ldrb r8, [r0, -r12]                    
[0300:0268] e4c08001 strb r8, [r0], #0x1                    
[0300:026c] e2599001 subs r9, r9, #0x1                      
[0300:0270] 1afffffb bne 000264                          
[0300:0274] eaffff7a b 000064
[0300:0278] e750800c ldrb r8, [r0, -r12]                    
[0300:027c] e4c08001 strb r8, [r0], #0x1                    
[0300:0280] e2599001 subs r9, r9, #0x1                      
[0300:0284] 1afffffc bne 00027c                          
[0300:0288] eaffff75 b 000064                          
[0300:028c] e35a0000 cmps r10, #0x0                         
[0300:0290] 04916004 ldreq r6, [r1], #0x4                   
[0300:0294] 03a0a020 moveq r10, #0x20                       
[0300:0298] e1a0b008 mov r11, r8                            
[0300:029c] e15b000a cmps r11, r10                          
[0300:02a0] 81a0b00a movhi r11, r10                         
[0300:02a4] e26b7020 rsb r7, r11, #0x20                     
[0300:02a8] e1a07736 mov r7, r6, lsr r7                     
[0300:02ac] e1a06b16 mov r6, r6, lsl r11                    
[0300:02b0] e04aa00b sub r10, r10, r11                      
[0300:02b4] e058800b subs r8, r8, r11                       
[0300:02b8] 012fff1e bxeq lr                  
[0300:02bc] e4916004 ldr r6, [r1], #0x4                     
[0300:02c0] e268a020 rsb r10, r8, #0x20                     
[0300:02c4] e1a0ba36 mov r11, r6, lsr r10                   
[0300:02c8] e08b7817 add r7, r11, r7, lsl r8                
[0300:02cc] e1a06816 mov r6, r6, lsl r8                     
[0300:02d0] e12fff1e bx lr
[0300:02d4] e8bd4ff0 ldmfd sp!, {r4-r11,lr}                 
[0300:02d8] e12fff1e bx lr                         
^ - first read bit The result will be I. a.d.m.i.r.e. .y.o.u.r. .e.n.t.h.u.s.i.a.s.m (. is 00)


#1 楼

该代码不容易阅读,因为它必须静态地执行,而且似乎不遵循标准的EABI。常量,然后查看代码是否与以下内容匹配:

原来的算法是JCALG1。

我从Internet和我得到:

I admire your enthusiasm. However, many of these fighters have been training for hundreds, evene fufuf


结尾已损坏,可能是因为结尾数据丢失了。请记住,您必须执行bswap32(更改32位值的字节序),并且似乎没有任何签名/标头(删除+10)

评论


谢谢!我实际上找到了开发人员,而他对我说的也一样。此解决方案有效,而且正是我想要找到的解决方案。未来的后续问题:您如何“搜索使用的常数”?他们只是数字。这是一些非常强大的Google搜寻。

– Alex Osheter
19年3月18日在21:53

其实没什么好看的。我只是在寻找“ compression 0x27f 0x37ff”(这两个值对我来说并不常见),这是第一个结果。我比较了代码和汇编,以确保它是正确的算法。最后,我迅速修改了源代码以集成您的测试值,并且输出正确。 :)

– wisk
19 Mar 18 '19 at 22:40

哇,真聪明。荣誉!

– Alex Osheter
19年3月18日在22:53