nasm -f obj bf-interpreter.asm
bcc32 bf-interpreter.obj
我以性能为重点编写了解释器,但是我还没有阅读很多关于优化的文章,所以我没有如果结果证明这是非常糟糕的代码,则感到很惊讶。当代码与C stdlib链接时,我可以从C stdlib调用方法,并且基本上只有
EBX
,EDI
和ESI
寄存器被认为是“安全的”,这意味着它们不会被对C的调用所覆盖。 stdlib,我的实现经常使用该约定。此版本的BF解释器读取作为参数传递给程序的文件。 lang-none prettyprint-override“>
extern _malloc, _calloc, _printf, _fdopen, _fprintf, _scanf, _getchar, _putchar, _fopen, _fseek, _ftell, _rewind, _fgetc, _fclose
%define STDERR 2
%define SEEK_END 2
%define EOF -1
%define BF_MEMORY_CELL_AMOUNT 30000
%define BF_PROGRAM_END 255
%define JUMP_PAST_CODE 91
%define JUMP_BACK_CODE 93
segment _DATA public align=4 class=DATA use32
format_int db "%d", 0
write_mode db "w", 0
read_mode db "r", 0
bfprogram_jump_table times 43 dd run_program_loop_end,
dd bfprogram_memory_inc,
dd bfprogram_input,
dd bfprogram_memory_dec,
dd bfprogram_output,
times 13 dd run_program_loop_end,
dd bfprogram_pointer_left,
dd run_program_loop_end,
dd bfprogram_pointer_right,
times 28 dd run_program_loop_end,
dd bfprogram_jump_past,
dd run_program_loop_end,
dd bfprogram_jump_back,
times 34 dd run_program_loop_end,
times 127 dd run_program_loop_end, ; 128 (127 + next line) invalid ASCII characters
dd run_program_done ; if jump address is 255 (BF_PROGRAM_END), then we're done'
error_noargument db "Fatal: No argument was provided.", 0
error_notexist db "Fatal: The file does not exist.", 0
error_outofmemory db "Fatal: The Operating System does not have enough memory available.", 0
segment _BSS public align=4 class=BSS use32
file_name resd 1
bf_program_size resd 1
bf_program resd 1
bf_memory resd 1
group DGROUP _BSS _DATA
segment _TEXT public align=1 class=CODE use32
global _main
_main:
mov ebp, esp ; save original stack pointer
;
; read command line arguments
;
mov eax, [ebp + 4] ; argc
cmp eax, 1
je error_exit_noargument
mov eax, [ebp + 8] ; *argv
mov eax, [eax + 4] ; argv[1]
mov [file_name], eax
;
; open file
;
push read_mode
push eax
call _fopen
add esp, 8
test eax, eax
jz error_exit_notexist
mov edi, eax ; store file pointer
;
; get file size
;
push SEEK_END
push 0
push edi
call _fseek
add esp, 12
push edi
call _ftell
add esp, 4
inc eax ; reserve one extra byte for the BF_PROGRAM_END code
mov [bf_program_size], eax
;
; rewind file
;
push edi
call _rewind
add esp, 4
;
; read Brainfuck program from file
;
push dword [bf_program_size]
call _malloc
add esp, 4
test eax, eax
jz error_exit_outofmemory
mov [bf_program], eax
mov esi, eax
store_program_loop:
push edi
call _fgetc
add esp, 4
cmp eax, EOF ; stop reading when end of file reached
jz short store_program_done
mov [esi], al
inc esi
jmp short store_program_loop
store_program_done:
mov [esi], byte BF_PROGRAM_END ; store program end special code
;
; close file
;
push edi
call _fclose
add esp, 4
;
; zero-initialize BF memory cells
;
push dword 1
push BF_MEMORY_CELL_AMOUNT
call _calloc
add esp, 8
test eax, eax
jz error_exit_outofmemory
mov [bf_memory], eax
;
; run the BF program
;
mov esi, eax ; current memory address
mov edi, [bf_program] ; current program address
run_program_loop:
movzx eax, byte [edi]
jmp [bfprogram_jump_table + 4*eax] ; addresses are dword, ASCII is translated to byte offsets
run_program_loop_end:
inc edi
jmp short run_program_loop
run_program_done:
jmp normal_exit
bfprogram_pointer_right:
inc esi
jmp run_program_loop_end
bfprogram_pointer_left:
dec esi
jmp run_program_loop_end
bfprogram_memory_inc:
mov al, [esi]
inc al
mov [esi], al
jmp run_program_loop_end
bfprogram_memory_dec:
mov al, [esi]
dec al
mov [esi], al
jmp run_program_loop_end
bfprogram_output:
mov al, [esi]
push eax ; safe to do because eax is 000000xxh before the prior mov
call _putchar
add esp, 4
jmp run_program_loop_end
bfprogram_input:
call _getchar
mov [esi], al
jmp run_program_loop_end
bfprogram_jump_past:
mov al, [esi]
test al, al ; check if memory cell is zero
jnz run_program_loop_end ; if not zero, move to next instruction
;
; find matching ]
;
mov ebx, 1 ; when counter reaches zero the ] is found where we need to jump past
bfprogram_jump_past_loop:
inc edi
mov al, [edi]
cmp al, JUMP_PAST_CODE
jz short bfprogram_jump_past_loop_found_jump_past
cmp al, JUMP_BACK_CODE
jz short bfprogram_jump_past_loop_found_jump_back
jmp short bfprogram_jump_past_loop
bfprogram_jump_past_loop_found_jump_past:
inc ebx
jmp short bfprogram_jump_past_loop
bfprogram_jump_past_loop_found_jump_back:
dec ebx
test ebx, ebx
jz run_program_loop_end ; jumped over matching ]
jmp short bfprogram_jump_past_loop
bfprogram_jump_back:
mov al, [esi]
test al, al ; check if memory cell is zero
jz run_program_loop_end ; if zero, move to next instruction
;
; find matching [
;
mov ebx, 1 ; when counter reaches zero the [ is found where we need to jump back to
bfprogram_jump_back_loop:
dec edi
mov al, [edi]
cmp al, JUMP_BACK_CODE
jz short bfprogram_jump_back_loop_found_jump_back
cmp al, JUMP_PAST_CODE
jz short bfprogram_jump_back_loop_found_jump_past
jmp short bfprogram_jump_back_loop
bfprogram_jump_back_loop_found_jump_back:
inc ebx
jmp short bfprogram_jump_back_loop
bfprogram_jump_back_loop_found_jump_past:
dec ebx
test ebx, ebx
jz run_program_loop_end ; jumped back to matching [
jmp short bfprogram_jump_back_loop
error_exit_noargument:
push write_mode
push 2
call _fdopen
add esp, 8
push error_noargument
push eax
call _fprintf
add esp, 8
mov eax, -1
jmp short exit
error_exit_notexist:
push write_mode
push 2
call _fdopen
add esp, 8
push error_notexist
push eax
call _fprintf
add esp, 8
mov eax, -2
jmp short exit
error_exit_outofmemory:
push write_mode
push 2
call _fdopen
add esp, 8
push error_outofmemory
push eax
call _fprintf
add esp, 8
mov eax, -3
jmp short exit
normal_exit:
mov eax, 0
exit:
ret
完整的源代码存储库可以在GitHub上找到。在线。
您可以在以下我录制的YouTube视频中看到两者的运行情况。
#1 楼
注释
; *argv
应该为; argv
,因为您尚未取消引用指针。在执行
cmp
指令后,您应该首选je
而不是jz
,因为它哦,在过去,您不得不告诉汇编器
jmp short
,因为汇编程序无法自行解决。 :) 在
run the BF program
部分,我将esi
和edi
更改为s
寄存器指向源代码,而d
寄存器指向数据。但这只是为了好玩。在
bfprogram_memory_inc
中,您可以说inc byte [esi]
。 inc
指令具有r/m8
编码,该编码允许直接在内存中增加值,而无需间接调用。safe to do
优化很不错。由于无论如何都依赖ASCII编码,因此,如果可能,应该将
JUMP_PAST_CODE
定义为'['
而不是91。并非每个读者都非常了解ASCII码。NASM是否支持本地标签?这样会使
bfprogram_jump_past_loop
中的标签名称更短一些,从而更易于阅读。_fdopen
而不是stderr
,而是链接器可见的符号,以便您可以直接访问它吗? br /> 由于错误消息中不包含百分比字符,因此应致电
fputs
而不是fprintf
。 。(或者它们是否丢失了,因为Windows无论如何都会添加此换行符?如果是这样,那没关系,因为在针对特定平台时不必编写严格符合ANSI C的代码。) />
bfprogram_jump_table
非常大。您可以通过相对于已知位置对跳转目标进行编码来使其更小,每个位置只需要占用一个字节。 times 43 db 0, db bfprogram_memory_inc - run_program_loop_end, ...
。format_int
似乎未使用。STDERR
应该称为STDERR_FILENO
(尽管该名称来自POSIX,而不是Windows,但仍广为人知。)由于0xFF
是Brainfuck程序中的有效指令,因此将其用作EOF标记不是一个好主意。从文件加载代码时,可以将93以上或43以下的每个字符替换为44。此更改还可以使跳转表变小。总体来说,这是一个漂亮的小程序,代码完成了显而易见的事情。很高兴阅读。
评论
\ $ \ begingroup \ $
如果用本地标签表示.foo_bar :,则为是; NASM确实支持他们。
\ $ \ endgroup \ $
– SirPython
16年11月14日在22:53
#2 楼
跳转指令很慢,应将其最小化。可以按以下方式删除解释器循环中的一条跳转指令: dec edi ; on first pass, compensate for inc edi
run_program_loop: ; jump here after executing an opcode
run_program_loop_end: ; old label for reference
inc edi
movzx eax, byte [edi]
jmp [bfprogram_jump_table + 4*eax] ; addresses are dword, ASCII is translated to byte offsets
评论
接下来:用Brainfuck编写的x86解释器!仅支持第一句话。
“我以性能为重点编写了解释器” –您可能想知道,优化实现通常不会一一解释和执行字符,而是将整个程序转换为机器代码,并在转换过程中进行优化(例如将++转换为2,而不是2个独立的增量指令,将[-]转换为仅将单元格设置为0,而没有任何循环,等等),然后执行生成的代码。但这需要大量重写,对于您所追求的,您拥有的已绰绰有余。
多亏您友好的PPCG邻居,x86机器代码中的解释器已经存在。如果您可以将其缩短,请随时在此处发布。
我们想要一个bf jit引擎来生成程序集。这个项目简单易行