From 69111bd86275a110f6c5e2d26e99ee53c69fe832 Mon Sep 17 00:00:00 2001 From: nmlgc Date: Thu, 18 Feb 2021 19:40:37 +0100 Subject: [PATCH] [Separate translation units] .PI: Row loop for masked blitting (undecompilable) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reason: Pascal calling convention with function parameters but no stack frame. Theoretically we can __emit__() everything inside this function, but there's no way we can get a `RETN 8` this way. Oh, and it also accesses SI and DI without backing them up to the stack. And thanks to TLINK apparently not reporting fixup overflows when segments are small enough (?), it took quite a while to get that CALL correct and not weirdly offset by 32 bytes. 😕 Part of P0134, funded by [Anonymous]. --- Makefile.mak | 4 +- Tupfile | 1 + Tupfile.bat | 1 + libs/master.lib/master.inc | 1 + th03/formats/pi_put_masked[data].asm | 2 + th05/formats/pi_asm_1.asm | 100 +++++++++++++++++++++++++++ th05/formats/pi_put_masked.asm | 76 +------------------- th05/pi_asm_1.asm | 1 + 8 files changed, 109 insertions(+), 77 deletions(-) create mode 100644 th05/formats/pi_asm_1.asm create mode 100644 th05/pi_asm_1.asm diff --git a/Makefile.mak b/Makefile.mak index e7c37a79..fe1aed04 100644 --- a/Makefile.mak +++ b/Makefile.mak @@ -152,7 +152,7 @@ bin\th05\res_kso.com: th05\res_kso.cpp $** | masters.lib -bin\th05\op.exe: th05\op010.cpp bin\th05\op.obj th05\op011.cpp th05\m_char.cpp bin\th05\pi_cpp_2.obj bin\th05\pi_asm_2.obj bin\th05\initop.obj bin\th05\input_s.obj bin\th05\inp_h_w.obj bin\th05\snd_dlym.obj th05\cdg_p_nc.cpp bin\th05\frmdelay.obj bin\th04\cdg_load.obj bin\th05\egcrect.obj bin\hfliplut.obj +bin\th05\op.exe: th05\op010.cpp bin\th05\op.obj th05\op011.cpp th05\m_char.cpp bin\th05\pi_asm_1.obj bin\th05\pi_cpp_2.obj bin\th05\pi_asm_2.obj bin\th05\initop.obj bin\th05\input_s.obj bin\th05\inp_h_w.obj bin\th05\snd_dlym.obj th05\cdg_p_nc.cpp bin\th05\frmdelay.obj bin\th04\cdg_load.obj bin\th05\egcrect.obj bin\hfliplut.obj $(CC) $(CFLAGS) $(LARGE_LFLAGS) -DGAME=5 -DBINARY='O' -3 -Z -nbin\th05\ -eOP.EXE @&&| $** | @@ -162,7 +162,7 @@ bin\th05\main.exe: bin\th05\main.obj th05\main010.cpp th05\main011.cpp th05\p_co $** | -bin\th05\maine.exe: bin\th05\maine.obj th05\maine011.cpp th05\regist.cpp th05\staff.cpp bin\th05\pi_cpp_2.obj bin\th05\pi_asm_2.obj bin\th05\initmain.obj bin\th05\input_s.obj bin\th05\inp_h_w.obj bin\th05\snd_dlym.obj bin\th05\frmdelay.obj bin\th04\cdg_load.obj bin\th05\egcrect.obj bin\hfliplut.obj +bin\th05\maine.exe: bin\th05\maine.obj th05\maine011.cpp th05\regist.cpp th05\staff.cpp bin\th05\pi_asm_1.obj bin\th05\pi_cpp_2.obj bin\th05\pi_asm_2.obj bin\th05\initmain.obj bin\th05\input_s.obj bin\th05\inp_h_w.obj bin\th05\snd_dlym.obj bin\th05\frmdelay.obj bin\th04\cdg_load.obj bin\th05\egcrect.obj bin\hfliplut.obj $(CC) $(CFLAGS) $(LARGE_LFLAGS) -DGAME=5 -DBINARY='E' -Z -nbin\th05\ -eMAINE.EXE @&&| $** | diff --git a/Tupfile b/Tupfile index acdbd901..ffe314aa 100644 --- a/Tupfile +++ b/Tupfile @@ -109,6 +109,7 @@ BMP2ARR = bin\\Pipeline\\bmp2arr.exe : th05\\player.asm |> !as5 |> : th05\\hud_bar.asm |> !as5 |> : th05\\bullet.asm |> !as5 |> +: th05\\pi_asm_1.asm |> !as5 |> : th05\\pi_asm_2.asm |> !as5 |> : th05_op.asm | \ th05/sprites/piano_l.asp \ diff --git a/Tupfile.bat b/Tupfile.bat index d3396f18..af8e9c67 100644 --- a/Tupfile.bat +++ b/Tupfile.bat @@ -50,6 +50,7 @@ tasm32 /m /mx /kh32768 /t /dGAME=5 th04\scoreupd.asm bin\th05\scoreupd.obj tasm32 /m /mx /kh32768 /t /dGAME=5 th05\player.asm bin\th05\player.obj tasm32 /m /mx /kh32768 /t /dGAME=5 th05\hud_bar.asm bin\th05\hud_bar.obj tasm32 /m /mx /kh32768 /t /dGAME=5 th05\bullet.asm bin\th05\bullet.obj +tasm32 /m /mx /kh32768 /t /dGAME=5 th05\pi_asm_1.asm bin\th05\pi_asm_1.obj tasm32 /m /mx /kh32768 /t /dGAME=5 th05\pi_asm_2.asm bin\th05\pi_asm_2.obj tasm32 /m /mx /kh32768 /t th05_op.asm bin\th05\op.obj tasm32 /m /mx /kh32768 /t th05_main.asm bin\th05\main.obj diff --git a/libs/master.lib/master.inc b/libs/master.lib/master.inc index 0191f636..8f40d3c7 100644 --- a/libs/master.lib/master.inc +++ b/libs/master.lib/master.inc @@ -5,6 +5,7 @@ include libs/master.lib/macros.inc +EGC_OFF procdesc pascal far GAIJI_PUTSA procdesc pascal far \ x:word, y:word, strp_seg:word, strp_off:word, atrb:word GRAPH_PACK_PUT_8_NOCLIP procdesc pascal far \ diff --git a/th03/formats/pi_put_masked[data].asm b/th03/formats/pi_put_masked[data].asm index 6058a293..121b6bdc 100644 --- a/th03/formats/pi_put_masked[data].asm +++ b/th03/formats/pi_put_masked[data].asm @@ -1,3 +1,5 @@ +PI_MASK_H = 4 + public _PI_MASKS _PI_MASKS label word ; 0 diff --git a/th05/formats/pi_asm_1.asm b/th05/formats/pi_asm_1.asm new file mode 100644 index 00000000..8f953b5c --- /dev/null +++ b/th05/formats/pi_asm_1.asm @@ -0,0 +1,100 @@ +; First TH05 .PI assembly translation unit. + + .386 + .model large + locals + +include pc98.inc +include libs/master.lib/master.inc + + extrn _PI_MASKS:word + extrn _pi_mask_ptr:word + extrn _pi_mask_y:word + extrn _pi_put_masked_vram_offset:word + +; Apparently necessary to avoid fixup overflows. In this case, the call to +; pi_masked_egc_setup_copy() would be bizarrely offset otherwise, which TLINK +; wouldn't even report?! I did very much not enjoy debugging this. +g_SHARED group SHARED, SHARED_ +SHARED segment byte public 'CODE' use16 +SHARED ends + +SHARED_ segment word public 'CODE' use16 + assume cs:g_SHARED + + _pi_mask_setup_egc_and_advance procdesc near + +public PI_PUT_MASKED_8_ROWLOOP +pi_put_masked_8_rowloop proc near +; Can't use ARG, because the function doesn't `PUSH BP`! +@@stride_packed = word ptr [bp+2] +@@w = word ptr [bp+4] +@@top = word ptr [bp+6] +@@left = word ptr [bp+8] +@@mask_id equ ax +@@h equ di + +; Each original row is first blitted to this fixed row in VRAM, before being +; blitted to its actual destination with the EGC active. +TEMP_ROW = RES_Y + + shl @@mask_id, 3 ; *= PI_MASK_H * word + add @@mask_id, offset _PI_MASKS + mov _pi_mask_ptr, @@mask_id + mov bp, sp + mov dx, @@left + shr dx, 3 + mov ax, @@top + shl ax, 6 + add dx, ax + shr ax, 2 + add dx, ax + mov _pi_put_masked_vram_offset, dx + mov _pi_mask_y, 0 + +@@put_row: + push es + call graph_pack_put_8_noclip pascal, 0, TEMP_ROW, es, si, @@w + push ds + push @@h + push si + mov di, _pi_put_masked_vram_offset + add _pi_put_masked_vram_offset, ROW_SIZE + cmp _pi_put_masked_vram_offset, PLANE_SIZE + jb short @@next_row + sub _pi_put_masked_vram_offset, PLANE_SIZE + +@@next_row: + call _pi_mask_setup_egc_and_advance + mov ax, SEG_PLANE_B + mov es, ax + assume es:nothing + mov ds, ax + assume ds:nothing + mov si, (ROW_SIZE * TEMP_ROW) + mov cx, @@w + shr cx, 4 + rep movsw + call egc_off + pop si + pop @@h + pop ds + pop es + add si, @@stride_packed + + ; .PI pointer normalization, see pi_buffer_p_normalize() + mov ax, si + shr ax, 4 + mov dx, es + add dx, ax + mov es, dx + and si, 0Fh + + dec @@h + jnz short @@put_row + retn 8 +pi_put_masked_8_rowloop endp + +SHARED_ ends + + end diff --git a/th05/formats/pi_put_masked.asm b/th05/formats/pi_put_masked.asm index c9d2f873..48f530eb 100644 --- a/th05/formats/pi_put_masked.asm +++ b/th05/formats/pi_put_masked.asm @@ -1,4 +1,4 @@ -_pi_mask_setup_egc_and_advance procdesc near +PI_PUT_MASKED_8_ROWLOOP procdesc near public PI_PUT_MASKED_8 pi_put_masked_8 proc far @@ -81,77 +81,3 @@ pi_put_quarter_masked_8 proc far retf 0Ah pi_put_quarter_masked_8 endp even - -; --------------------------------------------------------------------------- - -; void pascal pi_put_masked_8_rowloop( -; int mask_id, -; void far *pi_buf, -; pixel_t h, -; screen_x_t left, vram_y_t top, pixel_t w, size_t stride_packed -; ); -pi_put_masked_8_rowloop proc near -@@stride_packed = word ptr [bp+2] -@@w = word ptr [bp+4] -@@top = word ptr [bp+6] -@@left = word ptr [bp+8] -@@mask_id equ ax -@@h equ di - -TEMP_ROW = RES_Y - - shl @@mask_id, 3 - add @@mask_id, offset _PI_MASKS - mov _pi_mask_ptr, @@mask_id - mov bp, sp - mov dx, @@left - shr dx, 3 - mov ax, @@top - shl ax, 6 - add dx, ax - shr ax, 2 - add dx, ax - mov _pi_put_masked_vram_offset, dx - mov _pi_mask_y, 0 - -@@put_row: - push es - call graph_pack_put_8_noclip pascal, 0, TEMP_ROW, es, si, @@w - push ds - push @@h - push si - mov di, _pi_put_masked_vram_offset - add _pi_put_masked_vram_offset, ROW_SIZE - cmp _pi_put_masked_vram_offset, PLANE_SIZE - jb short @@next_row - sub _pi_put_masked_vram_offset, PLANE_SIZE - -@@next_row: - call _pi_mask_setup_egc_and_advance - mov ax, GRAM_400 - mov es, ax - assume es:nothing - mov ds, ax - assume ds:nothing - mov si, (ROW_SIZE * TEMP_ROW) - mov cx, @@w - shr cx, 4 - rep movsw - call egc_off - pop si - pop @@h - pop ds - assume ds:_DATA - pop es - assume es:nothing - add si, @@stride_packed - mov ax, si - shr ax, 4 - mov dx, es - add dx, ax - mov es, dx - and si, 0Fh - dec @@h - jnz short @@put_row - retn 8 -pi_put_masked_8_rowloop endp diff --git a/th05/pi_asm_1.asm b/th05/pi_asm_1.asm new file mode 100644 index 00000000..6bf3e0cd --- /dev/null +++ b/th05/pi_asm_1.asm @@ -0,0 +1 @@ +include th05/formats/pi_asm_1.asm