[Decompilation] [th03] .MRS: Byte-aligned, opaque blitting

Containing not one, but two decompilation innovations, one of which
works around a compiler bug using C++ template functions…

Completes P0126, funded by [Anonymous] and Blue Bolt.
This commit is contained in:
nmlgc 2020-11-08 21:04:36 +01:00
parent 5965930bd0
commit 8b0165738a
5 changed files with 250 additions and 161 deletions

View File

@ -105,6 +105,10 @@ case it's part of an arithmetic expression that was promoted to `int`.
## Assignments
| | |
|-|-|
| `MOV ???, [SI+????]` | Only achievable through pointer arithmetic? |
* When assigning to a array element at a variable or non-0 index, the array
element address is typically evaluated before the expression to be assigned.
But when assigning
@ -452,3 +456,12 @@ contains one of the following:
**Certainty:** Confirmed through reverse-engineering `TCC.EXE`, no way
around it.
### Compiler bugs
* Dereferencing a `far` pointer constructed from the `_FS` and `_GS`
pseudoregisters emits wrong segment prefix opcodes 0x46 (`INC SI`) and
0x4E (`DEC SI`) rather than the correct 0x64 and 0x65, respectively.
**Workaround**: Not happening when compiling via TASM (`-B` on the command
line, or `#pragma inline`).

View File

@ -9,6 +9,7 @@
// if(FLAGS_*) { goto some_label; | return; }
// these assemble into the single given instruction. Apply the ! operator to
// get the N versions.
#define FLAGS_CARRY (_FLAGS & 0x01) /* JC / JAE / JB */
#define FLAGS_ZERO (_FLAGS & 0x40) /* JZ */
#define FLAGS_SIGN (_FLAGS & 0x80) /* JS */
// ----------------
@ -26,5 +27,66 @@
out dx, ax; \
}
// Versions that actually inline with pseudoregisters
// poke() versions that actually inline with pseudoregisters
// ---------------------------------------------------------
#define pokew(sgm, off, val) { *(uint16_t far *)(MK_FP(sgm, off)) = val; }
// Turbo C++ 4.0 generates wrong segment prefix opcodes for the _FS and _GS
// pseudoregisters - 0x46 (INC SI) and 0x4E (DEC SI) rather than the correct
// 0x64 and 0x65, respectively. These prefixes are also not supported in
// inline assembly, which is limited to pre-386 anyway. Compiling via assembly
// (`#pragma inline`) would work and generate the correct instructions here,
// but that would incur yet another dependency on a 16-bit TASM, for something
// honestly quite insignificant.
//
// So, can we somehow work around this issue while retaining the readability
// of the usage code and pretending that this bug doesn't exist? Comparisons
// with segment registers unfortunately don't inline, so something like
// if(sgm == _FS)
// wouldn't work, even inside a macro that replaces [sgm] with _FS. But since
// __emit__() *does* inline, we can use function templates! The default
// versions provide the regularly intended C code for all other registers,
// while explicit specializations for _FS and _GS __emit__() the correct
// instruction opcodes for all offset registers needed. Then, we only need to
// somehow move the pseudoregisters up into the type system... which can
// simply be done by turning them into class names via preprocessor token
// pasting. Sure, this limits this approach to raw registers with no immediate
// offsets, but let's hope we won't ever need those...
//
// Also, hey, no need for the MK_FP() macro if we directly return the correct
// types.
#ifdef __cplusplus
}
struct Decomp_FS { void __seg* value() { return (void __seg *)(_FS); } };
struct Decomp_GS { void __seg* value() { return (void __seg *)(_GS); } };
struct Decomp_DI { void __near* value() { return (void __near *)(_DI); } };
// Removing [val] from the parameter lists of the template functions below
// perfects the inlining.
#define poked(sgm, off, val) \
_EAX = val; \
poked_eax((Decomp##sgm *)NULL, (Decomp##off *)NULL);
template <class Segment, class Offset> inline void poked_eax(
Segment *sgm, Offset *off
) {
*reinterpret_cast<uint32_t far *>(sgm->value() + off->value()) = _EAX;
}
inline void poked_eax(Decomp_FS *sgm, Decomp_DI *off) {
__emit__(0x66, 0x64, 0x89, 0x05); // MOV FS:[DI], EAX
}
inline void poked_eax(Decomp_GS *sgm, Decomp_DI *off) {
__emit__(0x66, 0x65, 0x89, 0x05); // MOV GS:[DI], EAX
}
extern "C" {
#endif
// ---------------------------------------------------------
// 32-bit ASM instructions not supported by Turbo C++ 4.0J's built-in
// assembler. Makes no sense to compile with `#pragma inline` (and thus,
// require a 16-bit TASM) just for those.
#define MOVSD __emit__(0x66, 0xA5);
#define REP __emit__(0xF3);

View File

@ -1,9 +1,15 @@
#pragma option -3
#pragma codeseg SHARED
extern "C" {
#include <stddef.h>
#include "platform.h"
#include "pc98.h"
#include "planar.h"
#include "decomp.h"
#include "th03/formats/hfliplut.h"
}
#include "th03/formats/mrs.hpp"
static const vram_byte_amount_t MRS_BYTE_W = (MRS_W / BYTE_DOTS);
@ -21,6 +27,9 @@ struct mrs_t {
extern mrs_t far *mrs_images[MRS_SLOT_COUNT];
// Decompilation workarounds
// -------------------------
// Points [reg_sgm]:[reg_off] to the alpha plane of the .MRS image in the
// given [slot].
#define mrs_slot_assign(reg_sgm, reg_off, slot) { \
@ -29,6 +38,102 @@ extern mrs_t far *mrs_images[MRS_SLOT_COUNT];
__asm { l##reg_sgm reg_off, mrs_images[bx]; } \
}
// Single iteration across [row_dword_w] 32-dot units of a .MRS image, from
// bottom to top. _DI is assumed to point at the bottom left target position,
// while [body] is responsible to increment _DI by [MRS_BYTE_W].
#define mrs_put_rows(row_dword_w, body) \
do { \
_CX = row_dword_w; \
body \
_DI -= (ROW_SIZE + MRS_BYTE_W); \
} while(!FLAGS_CARRY);
// ZUN optimized mrs_put_noalpha_8() to blit 3 out of the 4 bitplanes within a
// single loop. Annoyingly, he does this by first moving the source pointer to
// the beginning of the G plane within a mrs_t instance, and then accesses the
// earlier planes with *negative* offsets, rather than, y'know, just using
// positive ones like a sane person.
// These offsets are encoded as immediates within the instructions that read
// the dot patterns. Subtracting the raw values wouldn't decompile correctly,
// but thankfully, pointer arithmetic does, and is also a lot cleaner...
// conceptually, at least. It also inlines perfectly, allowing us to give some
// meaningful names to these horrifying expressions.
struct mrs_at_G_t : public mrs_plane_t {
dots32_t dots_from_alpha(void) const { return *(*((this - 3)->dots)); }
dots32_t dots_from_B(void) const { return *(*((this - 2)->dots)); }
dots32_t dots_from_R(void) const { return *(*((this - 1)->dots)); }
};
static inline mrs_at_G_t near* mrs_at_G(void) {
return reinterpret_cast<mrs_at_G_t near *>(offsetof(mrs_t, planes.G));
}
// -------------------------
inline uint16_t to_bottom_left_8(const screen_x_t &left) {
return ((left >> 3) + ((MRS_H - 1) * ROW_SIZE));
}
inline seg_t to_segment(const uscreen_y_t &top) {
_AX = (top / 2); // screen_y_t -> vram_y_t...
_DX = _AX;
return ((_AX << 2) + _DX); // ... and -> segment
}
void pascal mrs_put_noalpha_8(
screen_x_t left, uscreen_y_t top, int slot, bool altered_colors
)
{
#define _SI reinterpret_cast<mrs_at_G_t near *>(_SI)
#define at_bottom_left _DX // *Not* rooted at (0, 0)!
__asm { push ds; }
_DI = to_bottom_left_8(left);
_AX = to_segment(top);
mrs_slot_assign(ds, si, slot);
_SI = mrs_at_G();
// "I've spent good money on that Intel 386 CPU, so let's actually use
// *all* its segment registers!" :zunpet: :zunpet: :zunpet:
_FS = (_AX += SEG_PLANE_B); // = B
_GS = (_AX += SEG_PLANE_DIST_BRG); // = R
_ES = (_AX += SEG_PLANE_DIST_BRG); // = G
// At this point though, we're out of segment registers. That's why this
// approach of not changing destination segments within a blitting loop
// only works for 3 out of the 4 bitplanes, and why we need a second loop
// for the final one after all.
_BX = (_AX += SEG_PLANE_DIST_E); // = E
at_bottom_left = _DI;
if(altered_colors) {
mrs_put_rows(MRS_DWORD_W, { put_altered:
poked(_FS, _DI, (~_SI->dots_from_alpha() | _SI->dots_from_B()));
poked(_GS, _DI, _SI->dots_from_R());
MOVSD;
__asm { loop put_altered; }
});
// SI is now at the beginning of the E plane. Blit it in its own loop
_DI = at_bottom_left;
_ES = _BX;
mrs_put_rows(MRS_DWORD_W, REP MOVSD);
} else {
mrs_put_rows(MRS_DWORD_W, { put_regular:
poked(_FS, _DI, _SI->dots_from_B());
poked(_GS, _DI, _SI->dots_from_R());
MOVSD;
_asm { loop put_regular; }
});
// SI is now at the beginning of the E plane. Blit it in its own loop
_DI = at_bottom_left;
_ES = _BX;
mrs_put_rows(MRS_DWORD_W, REP MOVSD);
}
__asm { pop ds; }
#undef at_bottom_left
#undef _SI
}
#pragma codestring "\x90"
void pascal mrs_hflip(int slot)
{
_CX = sizeof(mrs_t);

View File

@ -7,6 +7,12 @@ static const int MRS_SLOT_COUNT = 8;
static const pixel_t MRS_W = 288;
static const pixel_t MRS_H = 184;
// Displays the .MRS image in the given [slot] at (⌊left/8⌋*8, top),
// disregarding its alpha plane, and optionally altering its colors slightly.
void pascal mrs_put_noalpha_8(
screen_x_t left, uscreen_y_t top, int slot, bool altered_colors
);
// Persistently flips the image in [slot] horizontally, using the [hflip_lut].
void pascal mrs_hflip(int slot);
/// ---------------------------------------------------------------------------

View File

@ -9052,104 +9052,7 @@ sub_EF46 endp
; ---------------------------------------------------------------------------
nop
; =============== S U B R O U T I N E =======================================
; Attributes: bp-based frame
sub_EFF4 proc far
arg_0 = byte ptr 6
arg_2 = word ptr 8
arg_4 = word ptr 0Ah
arg_6 = word ptr 0Ch
push bp
mov bp, sp
push si
push di
push ds
mov ax, [bp+arg_6]
sar ax, 3
add ax, 3930h
mov di, ax
mov ax, [bp+arg_4]
shr ax, 1
mov dx, ax
shl ax, 2
add ax, dx
mov bx, [bp+arg_2]
shl bx, 2
lds si, _mrs_images[bx]
mov si, 4DA0h
add ax, 0A800h
mov fs, ax
add ax, 800h
mov gs, ax
add ax, 800h
mov es, ax
add ax, 2800h
mov bx, ax
mov dx, di
cmp [bp+arg_0], 0
jz short loc_F071
loc_F03A:
mov cx, 9
loc_F03D:
mov eax, [si-4DA0h]
not eax
or eax, [si-33C0h]
mov fs:[di], eax
mov eax, [si-19E0h]
mov gs:[di], eax
movsd
loop loc_F03D
sub di, 74h ; 't'
jnb short loc_F03A
mov di, dx
mov es, bx
loc_F064:
mov cx, 9
rep movsd
sub di, 74h ; 't'
jnb short loc_F064
jmp short loc_F09E
; ---------------------------------------------------------------------------
loc_F071:
mov cx, 9
loc_F074:
mov eax, [si-33C0h]
mov fs:[di], eax
mov eax, [si-19E0h]
mov gs:[di], eax
movsd
loop loc_F074
sub di, 74h ; 't'
jnb short loc_F071
mov di, dx
mov es, bx
loc_F093:
mov cx, 9
rep movsd
sub di, 74h ; 't'
jnb short loc_F093
loc_F09E:
pop ds
pop di
pop si
pop bp
retf 8
sub_EFF4 endp
; ---------------------------------------------------------------------------
nop
extern @MRS_PUT_NOALPHA_8$QIUIIC:proc
extern @MRS_HFLIP$QI:proc
SPRITE16_SPRITES_COMMIT procdesc pascal far
SPRITE16_PUT procdesc pascal far \
@ -19177,19 +19080,19 @@ loc_1494A:
loc_1495C:
cmp _pid_current, 0
jz short loc_14967
add si, 140h
add si, PLAYFIELD_W_BORDERED
loc_14967:
push si
push 10h
push si ; left
push PLAYFIELD_TOP ; top
mov al, _pid_current
mov ah, 0
add ax, 2
push ax
push ax ; slot
mov al, _pid_current
mov ah, 0
push ax
call sub_EFF4
push ax ; altered_colors
call @mrs_put_noalpha_8$qiuiic
mov al, [bp+@@frame]
mov ah, 0
mov bx, 8
@ -20313,18 +20216,18 @@ loc_152D7:
mov _palette_changed, 1
mov al, _pid_current
mov ah, 0
imul ax, 140h
add ax, 10h
push ax
push 10h
imul ax, PLAYFIELD_W_BORDERED
add ax, PLAYFIELD_LEFT
push ax ; left
push PLAYFIELD_TOP ; top
mov al, _pid_current
mov ah, 0
add ax, 2
push ax
push ax ; slot
mov al, _pid_current
mov ah, 0
push ax
call sub_EFF4
push ax ; altered_colors
call @mrs_put_noalpha_8$qiuiic
jmp short loc_15323
; ---------------------------------------------------------------------------
@ -21393,18 +21296,18 @@ loc_15C23:
loc_15C32:
mov al, _pid_current
mov ah, 0
imul ax, 140h
add ax, 10h
push ax
push 10h
imul ax, PLAYFIELD_W_BORDERED
add ax, PLAYFIELD_LEFT
push ax ; left
push PLAYFIELD_TOP ; top
mov al, _pid_current
mov ah, 0
add ax, 2
push ax
push ax ; slot
mov al, _pid_current
mov ah, 0
push ax
call sub_EFF4
push ax ; altered_colors
call @mrs_put_noalpha_8$qiuiic
jmp short loc_15CB0
; ---------------------------------------------------------------------------
@ -24121,23 +24024,23 @@ loc_172F0:
loc_172FF:
mov al, _pid_current
mov ah, 0
imul ax, 140h
imul ax, PLAYFIELD_W_BORDERED
mov dl, _pid_current
mov dh, 0
add dx, dx
mov bx, dx
add ax, _playfield_fg_shift_x[bx]
add ax, 10h
push ax
push 10h
add ax, PLAYFIELD_LEFT
push ax ; left
push PLAYFIELD_TOP ; top
mov al, _pid_current
mov ah, 0
add ax, 2
push ax
push ax ; slot
mov al, _pid_current
mov ah, 0
push ax
call sub_EFF4
push ax ; altered_colors
call @mrs_put_noalpha_8$qiuiic
jmp short loc_1737D
; ---------------------------------------------------------------------------
@ -26392,22 +26295,22 @@ loc_184A5:
call sub_CE5B
loc_18518:
mov si, 10h
mov si, PLAYFIELD_LEFT
cmp _pid_current, 0
jz short loc_18526
add si, 140h
add si, PLAYFIELD_W_BORDERED
loc_18526:
push si
push 10h
push si ; left
push PLAYFIELD_TOP ; top
mov al, _pid_current
mov ah, 0
add ax, 2
push ax
push ax ; slot
mov al, _pid_current
mov ah, 0
push ax
call sub_EFF4
push ax ; altered_colors
call @mrs_put_noalpha_8$qiuiic
jmp short loc_185A3
; ---------------------------------------------------------------------------
@ -26781,22 +26684,22 @@ loc_18895:
mov _playfield_fg_shift_x[bx], -4
loc_188A4:
mov si, 10h
mov si, PLAYFIELD_LEFT
cmp _pid_current, 0
jz short loc_188B2
add si, 140h
add si, PLAYFIELD_W_BORDERED
loc_188B2:
push si
push 10h
push si ; left
push PLAYFIELD_TOP ; top
mov al, _pid_current
mov ah, 0
add ax, 2
push ax
push ax ; slot
mov al, _pid_current
mov ah, 0
push ax
call sub_EFF4
push ax ; altered_colors
call @mrs_put_noalpha_8$qiuiic
mov al, [bp+@@frame]
mov ah, 0
mov bx, 4
@ -27027,22 +26930,22 @@ loc_18A68:
mov angle_1FBD4, al
loc_18B2F:
mov si, 10h
mov si, PLAYFIELD_LEFT
cmp _pid_current, 0
jz short loc_18B3D
add si, 140h
add si, PLAYFIELD_W_BORDERED
loc_18B3D:
push si
push 10h
push si ; left
push PLAYFIELD_TOP ; top
mov al, _pid_current
mov ah, 0
add ax, 2
push ax
push ax ; slot
mov al, _pid_current
mov ah, 0
push ax
call sub_EFF4
push ax ; altered_colors
call @mrs_put_noalpha_8$qiuiic
call egc_on
jmp short @@ret
; ---------------------------------------------------------------------------
@ -27247,22 +27150,22 @@ loc_18D28:
sub word_1FE56, 2E0h
loc_18D34:
mov si, 10h
mov si, PLAYFIELD_LEFT
cmp _pid_current, 0
jz short loc_18D42
add si, 140h
add si, PLAYFIELD_W_BORDERED
loc_18D42:
push si
push 10h
push si ; left
push PLAYFIELD_TOP ; top
mov al, _pid_current
mov ah, 0
add ax, 2
push ax
push ax ; slot
mov al, _pid_current
mov ah, 0
push ax
call sub_EFF4
push ax ; altered_colors
call @mrs_put_noalpha_8$qiuiic
jmp short loc_18DBF
; ---------------------------------------------------------------------------
@ -27384,22 +27287,22 @@ loc_18E5F:
mov _playfield_fg_shift_x[bx], -4
loc_18E6E:
mov si, 10h
mov si, PLAYFIELD_LEFT
cmp _pid_current, 0
jz short loc_18E7C
add si, 140h
add si, PLAYFIELD_W_BORDERED
loc_18E7C:
push si
push 10h
push si ; left
push PLAYFIELD_TOP ; top
mov al, _pid_current
mov ah, 0
add ax, 2
push ax
push ax ; slot
mov al, _pid_current
mov ah, 0
push ax
call sub_EFF4
push ax ; altered_colors
call @mrs_put_noalpha_8$qiuiic
call grcg_setcolor pascal, (GC_RMW shl 16) + 15
mov ax, 900h
sub ax, word_220EC