#pragma option -zCSHARED -3 extern "C" { #include #include "platform.h" #include "pc98.h" #include "planar.h" #include "decomp.hpp" #include "master.hpp" #include "th03/formats/hfliplut.h" } #include "th03/formats/mrs.hpp" #undef grcg_off #define grcg_off() { \ _AL ^= _AL; \ __asm out 0x7C, al; \ } static const vram_byte_amount_t MRS_BYTE_W = (MRS_W / BYTE_DOTS); static const vram_dword_amount_t MRS_DWORD_W = (MRS_BYTE_W / sizeof(dots32_t)); struct mrs_plane_t { dots32_t dots[MRS_H][MRS_DWORD_W]; }; // On-disk per-image structure struct mrs_t { mrs_plane_t alpha; Planar planes; }; extern mrs_t far *mrs_images[MRS_SLOT_COUNT]; // Decompilation workarounds // ------------------------- #define mrs_slot_offset_to(var, slot) { \ var = slot; \ var <<= 2; \ } // Points [reg_sgm]:[reg_off] to the alpha plane of the .MRS image in the // given [slot]. #define mrs_slot_assign(reg_sgm, reg_off, slot) { \ mrs_slot_offset_to(_BX, slot); \ __asm { l##reg_sgm reg_off, mrs_images[bx]; } \ } // Single iteration across [row_dword_w] 32-dot units of a .MRS image, from // bottom to top. _DI is assumed to point at the bottom left target position, // while [body] is responsible to increment _DI by [MRS_BYTE_W]. #define mrs_put_rows(row_dword_w, body) \ do { \ _CX = row_dword_w; \ body \ _DI -= (ROW_SIZE + MRS_BYTE_W); \ } while(!FLAGS_CARRY); // ZUN optimized mrs_put_noalpha_8() to blit 3 out of the 4 bitplanes within a // single loop. Annoyingly, he does this by first moving the source pointer to // the beginning of the G plane within a mrs_t instance, and then accesses the // earlier planes with *negative* offsets, rather than, y'know, just using // positive ones like a sane person. // These offsets are encoded as immediates within the instructions that read // the dot patterns. Subtracting the raw values wouldn't decompile correctly, // but thankfully, pointer arithmetic does, and is also a lot cleaner... // conceptually, at least. It also inlines perfectly, allowing us to give some // meaningful names to these horrifying expressions. struct mrs_at_G_t : public mrs_plane_t { dots32_t dots_from_alpha(void) const { return *(*((this - 3)->dots)); } dots32_t dots_from_B(void) const { return *(*((this - 2)->dots)); } dots32_t dots_from_R(void) const { return *(*((this - 1)->dots)); } }; static inline mrs_at_G_t near* mrs_at_G(void) { return reinterpret_cast(offsetof(mrs_t, planes.G)); } // At least mrs_put_8() is somewhat sane. struct mrs_at_B_t : public mrs_plane_t { dots32_t dots_from_alpha(void) const { return *(*((this + 0)->dots)); } dots32_t dots_from_B(void) const { return *(*((this + 1)->dots)); } dots32_t dots_from_R(void) const { return *(*((this + 2)->dots)); } dots32_t dots_from_G(void) const { return *(*((this + 3)->dots)); } dots32_t dots_from_E(void) const { return *(*((this + 4)->dots)); } }; // ------------------------- void pascal mrs_load(int slot, const char *fn) { file_ropen(fn); mrs_images[slot] = reinterpret_cast( hmem_allocbyte(sizeof(mrs_t)) ); // MODDERS: Relies on the above expression setting _BX to the byte offset // of [slot]... file_read(*reinterpret_cast( reinterpret_cast(mrs_images) + _BX ), sizeof(mrs_t)); file_close(); } #pragma option -Z- void pascal mrs_free(int slot) { #define mrs_image_pointer_word(slot_offset, off_or_sgm) \ *(reinterpret_cast( \ reinterpret_cast(mrs_images) + slot_offset \ ) + off_or_sgm) mrs_slot_offset_to(_BX, slot); _AX = mrs_image_pointer_word(_BX, 1); // Yes, |=, not =, to an uninitialized register. The entire reason why we // can't decompile this into one sane expression... _DX |= mrs_image_pointer_word(_BX, 0); _DX |= _AX; if(!FLAGS_ZERO) { hmem_free(reinterpret_cast(_AX)); mrs_image_pointer_word(_BX, 0) = mrs_image_pointer_word(_BX, 1) = NULL; } #undef mrs_image_pointer_word } #pragma codestring "\x90" inline uint16_t to_bottom_left_8(const screen_x_t &left) { return ((left >> 3) + ((MRS_H - 1) * ROW_SIZE)); } inline seg_t to_segment(const uscreen_y_t &top) { _AX = (top / 2); // screen_y_t -> vram_y_t... _DX = _AX; return ((_AX << 2) + _DX); // ... and -> segment } void pascal mrs_put_8(screen_x_t left, uscreen_y_t top, int slot) { #define _SI reinterpret_cast(_SI) grcg_setcolor(GC_RMW, 0); _DI = to_bottom_left_8(left); _AX = to_segment(top); // "I've spent good money on that Intel 386 CPU, so let's actually use // *all* its segment registers!" :zunpet: :zunpet: :zunpet: _ES = (_AX += SEG_PLANE_B); // = B _FS = (_AX += SEG_PLANE_DIST_BRG); // = R _GS = (_AX += SEG_PLANE_DIST_BRG); // = G __asm { push ds; } mrs_slot_assign(ds, si, slot); _DX = MRS_DWORD_W; __asm { nop; } mrs_put_rows(_DX, REP MOVSD); grcg_off(); // Reset to bottom left _SI = 0; _DI += (MRS_H * ROW_SIZE); _BX = (_GS + SEG_PLANE_DIST_E); // = E _DX = MRS_DWORD_W; mrs_put_rows(_DX, { put: _EAX = _SI->dots_from_alpha(); _EAX |= _EAX; if(!FLAGS_ZERO) { poke_or_d(_ES, _DI, _SI->dots_from_B()); poke_or_d(_FS, _DI, _SI->dots_from_R()); poke_or_d(_GS, _DI, _SI->dots_from_G()); _GS = _BX; poke_or_d(_GS, _DI, _SI->dots_from_E()); _GS = (_BX - SEG_PLANE_DIST_E); } reinterpret_cast(_SI) += sizeof(dots32_t); _DI += sizeof(dots32_t); __asm { loop put; } }); __asm { pop ds; } #undef _SI } #pragma codestring "\x90" void pascal mrs_put_noalpha_8( screen_x_t left, uscreen_y_t top, int slot, bool altered_colors ) { #define _SI reinterpret_cast(_SI) #define at_bottom_left _DX // *Not* rooted at (0, 0)! __asm { push ds; } _DI = to_bottom_left_8(left); _AX = to_segment(top); mrs_slot_assign(ds, si, slot); _SI = mrs_at_G(); _FS = (_AX += SEG_PLANE_B); // = B _GS = (_AX += SEG_PLANE_DIST_BRG); // = R _ES = (_AX += SEG_PLANE_DIST_BRG); // = G // At this point though, we're out of segment registers. That's why this // approach of not changing destination segments within a blitting loop // only works for 3 out of the 4 bitplanes, and why we need a second loop // for the final one after all. _BX = (_AX += SEG_PLANE_DIST_E); // = E at_bottom_left = _DI; if(altered_colors) { mrs_put_rows(MRS_DWORD_W, { put_altered: poked(_FS, _DI, (~_SI->dots_from_alpha() | _SI->dots_from_B())); poked(_GS, _DI, _SI->dots_from_R()); MOVSD; __asm { loop put_altered; } }); // SI is now at the beginning of the E plane. Blit it in its own loop _DI = at_bottom_left; _ES = _BX; mrs_put_rows(MRS_DWORD_W, REP MOVSD); } else { mrs_put_rows(MRS_DWORD_W, { put_regular: poked(_FS, _DI, _SI->dots_from_B()); poked(_GS, _DI, _SI->dots_from_R()); MOVSD; _asm { loop put_regular; } }); // SI is now at the beginning of the E plane. Blit it in its own loop _DI = at_bottom_left; _ES = _BX; mrs_put_rows(MRS_DWORD_W, REP MOVSD); } __asm { pop ds; } #undef at_bottom_left #undef _SI } #pragma codestring "\x90" void pascal mrs_hflip(int slot) { _CX = sizeof(mrs_t); mrs_slot_assign(es, di, slot); reinterpret_cast(_BX) = hflip_lut; flip_dots_within_bytes: __asm { mov al, es:[di]; xlat; mov es:[di], al; inc di; loop flip_dots_within_bytes; } _CX = (sizeof(mrs_t) / MRS_BYTE_W); /* vram_byte_amount_t offset_y */ _BX = 0; flip_bytes: { // Assumes that the offset part of all image pointers is 0. Storing // segment pointers in [mrs_images] would have been clearer in this // case... /* vram_byte_amount_t offset_left */ _DI = 0; /* vram_byte_amount_t offset_right */ _SI = (MRS_BYTE_W - 1); do { __asm { mov al, es:[bx+di] mov dl, es:[bx+si] mov es:[bx+si], al mov es:[bx+di], dl } _SI--; _DI++; } while(_DI <= ((MRS_BYTE_W / 2) - 1)); _BX += MRS_BYTE_W; __asm { loop flip_bytes; } } } #pragma codestring "\x90"