ReC98/th01/end/pic.cpp

52 lines
1.6 KiB
C++
Raw Normal View History

#pragma option -O-
#include "th01/end/pic.hpp"
void pascal end_pics_load_palette_show(const char *fn)
{
graph_accesspage_func(1);
grp_put_palette_show(fn);
}
// Avoid symbol duplication...
#define egc_start_copy egc_start_copy_1
#include "th01/hardware/egcstart.cpp"
void end_pic_show(int quarter)
{
egc_start_copy();
pixel_t src_left = ((quarter % 2) * PIC_W);
pixel_t src_top = ((quarter / 2) * PIC_H);
uvram_offset_t vram_offset_src = vram_offset_shift(src_left, src_top);
uvram_offset_t vram_offset_dst = vram_offset_shift(PIC_LEFT, PIC_TOP);
vram_word_amount_t vram_x;
pixel_t y;
// ZUN quirk: This EGC-"accelerated" copy operation ends up performing a
// total of ((320 / 16) × 200 × 2) = 8000 VRAM page switches, which are
// everything but instant. Even the optimal assembly instructions for a
// *single* page switch, `MOV AL, (0|1)` followed by `OUT 0xA6, AL`, take
// 12 cycles on a 386 and 17 cycles on a 486, and ZUN adds the bloat of a
// standard function call on top of even that.
// Optimizations aside, using the EGC can't give you a better algorithm,
// as its tile registers are limited to 16 dots. Expanding to at least 32
// dots would have really been nice for ≥386 CPUs...
for(y = 0; y < PIC_H; y++) {
for(vram_x = 0; vram_x < (PIC_VRAM_W / EGC_REGISTER_SIZE); vram_x++) {
egc_temp_t d;
graph_accesspage_func(1); d = egc_chunk(vram_offset_src);
graph_accesspage_func(0); egc_chunk(vram_offset_dst) = d;
vram_offset_src += EGC_REGISTER_SIZE;
vram_offset_dst += EGC_REGISTER_SIZE;
}
vram_offset_src += (ROW_SIZE - PIC_VRAM_W);
vram_offset_dst += (ROW_SIZE - PIC_VRAM_W);
}
egc_off();
}
#pragma option -O.