From 79e9ab77dc6a69d24473ac3d11c11b03fa0ffbe3 Mon Sep 17 00:00:00 2001 From: nmlgc Date: Tue, 9 Aug 2022 19:43:22 +0200 Subject: [PATCH] [Maintenance] Simplify EGC macros and hardcode the chip's register size No reason to continue pretending it supports anything else. Part of P0212, funded by GhostRiderCog, Lmocinemod, and LeyDud. --- pc98.h | 8 ++++++++ planar.h | 15 ++++++++++----- th01/end/pic.cpp | 14 +++++++------- th01/hardware/egcrect.cpp | 22 +++++++++++++--------- th01/hardware/egcrows.cpp | 10 +++++----- th01/op_01.cpp | 2 +- th04/end/box.cpp | 12 +++++------- 7 files changed, 49 insertions(+), 34 deletions(-) diff --git a/pc98.h b/pc98.h index 53d19a29..4d01df6b 100644 --- a/pc98.h +++ b/pc98.h @@ -174,3 +174,11 @@ typedef int8_t uint4_t; // Segment distance between G↔E #define SEG_PLANE_DIST_E 0x2800 /// --------------- + +/// EGC +/// --- +/// The PC-98 EGC always operates on 16 dots at a time. + +static const int EGC_REGISTER_DOTS = 16; +static const int EGC_REGISTER_SIZE = (EGC_REGISTER_DOTS / BYTE_DOTS); +/// --- diff --git a/planar.h b/planar.h index c3202590..cae04989 100644 --- a/planar.h +++ b/planar.h @@ -189,9 +189,14 @@ static inline vram_offset_t vram_offset_divshift_wtf(screen_x_t x, vram_y_t y) { /* Nope, pokeb() doesn't generate the same code */ \ *reinterpret_cast(MK_FP(SEG_PLANE_B, offset)) = src -#define grcg_snap(dst, offset, bit_count) \ - VRAM_SNAP(dst, B, offset, bit_count) +// EGC +// --- -#define egc_put grcg_put -#define egc_put_emptyopt grcg_put_emptyopt -#define egc_snap grcg_snap +// ZUN bloat: Dummy value returned from an EGC copy read. Can be replaced with +// a pseudoregister to avoid one unnecessary store (for snapping) or load (for +// blitting) per EGC operation. +typedef dots16_t egc_temp_t; + +#define egc_chunk(offset) \ + VRAM_CHUNK(B, offset, 16) +// ---------- diff --git a/th01/end/pic.cpp b/th01/end/pic.cpp index 9a59c646..5df55e69 100644 --- a/th01/end/pic.cpp +++ b/th01/end/pic.cpp @@ -30,17 +30,17 @@ void end_pic_show(int quarter) // 386 and 17 cycles on a 486, and ZUN adds the bloat of a standard // function call on top of even that. // Optimizations aside, using the EGC can't give you a better algorithm, - // as its tile egisters are limited to 16 dots. Expanding to at least 32 + // as its tile registers are limited to 16 dots. Expanding to at least 32 // dots would have really been nice for ≥386 CPUs... for(y = 0; y < PIC_H; y++) { - for(vram_x = 0; vram_x < (PIC_VRAM_W / sizeof(dots16_t)); vram_x++) { - dots16_t d; + for(vram_x = 0; vram_x < (PIC_VRAM_W / EGC_REGISTER_SIZE); vram_x++) { + egc_temp_t d; - graph_accesspage_func(1); egc_snap(d, vram_offset_src, 16); - graph_accesspage_func(0); egc_put(vram_offset_dst, d, 16); + graph_accesspage_func(1); d = egc_chunk(vram_offset_src); + graph_accesspage_func(0); egc_chunk(vram_offset_dst) = d; - vram_offset_src += sizeof(dots16_t); - vram_offset_dst += sizeof(dots16_t); + vram_offset_src += EGC_REGISTER_SIZE; + vram_offset_dst += EGC_REGISTER_SIZE; } vram_offset_src += (ROW_SIZE - PIC_VRAM_W); vram_offset_dst += (ROW_SIZE - PIC_VRAM_W); diff --git a/th01/hardware/egcrect.cpp b/th01/hardware/egcrect.cpp index 1fef927b..560daafd 100644 --- a/th01/hardware/egcrect.cpp +++ b/th01/hardware/egcrect.cpp @@ -5,21 +5,25 @@ void egc_copy_rect_1_to_0_16(screen_x_t x, vram_y_t y, pixel_t w, pixel_t h) register screen_x_t x_end = x; register screen_x_t x_floor = x_end; pixel_t row; - screen_x_t col; - vram_offset_t row_p; - dots16_t dots; - vram_offset_t p; + screen_x_t column; + vram_offset_t vo_row; + egc_temp_t tmp; + vram_offset_t vo; x_end += w; x_floor &= 0xFFF0; - row_p = vram_offset_shift(x_floor, y); + vo_row = vram_offset_shift(x_floor, y); egc_start_copy(); for(row = 0; row < h; row++) { - for(col = x_floor, p = row_p; col < x_end; p += 2, col += 16) { - graph_accesspage_func(1); egc_snap(dots, p, 16); - graph_accesspage_func(0); egc_put(p, dots, 16); + for( + (column = x_floor, vo = vo_row); + column < x_end; + (vo += EGC_REGISTER_SIZE, column += EGC_REGISTER_DOTS) + ) { + graph_accesspage_func(1); tmp = egc_chunk(vo); + graph_accesspage_func(0); egc_chunk(vo) = tmp; } - row_p += ROW_SIZE; + vo_row += ROW_SIZE; } egc_off(); } diff --git a/th01/hardware/egcrows.cpp b/th01/hardware/egcrows.cpp index 5173cd5d..fa02f3e6 100644 --- a/th01/hardware/egcrows.cpp +++ b/th01/hardware/egcrows.cpp @@ -19,12 +19,12 @@ void egc_copy_rows_1_to_0(vram_y_t top, pixel_t h) for(pixel_t y = 0; y < h; y++) { vram_word_amount_t x = 0; - while(x < (ROW_SIZE / 2)) { - dots16_t page1; - graph_accesspage_func(1); egc_snap(page1, vram_offset, 16); - graph_accesspage_func(0); egc_put(vram_offset, page1, 16); + while(x < (ROW_SIZE / EGC_REGISTER_SIZE)) { + egc_temp_t tmp; + graph_accesspage_func(1); tmp = egc_chunk(vram_offset); + graph_accesspage_func(0); egc_chunk(vram_offset) = tmp; x++; - vram_offset += static_cast(sizeof(page1)); + vram_offset += EGC_REGISTER_SIZE; } } egc_off(); diff --git a/th01/op_01.cpp b/th01/op_01.cpp index fd6c7d62..0f4d4aa9 100644 --- a/th01/op_01.cpp +++ b/th01/op_01.cpp @@ -227,7 +227,7 @@ void whiteline_put(screen_y_t y) grcg_setcolor_rmw(15); x = 0; while(x < (ROW_SIZE / sizeof(dots32_t))) { - egc_put(vram_offset, 0xFFFFFFFF, 32); + grcg_put(vram_offset, 0xFFFFFFFF, 32); x++; vram_offset += static_cast(sizeof(dots32_t)); } diff --git a/th04/end/box.cpp b/th04/end/box.cpp index 92864305..b8e128d7 100644 --- a/th04/end/box.cpp +++ b/th04/end/box.cpp @@ -31,8 +31,7 @@ typedef enum { void pascal near box_1_to_0_masked(box_mask_t mask) { extern const dot_rect_t(16, 4) BOX_MASKS[BOX_MASK_COUNT]; - dots_t(16) dots; - #define CHUNK_W static_cast(sizeof(dots) * BYTE_DOTS) + egc_temp_t tmp; for(screen_y_t y = BOX_TOP; y < BOX_BOTTOM; y++) { outport2(EGC_READPLANEREG, 0x00ff); @@ -44,11 +43,10 @@ void pascal near box_1_to_0_masked(box_mask_t mask) vram_offset_t vram_offset = vram_offset_shift(BOX_LEFT, y); pixel_t x = 0; while(x < BOX_W) { - graph_accesspage(1); egc_snap(dots, vram_offset, 16); - graph_accesspage(0); egc_put(vram_offset, dots, 16); - x += CHUNK_W; - vram_offset += (CHUNK_W / BYTE_DOTS); + graph_accesspage(1); tmp = egc_chunk(vram_offset); + graph_accesspage(0); egc_chunk(vram_offset) = tmp; + x += EGC_REGISTER_DOTS; + vram_offset += EGC_REGISTER_SIZE; } } - #undef CHUNK_W }