From 2c52bb5ef35e28895cc4ef81f98e660272c43497 Mon Sep 17 00:00:00 2001 From: nmlgc Date: Thu, 24 Jun 2021 19:41:56 +0200 Subject: [PATCH] [Decompilation] [th04] Bullets: Velocity and angle calculation `switch` statements compiling to binary searches if the range of values is nasty enough? That's so cool. Apart from a few places in TH02, this is the only place in PC-98 Touhou to show off that Turbo C++ optimization. That code's still unexpectedly janky for what you'd expect from the 4th game in the series, though. Part of P0150, funded by Blue Bolt. --- Research/Borland C++ decompilation.md | 56 +++++ th01/math/subpixel.hpp | 4 + th04/main/bullet/add.cpp | 153 ++++++++++++- th04/main032.cpp | 11 +- th04_main.asm | 300 ++------------------------ 5 files changed, 237 insertions(+), 287 deletions(-) diff --git a/Research/Borland C++ decompilation.md b/Research/Borland C++ decompilation.md index fd1f6be6..8ebc0953 100644 --- a/Research/Borland C++ decompilation.md +++ b/Research/Borland C++ decompilation.md @@ -244,6 +244,62 @@ Calling conventions can be added before the `*`. * Sequence of the individual cases is identical in both C and ASM * Multiple cases with the same offset in the table, to code that doesn't return? Code was compiled with `-O` +* With the `-G` (Generate for speed) option, complicated `switch` statements + that require both value and jump tables are compiled to a binary search with + regular conditional branches: + + ```c + switch(foo) { + case 0x4B: /* […] */ break; + case 0x4D: /* […] */ break; + case 0x11: /* […] */ break; + case 0x1F: /* […] */ break; + case 0x20: /* […] */ break; + case 0x17: /* […] */ break; + case 0x26: /* […] */ break; + case 0x19: /* […] */ break; + case 0x01: /* […] */ break; + case 0x1C: /* […] */ break; + } + ``` + + Resulting ASM: + + ```asm + @@switch: + MOV AX, foo + CMP AX, 1Fh + JZ @@case_1Fh + JG @@GT_1Fh + CMP AX, 17h + JZ @@case_17h + JG @@GT_17h_LT_1Fh + CMP AX, 01h + JZ @@case_01h + CMP AX, 11h + JZ @@case_11h + JMP @@no_case_found + + @@GT_17h_LT_1Fh: + CMP AX, 1Ch + JZ @@case_1Ch + JMP @@no_case_found + + @@GT_1Fh: + CMP AX, 4Bh + JZ @@case_4Bh + JG @@GT_4Bh + CMP AX, 20h + JZ @@case_ + CMP AX, 26h + JZ @@case_26h + JMP @@no_case_found + + @@GT_4Bh: + CMP AX, 4Dh + JZ @@case_4Dh + JMP @@no_case_found + ``` ## Function calls diff --git a/th01/math/subpixel.hpp b/th01/math/subpixel.hpp index eca3f4f6..9c37e44f 100644 --- a/th01/math/subpixel.hpp +++ b/th01/math/subpixel.hpp @@ -21,6 +21,10 @@ inline subpixel_t to_sp(float pixel_v) { return static_cast(pixel_v * 16.0f); } +inline unsigned char to_sp8(float pixel_v) { + return static_cast(to_sp(pixel_v)); +} + template class SubpixelBase { public: typedef SubpixelBase SelfType; diff --git a/th04/main/bullet/add.cpp b/th04/main/bullet/add.cpp index 1783174a..fbba239e 100644 --- a/th04/main/bullet/add.cpp +++ b/th04/main/bullet/add.cpp @@ -1,6 +1,3 @@ -#include "th04/main/playperf.hpp" -#include "th04/main/bullet/bullet.hpp" - void pascal near bullets_add_regular_raw(void); void pascal near bullets_add_special_raw(void); @@ -9,6 +6,12 @@ void pascal near bullets_add_special_raw(void); /// Has no reason to be global. extern bool group_fixedspeed; + +// "(group_i * bullet_template.delta.spread_angle) is probably too expensive, +// let's rather do an addition for each additional spawned bullet :zunpet:" +extern unsigned char group_i_spread_angle; + +extern unsigned char group_i_absolute_angle; /// --------------- #define tmpl bullet_template @@ -257,3 +260,147 @@ void near bullets_add_special_fixedspeed(void) bullets_add_special(); group_fixedspeed = false; } + +#define last_bullet_in_group(group_ii) \ + (group_i >= (bullet_template.count - 1)) + +// Necessary to compile the switch statement in bullet_velocity_and_angle_set() +// to a binary search. Strangely, it's not used for the functions above? +#pragma option -G + +// Sets the bullet template's velocity for bullet #[group_i] in the template's +// current group, as well as [group_i_absolute_angle]. Returns true if this +// was the last bullet for this group. +bool16 pascal near bullet_velocity_and_angle_set(int group_i) +{ + int angle = 0x00; + unsigned char speed; + bool done; + + // Due to this default, invalid group values lead to the spawn functions + // repeatedly calling this function, until they completely filled the + // pellet / 16×16 part of the array with identical bullets using the given + // angle and speed. + // (Not really a ZUN bug until we can discover a game state where this can + // actually happen.) + done = false; + speed = bullet_template.speed.v; + + switch(bullet_template.group) { + case BG_SPREAD: + case BG_SPREAD_AIMED: + if(bullet_template.count & 1) { + // Odd-numbered spreads always contain a bullet in the center. + if(group_i == 0) { + group_i_spread_angle = 0x00; + angle = 0x00; + } else if(group_i & 1) { + // Symmetric version of even-numbered bullets + group_i_spread_angle += bullet_template.delta.spread_angle; + angle = (0x100 - group_i_spread_angle); + } else { + angle = group_i_spread_angle; + } + } else { + // Even-numbered spreads are aimed around the 0° point, and + // therefore need to be shifted by half of the angle. Yes, this + // whole separate branch, with its whole pointlessly mirrored + // logic, wouldn't have been necessary, and ZUN could have just + // added the angle offset after the fact... + if(group_i == 0) { + group_i_spread_angle = (bullet_template.delta.spread_angle / 2); + angle = group_i_spread_angle; + } else if(group_i & 1) { + // Symmetric version of even-numbered bullets + angle = (0x100 - group_i_spread_angle); + } else { + group_i_spread_angle += bullet_template.delta.spread_angle; + angle = group_i_spread_angle; + } + } + if(last_bullet_in_group(group_i)) { + done = true; + } + if(bullet_template.group == BG_SPREAD) { + goto no_aim; + } + goto aim; + + case BG_RING: + angle = ((group_i * 0x100) / bullet_template.count); + if(last_bullet_in_group(group_i)) { + done = true; + } + goto no_aim; + case BG_RING_AIMED: + angle = ((group_i * 0x100) / bullet_template.count); + if(last_bullet_in_group(group_i)) { + done = true; + } + goto aim; + + // All these 16-bit randring operations seem to waste 8 bits of randomness, + // but each next16 call only advances the pointer by one byte anyway. + case BG_FORCESINGLE_RANDOM_ANGLE: + angle = randring2_next16(); + done = true; + goto no_aim; + + case BG_FORCESINGLE: + case BG_SINGLE: + done = true; + goto no_aim; + + case BG_RANDOM_ANGLE: + angle = randring2_next16(); + if(last_bullet_in_group(group_i)) { + done = true; + } + goto no_aim; + case BG_RANDOM_ANGLE_AND_SPEED: + angle = randring2_next16(); + speed += randring2_next16_and(to_sp(2.0f) - 1); + if(last_bullet_in_group(group_i)) { + done = true; + } + goto no_aim; + case BG_RANDOM_CONSTRAINED_ANGLE_AIMED: + angle = randring2_next16_and(0x1F); + angle -= 0x10; + if(last_bullet_in_group(group_i)) { + done = true; + } + goto aim; + + case BG_FORCESINGLE_AIMED: + case BG_SINGLE_AIMED: + done = true; + goto aim; + + case BG_STACK: + case BG_STACK_AIMED: + speed += (bullet_template.delta.stack_speed * group_i); + if( + last_bullet_in_group(group_i) || + (bullet_template.speed >= to_sp8(10.0f)) + ) { + done = true; + } + if(bullet_template.group == BG_STACK) { + goto no_aim; + } + goto aim; + } +aim: + angle += iatan2( + (player_pos.cur.y - bullet_template.origin.y), + (player_pos.cur.x - bullet_template.origin.x) + ); + +no_aim: + vector2_near( + bullet_template.velocity, (angle + bullet_template.angle), speed + ); + group_i_absolute_angle = (angle + bullet_template.angle); + return done; +} diff --git a/th04/main032.cpp b/th04/main032.cpp index ba0eba61..c3ae1d0e 100644 --- a/th04/main032.cpp +++ b/th04/main032.cpp @@ -6,12 +6,19 @@ #pragma option -zCmain_032_TEXT -zPmain_03 extern "C" { -#include "ReC98.h" +#include "platform.h" +#include "pc98.h" +#include "planar.h" +#include "master.hpp" #include "th01/math/subpixel.hpp" #include "th04/math/motion.hpp" +#include "th04/math/randring.h" +#include "th04/math/vector.hpp" #include "th04/main/playfld.hpp" +#include "th04/main/playperf.hpp" +#include "th04/main/player/player.hpp" +#include "th04/main/bullet/bullet.hpp" #pragma option -a2 - #include "th04/main/bullet/add.cpp" } diff --git a/th04_main.asm b/th04_main.asm index cb39a7d7..3d5d0c8e 100644 --- a/th04_main.asm +++ b/th04_main.asm @@ -28225,272 +28225,8 @@ main_033_TEXT segment byte public 'CODE' use16 BULLETS_ADD_SPECIAL_HARD_LUNATIC procdesc pascal near _bullets_add_regular_fixedspeed procdesc near _bullets_add_special_fixedspeed procdesc near - -; =============== S U B R O U T I N E ======================================= - -; Attributes: bp-based frame - -sub_1CFC8 proc near - -var_4 = byte ptr -4 -@@speed = byte ptr -3 -var_2 = word ptr -2 -arg_0 = word ptr 4 - - push bp - mov bp, sp - sub sp, 4 - push si - mov si, [bp+arg_0] - mov [bp+var_2], 0 - mov [bp+var_4], 0 - mov al, _bullet_template.speed - mov [bp+@@speed], al - mov al, _bullet_template.BT_group - mov ah, 0 - cmp ax, BG_RING_AIMED - jz @@ring_aimed - jg short loc_1D027 - cmp ax, BG_RANDOM_ANGLE - jz @@random_angle - jg short loc_1D00F - or ax, ax - jz @@single - cmp ax, BG_SINGLE_AIMED - jz @@single_aimed - cmp ax, BG_FORCESINGLE_RANDOM_ANGLE - jz @@single_random_angle - jmp @@aim -; --------------------------------------------------------------------------- - -loc_1D00F: - cmp ax, BG_RANDOM_ANGLE_AND_SPEED - jz @@random_angle_and_speed - cmp ax, BG_RANDOM_CONSTRAINED_ANGLE_AIMED - jz @@spread_random_angle_aimed - cmp ax, BG_RING - jz @@ring - jmp @@aim -; --------------------------------------------------------------------------- - -loc_1D027: - cmp ax, BG_STACK_AIMED - jz @@stack - jg short loc_1D044 - cmp ax, BG_SPREAD - jz short @@spread - cmp ax, BG_SPREAD_AIMED - jz short @@spread - cmp ax, BG_STACK - jz @@stack - jmp @@aim -; --------------------------------------------------------------------------- - -loc_1D044: - cmp ax, BG_FORCESINGLE - jz @@single - cmp ax, BG_FORCESINGLE_AIMED - jz @@single_aimed - jmp @@aim -; --------------------------------------------------------------------------- - -@@spread: - test _bullet_template.count, 1 - jz short loc_1D088 - or si, si - jnz short loc_1D06C - mov byte_2CFF6, 0 - mov [bp+var_2], 0 - jmp short loc_1D0B0 -; --------------------------------------------------------------------------- - -loc_1D06C: - test si, 1 - jz short loc_1D0A8 - mov al, _bullet_template.BT_delta.spread_angle - add byte_2CFF6, al - -loc_1D079: - mov al, byte_2CFF6 - mov ah, 0 - mov dx, 100h - sub dx, ax - mov [bp+var_2], dx - jmp short loc_1D0B0 -; --------------------------------------------------------------------------- - -loc_1D088: - or si, si - jnz short loc_1D09B - mov al, _bullet_template.BT_delta.spread_angle - mov ah, 0 - cwd - sub ax, dx - sar ax, 1 - mov byte_2CFF6, al - jmp short loc_1D0A8 -; --------------------------------------------------------------------------- - -loc_1D09B: - test si, 1 - jnz short loc_1D079 - mov al, _bullet_template.BT_delta.spread_angle - add byte_2CFF6, al - -loc_1D0A8: - mov al, byte_2CFF6 - mov ah, 0 - mov [bp+var_2], ax - -loc_1D0B0: - mov al, _bullet_template.count - mov ah, 0 - dec ax - cmp ax, si - jg short loc_1D0BE - mov [bp+var_4], 1 - -loc_1D0BE: - cmp _bullet_template.BT_group, BG_SPREAD - jnz @@aim - jmp @@static -; --------------------------------------------------------------------------- - -@@ring: - mov ax, si - shl ax, 8 - mov dl, _bullet_template.count - mov dh, 0 - push dx - cwd - pop bx - idiv bx - mov [bp+var_2], ax - mov al, _bullet_template.count - mov ah, 0 - dec ax - cmp ax, si - jg @@static - jmp short @@single -; --------------------------------------------------------------------------- - -@@ring_aimed: - mov ax, si - shl ax, 8 - mov dl, _bullet_template.count - mov dh, 0 - push dx - cwd - pop bx - idiv bx - mov [bp+var_2], ax - mov al, _bullet_template.count - mov ah, 0 - dec ax - cmp ax, si - jg @@aim - jmp short @@single_aimed -; --------------------------------------------------------------------------- - -@@single_random_angle: - call randring2_next16 - mov [bp+var_2], ax - -@@single: - mov [bp+var_4], 1 - jmp @@static -; --------------------------------------------------------------------------- - -@@random_angle: - call randring2_next16 - mov [bp+var_2], ax - mov al, _bullet_template.count - mov ah, 0 - dec ax - cmp ax, si - jg short @@static - jmp short @@single -; --------------------------------------------------------------------------- - -@@random_angle_and_speed: - call randring2_next16 - mov [bp+var_2], ax - call randring2_next16_and pascal, 1Fh - add al, [bp+@@speed] - mov [bp+@@speed], al - mov al, _bullet_template.count - mov ah, 0 - dec ax - cmp ax, si - jg short @@static - jmp short @@single -; --------------------------------------------------------------------------- - -@@spread_random_angle_aimed: - call randring2_next16_and pascal, 1Fh - mov [bp+var_2], ax - sub [bp+var_2], 10h - mov al, _bullet_template.count - mov ah, 0 - dec ax - cmp ax, si - jg short @@aim - -@@single_aimed: - mov [bp+var_4], 1 - jmp short @@aim -; --------------------------------------------------------------------------- - -@@stack: - mov al, _bullet_template.BT_delta.stack_speed - mov ah, 0 - imul si - add al, [bp+@@speed] - mov [bp+@@speed], al - mov al, _bullet_template.count - mov ah, 0 - dec ax - cmp ax, si - jle short loc_1D182 - cmp _bullet_template.speed, (10 shl 4) - jb short loc_1D186 - -loc_1D182: - mov [bp+var_4], 1 - -loc_1D186: - cmp _bullet_template.BT_group, BG_STACK - jz short @@static - -@@aim: - mov ax, _player_pos.cur.y - sub ax, _bullet_template.BT_origin.y - push ax - mov ax, _player_pos.cur.x - sub ax, _bullet_template.BT_origin.x - push ax - call iatan2 - add [bp+var_2], ax - -@@static: - push offset _bullet_template.BT_velocity - mov al, byte ptr [bp+var_2] - add al, _bullet_template.BT_angle - push ax - mov al, [bp+@@speed] - mov ah, 0 - push ax - call vector2_near - mov al, byte ptr [bp+var_2] - add al, _bullet_template.BT_angle - mov angle_2D008, al - mov al, [bp+var_4] - mov ah, 0 - pop si - leave - retn 2 -sub_1CFC8 endp - + BULLET_VELOCITY_AND_ANGLE_SET procdesc pascal near \ + i:word ; =============== S U B R O U T I N E ======================================= @@ -28596,7 +28332,7 @@ public BULLETS_ADD_REGULAR_RAW bullets_add_regular_raw proc near @@spawn_state = byte ptr -5 -var_4 = byte ptr -4 +@@done = byte ptr -4 @@move_state = byte ptr -3 @@i = word ptr -2 @@ -28692,24 +28428,23 @@ loc_1D33C: mov [si+bullet_t.BULLET_patnum], ax mov al, [bp+@@spawn_state] mov [si+bullet_t.spawn_state], al - push di - call sub_1CFC8 - mov [bp+var_4], al + call BULLET_VELOCITY_AND_ANGLE_SET pascal, di + mov [bp+@@done], al cmp _bullet_template.patnum, PAT_BULLET16_D jb short loc_1D391 - call bullet_patnum_for_angle pascal, word ptr angle_2D008 + call bullet_patnum_for_angle pascal, word ptr _group_i_absolute_angle mov ah, 0 add [si+bullet_t.BULLET_patnum], ax loc_1D391: mov eax, _bullet_template.BT_velocity mov dword ptr [si+bullet_t.pos.velocity], eax - mov al, angle_2D008 + mov al, _group_i_absolute_angle mov [si+bullet_t.BULLET_angle], al mov al, _bullet_template.speed mov [si+bullet_t.speed_final], al mov [si+bullet_t.speed_cur], al - cmp [bp+var_4], 0 + cmp [bp+@@done], 0 jnz short loc_1D3BB inc di @@ -28736,7 +28471,7 @@ public BULLETS_ADD_SPECIAL_RAW bullets_add_special_raw proc near @@spawn_state = byte ptr -4 -var_3 = byte ptr -3 +@@done = byte ptr -3 @@i = word ptr -2 push bp @@ -28802,24 +28537,23 @@ loc_1D40A: mov [si+bullet_t.BULLET_patnum], ax mov al, [bp+@@spawn_state] mov [si+bullet_t.spawn_state], al - push di - call sub_1CFC8 - mov [bp+var_3], al + call BULLET_VELOCITY_AND_ANGLE_SET pascal, di + mov [bp+@@done], al cmp _bullet_template.patnum, PAT_BULLET16_D jb short loc_1D460 - call bullet_patnum_for_angle pascal, word ptr angle_2D008 + call bullet_patnum_for_angle pascal, word ptr _group_i_absolute_angle mov ah, 0 add [si+bullet_t.BULLET_patnum], ax loc_1D460: mov eax, _bullet_template.BT_velocity mov dword ptr [si+bullet_t.pos.velocity], eax - mov al, angle_2D008 + mov al, _group_i_absolute_angle mov [si+bullet_t.BULLET_angle], al mov al, _bullet_template.speed mov [si+bullet_t.speed_final], al mov [si+bullet_t.speed_cur], al - cmp [bp+var_3], 0 + cmp [bp+@@done], 0 jnz short loc_1D48A inc di @@ -34969,7 +34703,8 @@ include th04/formats/scoredat[bss].asm byte_2CFF2 db ? db ? word_2CFF4 dw ? -byte_2CFF6 db ? +public _group_i_spread_angle +_group_i_spread_angle db ? include th04/main/bullet/update[bss].asm db ? public _stage_graze @@ -34980,7 +34715,8 @@ _bullets_add_regular dw ? _bullets_add_special dw ? include th04/main/bullet/tune[bss].asm include th04/main/bullet/pellet_r[bss].asm -angle_2D008 db ? +public _group_i_absolute_angle +_group_i_absolute_angle db ? evendata public _bombing_disabled _bombing_disabled db ?