[Decompilation] [th04] Bullets: Velocity and angle calculation

`switch` statements compiling to binary searches if the range of values
is nasty enough? That's so cool. Apart from a few places in TH02,
this is the only place in PC-98 Touhou to show off that Turbo C++
optimization.

That code's still unexpectedly janky for what you'd expect from the 4th
game in the series, though.

Part of P0150, funded by Blue Bolt.
This commit is contained in:
nmlgc 2021-06-24 19:41:56 +02:00
parent f99aa1ae29
commit 2c52bb5ef3
5 changed files with 237 additions and 287 deletions

View File

@ -244,6 +244,62 @@ Calling conventions can be added before the `*`.
* Sequence of the individual cases is identical in both C and ASM
* Multiple cases with the same offset in the table, to code that doesn't
return? Code was compiled with `-O`
* With the `-G` (Generate for speed) option, complicated `switch` statements
that require both value and jump tables are compiled to a binary search with
regular conditional branches:
```c
switch(foo) {
case 0x4B: /* […] */ break;
case 0x4D: /* […] */ break;
case 0x11: /* […] */ break;
case 0x1F: /* […] */ break;
case 0x20: /* […] */ break;
case 0x17: /* […] */ break;
case 0x26: /* […] */ break;
case 0x19: /* […] */ break;
case 0x01: /* […] */ break;
case 0x1C: /* […] */ break;
}
```
Resulting ASM:
```asm
@@switch:
MOV AX, foo
CMP AX, 1Fh
JZ @@case_1Fh
JG @@GT_1Fh
CMP AX, 17h
JZ @@case_17h
JG @@GT_17h_LT_1Fh
CMP AX, 01h
JZ @@case_01h
CMP AX, 11h
JZ @@case_11h
JMP @@no_case_found
@@GT_17h_LT_1Fh:
CMP AX, 1Ch
JZ @@case_1Ch
JMP @@no_case_found
@@GT_1Fh:
CMP AX, 4Bh
JZ @@case_4Bh
JG @@GT_4Bh
CMP AX, 20h
JZ @@case_
CMP AX, 26h
JZ @@case_26h
JMP @@no_case_found
@@GT_4Bh:
CMP AX, 4Dh
JZ @@case_4Dh
JMP @@no_case_found
```
## Function calls

View File

@ -21,6 +21,10 @@ inline subpixel_t to_sp(float pixel_v) {
return static_cast<subpixel_t>(pixel_v * 16.0f);
}
inline unsigned char to_sp8(float pixel_v) {
return static_cast<unsigned char>(to_sp(pixel_v));
}
template <class SubpixelType, class PixelType> class SubpixelBase {
public:
typedef SubpixelBase<SubpixelType, PixelType> SelfType;

View File

@ -1,6 +1,3 @@
#include "th04/main/playperf.hpp"
#include "th04/main/bullet/bullet.hpp"
void pascal near bullets_add_regular_raw(void);
void pascal near bullets_add_special_raw(void);
@ -9,6 +6,12 @@ void pascal near bullets_add_special_raw(void);
/// Has no reason to be global.
extern bool group_fixedspeed;
// "(group_i * bullet_template.delta.spread_angle) is probably too expensive,
// let's rather do an addition for each additional spawned bullet :zunpet:"
extern unsigned char group_i_spread_angle;
extern unsigned char group_i_absolute_angle;
/// ---------------
#define tmpl bullet_template
@ -257,3 +260,147 @@ void near bullets_add_special_fixedspeed(void)
bullets_add_special();
group_fixedspeed = false;
}
#define last_bullet_in_group(group_ii) \
(group_i >= (bullet_template.count - 1))
// Necessary to compile the switch statement in bullet_velocity_and_angle_set()
// to a binary search. Strangely, it's not used for the functions above?
#pragma option -G
// Sets the bullet template's velocity for bullet #[group_i] in the template's
// current group, as well as [group_i_absolute_angle]. Returns true if this
// was the last bullet for this group.
bool16 pascal near bullet_velocity_and_angle_set(int group_i)
{
int angle = 0x00;
unsigned char speed;
bool done;
// Due to this default, invalid group values lead to the spawn functions
// repeatedly calling this function, until they completely filled the
// pellet / 16×16 part of the array with identical bullets using the given
// angle and speed.
// (Not really a ZUN bug until we can discover a game state where this can
// actually happen.)
done = false;
speed = bullet_template.speed.v;
switch(bullet_template.group) {
case BG_SPREAD:
case BG_SPREAD_AIMED:
if(bullet_template.count & 1) {
// Odd-numbered spreads always contain a bullet in the center.
if(group_i == 0) {
group_i_spread_angle = 0x00;
angle = 0x00;
} else if(group_i & 1) {
// Symmetric version of even-numbered bullets
group_i_spread_angle += bullet_template.delta.spread_angle;
angle = (0x100 - group_i_spread_angle);
} else {
angle = group_i_spread_angle;
}
} else {
// Even-numbered spreads are aimed around the 0° point, and
// therefore need to be shifted by half of the angle. Yes, this
// whole separate branch, with its whole pointlessly mirrored
// logic, wouldn't have been necessary, and ZUN could have just
// added the angle offset after the fact...
if(group_i == 0) {
group_i_spread_angle = (bullet_template.delta.spread_angle / 2);
angle = group_i_spread_angle;
} else if(group_i & 1) {
// Symmetric version of even-numbered bullets
angle = (0x100 - group_i_spread_angle);
} else {
group_i_spread_angle += bullet_template.delta.spread_angle;
angle = group_i_spread_angle;
}
}
if(last_bullet_in_group(group_i)) {
done = true;
}
if(bullet_template.group == BG_SPREAD) {
goto no_aim;
}
goto aim;
case BG_RING:
angle = ((group_i * 0x100) / bullet_template.count);
if(last_bullet_in_group(group_i)) {
done = true;
}
goto no_aim;
case BG_RING_AIMED:
angle = ((group_i * 0x100) / bullet_template.count);
if(last_bullet_in_group(group_i)) {
done = true;
}
goto aim;
// All these 16-bit randring operations seem to waste 8 bits of randomness,
// but each next16 call only advances the pointer by one byte anyway.
case BG_FORCESINGLE_RANDOM_ANGLE:
angle = randring2_next16();
done = true;
goto no_aim;
case BG_FORCESINGLE:
case BG_SINGLE:
done = true;
goto no_aim;
case BG_RANDOM_ANGLE:
angle = randring2_next16();
if(last_bullet_in_group(group_i)) {
done = true;
}
goto no_aim;
case BG_RANDOM_ANGLE_AND_SPEED:
angle = randring2_next16();
speed += randring2_next16_and(to_sp(2.0f) - 1);
if(last_bullet_in_group(group_i)) {
done = true;
}
goto no_aim;
case BG_RANDOM_CONSTRAINED_ANGLE_AIMED:
angle = randring2_next16_and(0x1F);
angle -= 0x10;
if(last_bullet_in_group(group_i)) {
done = true;
}
goto aim;
case BG_FORCESINGLE_AIMED:
case BG_SINGLE_AIMED:
done = true;
goto aim;
case BG_STACK:
case BG_STACK_AIMED:
speed += (bullet_template.delta.stack_speed * group_i);
if(
last_bullet_in_group(group_i) ||
(bullet_template.speed >= to_sp8(10.0f))
) {
done = true;
}
if(bullet_template.group == BG_STACK) {
goto no_aim;
}
goto aim;
}
aim:
angle += iatan2(
(player_pos.cur.y - bullet_template.origin.y),
(player_pos.cur.x - bullet_template.origin.x)
);
no_aim:
vector2_near(
bullet_template.velocity, (angle + bullet_template.angle), speed
);
group_i_absolute_angle = (angle + bullet_template.angle);
return done;
}

View File

@ -6,12 +6,19 @@
#pragma option -zCmain_032_TEXT -zPmain_03
extern "C" {
#include "ReC98.h"
#include "platform.h"
#include "pc98.h"
#include "planar.h"
#include "master.hpp"
#include "th01/math/subpixel.hpp"
#include "th04/math/motion.hpp"
#include "th04/math/randring.h"
#include "th04/math/vector.hpp"
#include "th04/main/playfld.hpp"
#include "th04/main/playperf.hpp"
#include "th04/main/player/player.hpp"
#include "th04/main/bullet/bullet.hpp"
#pragma option -a2
#include "th04/main/bullet/add.cpp"
}

View File

@ -28225,272 +28225,8 @@ main_033_TEXT segment byte public 'CODE' use16
BULLETS_ADD_SPECIAL_HARD_LUNATIC procdesc pascal near
_bullets_add_regular_fixedspeed procdesc near
_bullets_add_special_fixedspeed procdesc near
; =============== S U B R O U T I N E =======================================
; Attributes: bp-based frame
sub_1CFC8 proc near
var_4 = byte ptr -4
@@speed = byte ptr -3
var_2 = word ptr -2
arg_0 = word ptr 4
push bp
mov bp, sp
sub sp, 4
push si
mov si, [bp+arg_0]
mov [bp+var_2], 0
mov [bp+var_4], 0
mov al, _bullet_template.speed
mov [bp+@@speed], al
mov al, _bullet_template.BT_group
mov ah, 0
cmp ax, BG_RING_AIMED
jz @@ring_aimed
jg short loc_1D027
cmp ax, BG_RANDOM_ANGLE
jz @@random_angle
jg short loc_1D00F
or ax, ax
jz @@single
cmp ax, BG_SINGLE_AIMED
jz @@single_aimed
cmp ax, BG_FORCESINGLE_RANDOM_ANGLE
jz @@single_random_angle
jmp @@aim
; ---------------------------------------------------------------------------
loc_1D00F:
cmp ax, BG_RANDOM_ANGLE_AND_SPEED
jz @@random_angle_and_speed
cmp ax, BG_RANDOM_CONSTRAINED_ANGLE_AIMED
jz @@spread_random_angle_aimed
cmp ax, BG_RING
jz @@ring
jmp @@aim
; ---------------------------------------------------------------------------
loc_1D027:
cmp ax, BG_STACK_AIMED
jz @@stack
jg short loc_1D044
cmp ax, BG_SPREAD
jz short @@spread
cmp ax, BG_SPREAD_AIMED
jz short @@spread
cmp ax, BG_STACK
jz @@stack
jmp @@aim
; ---------------------------------------------------------------------------
loc_1D044:
cmp ax, BG_FORCESINGLE
jz @@single
cmp ax, BG_FORCESINGLE_AIMED
jz @@single_aimed
jmp @@aim
; ---------------------------------------------------------------------------
@@spread:
test _bullet_template.count, 1
jz short loc_1D088
or si, si
jnz short loc_1D06C
mov byte_2CFF6, 0
mov [bp+var_2], 0
jmp short loc_1D0B0
; ---------------------------------------------------------------------------
loc_1D06C:
test si, 1
jz short loc_1D0A8
mov al, _bullet_template.BT_delta.spread_angle
add byte_2CFF6, al
loc_1D079:
mov al, byte_2CFF6
mov ah, 0
mov dx, 100h
sub dx, ax
mov [bp+var_2], dx
jmp short loc_1D0B0
; ---------------------------------------------------------------------------
loc_1D088:
or si, si
jnz short loc_1D09B
mov al, _bullet_template.BT_delta.spread_angle
mov ah, 0
cwd
sub ax, dx
sar ax, 1
mov byte_2CFF6, al
jmp short loc_1D0A8
; ---------------------------------------------------------------------------
loc_1D09B:
test si, 1
jnz short loc_1D079
mov al, _bullet_template.BT_delta.spread_angle
add byte_2CFF6, al
loc_1D0A8:
mov al, byte_2CFF6
mov ah, 0
mov [bp+var_2], ax
loc_1D0B0:
mov al, _bullet_template.count
mov ah, 0
dec ax
cmp ax, si
jg short loc_1D0BE
mov [bp+var_4], 1
loc_1D0BE:
cmp _bullet_template.BT_group, BG_SPREAD
jnz @@aim
jmp @@static
; ---------------------------------------------------------------------------
@@ring:
mov ax, si
shl ax, 8
mov dl, _bullet_template.count
mov dh, 0
push dx
cwd
pop bx
idiv bx
mov [bp+var_2], ax
mov al, _bullet_template.count
mov ah, 0
dec ax
cmp ax, si
jg @@static
jmp short @@single
; ---------------------------------------------------------------------------
@@ring_aimed:
mov ax, si
shl ax, 8
mov dl, _bullet_template.count
mov dh, 0
push dx
cwd
pop bx
idiv bx
mov [bp+var_2], ax
mov al, _bullet_template.count
mov ah, 0
dec ax
cmp ax, si
jg @@aim
jmp short @@single_aimed
; ---------------------------------------------------------------------------
@@single_random_angle:
call randring2_next16
mov [bp+var_2], ax
@@single:
mov [bp+var_4], 1
jmp @@static
; ---------------------------------------------------------------------------
@@random_angle:
call randring2_next16
mov [bp+var_2], ax
mov al, _bullet_template.count
mov ah, 0
dec ax
cmp ax, si
jg short @@static
jmp short @@single
; ---------------------------------------------------------------------------
@@random_angle_and_speed:
call randring2_next16
mov [bp+var_2], ax
call randring2_next16_and pascal, 1Fh
add al, [bp+@@speed]
mov [bp+@@speed], al
mov al, _bullet_template.count
mov ah, 0
dec ax
cmp ax, si
jg short @@static
jmp short @@single
; ---------------------------------------------------------------------------
@@spread_random_angle_aimed:
call randring2_next16_and pascal, 1Fh
mov [bp+var_2], ax
sub [bp+var_2], 10h
mov al, _bullet_template.count
mov ah, 0
dec ax
cmp ax, si
jg short @@aim
@@single_aimed:
mov [bp+var_4], 1
jmp short @@aim
; ---------------------------------------------------------------------------
@@stack:
mov al, _bullet_template.BT_delta.stack_speed
mov ah, 0
imul si
add al, [bp+@@speed]
mov [bp+@@speed], al
mov al, _bullet_template.count
mov ah, 0
dec ax
cmp ax, si
jle short loc_1D182
cmp _bullet_template.speed, (10 shl 4)
jb short loc_1D186
loc_1D182:
mov [bp+var_4], 1
loc_1D186:
cmp _bullet_template.BT_group, BG_STACK
jz short @@static
@@aim:
mov ax, _player_pos.cur.y
sub ax, _bullet_template.BT_origin.y
push ax
mov ax, _player_pos.cur.x
sub ax, _bullet_template.BT_origin.x
push ax
call iatan2
add [bp+var_2], ax
@@static:
push offset _bullet_template.BT_velocity
mov al, byte ptr [bp+var_2]
add al, _bullet_template.BT_angle
push ax
mov al, [bp+@@speed]
mov ah, 0
push ax
call vector2_near
mov al, byte ptr [bp+var_2]
add al, _bullet_template.BT_angle
mov angle_2D008, al
mov al, [bp+var_4]
mov ah, 0
pop si
leave
retn 2
sub_1CFC8 endp
BULLET_VELOCITY_AND_ANGLE_SET procdesc pascal near \
i:word
; =============== S U B R O U T I N E =======================================
@ -28596,7 +28332,7 @@ public BULLETS_ADD_REGULAR_RAW
bullets_add_regular_raw proc near
@@spawn_state = byte ptr -5
var_4 = byte ptr -4
@@done = byte ptr -4
@@move_state = byte ptr -3
@@i = word ptr -2
@ -28692,24 +28428,23 @@ loc_1D33C:
mov [si+bullet_t.BULLET_patnum], ax
mov al, [bp+@@spawn_state]
mov [si+bullet_t.spawn_state], al
push di
call sub_1CFC8
mov [bp+var_4], al
call BULLET_VELOCITY_AND_ANGLE_SET pascal, di
mov [bp+@@done], al
cmp _bullet_template.patnum, PAT_BULLET16_D
jb short loc_1D391
call bullet_patnum_for_angle pascal, word ptr angle_2D008
call bullet_patnum_for_angle pascal, word ptr _group_i_absolute_angle
mov ah, 0
add [si+bullet_t.BULLET_patnum], ax
loc_1D391:
mov eax, _bullet_template.BT_velocity
mov dword ptr [si+bullet_t.pos.velocity], eax
mov al, angle_2D008
mov al, _group_i_absolute_angle
mov [si+bullet_t.BULLET_angle], al
mov al, _bullet_template.speed
mov [si+bullet_t.speed_final], al
mov [si+bullet_t.speed_cur], al
cmp [bp+var_4], 0
cmp [bp+@@done], 0
jnz short loc_1D3BB
inc di
@ -28736,7 +28471,7 @@ public BULLETS_ADD_SPECIAL_RAW
bullets_add_special_raw proc near
@@spawn_state = byte ptr -4
var_3 = byte ptr -3
@@done = byte ptr -3
@@i = word ptr -2
push bp
@ -28802,24 +28537,23 @@ loc_1D40A:
mov [si+bullet_t.BULLET_patnum], ax
mov al, [bp+@@spawn_state]
mov [si+bullet_t.spawn_state], al
push di
call sub_1CFC8
mov [bp+var_3], al
call BULLET_VELOCITY_AND_ANGLE_SET pascal, di
mov [bp+@@done], al
cmp _bullet_template.patnum, PAT_BULLET16_D
jb short loc_1D460
call bullet_patnum_for_angle pascal, word ptr angle_2D008
call bullet_patnum_for_angle pascal, word ptr _group_i_absolute_angle
mov ah, 0
add [si+bullet_t.BULLET_patnum], ax
loc_1D460:
mov eax, _bullet_template.BT_velocity
mov dword ptr [si+bullet_t.pos.velocity], eax
mov al, angle_2D008
mov al, _group_i_absolute_angle
mov [si+bullet_t.BULLET_angle], al
mov al, _bullet_template.speed
mov [si+bullet_t.speed_final], al
mov [si+bullet_t.speed_cur], al
cmp [bp+var_3], 0
cmp [bp+@@done], 0
jnz short loc_1D48A
inc di
@ -34969,7 +34703,8 @@ include th04/formats/scoredat[bss].asm
byte_2CFF2 db ?
db ?
word_2CFF4 dw ?
byte_2CFF6 db ?
public _group_i_spread_angle
_group_i_spread_angle db ?
include th04/main/bullet/update[bss].asm
db ?
public _stage_graze
@ -34980,7 +34715,8 @@ _bullets_add_regular dw ?
_bullets_add_special dw ?
include th04/main/bullet/tune[bss].asm
include th04/main/bullet/pellet_r[bss].asm
angle_2D008 db ?
public _group_i_absolute_angle
_group_i_absolute_angle db ?
evendata
public _bombing_disabled
_bombing_disabled db ?