mirror of https://github.com/nmlgc/ReC98.git
330 lines
11 KiB
C++
330 lines
11 KiB
C++
#pragma option -zPmain_01
|
||
|
||
#include <stddef.h>
|
||
#include "platform.h"
|
||
#include "x86real.h"
|
||
#include "codegen.hpp"
|
||
#include "pc98.h"
|
||
#include "master.hpp"
|
||
#include "th01/math/overlap.hpp"
|
||
#include "th01/math/polar.hpp"
|
||
#include "th01/math/subpixel.hpp"
|
||
#include "th04/math/motion.hpp"
|
||
#include "th04/main/playfld.hpp"
|
||
#include "th04/main/player/player.hpp"
|
||
#include "th05/main/bullet/laser.hpp"
|
||
|
||
#pragma option -k-
|
||
|
||
// Points that change from subpixel screen space to pixel screen space within
|
||
// the same variable...
|
||
union SpaceChangingPoint {
|
||
SPPoint sp;
|
||
screen_point_t pixel;
|
||
};
|
||
|
||
// Register parameters for vector2_at_opt().
|
||
#define v_length_ _BX
|
||
#define v_length static_cast<subpixel_t>(v_length_)
|
||
#define v_length_high _BH // ZUN bloat
|
||
#define v_length_low _BL // ZUN bloat
|
||
#define v_angle _CX
|
||
#define v_angle_low _CL
|
||
#define v_angle_high _CH
|
||
#define v_center_ _BP
|
||
#define v_center reinterpret_cast<SPPoint __ss *>(v_center_)
|
||
#define v_ret_ _DI
|
||
#define v_ret reinterpret_cast<SpaceChangingPoint __ss *>(v_ret_)
|
||
|
||
// Register parameters for build_line_in_pixels().
|
||
#define p_0_ _DI
|
||
#define p_0 reinterpret_cast<SpaceChangingPoint __ss *>(p_0_)
|
||
#define p_1_ _BX
|
||
#define p_1 reinterpret_cast<SpaceChangingPoint __ss *>(p_1_)
|
||
#define p_distance_ _BP
|
||
#define p_distance reinterpret_cast<SPPoint __ss *>(p_distance_)
|
||
|
||
// ZUN bloat: To work around the need for shifting BP even further once control
|
||
// flow reaches build_line_in_pixels(), ZUN hardcoded the offset from the
|
||
// current value of BP to [p_distance] within the function itself. This offset
|
||
// actually matches [p_distance]'s position if build_line_in_pixels() were a
|
||
// regular function… which means that this madness saves a grand total of one
|
||
// single PUSH instruction. Wow.
|
||
#define p_distance_offset 1
|
||
|
||
// A regular vector2_at() function with [v_center], [v_angle], and [v_length].
|
||
// Returns the Y coordinate of the resulting vector.
|
||
subpixel_t near vector2_at_opt()
|
||
{
|
||
uint16_t table_offset = v_angle;
|
||
table_offset += table_offset; // *= sizeof(short)
|
||
_AX = polar_by_offset(v_center->x, v_length, CosTable8, table_offset);
|
||
mov_to_mem(_SS, v_ret_, offsetof(point_t, x), _AX);
|
||
_AX = polar_by_offset(v_center->y, v_length, SinTable8, table_offset);
|
||
mov_to_mem(_SS, v_ret_, offsetof(point_t, y), _AX);
|
||
return _AX;
|
||
}
|
||
|
||
// Calculates:
|
||
// p_0 = p_0.to_pixel();
|
||
// p_1 = (p_0 + p_distance).to_pixel();
|
||
void near build_line_in_pixels()
|
||
{
|
||
#define tmp_0 _DX
|
||
#define tmp_1 _AX
|
||
|
||
tmp_1 = p_distance[p_distance_offset].x;
|
||
mov_to_reg(tmp_0, _SS, p_0_, offsetof(SPPoint, x));
|
||
tmp_1 += tmp_0;
|
||
static_cast<pixel_t>(tmp_1) >>= SUBPIXEL_BITS;
|
||
static_cast<pixel_t>(tmp_0) >>= SUBPIXEL_BITS;
|
||
p_1->pixel.x = tmp_1;
|
||
mov_to_mem(_SS, p_0_, offsetof(screen_point_t, x), tmp_0);
|
||
|
||
tmp_1 = p_distance[p_distance_offset].y;
|
||
mov_to_reg(tmp_0, _SS, p_0_, offsetof(SPPoint, y));
|
||
tmp_1 += tmp_0;
|
||
static_cast<pixel_t>(tmp_1) >>= SUBPIXEL_BITS;
|
||
static_cast<pixel_t>(tmp_0) >>= SUBPIXEL_BITS;
|
||
p_1->pixel.y = tmp_1;
|
||
mov_to_mem(_SS, p_0_, offsetof(screen_point_t, y), tmp_0);
|
||
|
||
#undef tmp_1
|
||
#undef tmp_0
|
||
}
|
||
|
||
bool16 pascal near laser_render_ray(const laser_coords_t near& coords)
|
||
{
|
||
// ZUN bloat: Micro-optimized based on the wrong assumption that using more
|
||
// stack space is bad? This assumption had dramatic effects on the
|
||
// complexity of the resulting monstrosity, for three main reasons:
|
||
//
|
||
// 1) Using BP as a parameter to the helper functions above, within a
|
||
// function that declares local variables which are accessed *relative*
|
||
// to BP. This shifts the function's view of its own variables, which in
|
||
// turn forces us to provide these shifted views as additional structs
|
||
// within the main union just to keep the code comprehensible.
|
||
//
|
||
// 2) The misguided assumption that pointer arithmetic with addresses of
|
||
// local variables is superior to just fully recalculating the pointers.
|
||
// On a 486, both `LEA r16, m` and `ADD r16, imm` take 1 cycle. Sure,
|
||
// thanks to 1), these addresses are constantly shifting, but it's still
|
||
// better than us having to annotate the exact movement to the target
|
||
// variable.
|
||
//
|
||
// 3) The necessary conversion from subpixel space to screen space is done
|
||
// within the same variables, requiring further unions within unions to
|
||
// describe which type is active at any given time.
|
||
//
|
||
// The /// comments spell out the core algorithm in pseudocode, and are
|
||
// probably easier to follow than the actual compiled mess of code.
|
||
|
||
#define _BP reinterpret_cast<SpaceChangingPoint __ss *>(_BP)
|
||
#define point_count static_cast<int>(_AX)
|
||
|
||
union {
|
||
// Subpixel points used for initial construction.
|
||
// Since the meaning of "left" and "right" would be flipped depending
|
||
// on whether the laser points up and down, we use `_ccw` and `cw` to
|
||
// indicate the ray's corner points as a counter-clockwise or clockwise
|
||
// quarter-circle rotation away from the centered origin.
|
||
struct {
|
||
SPPoint start_ccw;
|
||
/* -------------------- */ point_t _unused_1;
|
||
/* -------------------- */ point_t _unused_2;
|
||
SPPoint end_ccw;
|
||
/* -------------------- */ point_t _unused_3;
|
||
SPPoint origin_ccw;
|
||
SPPoint ccw_to_cw_delta;
|
||
SPPoint origin;
|
||
} c;
|
||
|
||
// Variant of [c] for correct addressing with BP shifted by one point.
|
||
struct {
|
||
/* -------------------- */ point_t _shift_dummy;
|
||
SPPoint start_ccw;
|
||
/* -------------------- */ point_t _unused_1;
|
||
/* -------------------- */ point_t _unused_2;
|
||
SPPoint end_ccw;
|
||
/* -------------------- */ point_t _unused_3;
|
||
SPPoint origin_ccw;
|
||
SPPoint ccw_to_cw_delta;
|
||
} c_bp_shifted;
|
||
|
||
// Screen-space points, converted from [c].
|
||
struct {
|
||
SpaceChangingPoint start_ccw;
|
||
SpaceChangingPoint start_cw;
|
||
SpaceChangingPoint end_cw;
|
||
SpaceChangingPoint end_ccw;
|
||
} p;
|
||
|
||
// Final clipped points
|
||
screen_point_t clipped[8];
|
||
|
||
// BP reference points for stack offset calculation
|
||
SpaceChangingPoint bp[8];
|
||
} ps;
|
||
|
||
// Same transformation as in lasers_render(), for LF_SHOOTOUT_DECAY.
|
||
v_length_low = coords.width.nonshrink;
|
||
v_length_high = 0;
|
||
v_length <<= (SUBPIXEL_BITS - 1);
|
||
|
||
ps.c.origin.x.v = (to_sp(PLAYFIELD_LEFT) + coords.origin.x);
|
||
ps.c.origin.y.v = (to_sp(PLAYFIELD_TOP) + coords.origin.y);
|
||
|
||
v_angle_low = coords.angle;
|
||
v_angle_low += static_cast<uint8_t>(0x40);
|
||
v_angle_high = 0;
|
||
|
||
/// ps.c.ccw_to_cw_delta = vector2_at(
|
||
/// ps.c.origin, (coords.width / 2), (coords.angle + 0x40)
|
||
/// );
|
||
lea_local_to_reg(v_ret_, &ps.bp[8], &ps.c.ccw_to_cw_delta);
|
||
_BP -= (&ps.bp[8].sp /* current */ - &ps.c.origin /* target */);
|
||
v_center = &_BP->sp;
|
||
vector2_at_opt();
|
||
|
||
/// ps.c.origin_ccw = vector2_at(
|
||
/// ps.c.origin, (coords.width / 2), (coords.angle - 0x40)
|
||
/// );
|
||
/// ps.c.ccw_to_cw_delta -= ps.c.origin_ccw;
|
||
v_angle_low += static_cast<uint8_t>(0x80);
|
||
v_ret -= (
|
||
&ps.c.ccw_to_cw_delta /* current */ - &ps.c.origin_ccw /* target */
|
||
);
|
||
ps.c_bp_shifted.ccw_to_cw_delta.y.v -= vector2_at_opt();
|
||
ps.c_bp_shifted.ccw_to_cw_delta.x.v -= ps.c_bp_shifted.origin_ccw.x.v;
|
||
|
||
/// ps.c.start_ccw = vector2_at(
|
||
/// ps.c.origin_ccw, coords.starts_at_distance, coords.angle
|
||
/// );
|
||
v_angle_low = coords.angle;
|
||
v_length = coords.starts_at_distance;
|
||
v_ret -= (&ps.c.origin_ccw /* current */ - &ps.c.start_ccw /* target */);
|
||
_BP -= (&ps.c.origin /* current */ - &ps.c.origin_ccw /* target */);
|
||
v_center = &_BP->sp;
|
||
vector2_at_opt();
|
||
|
||
/// ps.c.end_ccw = vector2_at(
|
||
/// ps.c.origin_ccw, coords.ends_at_distance, coords.angle
|
||
/// );
|
||
v_length = coords.ends_at_distance;
|
||
v_ret += (&ps.c.end_ccw /* target */ - &ps.c.start_ccw /* current */);
|
||
vector2_at_opt();
|
||
|
||
/// ps.p.start_ccw = ps.c.start_ccw.to_pixel();
|
||
/// ps.p.start_cw = (ps.c.start_ccw + ps.c.ccw_to_cw_delta).to_pixel();
|
||
lea_local_to_reg(p_0_, &ps.bp[5], &ps.p.start_ccw);
|
||
lea_local_to_reg(p_1_, &ps.bp[5], &ps.p.start_cw);
|
||
p_distance = (&_BP->sp + p_distance_offset + (
|
||
&ps.c.origin_ccw /* current */ - &ps.c.ccw_to_cw_delta /* target */
|
||
));
|
||
build_line_in_pixels();
|
||
|
||
/// ps.p.end_ccw = ps.c.end_ccw.to_pixel();
|
||
/// ps.p.end_cw = (ps.c.end_ccw + ps.c.ccw_to_cw_delta).to_pixel();
|
||
p_0 += (&ps.p.end_ccw /* target */ - &ps.p.start_ccw /* current */);
|
||
p_1 += (&ps.p.end_cw /* target */ - &ps.p.start_cw /* current */);
|
||
build_line_in_pixels();
|
||
|
||
// Return BP back to where you'd expect it to be
|
||
_BP += (&ps.bp[8].sp /* target */ - &ps.c.origin_ccw /* current */);
|
||
|
||
lea_local_to_reg(v_ret_, &ps.bp[8], &ps.clipped[0]);
|
||
point_count = grc_clip_polygon_n(&v_ret->pixel, 8, &v_ret->pixel, 4);
|
||
if(point_count == 0) {
|
||
goto offscreen;
|
||
}
|
||
asm { jge draw; } /// if(point_count < 0) {
|
||
point_count = 4;
|
||
/// }
|
||
|
||
draw:
|
||
grcg_polygon_cx(&v_ret->pixel, point_count);
|
||
return false;
|
||
|
||
offscreen:
|
||
return true;
|
||
|
||
#undef point_count
|
||
#undef _BP
|
||
}
|
||
|
||
void pascal near laser_hittest(Laser near& laser__)
|
||
{
|
||
#define laser_ _DI
|
||
#define laser reinterpret_cast<Laser near *>(laser_)
|
||
#define distance_start_ _SI
|
||
#define distance_start static_cast<subpixel_t>(distance_start_)
|
||
|
||
union {
|
||
struct {
|
||
SPPoint test_center;
|
||
SPPoint origin;
|
||
} c;
|
||
|
||
// BP reference points for stack offset calculation
|
||
SPPoint bp[2];
|
||
} ps;
|
||
|
||
// ZUN bloat: A reversed prolog and epilog means that we've got to
|
||
// __emit__() *every* instruction that touches these registers... Luckily,
|
||
// this function is simple enough, and doesn't need lots of comments to
|
||
// describe what it does: generating boxes at fixed intervals along the
|
||
// laser ray and hit-testing every one of them.
|
||
__emit__(0x57); // PUSH DI
|
||
__emit__(0x56); // PUSH SI
|
||
|
||
mov_param_to_reg(laser_, 4); (laser__); /// laser_ = &laser__;
|
||
|
||
mov_to_reg(
|
||
v_length_, _DS, laser_, offsetof(Laser, coords.ends_at_distance)
|
||
);
|
||
mov_to_reg(
|
||
distance_start_, _DS, laser_, offsetof(Laser, coords.starts_at_distance)
|
||
);
|
||
reinterpret_cast<SPPoint __ss *>(_BP) -= (
|
||
&ps.bp[2] /* current */ - &ps.c.origin /* target */
|
||
);
|
||
v_center = reinterpret_cast<SPPoint __ss *>(_BP);
|
||
mov_to_reg(_EAX, _DS, laser_, offsetof(Laser, coords.origin));
|
||
mov_to_mem(_SS, v_center_, 0, _EAX);
|
||
mov_to_reg(v_angle_low, _DS, laser_, offsetof(Laser, coords.angle));
|
||
v_angle_high ^= v_angle_high;
|
||
lea_local_to_reg(v_ret_, &ps.c.origin, &ps.c.test_center);
|
||
|
||
loop: {
|
||
_AX = vector2_at_opt();
|
||
|
||
// ZUN quirk: 12×12-pixel boxes in 16-pixel intervals = 4-pixel gaps
|
||
// between the boxes that allow the player to pass through the laser.
|
||
if(overlap_1d_fast(_AX, player_pos.cur.y, to_sp(12.0f))) {
|
||
mov_to_reg(_AX, _SS, v_ret_, offsetof(SPPoint, x));
|
||
if(overlap_1d_fast(_AX, player_pos.cur.x, to_sp(12.0f))) {
|
||
player_is_hit = true;
|
||
goto ret;
|
||
}
|
||
}
|
||
v_length -= to_sp(16.0f);
|
||
cmp_reg_reg(v_length_, distance_start_);
|
||
asm { jge loop; }
|
||
}
|
||
|
||
ret:
|
||
// Return BP back to where you'd expect it to be, allowing the function to
|
||
// properly return.
|
||
reinterpret_cast<SPPoint __ss *>(_BP) += (
|
||
&ps.bp[2] /* target */ - &ps.c.origin /* current */
|
||
);
|
||
|
||
__emit__(0x5E); // POP SI
|
||
__emit__(0x5F); // POP DI
|
||
|
||
#undef distance_start
|
||
#undef distance_start_
|
||
#undef laser
|
||
#undef laser_
|
||
}
|