[Decompilation] Find out how to bypass TCC's optimization of 0 immediates

By deferring that immediate 0 to link time. 🤦

Part of P0146, funded by -Tom- and Ember2528.
This commit is contained in:
nmlgc 2021-05-30 20:09:31 +02:00
parent 8426f3e13f
commit aae96aec45
4 changed files with 43 additions and 7 deletions

View File

@ -47,6 +47,10 @@ setfarfp macro farfp, func
mov word ptr farfp, offset func
endm
; See decomp.hpp
public _address_0
_address_0 = 0
; master.lib extensions
; ---------------------
; super_roll_put_1plane() plane_put constants

View File

@ -70,6 +70,22 @@ must be spelled out to silence the `Possibly incorrect assignment` warning.
`SUB` means that `??` is unsigned. Might require suffixing `imm` with `u` in
case it's part of an arithmetic expression that was promoted to `int`.
### Comparisons
* Any comparison of a register with a literal 0 is optimized to `OR reg, reg`
followed by a conditional jump, no matter how many calculations and inlined
functions are involved. Any `CMP reg, 0` instructions must have either come
from assembly, or referred to a *pointer* at address 0:
```c++
extern void near *address_0; // Public symbol at near address 0
register int i;
if(i != reinterpret_cast<int>(address_0)) {
// ↑ Will emit `CMP reg, 0`
}
```
## Floating-point arithmetic
* Since the x87 FPU can only load from memory, all temporary results of

View File

@ -27,6 +27,8 @@
out dx, ax; \
}
} // extern "C" was a mistake
// poke() versions that actually inline with pseudoregisters
// ---------------------------------------------------------
#define pokew(sgm, off, val) { *(uint16_t far *)(MK_FP(sgm, off)) = val; }
@ -56,8 +58,6 @@
// Also, hey, no need for the MK_FP() macro if we directly return the correct
// types.
} // extern "C" was a mistake
#if defined(__TURBOC__) && defined(__MSDOS__)
// Declared in <dos.h> in these compilers.
void __emit__(uint8_t __byte, ...);
@ -95,9 +95,26 @@ inline void poked_eax(Decomp_FS *sgm, Decomp_DI *off, uint8_t op) {
inline void poked_eax(Decomp_GS *sgm, Decomp_DI *off, uint8_t op) {
__emit__(0x66, 0x65, op, 0x05); // [op] GS:[DI], EAX
}
// ---------------------------------------------------------
#if defined(__TURBOC__) && defined(__MSDOS__)
// Use this function wherever the original code used a immediate 0 literal
// that Turbo C++ would optimize away, e.g. in register assignments
// (_AX = 0 → XOR AX, AX) or comparisons (_AX == 0 → OR AX, AX). This way,
// the compiler is forced to leave space for any potential offset, with the
// literal 0 then being spelled out by the linker.
template <class T> inline T keep_0(T x) {
if(x == 0) {
extern void *near address_0;
return reinterpret_cast<pixel_t>(&address_0);
}
return x;
}
#else
#define keep_0(x) x
#endif
extern "C" {
// ---------------------------------------------------------
// 32-bit ASM instructions not supported by Turbo C++ 4.0J's built-in
// assembler. Makes no sense to compile with `#pragma inline` (and thus,

View File

@ -35,14 +35,13 @@ void near pi_mask_setup_egc_and_advance(void)
outport2(EGC_READPLANEREG, 0xFF);
// EGC_COMPAREREAD | EGC_WS_PATREG | EGC_RL_MEMREAD
outport2(EGC_MODE_ROP_REG, 0x3100);
// Turbo C++ is too smart to emit this instruction with pseudo-registers!
__asm { mov ax, 0; }
outport(EGC_ADDRRESSREG, _AX);
outport(EGC_ADDRRESSREG, keep_0(0));
outport2(EGC_BITLENGTHREG, 0xF);
mask_ptr = reinterpret_cast<uint16_t>(pi_mask_ptr);
_AX = (pi_mask_y & (PI_MASK_H - 1));
__asm { shl ax, 1; } // And again!
// Turbo C++ is too smart to emit this instruction with pseudo-registers!
__asm { shl ax, 1; }
mask_ptr += _AX;
outport(EGC_MASKREG, *reinterpret_cast<dots16_t near *>(mask_ptr));
pi_mask_y++;