[Decompilation] Find out how to bypass TCC's optimization of 0 immediates

By deferring that immediate 0 to link time. 🤦 Part of P0146, funded by -Tom- and Ember2528.
2021-05-30 20:09:31 +02:00 · 2021-05-30 20:09:31 +02:00 · aae96aec45
parent 8426f3e13f
commit aae96aec45
4 changed files with 43 additions and 7 deletions
--- a/ReC98.inc
+++ b/ReC98.inc
@ -47,6 +47,10 @@ setfarfp macro farfp, func
 	mov word ptr farfp, offset func
 endm

+; See decomp.hpp
+public _address_0
+_address_0 = 0
+
 ; master.lib extensions
 ; ---------------------
 ; super_roll_put_1plane() plane_put constants
--- a/decompilation.md
+++ b/decompilation.md
@ -70,6 +70,22 @@ must be spelled out to silence the `Possibly incorrect assignment` warning.
 `SUB` means that `??` is unsigned. Might require suffixing `imm` with `u` in
 case it's part of an arithmetic expression that was promoted to `int`.

+### Comparisons
+
+* Any comparison of a register with a literal 0 is optimized to `OR reg, reg`
+  followed by a conditional jump, no matter how many calculations and inlined
+  functions are involved. Any `CMP reg, 0` instructions must have either come
+  from assembly, or referred to a *pointer* at address 0:
+
+  ```c++
+  extern void near *address_0; // Public symbol at near address 0
+  register int i;
+
+  if(i != reinterpret_cast<int>(address_0)) {
+    // ↑ Will emit `CMP reg, 0`
+  }
+  ```
+
 ## Floating-point arithmetic

 * Since the x87 FPU can only load from memory, all temporary results of
--- a/decomp.hpp
+++ b/decomp.hpp
@ -27,6 +27,8 @@
 	out dx, ax; \
 }

+} // extern "C" was a mistake
+
 // poke() versions that actually inline with pseudoregisters
 // ---------------------------------------------------------
 #define pokew(sgm, off, val) { *(uint16_t far *)(MK_FP(sgm, off)) = val; }
@ -56,8 +58,6 @@
 // Also, hey, no need for the MK_FP() macro if we directly return the correct
 // types.

-} // extern "C" was a mistake
-
 #if defined(__TURBOC__) && defined(__MSDOS__)
 	// Declared in <dos.h> in these compilers.
 	void __emit__(uint8_t __byte, ...);
@ -95,9 +95,26 @@ inline void poked_eax(Decomp_FS *sgm, Decomp_DI *off, uint8_t op) {
 inline void poked_eax(Decomp_GS *sgm, Decomp_DI *off, uint8_t op) {
 	__emit__(0x66, 0x65, op, 0x05); // [op] GS:[DI], EAX
 }
+// ---------------------------------------------------------
+
+#if defined(__TURBOC__) && defined(__MSDOS__)
+	// Use this function wherever the original code used a immediate 0 literal
+	// that Turbo C++ would optimize away, e.g. in register assignments
+	// (_AX = 0 → XOR AX, AX) or comparisons (_AX == 0 → OR AX, AX). This way,
+	// the compiler is forced to leave space for any potential offset, with the
+	// literal 0 then being spelled out by the linker.
+	template <class T> inline T keep_0(T x) {
+		if(x == 0) {
+			extern void *near address_0;
+			return reinterpret_cast<pixel_t>(&address_0);
+		}
+		return x;
+	}
+#else
+	#define keep_0(x) x
+#endif

 extern "C" {
-// ---------------------------------------------------------

 // 32-bit ASM instructions not supported by Turbo C++ 4.0J's built-in
 // assembler. Makes no sense to compile with `#pragma inline` (and thus,
--- a/th05/formats/pi_cpp_2.cpp
+++ b/th05/formats/pi_cpp_2.cpp
@ -35,14 +35,13 @@ void near pi_mask_setup_egc_and_advance(void)
 	outport2(EGC_READPLANEREG, 0xFF);
 	// EGC_COMPAREREAD | EGC_WS_PATREG | EGC_RL_MEMREAD
 	outport2(EGC_MODE_ROP_REG, 0x3100);
-	// Turbo C++ is too smart to emit this instruction with pseudo-registers!
-	__asm { mov ax, 0; }
-	outport(EGC_ADDRRESSREG, _AX);
+	outport(EGC_ADDRRESSREG, keep_0(0));
 	outport2(EGC_BITLENGTHREG, 0xF);

 	mask_ptr = reinterpret_cast<uint16_t>(pi_mask_ptr);
 	_AX = (pi_mask_y & (PI_MASK_H - 1));
-	__asm { shl ax, 1; } // And again!
+	// Turbo C++ is too smart to emit this instruction with pseudo-registers!
+	__asm { shl ax, 1; }
 	mask_ptr += _AX;
 	outport(EGC_MASKREG, *reinterpret_cast<dots16_t near *>(mask_ptr));
 	pi_mask_y++;