From b8ca607c385d0ddc2a776cfca910b263ebb069cb Mon Sep 17 00:00:00 2001 From: nmlgc Date: Fri, 21 Feb 2020 22:21:20 +0100 Subject: [PATCH] [Decompilation] [th02] Get zun_res1.c right MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4Β½ years after aa56a7c, it turns out that the correct decompilation involves… no-ops generated by assigning variables that just happen to be in registers to themselves?! Which does get optimized out, but only after TCC folded identical tail code in all branches of a function, thus effectively functioning as an optimization barrier. The initial attempt used register pseudovariables, but this definitely is the best possible way this could work – portable, and doesn't unnecessarily shred the code into tiny inlined functions pieces. The mindblowing thing here is that ZUN could have actually written this to have additional, albeit unnecessary, lines to place breakpoints on. But that means he must have chosen those two local variables in SI and DI completely by chance… 🀯 The best thing though? ~#pragma inline is gone~ Part of P0076, funded by [Anonymous] and -Tom-. --- Research/Borland C++ decompilation.md | 41 +++++++++++--------- th02/zun_res1.c | 56 ++++++++------------------- 2 files changed, 40 insertions(+), 57 deletions(-) diff --git a/Research/Borland C++ decompilation.md b/Research/Borland C++ decompilation.md index 0506e186..5d855b59 100644 --- a/Research/Borland C++ decompilation.md +++ b/Research/Borland C++ decompilation.md @@ -81,24 +81,29 @@ certain local variables as `word`s when they aren't. ### `-O` (Optimize jumps) -Inhibited by identical variable declarations within more than one scope – the -optimizer will only merge the code *after* the last ASM reference to that -declared variable. Yes, even though the emitted ASM would be identical: -```c -if(a) { - int v = set_v(); - do_something_else(); - use(v); -} else if(underline) { - // Second declaration of [v]. Even though it's assigned to the same stack - // offset, the second `PUSH w` call will still be emitted separately. - // Thus, jump optimization only reuses the `CALL use` instruction. - // Move the `int v;` declaraion to the beginning of the function to avoid - // this. - int v = set_v(); - use(v); -} -``` +Inhibited by: + +* identical variable declarations within more than one scope – the + optimizer will only merge the code *after* the last ASM reference to that + declared variable. Yes, even though the emitted ASM would be identical: + + ```c + if(a) { + int v = set_v(); + do_something_else(); + use(v); + } else if(underline) { + // Second declaration of [v]. Even though it's assigned to the same stack + // offset, the second `PUSH w` call will still be emitted separately. + // Thus, jump optimization only reuses the `CALL use` instruction. + // Move the `int v;` declaraion to the beginning of the function to avoid + // this. + int v = set_v(); + use(v); + } + ``` + +* distinct instances of assignments of local variables in registers to itself ## Inlining diff --git a/th02/zun_res1.c b/th02/zun_res1.c index dbc8f33e..893900c0 100644 --- a/th02/zun_res1.c +++ b/th02/zun_res1.c @@ -4,8 +4,6 @@ * configuration file required in order to run TH02. */ -#pragma inline - #include #include "th02/th02.h" #include "th02/snd/snd.h" @@ -35,43 +33,24 @@ void cfg_write(seg_t resident_sgm) dos_close(handle); } +#define LOGO \ + "“Œ•ϋ••–‚˜^—p@ ν’“ƒvƒƒOƒ‰ƒ€@ZUN_RES.com Version1.01 (c)zun 1997" + int main(int argc, const char **argv) { int pascal scoredat_verify(void); - static const char MIKOConfig[] = RES_ID; - static const char LOGO[] = - "\n" - "\n" - "“Œ•ϋ••–‚˜^—p@ ν’“ƒvƒƒOƒ‰ƒ€@ZUN_RES.com Version1.01 (c)zun 1997\n"; - static const char ERROR_SCOREDAT[] = - "ƒnƒCƒXƒRƒAƒtƒ@ƒCƒ‹‚ͺ‚¨‚©‚΅‚’‚́A‚ΰ‚€ˆκ“xŽΐs‚΅‚ĂˁB\n"; - static const char ERROR_NOT_RESIDENT[] = - "‚ν‚½‚΅A‚ά‚Ύ‚’‚ά‚Ή‚ρ‚悧\n\n"; - static const char REMOVED[] = - "‚³‚ζ‚Θ‚ηA‚ά‚½‰ο‚¦‚½‚η‚’‚’‚Θ\n\n"; - static const char ERROR_UNKNOWN_OPTION[] = - "‚»‚ρ‚ΘƒIƒvƒVƒ‡ƒ“•t‚―‚η‚κ‚Δ‚ΰA’‚ι‚ρ‚Ε‚·‚―‚Η\n\n"; - static const char ERROR_ALREADY_RESIDENT[] = - "‚ν‚½‚΅A‚·‚Ε‚Ι‚’‚ά‚·‚悧\n\n"; - static const char ERROR_OUT_OF_MEMORY[] = - "μ‚κ‚ά‚Ή‚ρA‚ν‚½‚΅‚Μ‹κŠ‚ͺ‚Θ‚’‚́I\n\n"; - static const char INITIALIZED[] = - "‚»‚κ‚ł́A‚ζ‚λ‚΅‚­‚¨Šθ‚’‚΅‚ά‚·\n\n"; - seg_t sgm; - const char *res_id = MIKOConfig; + const char *res_id = RES_ID; int i; char far *resident; sgm = resdata_exist(res_id, RES_ID_STRLEN, RES_PARASIZE); - dos_puts2(LOGO); + dos_puts2("\n\n" LOGO "\n"); graph_clear(); - // No, I tried all permutations of command-line switches, - // gotos and returns, and no pure C solution seems to work! - if(scoredat_verify() == 1) __asm { - push offset ds:ERROR_SCOREDAT - jmp error_puts + if(scoredat_verify() == 1) { + dos_puts2("ƒnƒCƒXƒRƒAƒtƒ@ƒCƒ‹‚ͺ‚¨‚©‚΅‚’‚́A‚ΰ‚€ˆκ“xŽΐs‚΅‚ĂˁB\n"); + return 1; } if(argc == 2) { #define arg1_is(capital, small) \ @@ -79,33 +58,32 @@ int main(int argc, const char **argv) && (argv[1][1] == (capital) || argv[1][1] == (small)) if(arg1_is('R', 'r')) { if(!sgm) { - dos_puts2(ERROR_NOT_RESIDENT); -asm jmp error_ret + dos_puts2("‚ν‚½‚΅A‚ά‚Ύ‚’‚ά‚Ή‚ρ‚悧\n\n"); + return 1; } dos_free(sgm); - dos_puts2(REMOVED); + dos_puts2("‚³‚ζ‚Θ‚ηA‚ά‚½‰ο‚¦‚½‚η‚’‚’‚Θ\n\n"); return 0; } else if(arg1_is('D', 'd')) { debug = 1; } else { - dos_puts2(ERROR_UNKNOWN_OPTION); + dos_puts2("‚»‚ρ‚ΘƒIƒvƒVƒ‡ƒ“•t‚―‚η‚κ‚Δ‚ΰA’‚ι‚ρ‚Ε‚·‚―‚Η\n\n"); + sgm = sgm; /* optimization barrier #1 */ return 1; } } if(sgm) { - dos_puts2(ERROR_ALREADY_RESIDENT); + dos_puts2("‚ν‚½‚΅A‚·‚Ε‚Ι‚’‚ά‚·‚悧\n\n"); + argv = argv; /* optimization barrier #2 */ return 1; } sgm = resdata_create(res_id, RES_ID_STRLEN, RES_PARASIZE); if(!sgm) { -asm push offset ds:ERROR_OUT_OF_MEMORY -error_puts: -asm call near ptr dos_puts2 -error_ret: + dos_puts2("μ‚κ‚ά‚Ή‚ρA‚ν‚½‚΅‚Μ‹κŠ‚ͺ‚Θ‚’‚́I\n\n"); return 1; } resident = MK_FP(sgm, 0); - dos_puts2(INITIALIZED); + dos_puts2("‚»‚κ‚ł́A‚ζ‚λ‚΅‚­‚¨Šθ‚’‚΅‚ά‚·\n\n"); for(i = offsetof(resident_t, stage); i < sizeof(resident_t); i++) { resident[i] = 0; }