pokeemerald/.travis/calcrom/calcrom.pl

#!/usr/bin/perl

use IPC::Cmd qw[ run ];

(@ARGV == 1)
    or die "ERROR: no map file specified.\n";
open(my $file, $ARGV[0])
    or die "ERROR: could not open file '$ARGV[0]'.\n";

my $src = 0;
my $asm = 0;
my $srcdata = 0;
my $data = 0;
while (my $line = <$file>)
{
    if ($line =~ /^ \.(\w+)\s+0x[0-9a-f]+\s+(0x[0-9a-f]+) (\w+)\/.+\.o/)
    {
        my $section = $1;
        my $size = hex($2);
        my $dir = $3;

        if ($section =~ /text/)
        {
            if ($dir eq 'src')
            {
                $src += $size;
            }
            elsif ($dir eq 'asm')
            {
                $asm += $size;
            }
        }
        elsif ($section =~ /rodata/)
        {
            if ($dir eq 'src')
            {
                $srcdata += $size;
            }
            elsif ($dir eq 'data')
            {
                $data += $size;
            }
        }
    }
}

(my $elffname = $ARGV[0]) =~ s/\.map/.elf/;

# Note that the grep filters out all branch labels. It also requires a minimum
# line length of 5, to filter out a ton of generated symbols (like AcCn). No
# settings to nm seem to remove these symbols. Finally, nm prints out a separate
# entry for whenever a name appears in a file, not just where it's defined. uniq
# removes all the duplicate entries.
#
#
# You'd expect this to take a while, because of uniq. It runs in under a second,
# though. Uniq is pretty fast!
my $base_cmd = "nm $elffname | awk '{print \$3}' | grep '^[^_].\\{4\\}' | uniq";

# This looks for Unknown_, Unknown_, or sub_, followed by just numbers. Note that
# it matches even if stuff precedes the unknown, like sUnknown/gUnknown.
my $undoc_cmd = "grep '[Uu]nknown_[0-9a-fA-F]*\\|sub_[0-9a-fA-F]*'";

# This looks for every symbol with an address at the end of it. Some things are
# given a name based on their type / location, but still have an unknown purpose.
# For example, FooMap_EventScript_FFFFFFF.
my $partial_doc_cmd = "grep '_[0-28][0-9a-fA-F]\\{5,6\\}'";

my $count_cmd = "wc -l";

# It sucks that we have to run this three times, but I can't figure out how to get
# stdin working for subcommands in perl while still having a timeout. It's decently
# fast anyway.
my $total_syms_as_string;
(run (
    command => "$base_cmd | $count_cmd",
    buffer => \$total_syms_as_string,
    timeout => 60
))
    or die "ERROR: Error while getting all symbols: $?";

my $undocumented_as_string;
(run (
    command => "$base_cmd | $undoc_cmd | $count_cmd",
    buffer => \$undocumented_as_string,
    timeout => 60
))
    or die "ERROR: Error while filtering for undocumented symbols: $?";

my $partial_documented_as_string;
(run (
    command => "$base_cmd | $partial_doc_cmd | $count_cmd",
    buffer => \$partial_documented_as_string,
    timeout => 60
))
    or die "ERROR: Error while filtering for partial symbols: $?";

# Performing addition on a string converts it to a number. Any string that fails
# to convert to a number becomes 0. So if our converted number is 0, but our string
# is nonzero, then the conversion was an error.
my $undocumented = $undocumented_as_string + 0;
(($undocumented != 0) and ($undocumented_as_string ne "0"))
    or die "ERROR: Cannot convert string to num: '$undocumented_as_string'";

my $partial_documented = $partial_documented_as_string + 0;
(($partial_documented != 0) and ($partial_documented_as_string ne "0"))
	or die "ERROR: Cannot convert string to num: '$partial_documented_as_string'";

my $total_syms = $total_syms_as_string + 0;
(($total_syms != 0) and ($total_syms_as_string ne "0"))
    or die "ERROR: Cannot convert string to num: '$total_syms_as_string'";

($total_syms != 0)
    or die "ERROR: No symbols found.";

my $total = $src + $asm;
my $srcPct = sprintf("%.4f", 100 * $src / $total);
my $asmPct = sprintf("%.4f", 100 * $asm / $total);

# partial_documented is double-counting the unknown_* and sub_* symbols.
$partial_documented = $partial_documented - $undocumented;

my $documented = $total_syms - ($undocumented + $partial_documented);
my $docPct = sprintf("%.4f", 100 * $documented / $total_syms);
my $partialPct = sprintf("%.4f", 100 * $partial_documented / $total_syms);
my $undocPct = sprintf("%.4f", 100 * $undocumented / $total_syms);

if ($asm == 0)
{
    print "Code decompilation is 100% complete\n"
}
else
{
    print "$total total bytes of code\n";
    print "$src bytes of code in src ($srcPct%)\n";
    print "$asm bytes of code in asm ($asmPct%)\n";
}
print "\n";

if ($partial_documented == 0 && $undocumented == 0)
{
    print "Documentation is 100% complete\n"
}
else
{
    print "$total_syms total symbols\n";
    print "$documented symbols documented ($docPct%)\n";
    print "$partial_documented symbols partially documented ($partialPct%)\n";
    print "$undocumented symbols undocumented ($undocPct%)\n";
}

print "\n";
my $dataTotal = $srcdata + $data;
my $srcDataPct = sprintf("%.4f", 100 * $srcdata / $dataTotal);
my $dataPct = sprintf("%.4f", 100 * $data / $dataTotal);

if ($data == 0)
{
    print "Data porting to C is 100% complete\n"
}
else
{
    print "$dataTotal total bytes of data\n";
    print "$srcdata bytes of data in src ($srcDataPct%)\n";
    print "$data bytes of data in data ($dataPct%)\n";
}
Add calcrom discord webhook to travis 2018-06-16 16:39:54 +00:00			`#!/usr/bin/perl`

Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00			`use IPC::Cmd qw[ run ];`

Add calcrom discord webhook to travis 2018-06-16 16:39:54 +00:00			`(@ARGV == 1)`
			`or die "ERROR: no map file specified.\n";`
			`open(my $file, $ARGV[0])`
			`or die "ERROR: could not open file '$ARGV[0]'.\n";`

			`my $src = 0;`
			`my $asm = 0;`
Report data decomp in calcrom 2019-08-06 17:00:03 +00:00			`my $srcdata = 0;`
			`my $data = 0;`
Add calcrom discord webhook to travis 2018-06-16 16:39:54 +00:00			`while (my $line = <$file>)`
			`{`
			`if ($line =~ /^ \.(\w+)\s+0x[0-9a-f]+\s+(0x[0-9a-f]+) (\w+)\/.+\.o/)`
			`{`
			`my $section = $1;`
			`my $size = hex($2);`
			`my $dir = $3;`

			`if ($section =~ /text/)`
			`{`
			`if ($dir eq 'src')`
			`{`
			`$src += $size;`
			`}`
			`elsif ($dir eq 'asm')`
			`{`
			`$asm += $size;`
			`}`
			`}`
Report data decomp in calcrom 2019-08-06 17:00:03 +00:00			`elsif ($section =~ /rodata/)`
			`{`
			`if ($dir eq 'src')`
			`{`
			`$srcdata += $size;`
			`}`
			`elsif ($dir eq 'data')`
			`{`
			`$data += $size;`
			`}`
			`}`
Add calcrom discord webhook to travis 2018-06-16 16:39:54 +00:00			`}`
			`}`

Report data decomp in calcrom 2019-08-06 17:00:03 +00:00			`(my $elffname = $ARGV[0]) =~ s/\.map/.elf/;`

Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00			`# Note that the grep filters out all branch labels. It also requires a minimum`
			`# line length of 5, to filter out a ton of generated symbols (like AcCn). No`
			`# settings to nm seem to remove these symbols. Finally, nm prints out a separate`
			`# entry for whenever a name appears in a file, not just where it's defined. uniq`
			`# removes all the duplicate entries.`
			`#`
			`#`
			`# You'd expect this to take a while, because of uniq. It runs in under a second,`
			`# though. Uniq is pretty fast!`
Report data decomp in calcrom 2019-08-06 17:00:03 +00:00			`my $base_cmd = "nm $elffname \| awk '{print \$3}' \| grep '^[^_].\\{4\\}' \| uniq";`
Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00
			`# This looks for Unknown_, Unknown_, or sub_, followed by just numbers. Note that`
			`# it matches even if stuff precedes the unknown, like sUnknown/gUnknown.`
			`my $undoc_cmd = "grep '[Uu]nknown_[0-9a-fA-F]\\\|sub_[0-9a-fA-F]'";`

			`# This looks for every symbol with an address at the end of it. Some things are`
			`# given a name based on their type / location, but still have an unknown purpose.`
			`# For example, FooMap_EventScript_FFFFFFF.`
Remove false positives from partial doc calcrom 2020-01-13 18:52:15 +00:00			`my $partial_doc_cmd = "grep '_[0-28][0-9a-fA-F]\\{5,6\\}'";`
Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00
			`my $count_cmd = "wc -l";`

			`# It sucks that we have to run this three times, but I can't figure out how to get`
			`# stdin working for subcommands in perl while still having a timeout. It's decently`
			`# fast anyway.`
Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00			`my $total_syms_as_string;`
			`(run (`
Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00			`command => "$base_cmd \| $count_cmd",`
Fix tabs 2019-03-01 06:09:53 +00:00			`buffer => \$total_syms_as_string,`
Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00			`timeout => 60`
Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00			`))`
			`or die "ERROR: Error while getting all symbols: $?";`

			`my $undocumented_as_string;`
			`(run (`
Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00			`command => "$base_cmd \| $undoc_cmd \| $count_cmd",`
Fix tabs 2019-03-01 06:09:53 +00:00			`buffer => \$undocumented_as_string,`
Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00			`timeout => 60`
Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00			`))`
			`or die "ERROR: Error while filtering for undocumented symbols: $?";`

Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00			`my $partial_documented_as_string;`
			`(run (`
			`command => "$base_cmd \| $partial_doc_cmd \| $count_cmd",`
			`buffer => \$partial_documented_as_string,`
			`timeout => 60`
			`))`
			`or die "ERROR: Error while filtering for partial symbols: $?";`

			`# Performing addition on a string converts it to a number. Any string that fails`
			`# to convert to a number becomes 0. So if our converted number is 0, but our string`
			`# is nonzero, then the conversion was an error.`
Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00			`my $undocumented = $undocumented_as_string + 0;`
			`(($undocumented != 0) and ($undocumented_as_string ne "0"))`
			`or die "ERROR: Cannot convert string to num: '$undocumented_as_string'";`

Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00			`my $partial_documented = $partial_documented_as_string + 0;`
			`(($partial_documented != 0) and ($partial_documented_as_string ne "0"))`
			`or die "ERROR: Cannot convert string to num: '$partial_documented_as_string'";`

Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00			`my $total_syms = $total_syms_as_string + 0;`
			`(($total_syms != 0) and ($total_syms_as_string ne "0"))`
			`or die "ERROR: Cannot convert string to num: '$total_syms_as_string'";`

			`($total_syms != 0)`
			`or die "ERROR: No symbols found.";`

Add calcrom discord webhook to travis 2018-06-16 16:39:54 +00:00			`my $total = $src + $asm;`
			`my $srcPct = sprintf("%.4f", 100 * $src / $total);`
			`my $asmPct = sprintf("%.4f", 100 * $asm / $total);`
Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00
Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00			`# partial_documented is double-counting the unknown_* and sub_* symbols.`
			`$partial_documented = $partial_documented - $undocumented;`

			`my $documented = $total_syms - ($undocumented + $partial_documented);`
Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00			`my $docPct = sprintf("%.4f", 100 * $documented / $total_syms);`
Fix symbol overcounting, add partial symbols 2019-03-03 20:02:26 +00:00			`my $partialPct = sprintf("%.4f", 100 * $partial_documented / $total_syms);`
Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00			`my $undocPct = sprintf("%.4f", 100 * $undocumented / $total_syms);`

Celebratory change to calcrom.pl 2020-03-03 21:29:57 +00:00			`if ($asm == 0)`
			`{`
			`print "Code decompilation is 100% complete\n"`
			`}`
			`else`
			`{`
			`print "$total total bytes of code\n";`
			`print "$src bytes of code in src ($srcPct%)\n";`
			`print "$asm bytes of code in asm ($asmPct%)\n";`
			`}`
Improve OK bot's symbol counting 2019-03-01 06:06:27 +00:00			`print "\n";`
Celebratory change to calcrom.pl 2020-03-03 21:29:57 +00:00
			`if ($partial_documented == 0 && $undocumented == 0)`
			`{`
			`print "Documentation is 100% complete\n"`
			`}`
			`else`
			`{`
			`print "$total_syms total symbols\n";`
			`print "$documented symbols documented ($docPct%)\n";`
			`print "$partial_documented symbols partially documented ($partialPct%)\n";`
			`print "$undocumented symbols undocumented ($undocPct%)\n";`
			`}`
Report data decomp in calcrom 2019-08-06 17:00:03 +00:00
			`print "\n";`
			`my $dataTotal = $srcdata + $data;`
			`my $srcDataPct = sprintf("%.4f", 100 * $srcdata / $dataTotal);`
			`my $dataPct = sprintf("%.4f", 100 * $data / $dataTotal);`
Celebratory change to calcrom.pl 2020-03-03 21:29:57 +00:00
			`if ($data == 0)`
			`{`
			`print "Data porting to C is 100% complete\n"`
			`}`
			`else`
			`{`
			`print "$dataTotal total bytes of data\n";`
			`print "$srcdata bytes of data in src ($srcDataPct%)\n";`
			`print "$data bytes of data in data ($dataPct%)\n";`
			`}`