From 2c6c6d9785de4d60b81117be3859c64e59b0b6e7 Mon Sep 17 00:00:00 2001 From: Max Moroz Date: Tue, 21 Jan 2020 13:09:07 -0800 Subject: [PATCH] [infra] Prioritize short files when collecting dataflow traces (#1632). (#3254) * [infra] Prioritize short files when collecting dataflow traces (#1632). * remove debug print * rename files and sizes dict --- infra/base-images/base-runner/dataflow_tracer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/infra/base-images/base-runner/dataflow_tracer.py b/infra/base-images/base-runner/dataflow_tracer.py index b157d66c8..7166bf43e 100755 --- a/infra/base-images/base-runner/dataflow_tracer.py +++ b/infra/base-images/base-runner/dataflow_tracer.py @@ -85,6 +85,7 @@ def collect_traces(binary, corpus_dir, dft_dir): 'failed': 0, } + files_and_sizes = {} for f in _list_dir(corpus_dir): stats['total'] += 1 size = os.path.getsize(f) @@ -92,7 +93,9 @@ def collect_traces(binary, corpus_dir, dft_dir): stats['long'] += 1 print('Skipping large file ({size}b): {path}'.format(size=size, path=f)) continue + files_and_sizes[f] = size + for f in sorted(files_and_sizes, key=files_and_sizes.get): output_path = os.path.join(dft_dir, _sha1(f)) try: result = _run([binary, f, output_path], timeout=_timeout(size))