[infra] Prioritize short files when collecting dataflow traces (#1632). (#3254)

* [infra] Prioritize short files when collecting dataflow traces (#1632).

* remove debug print

* rename files and sizes dict
This commit is contained in:
Max Moroz 2020-01-21 13:09:07 -08:00 committed by GitHub
parent 87df2b147a
commit 2c6c6d9785
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 3 additions and 0 deletions

View File

@ -85,6 +85,7 @@ def collect_traces(binary, corpus_dir, dft_dir):
'failed': 0,
}
files_and_sizes = {}
for f in _list_dir(corpus_dir):
stats['total'] += 1
size = os.path.getsize(f)
@ -92,7 +93,9 @@ def collect_traces(binary, corpus_dir, dft_dir):
stats['long'] += 1
print('Skipping large file ({size}b): {path}'.format(size=size, path=f))
continue
files_and_sizes[f] = size
for f in sorted(files_and_sizes, key=files_and_sizes.get):
output_path = os.path.join(dft_dir, _sha1(f))
try:
result = _run([binary, f, output_path], timeout=_timeout(size))