[infra] Prioritize short files when collecting dataflow traces (#1632). (#3254)

* [infra] Prioritize short files when collecting dataflow traces (#1632). * remove debug print * rename files and sizes dict
2020-01-21 13:09:07 -08:00 · 2020-01-21 13:09:07 -08:00 · 2c6c6d9785
parent 87df2b147a
commit 2c6c6d9785
1 changed files with 3 additions and 0 deletions
--- a/infra/base-images/base-runner/dataflow_tracer.py
+++ b/infra/base-images/base-runner/dataflow_tracer.py
@ -85,6 +85,7 @@ def collect_traces(binary, corpus_dir, dft_dir):
      'failed': 0,
  }

+  files_and_sizes = {}
  for f in _list_dir(corpus_dir):
    stats['total'] += 1
    size = os.path.getsize(f)
@ -92,7 +93,9 @@ def collect_traces(binary, corpus_dir, dft_dir):
      stats['long'] += 1
      print('Skipping large file ({size}b): {path}'.format(size=size, path=f))
      continue
+    files_and_sizes[f] = size

+  for f in sorted(files_and_sizes, key=files_and_sizes.get):
    output_path = os.path.join(dft_dir, _sha1(f))
    try:
      result = _run([binary, f, output_path], timeout=_timeout(size))