oss-fuzz/infra/base-images/base-builder/python_coverage_helper.py

121 lines
4.1 KiB
Python

# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Extracts file paths to copy files from pyinstaller-generated executables"""
import os
import sys
import shutil
import zipfile
# Finds all *.toc files in ./workpath and reads these files in order to
# identify Python files associated with a pyinstaller packaged executable.
# Copies all of the Python files to a temporary directory (/medio) following
# the original directory structure.
def get_all_files_from_toc(toc_file, file_path_set):
"""
Extract filepaths from a .toc file and add to file_path_set
"""
with open(toc_file, 'rb') as toc_file_fd:
for line in toc_file_fd:
try:
line = line.decode()
except: # pylint:disable=bare-except
continue
if '.py' not in line:
continue
split_line = line.split(' ')
for word in split_line:
word = word.replace('\'', '').replace(',', '').replace('\n', '')
if '.py' not in word:
continue
# Check if .egg is in the path and if so we need to split it
if os.path.isfile(word):
file_path_set.add(word)
elif '.egg' in word: # check if this is an egg
egg_path_split = word.split('.egg')
if len(egg_path_split) != 2:
continue
egg_path = egg_path_split[0] + '.egg'
if not os.path.isfile(egg_path):
continue
print('Unzipping contents of %s' % egg_path)
# We have an egg. This needs to be unzipped and then replaced
# with the unzipped data.
tmp_dir_name = 'zipdcontents'
if os.path.isdir(tmp_dir_name):
shutil.rmtree(tmp_dir_name)
# unzip egg and replace path with unzipped content
with zipfile.ZipFile(egg_path, 'r') as zip_f:
zip_f.extractall(tmp_dir_name)
os.remove(egg_path)
shutil.copytree(tmp_dir_name, egg_path)
# Now the lines should be accessible, so check again
if os.path.isfile(word):
file_path_set.add(word)
def create_file_structure_from_tocs(work_path, out_path):
"""
Extract the Python files that are added as paths in the output of
a pyinstaller operation. The files are determined by reading through
all of the *.toc files in the workpath of pyinstaller.
The files will be copied into the out_path using a similar file path
as they originally are. If any archive (.egg) files are present in the
.toc files, then unzip the archives and substitute the archive for the
unzipped content, i.e. we will extract the archives and collect the source
files.
"""
print('Extracts files from the pyinstaller workpath')
file_path_set = set()
for path1 in os.listdir(work_path):
full_path = os.path.join(work_path, path1)
if not os.path.isdir(full_path):
continue
# We have a directory
for path2 in os.listdir(full_path):
if not '.toc' in path2:
continue
full_toc_file = os.path.join(full_path, path2)
get_all_files_from_toc(full_toc_file, file_path_set)
for file_path in file_path_set:
relative_src = file_path[1:] if file_path[0] == '/' else file_path
dst_path = os.path.join(out_path, relative_src)
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
shutil.copy(file_path, dst_path)
def main():
"""
Main handler.
"""
if len(sys.argv) != 3:
print('Use: python3 python_coverage_helper.py pyinstaller_workpath '
'destination_for_output')
sys.exit(1)
work_path = sys.argv[1]
out_path = sys.argv[2]
create_file_structure_from_tocs(work_path, out_path)
if __name__ == '__main__':
main()