From 94b400c520a53823a6b8cf53e1b6804a2937644d Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Tue, 24 May 2022 03:06:25 +0100 Subject: [PATCH] dask: initial integration (#7688) * dask: initial integration * dask: add fuse fuzzer * update project yaml to match https://github.com/google/oss-fuzz/pull/7698 --- projects/dask/Dockerfile | 22 ++++++++++ projects/dask/build.sh | 23 ++++++++++ projects/dask/fuzz_fuse.py | 76 +++++++++++++++++++++++++++++++++ projects/dask/fuzz_serialize.py | 45 +++++++++++++++++++ projects/dask/project.yaml | 11 +++++ 5 files changed, 177 insertions(+) create mode 100644 projects/dask/Dockerfile create mode 100755 projects/dask/build.sh create mode 100644 projects/dask/fuzz_fuse.py create mode 100644 projects/dask/fuzz_serialize.py create mode 100644 projects/dask/project.yaml diff --git a/projects/dask/Dockerfile b/projects/dask/Dockerfile new file mode 100644 index 000000000..602e9e226 --- /dev/null +++ b/projects/dask/Dockerfile @@ -0,0 +1,22 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-builder-python +RUN apt-get update && apt-get install -y pkg-config +RUN git clone https://github.com/dask/dask +COPY build.sh fuzz_fuse.py $SRC/ +WORKDIR $SRC/dask + diff --git a/projects/dask/build.sh b/projects/dask/build.sh new file mode 100755 index 000000000..95f3540d8 --- /dev/null +++ b/projects/dask/build.sh @@ -0,0 +1,23 @@ +#!/bin/bash -eu +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +pip3 install . + +# Build fuzzers in $OUT. +for fuzzer in $(find $SRC -name 'fuzz_*.py'); do + compile_python_fuzzer $fuzzer +done diff --git a/projects/dask/fuzz_fuse.py b/projects/dask/fuzz_fuse.py new file mode 100644 index 000000000..787a08d04 --- /dev/null +++ b/projects/dask/fuzz_fuse.py @@ -0,0 +1,76 @@ +#!/usr/bin/python3 +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import atheris +import dask +from dask.optimization import ( + fuse, + fuse_linear, +) +from dask.utils_test import dec, inc, add + +def get_fuse_dict(data): + fdp = atheris.FuzzedDataProvider(data) + fuse_dict = dict() + number_of_entries = fdp.ConsumeIntInRange(1, 50) + operations = [dec, inc, add] + + previous_keys = list() + + key="a" + fuse_dict[key] = 1 + previous_keys.append(key) + + for i in range(number_of_entries): + newk=key+str(i) + val_op = operations[fdp.ConsumeIntInRange(0, 2)] + val_id = previous_keys[fdp.ConsumeIntInRange(0, len(previous_keys)-1)] + + fuse_dict[newk] = (val_op, val_id) + previous_keys.append(newk) + return fuse_dict + + + +@atheris.instrument_func +def TestOneInput(data): + if len(data) < 10: + return + fdp = atheris.FuzzedDataProvider(data) + fuzzed_dict = get_fuse_dict(data) + if len(fuzzed_dict) == 0: + return + + if fdp.ConsumeBool(): + fuse( + fuzzed_dict, + rename_keys=fdp.ConsumeBool() + ) + else: + fuse_linear( + fuzzed_dict, + rename_keys=fdp.ConsumeBool() + ) + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/projects/dask/fuzz_serialize.py b/projects/dask/fuzz_serialize.py new file mode 100644 index 000000000..548ceebc7 --- /dev/null +++ b/projects/dask/fuzz_serialize.py @@ -0,0 +1,45 @@ +#!/usr/bin/python3 +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import atheris +import json +from dask.config import ( + deserialize, + serialize +) + + +@atheris.instrument_func +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + try: + fuzzed_dict = json.loads(fdp.ConsumeString(sys.maxsize)) + except json.JSONDecodeError: + return + + if type(fuzzed_dict) is not dict: + return + serialized = serialize(fuzzed_dict) + config = deserialize(serialized) + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/projects/dask/project.yaml b/projects/dask/project.yaml new file mode 100644 index 000000000..98a79262b --- /dev/null +++ b/projects/dask/project.yaml @@ -0,0 +1,11 @@ +homepage: "https://github.com/dask/dask" +main_repo: "https://github.com/dask/dask" +language: python +fuzzing_engines: + - libfuzzer +sanitizers: + - address + - undefined +vendor_ccs: + - david@adalogics.com + - adam@adalogics.com