From c18654e7da795c00a7fc8dcae5f5d231b8198ad0 Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Fri, 9 Sep 2022 21:03:42 +0100 Subject: [PATCH] pandas: initial integration (#8459) --- projects/pandas/Dockerfile | 22 ++++++++++++ projects/pandas/build.sh | 22 ++++++++++++ projects/pandas/fuzz_python_parser.py | 50 +++++++++++++++++++++++++++ projects/pandas/project.yaml | 11 ++++++ 4 files changed, 105 insertions(+) create mode 100644 projects/pandas/Dockerfile create mode 100755 projects/pandas/build.sh create mode 100644 projects/pandas/fuzz_python_parser.py create mode 100644 projects/pandas/project.yaml diff --git a/projects/pandas/Dockerfile b/projects/pandas/Dockerfile new file mode 100644 index 000000000..ef049f8bd --- /dev/null +++ b/projects/pandas/Dockerfile @@ -0,0 +1,22 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-builder-python +RUN apt-get update && apt-get install -y make autoconf automake libtool +RUN pip3 install --upgrade pip Cython numpy +RUN git clone --depth 1 https://github.com/pandas-dev/pandas pandas +WORKDIR pandas +COPY build.sh *.py $SRC/ diff --git a/projects/pandas/build.sh b/projects/pandas/build.sh new file mode 100755 index 000000000..d45f30a8a --- /dev/null +++ b/projects/pandas/build.sh @@ -0,0 +1,22 @@ +#!/bin/bash -eu +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +############################################################################### + +python3 setup.py install +pip3 install . +for fuzzer in $(find $SRC -name 'fuzz_*.py'); do + LD_PRELOAD=$OUT/sanitizer_with_fuzzer.so ASAN_OPTIONS=detect_leaks=0 compile_python_fuzzer $fuzzer --hidden-import cmath +done diff --git a/projects/pandas/fuzz_python_parser.py b/projects/pandas/fuzz_python_parser.py new file mode 100644 index 000000000..03316820d --- /dev/null +++ b/projects/pandas/fuzz_python_parser.py @@ -0,0 +1,50 @@ +#!/usr/bin/python3 +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Targets pandas parsers. Both native and python code""" + +import os +import sys +import atheris +import io + +from pandas.errors import ( + EmptyDataError, + ParserError, +) + +from pandas.io.parsers import read_csv + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + try: + read_csv(io.StringIO(fdp.ConsumeUnicodeNoSurrogates(sys.maxsize))) + except ( + EmptyDataError, + ParserError, + ValueError + ): + pass + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/projects/pandas/project.yaml b/projects/pandas/project.yaml new file mode 100644 index 000000000..1825fb556 --- /dev/null +++ b/projects/pandas/project.yaml @@ -0,0 +1,11 @@ +homepage: "https://github.com/pandas-dev/pandas" +language: python +main_repo: "https://github.com/pandas-dev/pandas" +fuzzing_engines: + - libfuzzer +sanitizers: + - address + - undefined +vendor_ccs: + - david@adalogics.com + - adam@adalogics.com