diff --git a/projects/smart_open/Dockerfile b/projects/smart_open/Dockerfile new file mode 100644 index 000000000..d353f8c13 --- /dev/null +++ b/projects/smart_open/Dockerfile @@ -0,0 +1,22 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-builder-python + +RUN git clone https://github.com/RaRe-Technologies/smart_open +WORKDIR smart_open + +COPY build.sh fuzz_*.py $SRC/ diff --git a/projects/smart_open/build.sh b/projects/smart_open/build.sh new file mode 100644 index 000000000..6da18d63c --- /dev/null +++ b/projects/smart_open/build.sh @@ -0,0 +1,24 @@ +#!/bin/bash -eu +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +# Build and install project (using current CFLAGS, CXXFLAGS). +pip3 install --upgrade pip +pip3 install . + +for fuzzer in $(find $SRC -name 'fuzz_*.py'); do + compile_python_fuzzer $fuzzer +done diff --git a/projects/smart_open/fuzz_bytebuffer.py b/projects/smart_open/fuzz_bytebuffer.py new file mode 100644 index 000000000..926c8ddbf --- /dev/null +++ b/projects/smart_open/fuzz_bytebuffer.py @@ -0,0 +1,45 @@ +#!/usr/bin/python3 +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import atheris +import sys +import os +with atheris.instrument_imports(): + from smart_open.bytebuffer import ByteBuffer + +def TestInput(data): + fdp = atheris.FuzzedDataProvider(data) + + chunk_size = fdp.ConsumeIntInRange(1,100) + message_byte = [] + for i in range(1,fdp.ConsumeIntInRange(1,10)): + message_byte.append(fdp.ConsumeBytes(chunk_size)) + + buffer = ByteBuffer(chunk_size = fdp.ConsumeIntInRange(1,100)) + + buffer.empty() + buffer.fill(iter(message_byte)) + buffer.peek() + buffer.read(fdp.ConsumeIntInRange(1,chunk_size)) + buffer.readline(fdp.ConsumeBytes(1)) + buffer.empty() + +def main(): + atheris.Setup(sys.argv, TestInput, enable_python_coverage=True) + atheris.instrument_all() + atheris.Fuzz() + +if __name__ == "__main__": + main() diff --git a/projects/smart_open/fuzz_func.py b/projects/smart_open/fuzz_func.py new file mode 100644 index 000000000..891992021 --- /dev/null +++ b/projects/smart_open/fuzz_func.py @@ -0,0 +1,38 @@ +#!/usr/bin/python3 +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import atheris +import sys +import os +with atheris.instrument_imports(): + from smart_open import parse_uri + from smart_open import register_compressor + +def _handle_file(file_obj, mode): + return open(file_obj,mode) + +def TestInput(data): + fdp = atheris.FuzzedDataProvider(data) + + parse_uri(fdp.ConsumeString(200)) + register_compressor(".%s"%fdp.ConsumeString(3),_handle_file) + +def main(): + atheris.Setup(sys.argv, TestInput, enable_python_coverage=True) + atheris.instrument_all() + atheris.Fuzz() + +if __name__ == "__main__": + main() diff --git a/projects/smart_open/fuzz_util.py b/projects/smart_open/fuzz_util.py new file mode 100644 index 000000000..db5134053 --- /dev/null +++ b/projects/smart_open/fuzz_util.py @@ -0,0 +1,87 @@ +#!/usr/bin/python3 +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import atheris +import sys +with atheris.instrument_imports(): + from smart_open.utils import * + +def TestInput(data): + if len(data) < 10: + return + + fdp = atheris.FuzzedDataProvider(data) + + #Fuzz clamp + clamp( + fdp.ConsumeInt(10), + fdp.ConsumeInt(10), + None if fdp.ConsumeBool() else fdp.ConsumeInt(10) + ) + + #Fuzz check_kwargs + kwargs = {} + for i in range(1,fdp.ConsumeIntInRange(1,10)): + kwargs[fdp.ConsumeString(5)]=fdp.ConsumeString(10) + try: + check_kwargs(fdp.ConsumeString(10),kwargs) + except TypeError as e: + if "is not a callable object" not in str(e): + raise e + + #Fuzz make_range_string + try: + make_range_string( + None if fdp.ConsumeBool() else fdp.ConsumeInt(10), + None if fdp.ConsumeBool() else fdp.ConsumeInt(10) + ) + except ValueError as e: + if "make_range_string requires either a stop or start value" not in str(e): + raise e + + #Fuzz content_range + content_range = "%s %d-%d/%d"%( + fdp.ConsumeString(10), + fdp.ConsumeInt(10), + fdp.ConsumeInt(10), + fdp.ConsumeInt(10) + ) + try: + parse_content_range(content_range) + except ValueError as e: + error_list = [ + "invalid literal for int() with base 10", + "not enough values to unpack" + ] + expected_error = False + for error in error_list: + if error in str(e): + expected_error = True + if not expected_error: + raise e + + #Fuzz safe_urlsplit + try: + safe_urlsplit(fdp.ConsumeString(100)) + except ValueError as e: + if "Invalid IPv6 URL" not in str(e): + raise e +def main(): + atheris.Setup(sys.argv, TestInput, enable_python_coverage=True) + atheris.instrument_all() + atheris.Fuzz() + +if __name__ == "__main__": + main() diff --git a/projects/smart_open/fuzz_zip.py b/projects/smart_open/fuzz_zip.py new file mode 100644 index 000000000..3a7f08c09 --- /dev/null +++ b/projects/smart_open/fuzz_zip.py @@ -0,0 +1,51 @@ +#!/usr/bin/python3 +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import atheris +import sys +import os +with atheris.instrument_imports(): + from smart_open import open + import zipfile + import tempfile + +def TestInput(data): + if len(data) < 10: + return + + fdp = atheris.FuzzedDataProvider(data) + + tmp = tempfile.NamedTemporaryFile(prefix=fdp.ConsumeString(10), suffix=fdp.ConsumeString(4), delete=False) + filestr = fdp.ConsumeString(100) + + with open(tmp.name, 'wb') as f: + with zipfile.ZipFile(f, 'w') as zip: + zip.writestr(fdp.ConsumeString(10), filestr) + zip.writestr(fdp.ConsumeString(10), filestr) + + with open(tmp.name, 'rb') as f: + with zipfile.ZipFile(f) as zip: + for info in zip.infolist(): + file_bytes = zip.read(info.filename) + assert filestr == file_bytes.decode('utf-8') + os.unlink(tmp.name) + +def main(): + atheris.Setup(sys.argv, TestInput, enable_python_coverage=True) + atheris.instrument_all() + atheris.Fuzz() + +if __name__ == "__main__": + main() diff --git a/projects/smart_open/project.yaml b/projects/smart_open/project.yaml new file mode 100644 index 000000000..952682d98 --- /dev/null +++ b/projects/smart_open/project.yaml @@ -0,0 +1,12 @@ +fuzzing_engines: +- libfuzzer +homepage: https://github.com/RaRe-Technologies/smart_open +language: python +main_repo: https://github.com/RaRe-Technologies/smart_open +sanitizers: +- address +- undefined +vendor_ccs: +- david@adalogics.com +- adam@adalogics.com +- arthur.chan@adalogics.com