From 3c4a913d3ae6c653e5068cbaefb65813bc1c5073 Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Wed, 6 Apr 2022 10:20:23 +0100 Subject: [PATCH] lxml: initial integration. (#4908) * lxml: initial integration. * update to 2022 --- projects/lxml/Dockerfile | 25 ++++++++++++++++++++++ projects/lxml/build.sh | 34 ++++++++++++++++++++++++++++++ projects/lxml/fuzz_xml_parse.py | 37 +++++++++++++++++++++++++++++++++ projects/lxml/project.yaml | 11 ++++++++++ 4 files changed, 107 insertions(+) create mode 100644 projects/lxml/Dockerfile create mode 100644 projects/lxml/build.sh create mode 100644 projects/lxml/fuzz_xml_parse.py create mode 100644 projects/lxml/project.yaml diff --git a/projects/lxml/Dockerfile b/projects/lxml/Dockerfile new file mode 100644 index 000000000..09620b298 --- /dev/null +++ b/projects/lxml/Dockerfile @@ -0,0 +1,25 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-builder-python +RUN git clone --depth 1 https://github.com/lxml/lxml +RUN apt-get install -y libxml2-dev libxslt-dev zlib1g-dev +RUN pip3 install Cython + +COPY build.sh $SRC/ +COPY fuzz_* $SRC/lxml + +WORKDIR $SRC/lxml diff --git a/projects/lxml/build.sh b/projects/lxml/build.sh new file mode 100644 index 000000000..1dc1d82e4 --- /dev/null +++ b/projects/lxml/build.sh @@ -0,0 +1,34 @@ +#!/bin/bash -eu +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +python3 ./setup.py install + +# Build fuzzers in $OUT. +for fuzzer in $(find $SRC -name 'fuzz_*.py'); do + fuzzer_basename=$(basename -s .py $fuzzer) + fuzzer_package=${fuzzer_basename}.pkg + pyinstaller --distpath $OUT --onefile --name $fuzzer_package $fuzzer + + # Create execution wrapper. + echo "#!/bin/sh +# LLVMFuzzerTestOneInput for fuzzer detection. +this_dir=\$(dirname \"\$0\") +LD_PRELOAD=\$this_dir/sanitizer_with_fuzzer.so \ +ASAN_OPTIONS=\$ASAN_OPTIONS:symbolize=1:external_symbolizer_path=\$this_dir/llvm-symbolizer:detect_leaks=0 \ +\$this_dir/$fuzzer_package \$@" > $OUT/$fuzzer_basename + chmod u+x $OUT/$fuzzer_basename +done diff --git a/projects/lxml/fuzz_xml_parse.py b/projects/lxml/fuzz_xml_parse.py new file mode 100644 index 000000000..85af19f71 --- /dev/null +++ b/projects/lxml/fuzz_xml_parse.py @@ -0,0 +1,37 @@ +#!/usr/bin/python3 + +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import atheris +import sys + +with atheris.instrument_imports(): + from lxml import etree as et + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + try: + root = et.XML(fdp.ConsumeUnicode(sys.maxsize)) + except et.XMLSyntaxError: + None + + +def main(): + atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True) + atheris.Fuzz() + +if __name__ == "__main__": + main() diff --git a/projects/lxml/project.yaml b/projects/lxml/project.yaml new file mode 100644 index 000000000..64eae0a5f --- /dev/null +++ b/projects/lxml/project.yaml @@ -0,0 +1,11 @@ +homepage: "https://lxml.de/" +main_repo: 'https://github.com/lxml/lxml/' +language: python +primary_contact: "david@adalogics.com" +auto_ccs: + - "adam@adalogics.com" +fuzzing_engines: + - libfuzzer +sanitizers: + - address + - undefined