From 32db1bc6fe5a56902e5208cbed6adefff499ad42 Mon Sep 17 00:00:00 2001 From: Mike Aizatsky Date: Tue, 4 Oct 2016 12:21:48 -0700 Subject: [PATCH] [icu] icu fuzzers from chrome --- icu/Dockerfile | 22 ++++ icu/Jenkinsfile | 22 ++++ icu/break_iterator_fuzzer.cc | 46 +++++++++ icu/break_iterator_utf32_fuzzer.cc | 47 +++++++++ icu/build.sh | 48 +++++++++ icu/converter_fuzzer.cc | 45 ++++++++ icu/fuzzer_utils.h | 53 ++++++++++ icu/number_format_fuzzer.cc | 30 ++++++ icu/regex.dict | 103 +++++++++++++++++++ icu/ucasemap_fuzzer.cc | 53 ++++++++++ icu/unicode_string_codepage_create_fuzzer.cc | 73 +++++++++++++ icu/uregex_open_fuzzer.cc | 23 +++++ icu/uregex_open_fuzzer.options | 2 + 13 files changed, 567 insertions(+) create mode 100644 icu/Dockerfile create mode 100644 icu/Jenkinsfile create mode 100644 icu/break_iterator_fuzzer.cc create mode 100644 icu/break_iterator_utf32_fuzzer.cc create mode 100755 icu/build.sh create mode 100644 icu/converter_fuzzer.cc create mode 100644 icu/fuzzer_utils.h create mode 100644 icu/number_format_fuzzer.cc create mode 100644 icu/regex.dict create mode 100644 icu/ucasemap_fuzzer.cc create mode 100644 icu/unicode_string_codepage_create_fuzzer.cc create mode 100644 icu/uregex_open_fuzzer.cc create mode 100644 icu/uregex_open_fuzzer.options diff --git a/icu/Dockerfile b/icu/Dockerfile new file mode 100644 index 000000000..88ecfb6fd --- /dev/null +++ b/icu/Dockerfile @@ -0,0 +1,22 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM ossfuzz/base-libfuzzer +MAINTAINER mike.aizatsky@gmail.com +RUN apt-get install -y make + +RUN svn co http://source.icu-project.org/repos/icu/icu/trunk/ icu +COPY build.sh *.cc *.h *.dict /src/ diff --git a/icu/Jenkinsfile b/icu/Jenkinsfile new file mode 100644 index 000000000..6ed6d713e --- /dev/null +++ b/icu/Jenkinsfile @@ -0,0 +1,22 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + +def libfuzzerBuild = fileLoader.fromGit('infra/libfuzzer-pipeline.groovy', + 'https://github.com/google/oss-fuzz.git') + +libfuzzerBuild { + svn = "http://source.icu-project.org/repos/icu/icu/trunk/" +} diff --git a/icu/break_iterator_fuzzer.cc b/icu/break_iterator_fuzzer.cc new file mode 100644 index 000000000..143a74dae --- /dev/null +++ b/icu/break_iterator_fuzzer.cc @@ -0,0 +1,46 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include "fuzzer_utils.h" +#include "unicode/brkiter.h" + +IcuEnvironment* env = new IcuEnvironment(); + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + UErrorCode status = U_ZERO_ERROR; + icu::UnicodeString str(UnicodeStringFromUtf8(data, size)); + + auto rng = CreateRng(data, size); + const icu::Locale& locale = GetRandomLocale(&rng); + + std::unique_ptr bi; + + switch (rng() % 5) { + case 0: + bi.reset(icu::BreakIterator::createWordInstance(locale, status)); + break; + case 1: + bi.reset(icu::BreakIterator::createLineInstance(locale, status)); + break; + case 2: + bi.reset(icu::BreakIterator::createCharacterInstance(locale, status)); + break; + case 3: + bi.reset(icu::BreakIterator::createSentenceInstance(locale, status)); + break; + case 4: + bi.reset(icu::BreakIterator::createTitleInstance(locale, status)); + break; + } + if (U_FAILURE(status)) return 0; + + for (int32_t p = bi->first(); p != icu::BreakIterator::DONE; p = bi->next()) + if (U_FAILURE(status)) return 0; + + return 0; +} + diff --git a/icu/break_iterator_utf32_fuzzer.cc b/icu/break_iterator_utf32_fuzzer.cc new file mode 100644 index 000000000..544e5f6d5 --- /dev/null +++ b/icu/break_iterator_utf32_fuzzer.cc @@ -0,0 +1,47 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include "fuzzer_utils.h" +#include "unicode/brkiter.h" + +IcuEnvironment* env = new IcuEnvironment(); + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + UErrorCode status = U_ZERO_ERROR; + icu::UnicodeString str(UnicodeStringFromUtf32(data, size)); + + auto rng = CreateRng(data, size); + const icu::Locale& locale = GetRandomLocale(&rng); + + std::unique_ptr bi; + + switch (rng() % 5) { + case 0: + bi.reset(icu::BreakIterator::createWordInstance(locale, status)); + break; + case 1: + bi.reset(icu::BreakIterator::createLineInstance(locale, status)); + break; + case 2: + bi.reset(icu::BreakIterator::createCharacterInstance(locale, status)); + break; + case 3: + bi.reset(icu::BreakIterator::createSentenceInstance(locale, status)); + break; + case 4: + bi.reset(icu::BreakIterator::createTitleInstance(locale, status)); + break; + } + if (U_FAILURE(status)) + return 0; + + for (int32_t p = bi->first(); p != icu::BreakIterator::DONE; p = bi->next()) + if (U_FAILURE(status)) + return 0; + + return 0; +} diff --git a/icu/build.sh b/icu/build.sh new file mode 100755 index 000000000..dfeedae57 --- /dev/null +++ b/icu/build.sh @@ -0,0 +1,48 @@ +#!/bin/bash -eux +# +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +mkdir /work/icu +cd /work/icu + +# TODO: icu build failes without -DU_USE_STRTOD_L=0 +DEFINES="-DU_CHARSET_IS_UTF8=1 -DU_USING_ICU_NAMESPACE=0 -DU_ENABLE_DYLOAD=0 -DU_USE_STRTOD_L=0" +CFLAGS="$CFLAGS $DEFINES" +CXXFLAGS="$CXXFLAGS $DEFINES" + +CFLAGS=$CFLAGS CXXFLAGS=$CXXFLAGS CC=$CC CXX=$CXX LDFLAGS=$FUZZER_LDFLAGS \ + /bin/bash /src/icu/source/runConfigureICU Linux \ + --with-library-bits=64 --with-data-packaging=static --enable-static --disable-shared + +make -j$(nproc) + +FUZZERS="break_iterator_fuzzer \ + break_iterator_utf32_fuzzer \ + converter_fuzzer \ + number_format_fuzzer \ + ucasemap_fuzzer \ + unicode_string_codepage_create_fuzzer \ + uregex_open_fuzzer + " +for fuzzer in $FUZZERS; do + $CXX $CXXFLAGS -std=c++11 \ + /src/$fuzzer.cc -o /out/icu_$fuzzer \ + -I/src/icu/source/common -I/src/icu/source/i18n -L/work/icu/lib \ + -lfuzzer -licui18n -licuuc -licutu -licudata $FUZZER_LDFLAGS +done + +cp /src/*.dict /src/*.options /out \ No newline at end of file diff --git a/icu/converter_fuzzer.cc b/icu/converter_fuzzer.cc new file mode 100644 index 000000000..cfbdebf6a --- /dev/null +++ b/icu/converter_fuzzer.cc @@ -0,0 +1,45 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include +#include +#include +#include + +#include "fuzzer_utils.h" +#include "unicode/unistr.h" +#include "unicode/ucnv.h" + +IcuEnvironment* env = new IcuEnvironment(); + +template +using deleted_unique_ptr = std::unique_ptr>; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + UErrorCode status = U_ZERO_ERROR; + auto rng = CreateRng(data, size); + icu::UnicodeString str(UnicodeStringFromUtf8(data, size)); + + const char* converter_name = + ucnv_getAvailableName(rng() % ucnv_countAvailable()); + + deleted_unique_ptr converter(ucnv_open(converter_name, &status), + &ucnv_close); + + if (U_FAILURE(status)) + return 0; + + static const size_t dest_buffer_size = 1024 * 1204; + static const std::unique_ptr dest_buffer(new char[dest_buffer_size]); + + str.extract(dest_buffer.get(), dest_buffer_size, converter.get(), status); + + if (U_FAILURE(status)) + return 0; + + return 0; +} diff --git a/icu/fuzzer_utils.h b/icu/fuzzer_utils.h new file mode 100644 index 000000000..d879bc395 --- /dev/null +++ b/icu/fuzzer_utils.h @@ -0,0 +1,53 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef FUZZER_UTILS_H_ +#define FUZZER_UTILS_H_ + +#include +#include +#include + +#include "unicode/locid.h" +#include "unicode/uchar.h" + +struct IcuEnvironment { + IcuEnvironment() { + // nothing to initialize yet; + } +}; + +// Create RNG and seed it from data. +std::mt19937_64 CreateRng(const uint8_t* data, size_t size) { + std::mt19937_64 rng; + std::string str = std::string(reinterpret_cast(data), size); + std::size_t data_hash = std::hash()(str); + rng.seed(data_hash); + return rng; +} + +const icu::Locale& GetRandomLocale(std::mt19937_64* rng) { + int32_t num_locales = 0; + const icu::Locale* locales = icu::Locale::getAvailableLocales(num_locales); + assert(num_locales > 0); + return locales[(*rng)() % num_locales]; +} + +icu::UnicodeString UnicodeStringFromUtf8(const uint8_t* data, size_t size) { + return icu::UnicodeString::fromUTF8( + icu::StringPiece(reinterpret_cast(data), size)); +} + +icu::UnicodeString UnicodeStringFromUtf32(const uint8_t* data, size_t size) { + std::vector uchars; + uchars.resize(size * sizeof(uint8_t) / (sizeof(UChar32))); + memcpy(uchars.data(), data, uchars.size() * sizeof(UChar32)); + for (size_t i = 0; i < uchars.size(); ++i) { + uchars[i] = std::min(uchars[i], UCHAR_MAX_VALUE); + } + + return icu::UnicodeString::fromUTF32(uchars.data(), uchars.size()); +} + +#endif // FUZZER_UTILS_H_ diff --git a/icu/number_format_fuzzer.cc b/icu/number_format_fuzzer.cc new file mode 100644 index 000000000..88df77b77 --- /dev/null +++ b/icu/number_format_fuzzer.cc @@ -0,0 +1,30 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Fuzzer for NumberFormat::parse. + +#include +#include +#include +#include "fuzzer_utils.h" +#include "unicode/numfmt.h" + +IcuEnvironment* env = new IcuEnvironment(); + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + UErrorCode status = U_ZERO_ERROR; + + auto rng = CreateRng(data, size); + const icu::Locale& locale = GetRandomLocale(&rng); + + std::unique_ptr fmt( + icu::NumberFormat::createInstance(locale, status)); + if (U_FAILURE(status)) return 0; + + icu::UnicodeString str(UnicodeStringFromUtf8(data, size)); + icu::Formattable result; + fmt->parse(str, result, status); + + return 0; +} diff --git a/icu/regex.dict b/icu/regex.dict new file mode 100644 index 000000000..b0456e6d3 --- /dev/null +++ b/icu/regex.dict @@ -0,0 +1,103 @@ +# Copyright 2016 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +"\\a" +"\\A" +"\\b" +"\\B" +"\\cX" +"\\cC" +"\\cZ" +"\\d" +"\\D" +"\\e" +"\\u001B" +"\\E" +"\\f" +"\\u000C" +"\\G" +"\\h" +"\\u0009" +"\\H" +"\\k" +"\\n" +"\\N" +"\\p" +"\\P" +"{" +"}" +"\\Q" +"\\r" +"\\u000D" +"\\R" +"\\u000a" +"\\u000b" +"\\u000c" +"\\u000d" +"\\u0085" +"\\u2028" +"\\u2029" +"\\s" +"[\\t\\n\\f\\r\\p{Z}]" +"\\S" +"\\t" +"\\u0009" +"\\u" +"\\uf0ff" +"\\U" +"\\U0010ffff." +"\\v" +"\\V" +"\\w" +"\\W" +"\\x" +"\\xhh" +"\\X" +"\\Z" +"\\z" +"\\n" +"\\0" +"\\0ooo" +"." +"^" +"$" +"\\" +"|" +"*" +"+" +"?" +"," +"*?" +"+?" +"??" +"*+" +"++" +"?+" +"(" +"(?:" +"(?>" +"(?#" +"(?=" +"(?!" +"(?<=" +"(? +#include +#include +#include "fuzzer_utils.h" +#include "unicode/ucasemap.h" + +IcuEnvironment* env = new IcuEnvironment(); + +template +using deleted_unique_ptr = std::unique_ptr>; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + UErrorCode status = U_ZERO_ERROR; + + auto rng = CreateRng(data, size); + const icu::Locale& locale = GetRandomLocale(&rng); + uint32_t open_flags = static_cast(rng()); + + deleted_unique_ptr csm( + ucasemap_open(locale.getName(), open_flags, &status), + [](UCaseMap* map) { ucasemap_close(map); }); + + if (U_FAILURE(status)) + return 0; + + int32_t dst_size = size * 2; + std::unique_ptr dst(new char[dst_size]); + auto src = reinterpret_cast(data); + + switch (rng() % 4) { + case 0: ucasemap_utf8ToLower(csm.get(), dst.get(), dst_size, src, size, + &status); + break; + case 1: ucasemap_utf8ToUpper(csm.get(), dst.get(), dst_size, src, size, + &status); + break; + case 2: ucasemap_utf8ToTitle(csm.get(), dst.get(), dst_size, src, size, + &status); + break; + case 3: ucasemap_utf8FoldCase(csm.get(), dst.get(), dst_size, src, size, + &status); + break; + } + + return 0; +} + diff --git a/icu/unicode_string_codepage_create_fuzzer.cc b/icu/unicode_string_codepage_create_fuzzer.cc new file mode 100644 index 000000000..bb0489ca1 --- /dev/null +++ b/icu/unicode_string_codepage_create_fuzzer.cc @@ -0,0 +1,73 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include +#include +#include + +#include "fuzzer_utils.h" +#include "unicode/unistr.h" + +// Taken from third_party/icu/source/data/mappings/convrtrs.txt file. +static const std::array kConverters = { + { + "UTF-8", + "utf-16be", + "utf-16le", + "UTF-32", + "UTF-32BE", + "UTF-32LE", + "ibm866-html", + "iso-8859-2-html", + "iso-8859-3-html", + "iso-8859-4-html", + "iso-8859-5-html", + "iso-8859-6-html", + "iso-8859-7-html", + "iso-8859-8-html", + "ISO-8859-8-I", + "iso-8859-10-html", + "iso-8859-13-html", + "iso-8859-14-html", + "iso-8859-15-html", + "iso-8859-16-html", + "koi8-r-html", + "koi8-u-html", + "macintosh-html", + "windows-874-html", + "windows-1250-html", + "windows-1251-html", + "windows-1252-html", + "windows-1253-html", + "windows-1254-html", + "windows-1255-html", + "windows-1256-html", + "windows-1257-html", + "windows-1258-html", + "x-mac-cyrillic-html", + "windows-936-2000", + "gb18030", + "big5-html", + "euc-jp-html", + "ISO_2022,locale=ja,version=0", + "shift_jis-html", + "euc-kr-html", + "ISO-2022-KR", + "ISO-2022-CN", + "ISO-2022-CN-EXT", + "HZ-GB-2312" + } +}; + +IcuEnvironment* env = new IcuEnvironment(); + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + auto rng = CreateRng(data, size); + icu::UnicodeString str(reinterpret_cast(data), size, + kConverters[rng() % kConverters.size()]); + return 0; +} diff --git a/icu/uregex_open_fuzzer.cc b/icu/uregex_open_fuzzer.cc new file mode 100644 index 000000000..7e2744c66 --- /dev/null +++ b/icu/uregex_open_fuzzer.cc @@ -0,0 +1,23 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "fuzzer_utils.h" +#include "unicode/regex.h" + +IcuEnvironment* env = new IcuEnvironment(); + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + UParseError pe = { 0 }; + UErrorCode status = U_ZERO_ERROR; + URegularExpression* re = uregex_open(reinterpret_cast(data), + static_cast(size) / sizeof(UChar), + 0, &pe, &status); + if (re) + uregex_close(re); + + return 0; +} diff --git a/icu/uregex_open_fuzzer.options b/icu/uregex_open_fuzzer.options new file mode 100644 index 000000000..0e5d596dc --- /dev/null +++ b/icu/uregex_open_fuzzer.options @@ -0,0 +1,2 @@ +[libfuzzer] +dict = regex.dict