[icu] icu fuzzers from chrome

This commit is contained in:
Mike Aizatsky 2016-10-04 12:21:48 -07:00
parent 8a8a2bd67d
commit 32db1bc6fe
13 changed files with 567 additions and 0 deletions

22
icu/Dockerfile Normal file
View File

@ -0,0 +1,22 @@
# Copyright 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
FROM ossfuzz/base-libfuzzer
MAINTAINER mike.aizatsky@gmail.com
RUN apt-get install -y make
RUN svn co http://source.icu-project.org/repos/icu/icu/trunk/ icu
COPY build.sh *.cc *.h *.dict /src/

22
icu/Jenkinsfile vendored Normal file
View File

@ -0,0 +1,22 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
////////////////////////////////////////////////////////////////////////////////
def libfuzzerBuild = fileLoader.fromGit('infra/libfuzzer-pipeline.groovy',
'https://github.com/google/oss-fuzz.git')
libfuzzerBuild {
svn = "http://source.icu-project.org/repos/icu/icu/trunk/"
}

View File

@ -0,0 +1,46 @@
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "fuzzer_utils.h"
#include "unicode/brkiter.h"
IcuEnvironment* env = new IcuEnvironment();
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString str(UnicodeStringFromUtf8(data, size));
auto rng = CreateRng(data, size);
const icu::Locale& locale = GetRandomLocale(&rng);
std::unique_ptr<icu::BreakIterator> bi;
switch (rng() % 5) {
case 0:
bi.reset(icu::BreakIterator::createWordInstance(locale, status));
break;
case 1:
bi.reset(icu::BreakIterator::createLineInstance(locale, status));
break;
case 2:
bi.reset(icu::BreakIterator::createCharacterInstance(locale, status));
break;
case 3:
bi.reset(icu::BreakIterator::createSentenceInstance(locale, status));
break;
case 4:
bi.reset(icu::BreakIterator::createTitleInstance(locale, status));
break;
}
if (U_FAILURE(status)) return 0;
for (int32_t p = bi->first(); p != icu::BreakIterator::DONE; p = bi->next())
if (U_FAILURE(status)) return 0;
return 0;
}

View File

@ -0,0 +1,47 @@
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "fuzzer_utils.h"
#include "unicode/brkiter.h"
IcuEnvironment* env = new IcuEnvironment();
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
UErrorCode status = U_ZERO_ERROR;
icu::UnicodeString str(UnicodeStringFromUtf32(data, size));
auto rng = CreateRng(data, size);
const icu::Locale& locale = GetRandomLocale(&rng);
std::unique_ptr<icu::BreakIterator> bi;
switch (rng() % 5) {
case 0:
bi.reset(icu::BreakIterator::createWordInstance(locale, status));
break;
case 1:
bi.reset(icu::BreakIterator::createLineInstance(locale, status));
break;
case 2:
bi.reset(icu::BreakIterator::createCharacterInstance(locale, status));
break;
case 3:
bi.reset(icu::BreakIterator::createSentenceInstance(locale, status));
break;
case 4:
bi.reset(icu::BreakIterator::createTitleInstance(locale, status));
break;
}
if (U_FAILURE(status))
return 0;
for (int32_t p = bi->first(); p != icu::BreakIterator::DONE; p = bi->next())
if (U_FAILURE(status))
return 0;
return 0;
}

48
icu/build.sh Executable file
View File

@ -0,0 +1,48 @@
#!/bin/bash -eux
#
# Copyright 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
mkdir /work/icu
cd /work/icu
# TODO: icu build failes without -DU_USE_STRTOD_L=0
DEFINES="-DU_CHARSET_IS_UTF8=1 -DU_USING_ICU_NAMESPACE=0 -DU_ENABLE_DYLOAD=0 -DU_USE_STRTOD_L=0"
CFLAGS="$CFLAGS $DEFINES"
CXXFLAGS="$CXXFLAGS $DEFINES"
CFLAGS=$CFLAGS CXXFLAGS=$CXXFLAGS CC=$CC CXX=$CXX LDFLAGS=$FUZZER_LDFLAGS \
/bin/bash /src/icu/source/runConfigureICU Linux \
--with-library-bits=64 --with-data-packaging=static --enable-static --disable-shared
make -j$(nproc)
FUZZERS="break_iterator_fuzzer \
break_iterator_utf32_fuzzer \
converter_fuzzer \
number_format_fuzzer \
ucasemap_fuzzer \
unicode_string_codepage_create_fuzzer \
uregex_open_fuzzer
"
for fuzzer in $FUZZERS; do
$CXX $CXXFLAGS -std=c++11 \
/src/$fuzzer.cc -o /out/icu_$fuzzer \
-I/src/icu/source/common -I/src/icu/source/i18n -L/work/icu/lib \
-lfuzzer -licui18n -licuuc -licutu -licudata $FUZZER_LDFLAGS
done
cp /src/*.dict /src/*.options /out

45
icu/converter_fuzzer.cc Normal file
View File

@ -0,0 +1,45 @@
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <array>
#include <memory>
#include <vector>
#include "fuzzer_utils.h"
#include "unicode/unistr.h"
#include "unicode/ucnv.h"
IcuEnvironment* env = new IcuEnvironment();
template <typename T>
using deleted_unique_ptr = std::unique_ptr<T, std::function<void(T*)>>;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
UErrorCode status = U_ZERO_ERROR;
auto rng = CreateRng(data, size);
icu::UnicodeString str(UnicodeStringFromUtf8(data, size));
const char* converter_name =
ucnv_getAvailableName(rng() % ucnv_countAvailable());
deleted_unique_ptr<UConverter> converter(ucnv_open(converter_name, &status),
&ucnv_close);
if (U_FAILURE(status))
return 0;
static const size_t dest_buffer_size = 1024 * 1204;
static const std::unique_ptr<char[]> dest_buffer(new char[dest_buffer_size]);
str.extract(dest_buffer.get(), dest_buffer_size, converter.get(), status);
if (U_FAILURE(status))
return 0;
return 0;
}

53
icu/fuzzer_utils.h Normal file
View File

@ -0,0 +1,53 @@
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef FUZZER_UTILS_H_
#define FUZZER_UTILS_H_
#include <assert.h>
#include <algorithm>
#include <random>
#include "unicode/locid.h"
#include "unicode/uchar.h"
struct IcuEnvironment {
IcuEnvironment() {
// nothing to initialize yet;
}
};
// Create RNG and seed it from data.
std::mt19937_64 CreateRng(const uint8_t* data, size_t size) {
std::mt19937_64 rng;
std::string str = std::string(reinterpret_cast<const char*>(data), size);
std::size_t data_hash = std::hash<std::string>()(str);
rng.seed(data_hash);
return rng;
}
const icu::Locale& GetRandomLocale(std::mt19937_64* rng) {
int32_t num_locales = 0;
const icu::Locale* locales = icu::Locale::getAvailableLocales(num_locales);
assert(num_locales > 0);
return locales[(*rng)() % num_locales];
}
icu::UnicodeString UnicodeStringFromUtf8(const uint8_t* data, size_t size) {
return icu::UnicodeString::fromUTF8(
icu::StringPiece(reinterpret_cast<const char*>(data), size));
}
icu::UnicodeString UnicodeStringFromUtf32(const uint8_t* data, size_t size) {
std::vector<UChar32> uchars;
uchars.resize(size * sizeof(uint8_t) / (sizeof(UChar32)));
memcpy(uchars.data(), data, uchars.size() * sizeof(UChar32));
for (size_t i = 0; i < uchars.size(); ++i) {
uchars[i] = std::min(uchars[i], UCHAR_MAX_VALUE);
}
return icu::UnicodeString::fromUTF32(uchars.data(), uchars.size());
}
#endif // FUZZER_UTILS_H_

View File

@ -0,0 +1,30 @@
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Fuzzer for NumberFormat::parse.
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "fuzzer_utils.h"
#include "unicode/numfmt.h"
IcuEnvironment* env = new IcuEnvironment();
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
UErrorCode status = U_ZERO_ERROR;
auto rng = CreateRng(data, size);
const icu::Locale& locale = GetRandomLocale(&rng);
std::unique_ptr<icu::NumberFormat> fmt(
icu::NumberFormat::createInstance(locale, status));
if (U_FAILURE(status)) return 0;
icu::UnicodeString str(UnicodeStringFromUtf8(data, size));
icu::Formattable result;
fmt->parse(str, result, status);
return 0;
}

103
icu/regex.dict Normal file
View File

@ -0,0 +1,103 @@
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"\\a"
"\\A"
"\\b"
"\\B"
"\\cX"
"\\cC"
"\\cZ"
"\\d"
"\\D"
"\\e"
"\\u001B"
"\\E"
"\\f"
"\\u000C"
"\\G"
"\\h"
"\\u0009"
"\\H"
"\\k"
"\\n"
"\\N"
"\\p"
"\\P"
"{"
"}"
"\\Q"
"\\r"
"\\u000D"
"\\R"
"\\u000a"
"\\u000b"
"\\u000c"
"\\u000d"
"\\u0085"
"\\u2028"
"\\u2029"
"\\s"
"[\\t\\n\\f\\r\\p{Z}]"
"\\S"
"\\t"
"\\u0009"
"\\u"
"\\uf0ff"
"\\U"
"\\U0010ffff."
"\\v"
"\\V"
"\\w"
"\\W"
"\\x"
"\\xhh"
"\\X"
"\\Z"
"\\z"
"\\n"
"\\0"
"\\0ooo"
"."
"^"
"$"
"\\"
"|"
"*"
"+"
"?"
","
"*?"
"+?"
"??"
"*+"
"++"
"?+"
"("
"(?:"
"(?>"
"(?#"
"(?="
"(?!"
"(?<="
"(?<!"
"(?"
"-"
")"
":"
"(?ismwx-ismwx:"
"(?ismwx-ismwx)"
"(?i)"
"["
"]"
"[\\u0000-\\U0010ffff]"
"[:script=Greek:]"
"{script=Greek}"
"gC"
"sc"
"scx"
"WB"
"Nd"
"d"
"MN"

53
icu/ucasemap_fuzzer.cc Normal file
View File

@ -0,0 +1,53 @@
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Fuzzer for ucasemap.
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "fuzzer_utils.h"
#include "unicode/ucasemap.h"
IcuEnvironment* env = new IcuEnvironment();
template<typename T>
using deleted_unique_ptr = std::unique_ptr<T,std::function<void(T*)>>;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
UErrorCode status = U_ZERO_ERROR;
auto rng = CreateRng(data, size);
const icu::Locale& locale = GetRandomLocale(&rng);
uint32_t open_flags = static_cast<uint32_t>(rng());
deleted_unique_ptr<UCaseMap> csm(
ucasemap_open(locale.getName(), open_flags, &status),
[](UCaseMap* map) { ucasemap_close(map); });
if (U_FAILURE(status))
return 0;
int32_t dst_size = size * 2;
std::unique_ptr<char[]> dst(new char[dst_size]);
auto src = reinterpret_cast<const char*>(data);
switch (rng() % 4) {
case 0: ucasemap_utf8ToLower(csm.get(), dst.get(), dst_size, src, size,
&status);
break;
case 1: ucasemap_utf8ToUpper(csm.get(), dst.get(), dst_size, src, size,
&status);
break;
case 2: ucasemap_utf8ToTitle(csm.get(), dst.get(), dst_size, src, size,
&status);
break;
case 3: ucasemap_utf8FoldCase(csm.get(), dst.get(), dst_size, src, size,
&status);
break;
}
return 0;
}

View File

@ -0,0 +1,73 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <array>
#include <vector>
#include "fuzzer_utils.h"
#include "unicode/unistr.h"
// Taken from third_party/icu/source/data/mappings/convrtrs.txt file.
static const std::array<const char*, 45> kConverters = {
{
"UTF-8",
"utf-16be",
"utf-16le",
"UTF-32",
"UTF-32BE",
"UTF-32LE",
"ibm866-html",
"iso-8859-2-html",
"iso-8859-3-html",
"iso-8859-4-html",
"iso-8859-5-html",
"iso-8859-6-html",
"iso-8859-7-html",
"iso-8859-8-html",
"ISO-8859-8-I",
"iso-8859-10-html",
"iso-8859-13-html",
"iso-8859-14-html",
"iso-8859-15-html",
"iso-8859-16-html",
"koi8-r-html",
"koi8-u-html",
"macintosh-html",
"windows-874-html",
"windows-1250-html",
"windows-1251-html",
"windows-1252-html",
"windows-1253-html",
"windows-1254-html",
"windows-1255-html",
"windows-1256-html",
"windows-1257-html",
"windows-1258-html",
"x-mac-cyrillic-html",
"windows-936-2000",
"gb18030",
"big5-html",
"euc-jp-html",
"ISO_2022,locale=ja,version=0",
"shift_jis-html",
"euc-kr-html",
"ISO-2022-KR",
"ISO-2022-CN",
"ISO-2022-CN-EXT",
"HZ-GB-2312"
}
};
IcuEnvironment* env = new IcuEnvironment();
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
auto rng = CreateRng(data, size);
icu::UnicodeString str(reinterpret_cast<const char*>(data), size,
kConverters[rng() % kConverters.size()]);
return 0;
}

23
icu/uregex_open_fuzzer.cc Normal file
View File

@ -0,0 +1,23 @@
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <stddef.h>
#include <stdint.h>
#include "fuzzer_utils.h"
#include "unicode/regex.h"
IcuEnvironment* env = new IcuEnvironment();
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
UParseError pe = { 0 };
UErrorCode status = U_ZERO_ERROR;
URegularExpression* re = uregex_open(reinterpret_cast<const UChar*>(data),
static_cast<int>(size) / sizeof(UChar),
0, &pe, &status);
if (re)
uregex_close(re);
return 0;
}

View File

@ -0,0 +1,2 @@
[libfuzzer]
dict = regex.dict