diff --git a/projects/sentencepiece/Dockerfile b/projects/sentencepiece/Dockerfile new file mode 100644 index 000000000..8fc2ca21d --- /dev/null +++ b/projects/sentencepiece/Dockerfile @@ -0,0 +1,22 @@ +# Copyright 2020 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-builder +MAINTAINER taku@google.com +RUN apt-get update && apt-get install -y make autoconf automake libtool cmake build-essential pkg-config libgoogle-perftools-dev +RUN git clone --depth 1 https://github.com/google/sentencepiece.git sentencepiece +WORKDIR sentencepiece +COPY build.sh *.cc $SRC/ diff --git a/projects/sentencepiece/build.sh b/projects/sentencepiece/build.sh new file mode 100755 index 000000000..fee9e88f7 --- /dev/null +++ b/projects/sentencepiece/build.sh @@ -0,0 +1,31 @@ +#!/bin/bash -eu +# Copyright 2020 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +# build project +mkdir build +cd build +cmake -DSPM_ENABLE_SHARED=ON .. +make -j $(nproc) +make install + +# build fuzzers +for fuzzer in $(find $SRC -name '*_fuzzer.cc'); do + fuzz_basename=$(basename -s .cc $fuzzer) + $CXX $CXXFLAGS -std=c++11 -I. \ + $fuzzers $LIB_FUZZING_ENGINE ./src/libsentencepiece.a \ + -o $OUT/$fuzz_basename +done diff --git a/projects/sentencepiece/project.yaml b/projects/sentencepiece/project.yaml new file mode 100644 index 000000000..e3295f265 --- /dev/null +++ b/projects/sentencepiece/project.yaml @@ -0,0 +1,9 @@ +homepage: "https://github.com/google/sentencepiece" +language: c++ +primary_contact: "taku@google.com" +sanitizers: + - address + - memory + - undefined +architectures: + - x86_64 diff --git a/projects/sentencepiece/sample_encode_fuzzer.cc b/projects/sentencepiece/sample_encode_fuzzer.cc new file mode 100644 index 000000000..7beb06fcb --- /dev/null +++ b/projects/sentencepiece/sample_encode_fuzzer.cc @@ -0,0 +1,33 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include + +#include "sentencepiece_processor.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + sentencepiece::SentencePieceProcessor fuzz_sp_processor; + FuzzedDataProvider data_provider(data, size); + const int nbest_size = data_provider.ConsumeIntegral(); + const float alpha = data_provider.ConsumeFloatingPoint(); + const std::string in_string = data_provider.ConsumeRemainingBytesAsString(); + + fuzz_sp_processor.SampleEncodeAsSerializedProto(in_string, nbest_size, alpha); + return 0; +}