Add sentencepiece (#4035)

* add sentencepiece project

* change the architectures

* change indentations
This commit is contained in:
Google AutoFuzz Team 2020-06-29 21:45:50 +02:00 committed by GitHub
parent 7da55877b6
commit 1f9ec264a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 95 additions and 0 deletions

View File

@ -0,0 +1,22 @@
# Copyright 2020 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
FROM gcr.io/oss-fuzz-base/base-builder
MAINTAINER taku@google.com
RUN apt-get update && apt-get install -y make autoconf automake libtool cmake build-essential pkg-config libgoogle-perftools-dev
RUN git clone --depth 1 https://github.com/google/sentencepiece.git sentencepiece
WORKDIR sentencepiece
COPY build.sh *.cc $SRC/

31
projects/sentencepiece/build.sh Executable file
View File

@ -0,0 +1,31 @@
#!/bin/bash -eu
# Copyright 2020 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
# build project
mkdir build
cd build
cmake -DSPM_ENABLE_SHARED=ON ..
make -j $(nproc)
make install
# build fuzzers
for fuzzer in $(find $SRC -name '*_fuzzer.cc'); do
fuzz_basename=$(basename -s .cc $fuzzer)
$CXX $CXXFLAGS -std=c++11 -I. \
$fuzzers $LIB_FUZZING_ENGINE ./src/libsentencepiece.a \
-o $OUT/$fuzz_basename
done

View File

@ -0,0 +1,9 @@
homepage: "https://github.com/google/sentencepiece"
language: c++
primary_contact: "taku@google.com"
sanitizers:
- address
- memory
- undefined
architectures:
- x86_64

View File

@ -0,0 +1,33 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fuzzer/FuzzedDataProvider.h>
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
#include "sentencepiece_processor.h"
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
sentencepiece::SentencePieceProcessor fuzz_sp_processor;
FuzzedDataProvider data_provider(data, size);
const int nbest_size = data_provider.ConsumeIntegral<int>();
const float alpha = data_provider.ConsumeFloatingPoint<float>();
const std::string in_string = data_provider.ConsumeRemainingBytesAsString();
fuzz_sp_processor.SampleEncodeAsSerializedProto(in_string, nbest_size, alpha);
return 0;
}