mirror of https://github.com/google/oss-fuzz.git
248 lines
9.0 KiB
Java
Executable File
248 lines
9.0 KiB
Java
Executable File
// Copyright 2023 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in co mpliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
|
|
import com.code_intelligence.jazzer.api.FuzzedDataProvider;
|
|
|
|
import java.util.List;
|
|
|
|
import javax.imageio.IIOException;
|
|
|
|
import java.io.IOException;
|
|
import java.io.InputStreamReader;
|
|
import java.io.ByteArrayInputStream;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.Files;
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
import org.apache.lucene.analysis.custom.CustomAnalyzer;
|
|
import org.apache.lucene.analysis.synonym.SolrSynonymParser;
|
|
import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
|
|
import org.apache.lucene.search.IndexSearcher;
|
|
import org.apache.lucene.geo.SimpleWKTShapeParser;
|
|
import org.apache.lucene.queryparser.xml.CoreParser;
|
|
import org.apache.lucene.queryparser.xml.ParserException;
|
|
import org.apache.lucene.queryparser.classic.QueryParser;
|
|
import org.apache.lucene.queryparser.classic.ParseException;
|
|
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
|
|
import org.apache.lucene.queryparser.xml.CorePlusQueriesParser;
|
|
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
|
|
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
|
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
|
import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
|
|
import org.apache.lucene.queryparser.flexible.precedence.PrecedenceQueryParser;
|
|
|
|
public class QueryParserFuzzer {
|
|
static String [] tokenizerArray = {
|
|
"standard",
|
|
"simplePattern",
|
|
"classic",
|
|
"whitespace",
|
|
"uax29UrlEmail",
|
|
"pathHierarchy",
|
|
"wikipedia",
|
|
"nGram",
|
|
"edgeNGram",
|
|
"thai",
|
|
"pattern",
|
|
"simplePatternSplit",
|
|
"letter",
|
|
"keyword"
|
|
};
|
|
|
|
static String [] charFilterArray = {
|
|
"htmlStrip",
|
|
"cjkWidth",
|
|
"mapping",
|
|
"patternReplace",
|
|
"persian"
|
|
};
|
|
|
|
static String [] tokenFilterArray = {
|
|
"apostrophe",
|
|
"arabicNormalization",
|
|
"arabicStem",
|
|
"asciiFolding",
|
|
"bengaliNormalization",
|
|
"bengaliStem",
|
|
"brazilianStem",
|
|
"bulgarianStem",
|
|
"capitalization",
|
|
"cjkBigram",
|
|
"cjkWidth",
|
|
"classic",
|
|
"codepointCount",
|
|
"commonGrams",
|
|
"commonGramsQuery",
|
|
"concatenateGraph",
|
|
"czechStem",
|
|
"dateRecognizer",
|
|
"decimalDigit",
|
|
"delimitedPayload",
|
|
"delimitedTermFrequency",
|
|
"dictionaryCompoundWord",
|
|
"edgeNGram",
|
|
"elision",
|
|
"englishMinimalStem",
|
|
"englishPossessive",
|
|
"fingerprint",
|
|
"finnishLightStem",
|
|
"fixBrokenOffsets",
|
|
"fixedShingle",
|
|
"flattenGraph",
|
|
"frenchLightStem",
|
|
"frenchMinimalStem",
|
|
"galicianMinimalStem",
|
|
"galicianStem",
|
|
"germanLightStem",
|
|
"germanMinimalStem",
|
|
"germanNormalization",
|
|
"germanStem",
|
|
"greekLowercase",
|
|
"greekStem",
|
|
"hindiNormalization",
|
|
"hindiStem",
|
|
"hungarianLightStem",
|
|
"hunspellStem",
|
|
"hyphenatedWords",
|
|
"hyphenationCompoundWord",
|
|
"indicNormalization",
|
|
"indonesianStem",
|
|
"irishLowercase",
|
|
"italianLightStem",
|
|
"kStem",
|
|
"keepWord",
|
|
"keywordMarker",
|
|
"keywordRepeat",
|
|
"latvianStem",
|
|
"length",
|
|
"limitTokenCount",
|
|
"limitTokenOffset",
|
|
"limitTokenPosition",
|
|
"lowercase",
|
|
"minHash",
|
|
"nGram",
|
|
"norwegianLightStem",
|
|
"norwegianMinimalStem",
|
|
"numericPayload",
|
|
"patternCaptureGroup",
|
|
"patternReplace",
|
|
"persianNormalization",
|
|
"porterStem",
|
|
"portugueseLightStem",
|
|
"portugueseMinimalStem",
|
|
"portugueseStem",
|
|
"protectedTerm",
|
|
"removeDuplicates",
|
|
"reverseString",
|
|
"russianLightStem",
|
|
"scandinavianFolding",
|
|
"scandinavianNormalization",
|
|
"serbianNormalization",
|
|
"shingle",
|
|
"snowballPorter",
|
|
"soraniNormalization",
|
|
"soraniStem",
|
|
"spanishLightStem",
|
|
"spanishMinimalStem",
|
|
"stemmerOverride",
|
|
"stop",
|
|
"swedishLightStem",
|
|
"synonym",
|
|
"synonymGraph",
|
|
"tokenOffsetPayload",
|
|
"trim",
|
|
"truncate",
|
|
"turkishLowercase",
|
|
"type",
|
|
"typeAsPayload",
|
|
"typeAsSynonym",
|
|
"uppercase",
|
|
"wordDelimiter",
|
|
"wordDelimiterGraph"
|
|
};
|
|
|
|
public static void fuzzerTestOneInput(FuzzedDataProvider data) {
|
|
String field = data.consumeString(500);
|
|
String field2 = data.consumeString(500);
|
|
String field3 = data.consumeString(500);
|
|
String [] fields = {field, field2, field3};
|
|
String query = data.consumeString(500);
|
|
String query2 = data.consumeString(500);
|
|
String query3 = data.consumeString(500);
|
|
String [] queries = {query, query2, query3};
|
|
Boolean dedup = data.consumeBoolean();
|
|
Boolean expand = data.consumeBoolean();
|
|
List<String> selectedTokenizers = data.pickValues(tokenizerArray, data.consumeInt(0, tokenizerArray.length));
|
|
List<String> selectedCharFilters = data.pickValues(charFilterArray, data.consumeInt(0, charFilterArray.length));
|
|
List<String> selectedTokenFilters = data.pickValues(tokenFilterArray, data.consumeInt(0, tokenFilterArray.length));
|
|
CustomAnalyzer.Builder cb = CustomAnalyzer.builder();
|
|
|
|
try {
|
|
cb.withTokenizer(data.pickValue(tokenizerArray));
|
|
|
|
for (String cf : selectedCharFilters) {
|
|
cb.addCharFilter(cf);
|
|
}
|
|
|
|
for (String tf : selectedTokenFilters) {
|
|
cb.addTokenFilter(tf);
|
|
}
|
|
|
|
Analyzer analyzer = cb.build();
|
|
|
|
QueryParser queryParser = new QueryParser(field, analyzer);
|
|
queryParser.parse(query);
|
|
|
|
StandardQueryParser standardQueryParser = new StandardQueryParser(analyzer);
|
|
standardQueryParser.parse(query, field);
|
|
|
|
SimpleQueryParser simpleQueryParser = new SimpleQueryParser(analyzer, field);
|
|
simpleQueryParser.parse(query);
|
|
|
|
ComplexPhraseQueryParser complexPhraseQueryParser = new ComplexPhraseQueryParser(field, analyzer);
|
|
complexPhraseQueryParser.parse(query);
|
|
|
|
MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(fields, analyzer);
|
|
multiFieldQueryParser.parse(queries, fields, analyzer);
|
|
|
|
PrecedenceQueryParser precedenceQueryParser = new PrecedenceQueryParser(analyzer);
|
|
precedenceQueryParser.parse(query, field);
|
|
|
|
CoreParser coreParser = new CoreParser(field, analyzer);
|
|
coreParser.parse(new ByteArrayInputStream(query.getBytes()));
|
|
|
|
CorePlusQueriesParser corePlusQueriesParser = new CorePlusQueriesParser(field, analyzer);
|
|
corePlusQueriesParser.parse(new ByteArrayInputStream(query.getBytes()));
|
|
|
|
SolrSynonymParser solrqSynonymParser = new SolrSynonymParser(dedup, expand, analyzer);
|
|
solrqSynonymParser.parse(new InputStreamReader(new ByteArrayInputStream(query.getBytes())));
|
|
|
|
WordnetSynonymParser wordnetSynonymParser = new WordnetSynonymParser(dedup, expand, analyzer);
|
|
wordnetSynonymParser.parse(new InputStreamReader(new ByteArrayInputStream(query.getBytes())));
|
|
|
|
SimpleWKTShapeParser.parse(query);
|
|
} catch (IOException | QueryNodeException | ParseException | ParserException | java.text.ParseException e) {
|
|
// IOException must be caught or declared to be thrown according to docs.
|
|
// QueryNodeException must be caught or declared to be thrown according to docs.
|
|
// ParseException must be caught or declared to be thrown according to docs.
|
|
// ParserException must be caught or declared to be thrown according to docs.
|
|
// java.text.ParseException must be caught or declared to be thrown according to docs.
|
|
} catch (RuntimeException e) {
|
|
// Undocumented RuntimeException is thrown at https://github.com/apache/lucene/blob/main/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java#L147
|
|
}
|
|
|
|
}
|
|
}
|