mirror of https://github.com/perkeep/perkeep.git
314 lines
5.3 KiB
Go
314 lines
5.3 KiB
Go
/*
|
|
Copyright 2014 The Perkeep Authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
// This is the lexer for search expressions (see expr.go).
|
|
|
|
package search
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type tokenType int
|
|
|
|
const (
|
|
tokenAnd tokenType = iota
|
|
tokenArg
|
|
tokenClose
|
|
tokenColon
|
|
tokenEOF
|
|
tokenError
|
|
tokenLiteral
|
|
tokenNot
|
|
tokenOpen
|
|
tokenOr
|
|
tokenPredicate
|
|
tokenQuotedArg
|
|
tokenQuotedLiteral
|
|
)
|
|
|
|
const (
|
|
eof = -1 // -1 is unused in utf8
|
|
whitespace = "\t\n\f\v\r "
|
|
opBound = whitespace + "("
|
|
)
|
|
|
|
// IsSearchWordRune defines the runes that can be used in unquoted predicate arguments
|
|
// or unquoted literals. These are all non-space unicode characters except ':' which is
|
|
// used for predicate marking, and '(', ')', which are used for predicate grouping.
|
|
func isSearchWordRune(r rune) bool {
|
|
switch r {
|
|
case ':', ')', '(', eof:
|
|
return false
|
|
}
|
|
return !unicode.IsSpace(r)
|
|
}
|
|
|
|
type token struct {
|
|
typ tokenType
|
|
val string
|
|
start int
|
|
}
|
|
|
|
func (t token) String() string {
|
|
switch t.typ {
|
|
case tokenEOF:
|
|
return "EOF"
|
|
case tokenError:
|
|
return fmt.Sprintf("{err:%q at pos: %d}", t.val, t.start)
|
|
}
|
|
return fmt.Sprintf("{t:%v,%q (col: %d)}", t.typ, t.val, t.start)
|
|
}
|
|
|
|
type lexer struct {
|
|
input string
|
|
start int
|
|
pos int
|
|
width int
|
|
tokens chan token
|
|
state stateFn
|
|
}
|
|
|
|
func (l *lexer) emit(typ tokenType) {
|
|
l.tokens <- token{typ, l.input[l.start:l.pos], l.start}
|
|
l.start = l.pos
|
|
}
|
|
|
|
func (l *lexer) next() (r rune) {
|
|
if l.pos >= len(l.input) {
|
|
l.width = 0
|
|
return eof
|
|
}
|
|
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
|
|
l.pos += l.width
|
|
return
|
|
}
|
|
|
|
func (l *lexer) ignore() {
|
|
l.start = l.pos
|
|
}
|
|
|
|
func (l *lexer) backup() {
|
|
l.pos -= l.width
|
|
}
|
|
|
|
func (l *lexer) peek() rune {
|
|
r := l.next()
|
|
l.backup()
|
|
return r
|
|
}
|
|
|
|
func (l *lexer) accept(valid string) bool {
|
|
if strings.ContainsRune(valid, l.next()) {
|
|
return true
|
|
}
|
|
l.backup()
|
|
return false
|
|
}
|
|
|
|
func (l *lexer) acceptString(s string) bool {
|
|
for _, r := range s {
|
|
if l.next() != r {
|
|
l.backup()
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (l *lexer) acceptRun(valid string) {
|
|
for strings.ContainsRune(valid, l.next()) {
|
|
}
|
|
l.backup()
|
|
}
|
|
|
|
func (l *lexer) acceptRunFn(valid func(rune) bool) {
|
|
for valid(l.next()) {
|
|
}
|
|
l.backup()
|
|
}
|
|
|
|
func (l *lexer) errorf(format string, args ...interface{}) stateFn {
|
|
l.tokens <- token{
|
|
typ: tokenError,
|
|
val: fmt.Sprintf(format, args...),
|
|
start: l.start,
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func lex(input string) (*lexer, chan token) {
|
|
l := &lexer{
|
|
input: input,
|
|
tokens: make(chan token),
|
|
state: readExp,
|
|
}
|
|
go l.run()
|
|
return l, l.tokens
|
|
}
|
|
|
|
func (l *lexer) run() {
|
|
for {
|
|
if l.state == nil {
|
|
close(l.tokens)
|
|
return
|
|
}
|
|
l.state = l.state(l)
|
|
}
|
|
}
|
|
|
|
//
|
|
// State functions
|
|
//
|
|
type stateFn func(*lexer) stateFn
|
|
|
|
func readNeg(l *lexer) stateFn {
|
|
l.accept("-")
|
|
l.emit(tokenNot)
|
|
return readExp
|
|
}
|
|
|
|
func readClose(l *lexer) stateFn {
|
|
l.accept(")")
|
|
l.emit(tokenClose)
|
|
return readOperator
|
|
}
|
|
|
|
func readOpen(l *lexer) stateFn {
|
|
l.accept("(")
|
|
l.emit(tokenOpen)
|
|
return readExp
|
|
}
|
|
|
|
func readColon(l *lexer) stateFn {
|
|
l.accept(":")
|
|
l.emit(tokenColon)
|
|
return readArg
|
|
}
|
|
|
|
func readPredicate(l *lexer) stateFn {
|
|
l.acceptRunFn(unicode.IsLetter)
|
|
switch l.peek() {
|
|
case ':':
|
|
l.emit(tokenPredicate)
|
|
return readColon
|
|
}
|
|
return readLiteral
|
|
}
|
|
|
|
func readLiteral(l *lexer) stateFn {
|
|
l.acceptRunFn(isSearchWordRune)
|
|
l.emit(tokenLiteral)
|
|
return readOperator
|
|
}
|
|
|
|
func readArg(l *lexer) stateFn {
|
|
if l.peek() == '"' {
|
|
return readQuotedArg
|
|
}
|
|
l.acceptRunFn(isSearchWordRune)
|
|
l.emit(tokenArg)
|
|
if l.peek() == ':' {
|
|
return readColon
|
|
}
|
|
return readOperator
|
|
}
|
|
|
|
func readAND(l *lexer) stateFn {
|
|
if l.acceptString("and") && l.accept(opBound) {
|
|
l.backup()
|
|
l.emit(tokenAnd)
|
|
return readExp
|
|
}
|
|
return readPredicate
|
|
}
|
|
|
|
func readOR(l *lexer) stateFn {
|
|
if l.acceptString("or") && l.accept(opBound) {
|
|
l.backup()
|
|
l.emit(tokenOr)
|
|
return readExp
|
|
}
|
|
return readPredicate
|
|
}
|
|
|
|
func runQuoted(l *lexer) bool {
|
|
l.accept("\"")
|
|
for {
|
|
r := l.next()
|
|
switch r {
|
|
case eof:
|
|
return false
|
|
case '\\':
|
|
l.next()
|
|
case '"':
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
|
|
func readQuotedLiteral(l *lexer) stateFn {
|
|
if !runQuoted(l) {
|
|
return l.errorf("Unclosed quote")
|
|
}
|
|
l.emit(tokenQuotedLiteral)
|
|
return readOperator
|
|
}
|
|
|
|
func readQuotedArg(l *lexer) stateFn {
|
|
if !runQuoted(l) {
|
|
return l.errorf("Unclosed quote")
|
|
}
|
|
l.emit(tokenQuotedArg)
|
|
if l.peek() == ':' {
|
|
return readColon
|
|
}
|
|
return readOperator
|
|
}
|
|
|
|
func readExp(l *lexer) stateFn {
|
|
l.acceptRun(whitespace)
|
|
l.ignore()
|
|
switch l.peek() {
|
|
case eof:
|
|
return nil
|
|
case '(':
|
|
return readOpen
|
|
case ')':
|
|
return readClose
|
|
case '-':
|
|
return readNeg
|
|
case '"':
|
|
return readQuotedLiteral
|
|
}
|
|
return readPredicate
|
|
}
|
|
|
|
func readOperator(l *lexer) stateFn {
|
|
l.acceptRun(whitespace)
|
|
l.ignore()
|
|
switch l.peek() {
|
|
case 'a':
|
|
return readAND
|
|
case 'o':
|
|
return readOR
|
|
}
|
|
return readExp
|
|
}
|