From 01630598148cc5feb9a8f01000a2c6fef1b57015 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Sat, 7 Dec 2013 06:46:02 -0800 Subject: [PATCH] sqlindex: don't iterate in small 50-row batches on big prefixes This speeds Camlistore start-up index slurping considerably. Instead of 17,000 queries in 30 seconds, it now does 6. A future change will improve this further, giving sorted.KeyValue impls a hint about the end range too. Change-Id: Icb5e2ac6edd580469e907fde4e916c00aba0ffe8 --- pkg/index/sqlindex/sqlindex.go | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/pkg/index/sqlindex/sqlindex.go b/pkg/index/sqlindex/sqlindex.go index a4701e50e..8b130c919 100644 --- a/pkg/index/sqlindex/sqlindex.go +++ b/pkg/index/sqlindex/sqlindex.go @@ -22,6 +22,7 @@ import ( "errors" "fmt" "log" + "regexp" "strconv" "sync" @@ -164,10 +165,24 @@ func (s *Storage) Find(key string) sorted.Iterator { low: []byte(key), op: ">=", closeCheck: leak.NewChecker(), + batchSize: batchSize(key), } return it } +var wordThenPunct = regexp.MustCompile(`^\w+\W$`) + +// batchSize returns the size of the LIMIT query we'll use for the provided key. +// A return value of 0 means no LIMIT clause (which is only used when we expect +// a complete scan) +func batchSize(key string) int { + const defaultBatchSize = 50 + if wordThenPunct.MatchString(key) { + return 0 + } + return defaultBatchSize +} + // iter is a iterator over sorted key/value pairs in rows. type iter struct { s *Storage @@ -179,7 +194,7 @@ type iter struct { rows *sql.Rows // if non-nil, the rows we're reading from - batchSize int // how big our LIMIT query was + batchSize int // how big our LIMIT query was; 0 for none seen int // how many rows we've seen this query key sql.RawBytes @@ -225,13 +240,15 @@ func (t *iter) Next() bool { } t.skey, t.sval = nil, nil if t.rows == nil { - const batchSize = 50 - t.batchSize = batchSize + limit := "" + if t.batchSize > 0 { + limit = "LIMIT " + strconv.Itoa(t.batchSize) + } if t.s.Serial { t.s.mu.Lock() } t.rows, t.err = t.s.DB.Query(t.s.sql( - "SELECT k, v FROM rows WHERE k "+t.op+" ? ORDER BY k LIMIT "+strconv.Itoa(batchSize)), + "SELECT k, v FROM rows WHERE k "+t.op+" ? ORDER BY k "+limit), string(t.low)) if t.s.Serial { t.s.mu.Unlock() @@ -244,7 +261,7 @@ func (t *iter) Next() bool { t.op = ">" } if !t.rows.Next() { - if t.seen == t.batchSize { + if t.batchSize > 0 && t.seen == t.batchSize { t.rows.Close() // required for <= Go 1.1, but not Go 1.2, iirc. t.rows = nil return t.Next()