Add KeyBytes and ValueBytes accessor to sorted.KeyValue.Iterator.

Goal is to iterate faster (notably: for slurping the index to the in-memory
corpus on start-up), by doing fewer copies and generating less garbage.

Change-Id: I054b0de2b994eb1f2356aa8587a466bafeb6cf82
This commit is contained in:
Brad Fitzpatrick 2013-12-04 14:18:52 +01:00
parent 09dddbc63a
commit 11a1745034
6 changed files with 135 additions and 28 deletions

View File

@ -156,8 +156,9 @@ type iter struct {
enum *kv.Enumerator
valid bool
key, val string
valid bool
key, val []byte
skey, sval *string // non-nil if valid
err error
closed bool
@ -168,24 +169,49 @@ func (it *iter) Close() error {
return it.err
}
func (it *iter) Key() string {
func (it *iter) KeyBytes() []byte {
if !it.valid {
panic("not valid")
}
return it.key
}
func (it *iter) Value() string {
func (it *iter) Key() string {
if !it.valid {
panic("not valid")
}
if it.skey != nil {
return *it.skey
}
str := string(it.key)
it.skey = &str
return str
}
func (it *iter) ValueBytes() []byte {
if !it.valid {
panic("not valid")
}
return it.val
}
func (it *iter) Value() string {
if !it.valid {
panic("not valid")
}
if it.sval != nil {
return *it.sval
}
str := string(it.val)
it.sval = &str
return str
}
func (it *iter) Next() (ret bool) {
if it.closed {
panic("Next called after Next returned value")
}
it.skey, it.sval = nil, nil
defer func() {
it.valid = ret
if !ret {
@ -198,13 +224,12 @@ func (it *iter) Next() (ret bool) {
return false
}
}
key, val, err := it.enum.Next()
var err error
it.key, it.val, err = it.enum.Next()
if err == io.EOF {
it.err = nil
return false
}
it.key = string(key)
it.val = string(val)
return true
}

View File

@ -159,11 +159,11 @@ type mongoStrIterator struct {
*mgo.Iter
}
func (s mongoStrIterator) Next() bool {
func (s *mongoStrIterator) Next() bool {
return s.Iter.Next(&s.res)
}
func (s mongoStrIterator) Key() (key string) {
func (s *mongoStrIterator) Key() string {
key, ok := (s.res[mgoKey]).(string)
if !ok {
return ""
@ -171,7 +171,14 @@ func (s mongoStrIterator) Key() (key string) {
return key
}
func (s mongoStrIterator) Value() (value string) {
func (s *mongoStrIterator) KeyBytes() []byte {
// TODO(bradfitz,mpl): this is less efficient than the string way. we should
// do better here, somehow, like all the other sorted.KeyValue iterators.
// For now:
return []byte(s.Key())
}
func (s *mongoStrIterator) Value() string {
value, ok := (s.res[mgoValue]).(string)
if !ok {
return ""
@ -179,7 +186,14 @@ func (s mongoStrIterator) Value() (value string) {
return value
}
func (s mongoStrIterator) Close() error {
func (s *mongoStrIterator) ValueBytes() []byte {
// TODO(bradfitz,mpl): this is less efficient than the string way. we should
// do better here, somehow, like all the other sorted.KeyValue iterators.
// For now:
return []byte(s.Value())
}
func (s *mongoStrIterator) Close() error {
// TODO(mpl): think about anything more to be done here.
return nil
}
@ -213,7 +227,7 @@ func (mk *mongoKeys) Find(key string) sorted.Iterator {
// more suited if possible.
cleanedKey := strings.Replace(key, "|", `\|`, -1)
iter := mk.db.Find(&bson.M{mgoKey: &bson.M{"$regex": "^" + cleanedKey}}).Sort(mgoKey).Iter()
return mongoStrIterator{res: bson.M{}, Iter: iter}
return &mongoStrIterator{res: bson.M{}, Iter: iter}
}
func (mk *mongoKeys) Set(key, value string) error {

View File

@ -161,7 +161,7 @@ func (s *Storage) Close() error { return s.DB.Close() }
func (s *Storage) Find(key string) sorted.Iterator {
it := &iter{
s: s,
low: key,
low: []byte(key),
op: ">=",
closeCheck: leak.NewChecker(),
}
@ -171,7 +171,7 @@ func (s *Storage) Find(key string) sorted.Iterator {
// iter is a iterator over sorted key/value pairs in rows.
type iter struct {
s *Storage
low string
low []byte
op string // ">=" initially, then ">"
err error // accumulated error, returned at Close
@ -182,14 +182,32 @@ type iter struct {
batchSize int // how big our LIMIT query was
seen int // how many rows we've seen this query
key string
value string
key sql.RawBytes
val sql.RawBytes
skey, sval *string // if non-nil, it's been stringified
}
var errClosed = errors.New("mysqlindexer: Iterator already closed")
func (t *iter) Key() string { return t.key }
func (t *iter) Value() string { return t.value }
func (t *iter) KeyBytes() []byte { return t.key }
func (t *iter) Key() string {
if t.skey != nil {
return *t.skey
}
str := string(t.key)
t.skey = &str
return str
}
func (t *iter) ValueBytes() []byte { return t.val }
func (t *iter) Value() string {
if t.sval != nil {
return *t.sval
}
str := string(t.val)
t.sval = &str
return str
}
func (t *iter) Close() error {
t.closeCheck.Close()
@ -205,6 +223,7 @@ func (t *iter) Next() bool {
if t.err != nil {
return false
}
t.skey, t.sval = nil, nil
if t.rows == nil {
const batchSize = 50
t.batchSize = batchSize
@ -213,7 +232,7 @@ func (t *iter) Next() bool {
}
t.rows, t.err = t.s.DB.Query(t.s.sql(
"SELECT k, v FROM rows WHERE k "+t.op+" ? ORDER BY k LIMIT "+strconv.Itoa(batchSize)),
t.low)
string(t.low))
if t.s.Serial {
t.s.mu.Unlock()
}
@ -232,12 +251,12 @@ func (t *iter) Next() bool {
}
return false
}
t.err = t.rows.Scan(&t.key, &t.value)
t.err = t.rows.Scan(&t.key, &t.val)
if t.err != nil {
log.Printf("unexpected Scan error: %v", t.err)
return false
}
t.low = t.key
t.low = append(t.low[:0], t.key...)
t.seen++
return true
}

View File

@ -70,10 +70,24 @@ type Iterator interface {
// Only valid after a call to Next returns true.
Key() string
// KeyBytes returns the key as bytes. The returned bytes
// should not be written and are invalid after the next call
// to Next or Close.
// TODO(bradfitz): rename this and change it to return a
// mem.RO instead?
KeyBytes() []byte
// Value returns the value of the current key/value pair.
// Only valid after a call to Next returns true.
Value() string
// ValueBytes returns the value as bytes. The returned bytes
// should not be written and are invalid after the next call
// to Next or Close.
// TODO(bradfitz): rename this and change it to return a
// mem.RO instead?
ValueBytes() []byte
// Close closes the iterator and returns any accumulated error. Exhausting
// all the key/value pairs in a table is not considered to be an error.
// It is valid to call Close multiple times. Other methods should not be

View File

@ -43,15 +43,45 @@ type memKeys struct {
// operates on []byte, to Camlistore's index.Iterator, which operates
// on string.
type stringIterator struct {
db.Iterator
lit db.Iterator // underlying leveldb iterator
k, v *string // if nil, not stringified yet
}
func (s stringIterator) Key() string {
return string(s.Iterator.Key())
func (s *stringIterator) Next() bool {
s.k, s.v = nil, nil
return s.lit.Next()
}
func (s stringIterator) Value() string {
return string(s.Iterator.Value())
func (s *stringIterator) Close() error {
err := s.lit.Close()
*s = stringIterator{} // to cause crashes on future access
return err
}
func (s *stringIterator) KeyBytes() []byte {
return s.lit.Key()
}
func (s *stringIterator) ValueBytes() []byte {
return s.lit.Value()
}
func (s *stringIterator) Key() string {
if s.k != nil {
return *s.k
}
str := string(s.KeyBytes())
s.k = &str
return str
}
func (s *stringIterator) Value() string {
if s.v != nil {
return *s.v
}
str := string(s.ValueBytes())
s.v = &str
return str
}
func (mk *memKeys) Get(key string) (string, error) {
@ -67,8 +97,8 @@ func (mk *memKeys) Get(key string) (string, error) {
func (mk *memKeys) Find(key string) Iterator {
mk.mu.Lock()
defer mk.mu.Unlock()
dit := mk.db.Find([]byte(key), nil)
return stringIterator{dit}
lit := mk.db.Find([]byte(key), nil)
return &stringIterator{lit: lit}
}
func (mk *memKeys) Set(key, value string) error {

View File

@ -189,6 +189,11 @@ func (it *iter) Next() bool {
func (it *iter) Key() string { return it.key }
func (it *iter) Value() string { return it.value }
// TODO(bradfit): optimize the string<->[]byte copies in this iterator, as done in the other
// sorted.KeyValue iterators.
func (it *iter) KeyBytes() []byte { return []byte(it.key) }
func (it *iter) ValueBytes() []byte { return []byte(it.value) }
func indexFromConfig(ld blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) {
is := &indexStorage{}
var (