mirror of https://github.com/perkeep/perkeep.git
Add KeyBytes and ValueBytes accessor to sorted.KeyValue.Iterator.
Goal is to iterate faster (notably: for slurping the index to the in-memory corpus on start-up), by doing fewer copies and generating less garbage. Change-Id: I054b0de2b994eb1f2356aa8587a466bafeb6cf82
This commit is contained in:
parent
09dddbc63a
commit
11a1745034
|
@ -157,7 +157,8 @@ type iter struct {
|
|||
enum *kv.Enumerator
|
||||
|
||||
valid bool
|
||||
key, val string
|
||||
key, val []byte
|
||||
skey, sval *string // non-nil if valid
|
||||
|
||||
err error
|
||||
closed bool
|
||||
|
@ -168,24 +169,49 @@ func (it *iter) Close() error {
|
|||
return it.err
|
||||
}
|
||||
|
||||
func (it *iter) Key() string {
|
||||
func (it *iter) KeyBytes() []byte {
|
||||
if !it.valid {
|
||||
panic("not valid")
|
||||
}
|
||||
return it.key
|
||||
}
|
||||
|
||||
func (it *iter) Value() string {
|
||||
func (it *iter) Key() string {
|
||||
if !it.valid {
|
||||
panic("not valid")
|
||||
}
|
||||
if it.skey != nil {
|
||||
return *it.skey
|
||||
}
|
||||
str := string(it.key)
|
||||
it.skey = &str
|
||||
return str
|
||||
}
|
||||
|
||||
func (it *iter) ValueBytes() []byte {
|
||||
if !it.valid {
|
||||
panic("not valid")
|
||||
}
|
||||
return it.val
|
||||
}
|
||||
|
||||
func (it *iter) Value() string {
|
||||
if !it.valid {
|
||||
panic("not valid")
|
||||
}
|
||||
if it.sval != nil {
|
||||
return *it.sval
|
||||
}
|
||||
str := string(it.val)
|
||||
it.sval = &str
|
||||
return str
|
||||
}
|
||||
|
||||
func (it *iter) Next() (ret bool) {
|
||||
if it.closed {
|
||||
panic("Next called after Next returned value")
|
||||
}
|
||||
it.skey, it.sval = nil, nil
|
||||
defer func() {
|
||||
it.valid = ret
|
||||
if !ret {
|
||||
|
@ -198,13 +224,12 @@ func (it *iter) Next() (ret bool) {
|
|||
return false
|
||||
}
|
||||
}
|
||||
key, val, err := it.enum.Next()
|
||||
var err error
|
||||
it.key, it.val, err = it.enum.Next()
|
||||
if err == io.EOF {
|
||||
it.err = nil
|
||||
return false
|
||||
}
|
||||
it.key = string(key)
|
||||
it.val = string(val)
|
||||
return true
|
||||
}
|
||||
|
||||
|
|
|
@ -159,11 +159,11 @@ type mongoStrIterator struct {
|
|||
*mgo.Iter
|
||||
}
|
||||
|
||||
func (s mongoStrIterator) Next() bool {
|
||||
func (s *mongoStrIterator) Next() bool {
|
||||
return s.Iter.Next(&s.res)
|
||||
}
|
||||
|
||||
func (s mongoStrIterator) Key() (key string) {
|
||||
func (s *mongoStrIterator) Key() string {
|
||||
key, ok := (s.res[mgoKey]).(string)
|
||||
if !ok {
|
||||
return ""
|
||||
|
@ -171,7 +171,14 @@ func (s mongoStrIterator) Key() (key string) {
|
|||
return key
|
||||
}
|
||||
|
||||
func (s mongoStrIterator) Value() (value string) {
|
||||
func (s *mongoStrIterator) KeyBytes() []byte {
|
||||
// TODO(bradfitz,mpl): this is less efficient than the string way. we should
|
||||
// do better here, somehow, like all the other sorted.KeyValue iterators.
|
||||
// For now:
|
||||
return []byte(s.Key())
|
||||
}
|
||||
|
||||
func (s *mongoStrIterator) Value() string {
|
||||
value, ok := (s.res[mgoValue]).(string)
|
||||
if !ok {
|
||||
return ""
|
||||
|
@ -179,7 +186,14 @@ func (s mongoStrIterator) Value() (value string) {
|
|||
return value
|
||||
}
|
||||
|
||||
func (s mongoStrIterator) Close() error {
|
||||
func (s *mongoStrIterator) ValueBytes() []byte {
|
||||
// TODO(bradfitz,mpl): this is less efficient than the string way. we should
|
||||
// do better here, somehow, like all the other sorted.KeyValue iterators.
|
||||
// For now:
|
||||
return []byte(s.Value())
|
||||
}
|
||||
|
||||
func (s *mongoStrIterator) Close() error {
|
||||
// TODO(mpl): think about anything more to be done here.
|
||||
return nil
|
||||
}
|
||||
|
@ -213,7 +227,7 @@ func (mk *mongoKeys) Find(key string) sorted.Iterator {
|
|||
// more suited if possible.
|
||||
cleanedKey := strings.Replace(key, "|", `\|`, -1)
|
||||
iter := mk.db.Find(&bson.M{mgoKey: &bson.M{"$regex": "^" + cleanedKey}}).Sort(mgoKey).Iter()
|
||||
return mongoStrIterator{res: bson.M{}, Iter: iter}
|
||||
return &mongoStrIterator{res: bson.M{}, Iter: iter}
|
||||
}
|
||||
|
||||
func (mk *mongoKeys) Set(key, value string) error {
|
||||
|
|
|
@ -161,7 +161,7 @@ func (s *Storage) Close() error { return s.DB.Close() }
|
|||
func (s *Storage) Find(key string) sorted.Iterator {
|
||||
it := &iter{
|
||||
s: s,
|
||||
low: key,
|
||||
low: []byte(key),
|
||||
op: ">=",
|
||||
closeCheck: leak.NewChecker(),
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ func (s *Storage) Find(key string) sorted.Iterator {
|
|||
// iter is a iterator over sorted key/value pairs in rows.
|
||||
type iter struct {
|
||||
s *Storage
|
||||
low string
|
||||
low []byte
|
||||
op string // ">=" initially, then ">"
|
||||
err error // accumulated error, returned at Close
|
||||
|
||||
|
@ -182,14 +182,32 @@ type iter struct {
|
|||
batchSize int // how big our LIMIT query was
|
||||
seen int // how many rows we've seen this query
|
||||
|
||||
key string
|
||||
value string
|
||||
key sql.RawBytes
|
||||
val sql.RawBytes
|
||||
skey, sval *string // if non-nil, it's been stringified
|
||||
}
|
||||
|
||||
var errClosed = errors.New("mysqlindexer: Iterator already closed")
|
||||
|
||||
func (t *iter) Key() string { return t.key }
|
||||
func (t *iter) Value() string { return t.value }
|
||||
func (t *iter) KeyBytes() []byte { return t.key }
|
||||
func (t *iter) Key() string {
|
||||
if t.skey != nil {
|
||||
return *t.skey
|
||||
}
|
||||
str := string(t.key)
|
||||
t.skey = &str
|
||||
return str
|
||||
}
|
||||
|
||||
func (t *iter) ValueBytes() []byte { return t.val }
|
||||
func (t *iter) Value() string {
|
||||
if t.sval != nil {
|
||||
return *t.sval
|
||||
}
|
||||
str := string(t.val)
|
||||
t.sval = &str
|
||||
return str
|
||||
}
|
||||
|
||||
func (t *iter) Close() error {
|
||||
t.closeCheck.Close()
|
||||
|
@ -205,6 +223,7 @@ func (t *iter) Next() bool {
|
|||
if t.err != nil {
|
||||
return false
|
||||
}
|
||||
t.skey, t.sval = nil, nil
|
||||
if t.rows == nil {
|
||||
const batchSize = 50
|
||||
t.batchSize = batchSize
|
||||
|
@ -213,7 +232,7 @@ func (t *iter) Next() bool {
|
|||
}
|
||||
t.rows, t.err = t.s.DB.Query(t.s.sql(
|
||||
"SELECT k, v FROM rows WHERE k "+t.op+" ? ORDER BY k LIMIT "+strconv.Itoa(batchSize)),
|
||||
t.low)
|
||||
string(t.low))
|
||||
if t.s.Serial {
|
||||
t.s.mu.Unlock()
|
||||
}
|
||||
|
@ -232,12 +251,12 @@ func (t *iter) Next() bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
t.err = t.rows.Scan(&t.key, &t.value)
|
||||
t.err = t.rows.Scan(&t.key, &t.val)
|
||||
if t.err != nil {
|
||||
log.Printf("unexpected Scan error: %v", t.err)
|
||||
return false
|
||||
}
|
||||
t.low = t.key
|
||||
t.low = append(t.low[:0], t.key...)
|
||||
t.seen++
|
||||
return true
|
||||
}
|
||||
|
|
|
@ -70,10 +70,24 @@ type Iterator interface {
|
|||
// Only valid after a call to Next returns true.
|
||||
Key() string
|
||||
|
||||
// KeyBytes returns the key as bytes. The returned bytes
|
||||
// should not be written and are invalid after the next call
|
||||
// to Next or Close.
|
||||
// TODO(bradfitz): rename this and change it to return a
|
||||
// mem.RO instead?
|
||||
KeyBytes() []byte
|
||||
|
||||
// Value returns the value of the current key/value pair.
|
||||
// Only valid after a call to Next returns true.
|
||||
Value() string
|
||||
|
||||
// ValueBytes returns the value as bytes. The returned bytes
|
||||
// should not be written and are invalid after the next call
|
||||
// to Next or Close.
|
||||
// TODO(bradfitz): rename this and change it to return a
|
||||
// mem.RO instead?
|
||||
ValueBytes() []byte
|
||||
|
||||
// Close closes the iterator and returns any accumulated error. Exhausting
|
||||
// all the key/value pairs in a table is not considered to be an error.
|
||||
// It is valid to call Close multiple times. Other methods should not be
|
||||
|
|
|
@ -43,15 +43,45 @@ type memKeys struct {
|
|||
// operates on []byte, to Camlistore's index.Iterator, which operates
|
||||
// on string.
|
||||
type stringIterator struct {
|
||||
db.Iterator
|
||||
lit db.Iterator // underlying leveldb iterator
|
||||
k, v *string // if nil, not stringified yet
|
||||
}
|
||||
|
||||
func (s stringIterator) Key() string {
|
||||
return string(s.Iterator.Key())
|
||||
func (s *stringIterator) Next() bool {
|
||||
s.k, s.v = nil, nil
|
||||
return s.lit.Next()
|
||||
}
|
||||
|
||||
func (s stringIterator) Value() string {
|
||||
return string(s.Iterator.Value())
|
||||
func (s *stringIterator) Close() error {
|
||||
err := s.lit.Close()
|
||||
*s = stringIterator{} // to cause crashes on future access
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *stringIterator) KeyBytes() []byte {
|
||||
return s.lit.Key()
|
||||
}
|
||||
|
||||
func (s *stringIterator) ValueBytes() []byte {
|
||||
return s.lit.Value()
|
||||
}
|
||||
|
||||
func (s *stringIterator) Key() string {
|
||||
if s.k != nil {
|
||||
return *s.k
|
||||
}
|
||||
str := string(s.KeyBytes())
|
||||
s.k = &str
|
||||
return str
|
||||
}
|
||||
|
||||
func (s *stringIterator) Value() string {
|
||||
if s.v != nil {
|
||||
return *s.v
|
||||
}
|
||||
str := string(s.ValueBytes())
|
||||
s.v = &str
|
||||
return str
|
||||
}
|
||||
|
||||
func (mk *memKeys) Get(key string) (string, error) {
|
||||
|
@ -67,8 +97,8 @@ func (mk *memKeys) Get(key string) (string, error) {
|
|||
func (mk *memKeys) Find(key string) Iterator {
|
||||
mk.mu.Lock()
|
||||
defer mk.mu.Unlock()
|
||||
dit := mk.db.Find([]byte(key), nil)
|
||||
return stringIterator{dit}
|
||||
lit := mk.db.Find([]byte(key), nil)
|
||||
return &stringIterator{lit: lit}
|
||||
}
|
||||
|
||||
func (mk *memKeys) Set(key, value string) error {
|
||||
|
|
|
@ -189,6 +189,11 @@ func (it *iter) Next() bool {
|
|||
func (it *iter) Key() string { return it.key }
|
||||
func (it *iter) Value() string { return it.value }
|
||||
|
||||
// TODO(bradfit): optimize the string<->[]byte copies in this iterator, as done in the other
|
||||
// sorted.KeyValue iterators.
|
||||
func (it *iter) KeyBytes() []byte { return []byte(it.key) }
|
||||
func (it *iter) ValueBytes() []byte { return []byte(it.value) }
|
||||
|
||||
func indexFromConfig(ld blobserver.Loader, config jsonconfig.Obj) (storage blobserver.Storage, err error) {
|
||||
is := &indexStorage{}
|
||||
var (
|
||||
|
|
Loading…
Reference in New Issue