2011-11-10 17:47:32 +00:00
|
|
|
/*
|
|
|
|
Copyright 2011 Google Inc.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package index
|
|
|
|
|
|
|
|
import (
|
2011-11-11 02:58:04 +00:00
|
|
|
"fmt"
|
2011-11-10 18:28:44 +00:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
2011-11-10 17:47:32 +00:00
|
|
|
|
2013-08-04 02:54:30 +00:00
|
|
|
"camlistore.org/pkg/blob"
|
2013-12-02 21:20:51 +00:00
|
|
|
"camlistore.org/pkg/context"
|
2013-11-23 07:24:54 +00:00
|
|
|
"camlistore.org/pkg/sorted"
|
2011-11-10 17:47:32 +00:00
|
|
|
)
|
|
|
|
|
2013-12-02 21:20:51 +00:00
|
|
|
func (ix *Index) EnumerateBlobs(ctx *context.Context, dest chan<- blob.SizedRef, after string, limit int) (err error) {
|
2011-11-10 17:47:32 +00:00
|
|
|
defer close(dest)
|
2013-12-07 16:43:18 +00:00
|
|
|
it := ix.s.Find("have:"+after, "have~")
|
2013-12-02 21:20:51 +00:00
|
|
|
defer func() {
|
|
|
|
closeErr := it.Close()
|
|
|
|
if err == nil {
|
|
|
|
err = closeErr
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
index: actually reindex when out of order
problem: the out-of-order mechanism based on the outOfOrderIndexerLoop
was not working for some claims.
Let C be a delete claim on permanode P. If C was received before P was,
C was marked as being received with the "have" index row. However, for
the deletion to be marked in the index, some information about P is
needed (its meta row), so C could not be fully indexed upon reception.
Then, when P was finally received, the outOfOrderIndexerLoop would kick
in and retry indexing C. Which would fail, because a test based on the
"have" row would (wrongly) detect that C is already indexed and return
early.
In this patch:
-we introduce the "|indexed" suffix to the "have" - value part - row
(receive.go). If a blob is received but some of its dependencies are
missing, the have row value is written without the suffix. Upon
reception of a blob, we now test for the presence of the suffix in the
have row. If missing, the reception continues instead of returning
early. The existing mechanism that was detecting missing dependencies
for file blobs has been adapted to work with this suffix too.
-the index enumeration (enumstat.go), which relies on "have" rows, has
been adapted to work with the new "have" row format, while staying
compatible with the old format. And related tests have been added.
http://camlistore.org/issue/454
Change-Id: I2559d08a12b2a4e0f0691fc7e31f1ed1f874625e
2014-07-03 16:07:08 +00:00
|
|
|
afterKey := "have:" + after
|
2012-02-20 12:32:46 +00:00
|
|
|
n := int(0)
|
2011-11-10 18:28:44 +00:00
|
|
|
for n < limit && it.Next() {
|
|
|
|
k := it.Key()
|
index: actually reindex when out of order
problem: the out-of-order mechanism based on the outOfOrderIndexerLoop
was not working for some claims.
Let C be a delete claim on permanode P. If C was received before P was,
C was marked as being received with the "have" index row. However, for
the deletion to be marked in the index, some information about P is
needed (its meta row), so C could not be fully indexed upon reception.
Then, when P was finally received, the outOfOrderIndexerLoop would kick
in and retry indexing C. Which would fail, because a test based on the
"have" row would (wrongly) detect that C is already indexed and return
early.
In this patch:
-we introduce the "|indexed" suffix to the "have" - value part - row
(receive.go). If a blob is received but some of its dependencies are
missing, the have row value is written without the suffix. Upon
reception of a blob, we now test for the presence of the suffix in the
have row. If missing, the reception continues instead of returning
early. The existing mechanism that was detecting missing dependencies
for file blobs has been adapted to work with this suffix too.
-the index enumeration (enumstat.go), which relies on "have" rows, has
been adapted to work with the new "have" row format, while staying
compatible with the old format. And related tests have been added.
http://camlistore.org/issue/454
Change-Id: I2559d08a12b2a4e0f0691fc7e31f1ed1f874625e
2014-07-03 16:07:08 +00:00
|
|
|
if k <= afterKey {
|
2013-12-02 21:20:51 +00:00
|
|
|
continue
|
|
|
|
}
|
2011-11-10 18:28:44 +00:00
|
|
|
if !strings.HasPrefix(k, "have:") {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
n++
|
2013-08-04 02:54:30 +00:00
|
|
|
br, ok := blob.Parse(k[len("have:"):])
|
index: actually reindex when out of order
problem: the out-of-order mechanism based on the outOfOrderIndexerLoop
was not working for some claims.
Let C be a delete claim on permanode P. If C was received before P was,
C was marked as being received with the "have" index row. However, for
the deletion to be marked in the index, some information about P is
needed (its meta row), so C could not be fully indexed upon reception.
Then, when P was finally received, the outOfOrderIndexerLoop would kick
in and retry indexing C. Which would fail, because a test based on the
"have" row would (wrongly) detect that C is already indexed and return
early.
In this patch:
-we introduce the "|indexed" suffix to the "have" - value part - row
(receive.go). If a blob is received but some of its dependencies are
missing, the have row value is written without the suffix. Upon
reception of a blob, we now test for the presence of the suffix in the
have row. If missing, the reception continues instead of returning
early. The existing mechanism that was detecting missing dependencies
for file blobs has been adapted to work with this suffix too.
-the index enumeration (enumstat.go), which relies on "have" rows, has
been adapted to work with the new "have" row format, while staying
compatible with the old format. And related tests have been added.
http://camlistore.org/issue/454
Change-Id: I2559d08a12b2a4e0f0691fc7e31f1ed1f874625e
2014-07-03 16:07:08 +00:00
|
|
|
if !ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
size, err := parseHaveVal(it.Value())
|
|
|
|
if err == nil {
|
2013-12-02 21:20:51 +00:00
|
|
|
select {
|
2014-01-28 20:46:52 +00:00
|
|
|
case dest <- blob.SizedRef{br, uint32(size)}:
|
2013-12-02 21:20:51 +00:00
|
|
|
case <-ctx.Done():
|
|
|
|
return context.ErrCanceled
|
|
|
|
}
|
2011-11-10 18:28:44 +00:00
|
|
|
}
|
|
|
|
}
|
2013-12-02 21:20:51 +00:00
|
|
|
return nil
|
2011-11-10 17:47:32 +00:00
|
|
|
}
|
|
|
|
|
2013-08-21 20:57:28 +00:00
|
|
|
func (ix *Index) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
|
2011-11-11 02:58:04 +00:00
|
|
|
for _, br := range blobs {
|
|
|
|
key := "have:" + br.String()
|
|
|
|
v, err := ix.s.Get(key)
|
2013-11-23 07:24:54 +00:00
|
|
|
if err == sorted.ErrNotFound {
|
2011-11-11 02:58:04 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("error looking up key %q: %v", key, err)
|
|
|
|
}
|
index: actually reindex when out of order
problem: the out-of-order mechanism based on the outOfOrderIndexerLoop
was not working for some claims.
Let C be a delete claim on permanode P. If C was received before P was,
C was marked as being received with the "have" index row. However, for
the deletion to be marked in the index, some information about P is
needed (its meta row), so C could not be fully indexed upon reception.
Then, when P was finally received, the outOfOrderIndexerLoop would kick
in and retry indexing C. Which would fail, because a test based on the
"have" row would (wrongly) detect that C is already indexed and return
early.
In this patch:
-we introduce the "|indexed" suffix to the "have" - value part - row
(receive.go). If a blob is received but some of its dependencies are
missing, the have row value is written without the suffix. Upon
reception of a blob, we now test for the presence of the suffix in the
have row. If missing, the reception continues instead of returning
early. The existing mechanism that was detecting missing dependencies
for file blobs has been adapted to work with this suffix too.
-the index enumeration (enumstat.go), which relies on "have" rows, has
been adapted to work with the new "have" row format, while staying
compatible with the old format. And related tests have been added.
http://camlistore.org/issue/454
Change-Id: I2559d08a12b2a4e0f0691fc7e31f1ed1f874625e
2014-07-03 16:07:08 +00:00
|
|
|
size, err := parseHaveVal(v)
|
2011-11-11 02:58:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("invalid size for key %q = %q", key, v)
|
|
|
|
}
|
2014-01-28 20:46:52 +00:00
|
|
|
dest <- blob.SizedRef{br, uint32(size)}
|
2011-11-11 02:58:04 +00:00
|
|
|
}
|
2011-11-10 17:47:32 +00:00
|
|
|
return nil
|
|
|
|
}
|
index: actually reindex when out of order
problem: the out-of-order mechanism based on the outOfOrderIndexerLoop
was not working for some claims.
Let C be a delete claim on permanode P. If C was received before P was,
C was marked as being received with the "have" index row. However, for
the deletion to be marked in the index, some information about P is
needed (its meta row), so C could not be fully indexed upon reception.
Then, when P was finally received, the outOfOrderIndexerLoop would kick
in and retry indexing C. Which would fail, because a test based on the
"have" row would (wrongly) detect that C is already indexed and return
early.
In this patch:
-we introduce the "|indexed" suffix to the "have" - value part - row
(receive.go). If a blob is received but some of its dependencies are
missing, the have row value is written without the suffix. Upon
reception of a blob, we now test for the presence of the suffix in the
have row. If missing, the reception continues instead of returning
early. The existing mechanism that was detecting missing dependencies
for file blobs has been adapted to work with this suffix too.
-the index enumeration (enumstat.go), which relies on "have" rows, has
been adapted to work with the new "have" row format, while staying
compatible with the old format. And related tests have been added.
http://camlistore.org/issue/454
Change-Id: I2559d08a12b2a4e0f0691fc7e31f1ed1f874625e
2014-07-03 16:07:08 +00:00
|
|
|
|
|
|
|
// parseHaveVal takes the value part of an "have" index row and returns
|
|
|
|
// the blob size found in that value. Examples:
|
|
|
|
// parseHaveVal("324|indexed") == 324
|
|
|
|
// parseHaveVal("654") == 654
|
|
|
|
func parseHaveVal(val string) (size uint64, err error) {
|
|
|
|
pipei := strings.Index(val, "|")
|
|
|
|
if pipei >= 0 {
|
|
|
|
// filter out the "indexed" suffix
|
|
|
|
val = val[:pipei]
|
|
|
|
}
|
|
|
|
return strconv.ParseUint(val, 10, 32)
|
|
|
|
}
|