2011-05-29 17:50:17 +00:00
|
|
|
/*
|
|
|
|
Copyright 2011 Google Inc.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package schema
|
|
|
|
|
|
|
|
import (
|
2011-06-06 15:54:31 +00:00
|
|
|
"bufio"
|
2011-05-29 17:50:17 +00:00
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
2013-02-03 05:50:03 +00:00
|
|
|
"os"
|
2011-05-29 17:50:17 +00:00
|
|
|
"strings"
|
2015-01-15 21:05:54 +00:00
|
|
|
"time"
|
2011-05-29 17:50:17 +00:00
|
|
|
|
2013-08-04 02:54:30 +00:00
|
|
|
"camlistore.org/pkg/blob"
|
Update from r60 to [almost] Go 1.
A lot is still broken, but most stuff at least compiles now.
The directory tree has been rearranged now too. Go libraries are now
under "pkg". Fully qualified, they are e.g. "camlistore.org/pkg/jsonsign".
The go tool cannot yet fetch from arbitrary domains, but discussion is
happening now on which mechanism to use to allow that.
For now, put the camlistore root under $GOPATH/src. Typically $GOPATH
is $HOME, so Camlistore should be at $HOME/src/camlistore.org.
Then you can:
$ go build ./server/camlistored
... etc
The build.pl script is currently disabled. It'll be resurrected at
some point, but with a very different role (helping create a fake
GOPATH and running the go build command, if things are installed at
the wrong place, and/or running fileembed generators).
Many things are certainly broken.
Many things are disabled. (MySQL, all indexing, etc).
Many things need to be moved into
camlistore.org/third_party/{code.google.com,github.com} and updated
from their r60 to Go 1 versions, where applicable.
The GoMySQL stuff should be updated to use database/sql and the ziutek
library implementing database/sql/driver.
Help wanted.
Change-Id: If71217dc5c8f0e70dbe46e9504ca5131c6eeacde
2012-02-19 05:53:06 +00:00
|
|
|
"camlistore.org/pkg/blobserver"
|
|
|
|
"camlistore.org/pkg/rollsum"
|
2013-09-12 15:04:10 +00:00
|
|
|
"camlistore.org/pkg/syncutil"
|
2011-05-29 17:50:17 +00:00
|
|
|
)
|
|
|
|
|
2012-10-29 01:03:05 +00:00
|
|
|
const (
|
|
|
|
// maxBlobSize is the largest blob we ever make when cutting up
|
|
|
|
// a file.
|
|
|
|
maxBlobSize = 1 << 20
|
|
|
|
|
|
|
|
// firstChunkSize is the ideal size of the first chunk of a
|
|
|
|
// file. It's kept smaller for the file(1) command, which
|
|
|
|
// likes to read 96 kB on Linux and 256 kB on OS X. Related
|
|
|
|
// are tools which extract the EXIF metadata from JPEGs,
|
|
|
|
// ID3 from mp3s, etc. Nautilus, OS X Finder, etc.
|
|
|
|
// The first chunk may be larger than this if cutting the file
|
|
|
|
// here would create a small subsequent chunk (e.g. a file one
|
|
|
|
// byte larger than firstChunkSize)
|
|
|
|
firstChunkSize = 256 << 10
|
|
|
|
|
|
|
|
// bufioReaderSize is an explicit size for our bufio.Reader,
|
|
|
|
// so we don't rely on NewReader's implicit size.
|
|
|
|
// We care about the buffer size because it affects how far
|
|
|
|
// in advance we can detect EOF from an io.Reader that doesn't
|
|
|
|
// know its size. Detecting an EOF bufioReaderSize bytes early
|
|
|
|
// means we can plan for the final chunk.
|
|
|
|
bufioReaderSize = 32 << 10
|
|
|
|
|
|
|
|
// tooSmallThreshold is the threshold at which rolling checksum
|
|
|
|
// boundaries are ignored if the current chunk being built is
|
|
|
|
// smaller than this.
|
|
|
|
tooSmallThreshold = 64 << 10
|
|
|
|
)
|
2011-10-31 00:17:34 +00:00
|
|
|
|
2015-01-15 21:05:54 +00:00
|
|
|
// WriteFileFromReaderWithModTime creates and uploads a "file" JSON schema
|
2011-05-29 17:50:17 +00:00
|
|
|
// composed of chunks of r, also uploading the chunks. The returned
|
|
|
|
// BlobRef is of the JSON file schema blob.
|
2015-01-15 21:05:54 +00:00
|
|
|
// Both filename and modTime are optional.
|
|
|
|
func WriteFileFromReaderWithModTime(bs blobserver.StatReceiver, filename string, modTime time.Time, r io.Reader) (blob.Ref, error) {
|
2014-04-13 02:29:02 +00:00
|
|
|
if strings.Contains(filename, "/") {
|
|
|
|
return blob.Ref{}, fmt.Errorf("schema.WriteFileFromReader: filename %q shouldn't contain a slash", filename)
|
|
|
|
}
|
2015-01-15 21:05:54 +00:00
|
|
|
|
2011-09-28 18:07:07 +00:00
|
|
|
m := NewFileMap(filename)
|
2015-01-15 21:05:54 +00:00
|
|
|
if !modTime.IsZero() {
|
|
|
|
m.SetModTime(modTime)
|
|
|
|
}
|
2011-09-28 18:07:07 +00:00
|
|
|
return WriteFileMap(bs, m, r)
|
|
|
|
}
|
2011-05-29 17:50:17 +00:00
|
|
|
|
2015-01-15 21:05:54 +00:00
|
|
|
// WriteFileFromReader creates and uploads a "file" JSON schema
|
|
|
|
// composed of chunks of r, also uploading the chunks. The returned
|
|
|
|
// BlobRef is of the JSON file schema blob.
|
|
|
|
// The filename is optional.
|
|
|
|
func WriteFileFromReader(bs blobserver.StatReceiver, filename string, r io.Reader) (blob.Ref, error) {
|
|
|
|
return WriteFileFromReaderWithModTime(bs, filename, time.Time{}, r)
|
|
|
|
}
|
|
|
|
|
2013-01-22 04:56:12 +00:00
|
|
|
// WriteFileMap uploads chunks of r to bs while populating file and
|
|
|
|
// finally uploading file's Blob. The returned blobref is of file's
|
2012-08-21 17:31:44 +00:00
|
|
|
// JSON blob.
|
2013-08-04 02:54:30 +00:00
|
|
|
func WriteFileMap(bs blobserver.StatReceiver, file *Builder, r io.Reader) (blob.Ref, error) {
|
2013-01-22 04:56:12 +00:00
|
|
|
return writeFileMapRolling(bs, file, r)
|
2012-10-29 01:03:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// This is the simple 1MB chunk version. The rolling checksum version is below.
|
2013-08-04 02:54:30 +00:00
|
|
|
func writeFileMapOld(bs blobserver.StatReceiver, file *Builder, r io.Reader) (blob.Ref, error) {
|
2011-09-08 00:51:29 +00:00
|
|
|
parts, size := []BytesPart{}, int64(0)
|
2011-05-29 17:50:17 +00:00
|
|
|
|
2012-08-21 17:31:44 +00:00
|
|
|
var buf bytes.Buffer
|
2011-05-29 17:50:17 +00:00
|
|
|
for {
|
|
|
|
buf.Reset()
|
2012-08-21 18:14:47 +00:00
|
|
|
n, err := io.Copy(&buf, io.LimitReader(r, maxBlobSize))
|
2011-05-29 17:50:17 +00:00
|
|
|
if err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2011-05-29 17:50:17 +00:00
|
|
|
}
|
|
|
|
if n == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2013-08-04 02:54:30 +00:00
|
|
|
hash := blob.NewHash()
|
2013-01-20 21:36:27 +00:00
|
|
|
io.Copy(hash, bytes.NewReader(buf.Bytes()))
|
2013-08-04 02:54:30 +00:00
|
|
|
br := blob.RefFromHash(hash)
|
2011-05-29 17:50:17 +00:00
|
|
|
hasBlob, err := serverHasBlob(bs, br)
|
|
|
|
if err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2011-05-29 17:50:17 +00:00
|
|
|
}
|
2011-05-30 01:22:56 +00:00
|
|
|
if !hasBlob {
|
2012-08-21 17:31:44 +00:00
|
|
|
sb, err := bs.ReceiveBlob(br, &buf)
|
2011-05-30 01:22:56 +00:00
|
|
|
if err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2011-05-30 01:22:56 +00:00
|
|
|
}
|
2014-01-28 20:46:52 +00:00
|
|
|
if want := (blob.SizedRef{br, uint32(n)}); sb != want {
|
2014-07-01 09:46:45 +00:00
|
|
|
return blob.Ref{}, fmt.Errorf("schema/filewriter: wrote %s, expect %s", sb, want)
|
2011-05-30 01:22:56 +00:00
|
|
|
}
|
2011-05-29 17:50:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
size += n
|
2011-09-08 00:51:29 +00:00
|
|
|
parts = append(parts, BytesPart{
|
2011-07-02 16:09:50 +00:00
|
|
|
BlobRef: br,
|
|
|
|
Size: uint64(n),
|
|
|
|
Offset: 0, // into BlobRef to read from (not of dest)
|
2011-05-29 17:50:17 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2013-01-22 04:56:12 +00:00
|
|
|
err := file.PopulateParts(size, parts)
|
2011-05-29 17:50:17 +00:00
|
|
|
if err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2011-05-29 17:50:17 +00:00
|
|
|
}
|
|
|
|
|
2013-01-22 04:56:12 +00:00
|
|
|
json := file.Blob().JSON()
|
2011-05-29 17:50:17 +00:00
|
|
|
if err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2011-05-29 17:50:17 +00:00
|
|
|
}
|
2013-08-04 02:54:30 +00:00
|
|
|
br := blob.SHA1FromString(json)
|
2011-05-29 17:50:17 +00:00
|
|
|
sb, err := bs.ReceiveBlob(br, strings.NewReader(json))
|
|
|
|
if err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2011-05-29 17:50:17 +00:00
|
|
|
}
|
2014-01-28 20:46:52 +00:00
|
|
|
if expect := (blob.SizedRef{br, uint32(len(json))}); expect != sb {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, fmt.Errorf("schema/filewriter: wrote %s bytes, got %s ack'd", expect, sb)
|
2011-05-29 17:50:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return br, nil
|
|
|
|
}
|
|
|
|
|
2013-08-04 02:54:30 +00:00
|
|
|
func serverHasBlob(bs blobserver.BlobStatter, br blob.Ref) (have bool, err error) {
|
2013-02-03 05:50:03 +00:00
|
|
|
_, err = blobserver.StatBlob(bs, br)
|
|
|
|
if err == nil {
|
2011-05-29 17:50:17 +00:00
|
|
|
have = true
|
2013-02-03 05:50:03 +00:00
|
|
|
} else if err == os.ErrNotExist {
|
|
|
|
err = nil
|
2011-05-29 17:50:17 +00:00
|
|
|
}
|
|
|
|
return
|
2011-06-06 15:54:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type span struct {
|
|
|
|
from, to int64
|
|
|
|
bits int
|
2013-08-04 02:54:30 +00:00
|
|
|
br blob.Ref
|
2011-06-06 15:54:31 +00:00
|
|
|
children []span
|
|
|
|
}
|
|
|
|
|
2012-10-29 01:03:05 +00:00
|
|
|
func (s *span) isSingleBlob() bool {
|
|
|
|
return len(s.children) == 0
|
|
|
|
}
|
|
|
|
|
2011-06-06 15:54:31 +00:00
|
|
|
func (s *span) size() int64 {
|
|
|
|
size := s.to - s.from
|
|
|
|
for _, cs := range s.children {
|
|
|
|
size += cs.size()
|
|
|
|
}
|
|
|
|
return size
|
|
|
|
}
|
|
|
|
|
2012-10-29 01:03:05 +00:00
|
|
|
// noteEOFReader keeps track of when it's seen EOF, but otherwise
|
|
|
|
// delegates entirely to r.
|
|
|
|
type noteEOFReader struct {
|
|
|
|
r io.Reader
|
|
|
|
sawEOF bool
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *noteEOFReader) Read(p []byte) (n int, err error) {
|
|
|
|
n, err = r.r.Read(p)
|
|
|
|
if err == io.EOF {
|
|
|
|
r.sawEOF = true
|
|
|
|
}
|
|
|
|
return
|
2011-09-28 18:07:07 +00:00
|
|
|
}
|
|
|
|
|
2013-08-04 02:54:30 +00:00
|
|
|
func uploadString(bs blobserver.StatReceiver, br blob.Ref, s string) (blob.Ref, error) {
|
|
|
|
if !br.Valid() {
|
|
|
|
panic("invalid blobref")
|
2013-02-03 04:17:14 +00:00
|
|
|
}
|
2012-12-20 17:34:14 +00:00
|
|
|
hasIt, err := serverHasBlob(bs, br)
|
|
|
|
if err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2012-12-20 17:34:14 +00:00
|
|
|
}
|
|
|
|
if hasIt {
|
|
|
|
return br, nil
|
|
|
|
}
|
2014-01-27 05:56:39 +00:00
|
|
|
_, err = blobserver.ReceiveNoHash(bs, br, strings.NewReader(s))
|
2012-12-20 17:34:14 +00:00
|
|
|
if err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2012-12-20 17:34:14 +00:00
|
|
|
}
|
|
|
|
return br, nil
|
|
|
|
}
|
2011-06-06 15:54:31 +00:00
|
|
|
|
2013-02-02 18:58:47 +00:00
|
|
|
// uploadBytes populates bb (a builder of either type "bytes" or
|
2012-12-20 17:34:14 +00:00
|
|
|
// "file", which is a superset of "bytes"), sets it to the provided
|
|
|
|
// size, and populates with provided spans. The bytes or file schema
|
|
|
|
// blob is uploaded and its blobref is returned.
|
2013-02-03 06:35:08 +00:00
|
|
|
func uploadBytes(bs blobserver.StatReceiver, bb *Builder, size int64, s []span) *uploadBytesFuture {
|
|
|
|
future := newUploadBytesFuture()
|
2012-12-20 17:34:14 +00:00
|
|
|
parts := []BytesPart{}
|
2013-02-03 06:35:08 +00:00
|
|
|
addBytesParts(bs, &parts, s, future)
|
|
|
|
|
|
|
|
if err := bb.PopulateParts(size, parts); err != nil {
|
|
|
|
future.errc <- err
|
2013-02-08 01:12:08 +00:00
|
|
|
return future
|
|
|
|
}
|
|
|
|
|
|
|
|
// Hack until camlistore.org/issue/102 is fixed. If we happen to upload
|
|
|
|
// the "file" schema before any of its parts arrive, then the indexer
|
|
|
|
// can get confused. So wait on the parts before, and then upload
|
|
|
|
// the "file" blob afterwards.
|
|
|
|
if bb.Type() == "file" {
|
|
|
|
future.errc <- nil
|
|
|
|
_, err := future.Get() // may not be nil, if children parts failed
|
|
|
|
future = newUploadBytesFuture()
|
|
|
|
if err != nil {
|
2013-02-03 06:35:08 +00:00
|
|
|
future.errc <- err
|
2013-02-08 01:12:08 +00:00
|
|
|
return future
|
|
|
|
}
|
2012-12-20 17:34:14 +00:00
|
|
|
}
|
2013-02-08 01:12:08 +00:00
|
|
|
|
|
|
|
json := bb.Blob().JSON()
|
2013-08-04 02:54:30 +00:00
|
|
|
br := blob.SHA1FromString(json)
|
2013-02-08 01:12:08 +00:00
|
|
|
future.br = br
|
|
|
|
go func() {
|
|
|
|
_, err := uploadString(bs, br, json)
|
|
|
|
future.errc <- err
|
|
|
|
}()
|
2013-02-03 06:35:08 +00:00
|
|
|
return future
|
|
|
|
}
|
|
|
|
|
|
|
|
func newUploadBytesFuture() *uploadBytesFuture {
|
|
|
|
return &uploadBytesFuture{
|
|
|
|
errc: make(chan error, 1),
|
2012-12-20 17:34:14 +00:00
|
|
|
}
|
2013-02-03 06:35:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// An uploadBytesFuture is an eager result of a still-in-progress uploadBytes call.
|
|
|
|
// Call Get to wait and get its final result.
|
|
|
|
type uploadBytesFuture struct {
|
2013-08-04 02:54:30 +00:00
|
|
|
br blob.Ref
|
2013-02-03 06:35:08 +00:00
|
|
|
errc chan error
|
|
|
|
children []*uploadBytesFuture
|
|
|
|
}
|
|
|
|
|
|
|
|
// BlobRef returns the optimistic blobref of this uploadBytes call without blocking.
|
2013-08-04 02:54:30 +00:00
|
|
|
func (f *uploadBytesFuture) BlobRef() blob.Ref {
|
2013-02-03 06:35:08 +00:00
|
|
|
return f.br
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get blocks for all children and returns any final error.
|
2013-08-04 02:54:30 +00:00
|
|
|
func (f *uploadBytesFuture) Get() (blob.Ref, error) {
|
2013-02-03 06:35:08 +00:00
|
|
|
for _, f := range f.children {
|
|
|
|
if _, err := f.Get(); err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2013-02-03 06:35:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return f.br, <-f.errc
|
2012-12-20 17:34:14 +00:00
|
|
|
}
|
2012-10-29 01:03:05 +00:00
|
|
|
|
2012-12-20 17:34:14 +00:00
|
|
|
// addBytesParts uploads the provided spans to bs, appending elements to *dst.
|
2013-02-03 06:35:08 +00:00
|
|
|
func addBytesParts(bs blobserver.StatReceiver, dst *[]BytesPart, spans []span, parent *uploadBytesFuture) {
|
2012-12-20 17:34:14 +00:00
|
|
|
for _, sp := range spans {
|
|
|
|
if len(sp.children) == 1 && sp.children[0].isSingleBlob() {
|
|
|
|
// Remove an occasional useless indirection of
|
|
|
|
// what would become a bytes schema blob
|
|
|
|
// pointing to a single blobref. Just promote
|
|
|
|
// the blobref child instead.
|
|
|
|
child := sp.children[0]
|
|
|
|
*dst = append(*dst, BytesPart{
|
|
|
|
BlobRef: child.br,
|
|
|
|
Size: uint64(child.size()),
|
|
|
|
})
|
|
|
|
sp.children = nil
|
|
|
|
}
|
|
|
|
if len(sp.children) > 0 {
|
|
|
|
childrenSize := int64(0)
|
|
|
|
for _, cs := range sp.children {
|
|
|
|
childrenSize += cs.size()
|
|
|
|
}
|
2013-02-03 06:35:08 +00:00
|
|
|
future := uploadBytes(bs, newBytes(), childrenSize, sp.children)
|
|
|
|
parent.children = append(parent.children, future)
|
2012-12-20 17:34:14 +00:00
|
|
|
*dst = append(*dst, BytesPart{
|
2013-02-03 06:35:08 +00:00
|
|
|
BytesRef: future.BlobRef(),
|
2012-12-20 17:34:14 +00:00
|
|
|
Size: uint64(childrenSize),
|
|
|
|
})
|
2011-06-06 15:54:31 +00:00
|
|
|
}
|
2012-12-20 17:34:14 +00:00
|
|
|
if sp.from == sp.to {
|
|
|
|
panic("Shouldn't happen. " + fmt.Sprintf("weird span with same from & to: %#v", sp))
|
2011-06-06 15:54:31 +00:00
|
|
|
}
|
2012-12-20 17:34:14 +00:00
|
|
|
*dst = append(*dst, BytesPart{
|
|
|
|
BlobRef: sp.br,
|
|
|
|
Size: uint64(sp.to - sp.from),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// writeFileMap uploads chunks of r to bs while populating fileMap and
|
|
|
|
// finally uploading fileMap. The returned blobref is of fileMap's
|
|
|
|
// JSON blob. It uses rolling checksum for the chunks sizes.
|
2013-08-04 02:54:30 +00:00
|
|
|
func writeFileMapRolling(bs blobserver.StatReceiver, file *Builder, r io.Reader) (blob.Ref, error) {
|
2013-01-22 04:56:12 +00:00
|
|
|
n, spans, err := writeFileChunks(bs, file, r)
|
2012-12-20 17:34:14 +00:00
|
|
|
if err != nil {
|
2013-08-04 02:54:30 +00:00
|
|
|
return blob.Ref{}, err
|
2012-12-20 17:34:14 +00:00
|
|
|
}
|
|
|
|
// The top-level content parts
|
2013-02-03 06:35:08 +00:00
|
|
|
return uploadBytes(bs, file, n, spans).Get()
|
2012-12-20 17:34:14 +00:00
|
|
|
}
|
|
|
|
|
2013-02-03 03:34:23 +00:00
|
|
|
// WriteFileChunks uploads chunks of r to bs while populating file.
|
|
|
|
// It does not upload file.
|
2013-01-22 04:56:12 +00:00
|
|
|
func WriteFileChunks(bs blobserver.StatReceiver, file *Builder, r io.Reader) error {
|
2013-02-03 03:34:23 +00:00
|
|
|
size, spans, err := writeFileChunks(bs, file, r)
|
2012-12-20 17:34:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2013-02-03 03:34:23 +00:00
|
|
|
parts := []BytesPart{}
|
2013-02-03 06:35:08 +00:00
|
|
|
future := newUploadBytesFuture()
|
|
|
|
addBytesParts(bs, &parts, spans, future)
|
2013-02-04 06:28:11 +00:00
|
|
|
future.errc <- nil // Get will still block on addBytesParts' children
|
2013-02-03 06:35:08 +00:00
|
|
|
if _, err := future.Get(); err != nil {
|
2013-02-03 03:34:23 +00:00
|
|
|
return err
|
2011-06-06 15:54:31 +00:00
|
|
|
}
|
2013-02-03 03:34:23 +00:00
|
|
|
return file.PopulateParts(size, parts)
|
2012-12-20 17:34:14 +00:00
|
|
|
}
|
|
|
|
|
2013-01-22 04:56:12 +00:00
|
|
|
func writeFileChunks(bs blobserver.StatReceiver, file *Builder, r io.Reader) (n int64, spans []span, outerr error) {
|
2012-12-20 17:34:14 +00:00
|
|
|
src := ¬eEOFReader{r: r}
|
|
|
|
bufr := bufio.NewReaderSize(src, bufioReaderSize)
|
|
|
|
spans = []span{} // the tree of spans, cut on interesting rollsum boundaries
|
|
|
|
rs := rollsum.New()
|
2013-02-03 04:17:14 +00:00
|
|
|
var last int64
|
|
|
|
var buf bytes.Buffer
|
|
|
|
blobSize := 0 // of the next blob being built, should be same as buf.Len()
|
|
|
|
|
|
|
|
const chunksInFlight = 32 // at ~64 KB chunks, this is ~2MB memory per file
|
2013-09-12 15:04:10 +00:00
|
|
|
gatec := syncutil.NewGate(chunksInFlight)
|
2013-02-03 04:17:14 +00:00
|
|
|
firsterrc := make(chan error, 1)
|
2012-12-20 17:34:14 +00:00
|
|
|
|
2013-02-03 04:17:14 +00:00
|
|
|
// uploadLastSpan runs in the same goroutine as the loop below and is responsible for
|
|
|
|
// starting uploading the contents of the buf. It returns false if there's been
|
|
|
|
// an error and the loop below should be stopped.
|
2011-06-06 15:54:31 +00:00
|
|
|
uploadLastSpan := func() bool {
|
2013-02-03 04:17:14 +00:00
|
|
|
chunk := buf.String()
|
|
|
|
buf.Reset()
|
2013-08-04 02:54:30 +00:00
|
|
|
br := blob.SHA1FromString(chunk)
|
2013-02-03 04:17:14 +00:00
|
|
|
spans[len(spans)-1].br = br
|
|
|
|
select {
|
|
|
|
case outerr = <-firsterrc:
|
2011-06-06 15:54:31 +00:00
|
|
|
return false
|
2013-02-03 04:17:14 +00:00
|
|
|
default:
|
|
|
|
// No error seen so far, continue.
|
2011-06-06 15:54:31 +00:00
|
|
|
}
|
2013-08-22 00:05:52 +00:00
|
|
|
gatec.Start()
|
2013-02-03 04:17:14 +00:00
|
|
|
go func() {
|
2013-08-22 00:05:52 +00:00
|
|
|
defer gatec.Done()
|
2013-02-03 04:17:14 +00:00
|
|
|
if _, err := uploadString(bs, br, chunk); err != nil {
|
|
|
|
select {
|
|
|
|
case firsterrc <- err:
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
2011-06-06 15:54:31 +00:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
c, err := bufr.ReadByte()
|
Update from r60 to [almost] Go 1.
A lot is still broken, but most stuff at least compiles now.
The directory tree has been rearranged now too. Go libraries are now
under "pkg". Fully qualified, they are e.g. "camlistore.org/pkg/jsonsign".
The go tool cannot yet fetch from arbitrary domains, but discussion is
happening now on which mechanism to use to allow that.
For now, put the camlistore root under $GOPATH/src. Typically $GOPATH
is $HOME, so Camlistore should be at $HOME/src/camlistore.org.
Then you can:
$ go build ./server/camlistored
... etc
The build.pl script is currently disabled. It'll be resurrected at
some point, but with a very different role (helping create a fake
GOPATH and running the go build command, if things are installed at
the wrong place, and/or running fileembed generators).
Many things are certainly broken.
Many things are disabled. (MySQL, all indexing, etc).
Many things need to be moved into
camlistore.org/third_party/{code.google.com,github.com} and updated
from their r60 to Go 1 versions, where applicable.
The GoMySQL stuff should be updated to use database/sql and the ziutek
library implementing database/sql/driver.
Help wanted.
Change-Id: If71217dc5c8f0e70dbe46e9504ca5131c6eeacde
2012-02-19 05:53:06 +00:00
|
|
|
if err == io.EOF {
|
2011-06-06 15:54:31 +00:00
|
|
|
if n != last {
|
|
|
|
spans = append(spans, span{from: last, to: n})
|
|
|
|
if !uploadLastSpan() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if err != nil {
|
2012-12-20 17:34:14 +00:00
|
|
|
return 0, nil, err
|
2011-06-06 15:54:31 +00:00
|
|
|
}
|
|
|
|
|
2012-10-29 01:03:05 +00:00
|
|
|
buf.WriteByte(c)
|
2011-06-06 15:54:31 +00:00
|
|
|
n++
|
2011-10-31 00:17:34 +00:00
|
|
|
blobSize++
|
2011-06-06 15:54:31 +00:00
|
|
|
rs.Roll(c)
|
2012-10-29 01:03:05 +00:00
|
|
|
|
|
|
|
var bits int
|
|
|
|
onRollSplit := rs.OnSplit()
|
|
|
|
switch {
|
|
|
|
case blobSize == maxBlobSize:
|
|
|
|
bits = 20 // arbitrary node weight; 1<<20 == 1MB
|
|
|
|
case src.sawEOF:
|
|
|
|
// Don't split. End is coming soon enough.
|
|
|
|
continue
|
|
|
|
case onRollSplit && n > firstChunkSize && blobSize > tooSmallThreshold:
|
|
|
|
bits = rs.Bits()
|
|
|
|
case n == firstChunkSize:
|
|
|
|
bits = 18 // 1 << 18 == 256KB
|
|
|
|
default:
|
|
|
|
// Don't split.
|
|
|
|
continue
|
2011-06-06 15:54:31 +00:00
|
|
|
}
|
2011-10-31 00:17:34 +00:00
|
|
|
blobSize = 0
|
2011-06-06 15:54:31 +00:00
|
|
|
|
|
|
|
// Take any spans from the end of the spans slice that
|
|
|
|
// have a smaller 'bits' score and make them children
|
|
|
|
// of this node.
|
|
|
|
var children []span
|
|
|
|
childrenFrom := len(spans)
|
|
|
|
for childrenFrom > 0 && spans[childrenFrom-1].bits < bits {
|
|
|
|
childrenFrom--
|
|
|
|
}
|
|
|
|
if nCopy := len(spans) - childrenFrom; nCopy > 0 {
|
|
|
|
children = make([]span, nCopy)
|
|
|
|
copy(children, spans[childrenFrom:])
|
|
|
|
spans = spans[:childrenFrom]
|
|
|
|
}
|
|
|
|
|
|
|
|
spans = append(spans, span{from: last, to: n, bits: bits, children: children})
|
|
|
|
last = n
|
|
|
|
if !uploadLastSpan() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-02-03 04:17:14 +00:00
|
|
|
// Loop was already hit earlier.
|
|
|
|
if outerr != nil {
|
|
|
|
return 0, nil, outerr
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for all uploads to finish, one way or another, and then
|
|
|
|
// see if any generated errors.
|
|
|
|
// Once this loop is done, we own all the tokens in gatec, so nobody
|
|
|
|
// else can have one outstanding.
|
|
|
|
for i := 0; i < chunksInFlight; i++ {
|
2013-08-22 00:05:52 +00:00
|
|
|
gatec.Start()
|
2013-02-03 04:17:14 +00:00
|
|
|
}
|
|
|
|
select {
|
|
|
|
case err := <-firsterrc:
|
|
|
|
return 0, nil, err
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
2012-12-20 17:34:14 +00:00
|
|
|
return n, spans, nil
|
2011-05-29 17:50:17 +00:00
|
|
|
|
|
|
|
}
|