diff --git a/pkg/schema/filereader.go b/pkg/schema/filereader.go index 438dfad8f..c54bfe6d7 100644 --- a/pkg/schema/filereader.go +++ b/pkg/schema/filereader.go @@ -147,6 +147,7 @@ type FileReader struct { ci int // index into contentparts, or -1 on closed ccon uint64 // bytes into current chunk already consumed remain int64 // bytes remaining + size int64 // total number of bytes cr blobref.ReadSeekCloser // cached reader (for blobref chunks) crbr *blobref.BlobRef // the blobref that cr is for @@ -184,7 +185,18 @@ func (ss *Superset) NewFileReader(fetcher blobref.SeekFetcher) (*FileReader, err if ss.Type != "file" && ss.Type != "bytes" { return nil, fmt.Errorf("schema/filereader: Superset not of type \"file\" or \"bytes\"") } - return &FileReader{fetcher: fetcher, ss: ss, remain: int64(ss.SumPartsSize())}, nil + size := int64(ss.SumPartsSize()) + return &FileReader{ + fetcher: fetcher, + ss: ss, + size: size, + remain: size, + }, nil +} + +// Size returns the size of the file in bytes. +func (fr *FileReader) Size() int64 { + return fr.size } // FileSchema returns the reader's schema superset. Don't mutate it. @@ -250,7 +262,7 @@ func (fr *FileReader) readerFor(br *blobref.BlobRef, seekTo int64) (r io.Reader, } } else { - rsc = &zeroReader{} + rsc = zeroReader{} } fr.crbr = br fr.cr = rsc @@ -291,6 +303,44 @@ func (fr *FileReader) currentPart() (*BytesPart, error) { panic("unreachable") } +var _ interface { + io.ReaderAt + io.Reader + io.Closer +} = (*FileReader)(nil) + +func (fr *FileReader) ReadAt(p []byte, offset int64) (n int, err error) { + if offset < 0 { + return 0, errors.New("schema/filereader: negative offset") + } + if offset >= fr.Size() { + return 0, io.EOF + } + want := len(p) + for len(p) > 0 && err == nil { + panic("TODO: finish implementing") + r := fr.readerForOffset(offset) + var n1 int + n1, err = r.Read(p) + p = p[n1:] + if err == io.EOF { + err = nil + } + } + if n == want && err == io.EOF { + // ReaderAt permits either way, but I like this way. + err = nil + } + if n < want && err == nil { + err = io.ErrUnexpectedEOF + } + return n, err +} + +func (fr *FileReader) readerForOffset(off int64) io.Reader { + panic("TODO(bradfitz): implement") +} + func (fr *FileReader) Read(p []byte) (n int, err error) { if fr.ci == closedIndex { return 0, errClosed @@ -347,20 +397,21 @@ func minu64(a, b uint64) uint64 { return b } +// zeroReader is a ReadSeekCloser that always reads zero bytes. type zeroReader struct{} -func (*zeroReader) Read(p []byte) (int, error) { +func (zeroReader) Read(p []byte) (int, error) { for i := range p { p[i] = 0 } return len(p), nil } -func (*zeroReader) Close() error { +func (zeroReader) Close() error { return nil } -func (*zeroReader) Seek(offset int64, whence int) (newFilePos int64, err error) { +func (zeroReader) Seek(offset int64, whence int) (newFilePos int64, err error) { // Caller is ignoring our newFilePos return value. return 0, nil } diff --git a/pkg/schema/schema.go b/pkg/schema/schema.go index 471b0cf08..ce35e27bb 100644 --- a/pkg/schema/schema.go +++ b/pkg/schema/schema.go @@ -75,9 +75,18 @@ type StatHasher interface { Hash(fileName string) (*blobref.BlobRef, error) } +// File is the interface returned when opening a DirectoryEntry that +// is a regular file. type File interface { + // TODO(bradfitz): this should instead be a ReaderAt with a Size() int64 method. + // Then a Reader could be built with a SectionReader. + Close() error - Skip(skipBytes uint64) uint64 + Size() int64 + + // Skip is an efficient way to skip n bytes into + Skip(n uint64) uint64 + Read(p []byte) (int, error) } @@ -243,6 +252,8 @@ type Superset struct { UnixCtime string `json:"unixCtime"` UnixAtime string `json:"unixAtime"` + // Parts are references to the data chunks of a regular file (or a "bytes" schema blob). + // See doc/schema/bytes.txt and doc/schema/files/file.txt. Parts []*BytesPart `json:"parts"` Entries string `json:"entries"` // for directories, a blobref to a static-set @@ -250,20 +261,32 @@ type Superset struct { // blobrefs to child dirs/files) } +// BytesPart is the type representing one of the "parts" in a "file" +// or "bytes" JSON schema. +// +// See doc/schema/bytes.txt and doc/schema/files/file.txt. type BytesPart struct { - // Required. + // Size is the number of bytes that this part contributes to the overall segment. Size uint64 `json:"size"` - // At most one of: + // At most one of BlobRef or BytesRef must be set, but it's illegal for both to be set. + // If neither are set, this BytesPart represents Size zero bytes. + // BlobRef refers to raw bytes. BytesRef references a "bytes" schema blob. BlobRef *blobref.BlobRef `json:"blobRef,omitempty"` BytesRef *blobref.BlobRef `json:"bytesRef,omitempty"` - // Optional (default value is zero if unset anyway): + // Offset optionally specifies the offset into BlobRef to skip + // when reading Size bytes. Offset uint64 `json:"offset,omitempty"` } +// stringFromMixedArray joins a slice of either strings or float64 +// values (as retrieved from JSON decoding) into a string. These are +// used for non-UTF8 filenames in "fileNameBytes" fields. The strings +// are UTF-8 segments and the float64s (actually uint8 values) are +// byte values. func stringFromMixedArray(parts []interface{}) string { - buf := new(bytes.Buffer) + var buf bytes.Buffer for _, part := range parts { if s, ok := part.(string); ok { buf.WriteString(s) @@ -587,7 +610,10 @@ func NewDelAttributeClaim(permaNode *blobref.BlobRef, attr string) Map { return m } -// Types of ShareRefs +// ShareHaveRef is the a share type specifying that if you "have the +// reference" (know the blobref to the haveref share blob), then you +// have access to the referenced object from that share blob. +// This is the "send a link to a friend" access model. const ShareHaveRef = "haveref" // RFC3339FromTime returns an RFC3339-formatted time in UTC.