schema: more docs and some FileReader work

Change-Id: Id9e754e3b6358a651a826a4cdeb00efb330e3c3a
2012-08-23 18:44:03 -07:00 · 2012-08-23 18:44:03 -07:00 · faf3c90d6f
parent 01195665dc
commit faf3c90d6f
2 changed files with 88 additions and 11 deletions
--- a/pkg/schema/filereader.go
+++ b/pkg/schema/filereader.go
@ -147,6 +147,7 @@ type FileReader struct {
 	ci     int    // index into contentparts, or -1 on closed
 	ccon   uint64 // bytes into current chunk already consumed
 	remain int64  // bytes remaining
+	size   int64  // total number of bytes

 	cr   blobref.ReadSeekCloser // cached reader (for blobref chunks)
 	crbr *blobref.BlobRef       // the blobref that cr is for
@ -184,7 +185,18 @@ func (ss *Superset) NewFileReader(fetcher blobref.SeekFetcher) (*FileReader, err
 	if ss.Type != "file" && ss.Type != "bytes" {
 		return nil, fmt.Errorf("schema/filereader: Superset not of type \"file\" or \"bytes\"")
 	}
-	return &FileReader{fetcher: fetcher, ss: ss, remain: int64(ss.SumPartsSize())}, nil
+	size := int64(ss.SumPartsSize())
+	return &FileReader{
+		fetcher: fetcher,
+		ss:      ss,
+		size:    size,
+		remain:  size,
+	}, nil
+}
+
+// Size returns the size of the file in bytes.
+func (fr *FileReader) Size() int64 {
+	return fr.size
 }

 // FileSchema returns the reader's schema superset. Don't mutate it.
@ -250,7 +262,7 @@ func (fr *FileReader) readerFor(br *blobref.BlobRef, seekTo int64) (r io.Reader,
 		}

 	} else {
-		rsc = &zeroReader{}
+		rsc = zeroReader{}
 	}
 	fr.crbr = br
 	fr.cr = rsc
@ -291,6 +303,44 @@ func (fr *FileReader) currentPart() (*BytesPart, error) {
 	panic("unreachable")
 }

+var _ interface {
+	io.ReaderAt
+	io.Reader
+	io.Closer
+} = (*FileReader)(nil)
+
+func (fr *FileReader) ReadAt(p []byte, offset int64) (n int, err error) {
+	if offset < 0 {
+		return 0, errors.New("schema/filereader: negative offset")
+	}
+	if offset >= fr.Size() {
+		return 0, io.EOF
+	}
+	want := len(p)
+	for len(p) > 0 && err == nil {
+		panic("TODO: finish implementing")
+		r := fr.readerForOffset(offset)
+		var n1 int
+		n1, err = r.Read(p)
+		p = p[n1:]
+		if err == io.EOF {
+			err = nil
+		}
+	}
+	if n == want && err == io.EOF {
+		// ReaderAt permits either way, but I like this way.
+		err = nil
+	}
+	if n < want && err == nil {
+		err = io.ErrUnexpectedEOF
+	}
+	return n, err
+}
+
+func (fr *FileReader) readerForOffset(off int64) io.Reader {
+	panic("TODO(bradfitz): implement")
+}
+
 func (fr *FileReader) Read(p []byte) (n int, err error) {
 	if fr.ci == closedIndex {
 		return 0, errClosed
@ -347,20 +397,21 @@ func minu64(a, b uint64) uint64 {
 	return b
 }

+// zeroReader is a ReadSeekCloser that always reads zero bytes.
 type zeroReader struct{}

-func (*zeroReader) Read(p []byte) (int, error) {
+func (zeroReader) Read(p []byte) (int, error) {
 	for i := range p {
 		p[i] = 0
 	}
 	return len(p), nil
 }

-func (*zeroReader) Close() error {
+func (zeroReader) Close() error {
 	return nil
 }

-func (*zeroReader) Seek(offset int64, whence int) (newFilePos int64, err error) {
+func (zeroReader) Seek(offset int64, whence int) (newFilePos int64, err error) {
 	// Caller is ignoring our newFilePos return value.
 	return 0, nil
 }
--- a/pkg/schema/schema.go
+++ b/pkg/schema/schema.go
@ -75,9 +75,18 @@ type StatHasher interface {
 	Hash(fileName string) (*blobref.BlobRef, error)
 }

+// File is the interface returned when opening a DirectoryEntry that
+// is a regular file.
 type File interface {
+	// TODO(bradfitz): this should instead be a ReaderAt with a Size() int64 method.
+	// Then a Reader could be built with a SectionReader.
+	
 	Close() error
-	Skip(skipBytes uint64) uint64
+	Size() int64
+
+	// Skip is an efficient way to skip n bytes into 
+	Skip(n uint64) uint64
+
 	Read(p []byte) (int, error)
 }

@ -243,6 +252,8 @@ type Superset struct {
 	UnixCtime      string `json:"unixCtime"`
 	UnixAtime      string `json:"unixAtime"`

+	// Parts are references to the data chunks of a regular file (or a "bytes" schema blob).
+	// See doc/schema/bytes.txt and doc/schema/files/file.txt.
 	Parts []*BytesPart `json:"parts"`

 	Entries string   `json:"entries"` // for directories, a blobref to a static-set
@ -250,20 +261,32 @@ type Superset struct {
 	// blobrefs to child dirs/files)
 }

+// BytesPart is the type representing one of the "parts" in a "file"
+// or "bytes" JSON schema.
+//
+// See doc/schema/bytes.txt and doc/schema/files/file.txt.
 type BytesPart struct {
-	// Required.
+	// Size is the number of bytes that this part contributes to the overall segment.
 	Size uint64 `json:"size"`

-	// At most one of:
+	// At most one of BlobRef or BytesRef must be set, but it's illegal for both to be set.
+	// If neither are set, this BytesPart represents Size zero bytes.
+	// BlobRef refers to raw bytes. BytesRef references a "bytes" schema blob.
 	BlobRef  *blobref.BlobRef `json:"blobRef,omitempty"`
 	BytesRef *blobref.BlobRef `json:"bytesRef,omitempty"`

-	// Optional (default value is zero if unset anyway):
+	// Offset optionally specifies the offset into BlobRef to skip
+	// when reading Size bytes.
 	Offset uint64 `json:"offset,omitempty"`
 }

+// stringFromMixedArray joins a slice of either strings or float64
+// values (as retrieved from JSON decoding) into a string.  These are
+// used for non-UTF8 filenames in "fileNameBytes" fields.  The strings
+// are UTF-8 segments and the float64s (actually uint8 values) are
+// byte values.
 func stringFromMixedArray(parts []interface{}) string {
-	buf := new(bytes.Buffer)
+	var buf bytes.Buffer
 	for _, part := range parts {
 		if s, ok := part.(string); ok {
 			buf.WriteString(s)
@ -587,7 +610,10 @@ func NewDelAttributeClaim(permaNode *blobref.BlobRef, attr string) Map {
 	return m
 }

-// Types of ShareRefs
+// ShareHaveRef is the a share type specifying that if you "have the
+// reference" (know the blobref to the haveref share blob), then you
+// have access to the referenced object from that share blob.
+// This is the "send a link to a friend" access model.
 const ShareHaveRef = "haveref"

 // RFC3339FromTime returns an RFC3339-formatted time in UTC.