diff --git a/pkg/schema/blob.go b/pkg/schema/blob.go index ebc8c04dc..f28400062 100644 --- a/pkg/schema/blob.go +++ b/pkg/schema/blob.go @@ -20,6 +20,7 @@ import ( "context" "encoding/json" "fmt" + "io" "path/filepath" "strings" "time" @@ -431,7 +432,7 @@ func (bb *Builder) Blob() *Blob { panic(err) } h := blob.NewHash() - h.Write([]byte(json)) + io.WriteString(h, json) return &Blob{ str: json, ss: ss, diff --git a/pkg/schema/schema.go b/pkg/schema/schema.go index 1f0c09fac..a1377cfe4 100644 --- a/pkg/schema/schema.go +++ b/pkg/schema/schema.go @@ -40,6 +40,7 @@ import ( "unicode/utf8" "github.com/bradfitz/latlong" + "perkeep.org/internal/pools" "perkeep.org/pkg/blob" "github.com/rwcarlsen/goexif/exif" @@ -300,12 +301,25 @@ type superset struct { Expires types.Time3339 `json:"expires"` // or zero for no expiration } +var errSchemaBlobTooLarge = errors.New("schema blob too large") + func parseSuperset(r io.Reader) (*superset, error) { - var ss superset - if err := json.NewDecoder(io.LimitReader(r, MaxSchemaBlobSize)).Decode(&ss); err != nil { + buf := pools.BytesBuffer() + defer pools.PutBuffer(buf) + + n, err := io.CopyN(buf, r, MaxSchemaBlobSize+1) + if err != nil && err != io.EOF { return nil, err } - return &ss, nil + if n > MaxSchemaBlobSize { + return nil, errSchemaBlobTooLarge + } + + ss := new(superset) + if err := json.Unmarshal(buf.Bytes(), ss); err != nil { + return nil, err + } + return ss, nil } // BlobFromReader returns a new Blob from the provided Reader r, @@ -319,41 +333,9 @@ func BlobFromReader(ref blob.Ref, r io.Reader) (*Blob, error) { tee := io.TeeReader(r, &buf) ss, err := parseSuperset(tee) if err != nil { - return nil, err + return nil, fmt.Errorf("error parsing Blob %v: %w", ref, err) } - var wb [16]byte - afterObj := 0 - for { - n, err := tee.Read(wb[:]) - afterObj += n - for i := 0; i < n; i++ { - if !isASCIIWhite(wb[i]) { - return nil, fmt.Errorf("invalid bytes after JSON schema blob in %v", ref) - } - } - if afterObj > MaxSchemaBlobSize { - break - } - if err == io.EOF { - break - } - if err != nil { - return nil, err - } - } - json := buf.String() - if len(json) > MaxSchemaBlobSize { - return nil, fmt.Errorf("schema: metadata blob %v is over expected limit; size=%d", ref, len(json)) - } - return &Blob{ref, json, ss}, nil -} - -func isASCIIWhite(b byte) bool { - switch b { - case ' ', '\t', '\r', '\n': - return true - } - return false + return &Blob{ref, buf.String(), ss}, nil } // BytesPart is the type representing one of the "parts" in a "file" diff --git a/pkg/schema/schema_test.go b/pkg/schema/schema_test.go index f12b293b6..f2b8a0e3b 100644 --- a/pkg/schema/schema_test.go +++ b/pkg/schema/schema_test.go @@ -236,11 +236,7 @@ func TestBlobFromReader(t *testing.T) { blob, err = BlobFromReader(br, strings.NewReader(`{"camliVersion": 1, "camliType": "foo"} X `)) if err == nil { - // TODO(bradfitz): fix this somehow. Currently encoding/json's - // decoder over-reads. - // See: https://code.google.com/p/go/issues/detail?id=1955 , - // which was "fixed", but not really. - t.Logf("TODO(bradfitz): make sure bogus non-whitespace after the JSON object causes an error.") + t.Errorf("bogus non-whitespace after the JSON object should cause an error") } }