schema: simplify parseSuperset

Signed-off-by: Brad Fitzpatrick <brad@danga.com>
This commit is contained in:
Brad Fitzpatrick 2023-01-13 07:07:14 -08:00
parent ad7454c304
commit 6614d23428
3 changed files with 22 additions and 43 deletions

View File

@ -20,6 +20,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io"
"path/filepath" "path/filepath"
"strings" "strings"
"time" "time"
@ -431,7 +432,7 @@ func (bb *Builder) Blob() *Blob {
panic(err) panic(err)
} }
h := blob.NewHash() h := blob.NewHash()
h.Write([]byte(json)) io.WriteString(h, json)
return &Blob{ return &Blob{
str: json, str: json,
ss: ss, ss: ss,

View File

@ -40,6 +40,7 @@ import (
"unicode/utf8" "unicode/utf8"
"github.com/bradfitz/latlong" "github.com/bradfitz/latlong"
"perkeep.org/internal/pools"
"perkeep.org/pkg/blob" "perkeep.org/pkg/blob"
"github.com/rwcarlsen/goexif/exif" "github.com/rwcarlsen/goexif/exif"
@ -300,12 +301,25 @@ type superset struct {
Expires types.Time3339 `json:"expires"` // or zero for no expiration Expires types.Time3339 `json:"expires"` // or zero for no expiration
} }
var errSchemaBlobTooLarge = errors.New("schema blob too large")
func parseSuperset(r io.Reader) (*superset, error) { func parseSuperset(r io.Reader) (*superset, error) {
var ss superset buf := pools.BytesBuffer()
if err := json.NewDecoder(io.LimitReader(r, MaxSchemaBlobSize)).Decode(&ss); err != nil { defer pools.PutBuffer(buf)
n, err := io.CopyN(buf, r, MaxSchemaBlobSize+1)
if err != nil && err != io.EOF {
return nil, err return nil, err
} }
return &ss, nil if n > MaxSchemaBlobSize {
return nil, errSchemaBlobTooLarge
}
ss := new(superset)
if err := json.Unmarshal(buf.Bytes(), ss); err != nil {
return nil, err
}
return ss, nil
} }
// BlobFromReader returns a new Blob from the provided Reader r, // BlobFromReader returns a new Blob from the provided Reader r,
@ -319,41 +333,9 @@ func BlobFromReader(ref blob.Ref, r io.Reader) (*Blob, error) {
tee := io.TeeReader(r, &buf) tee := io.TeeReader(r, &buf)
ss, err := parseSuperset(tee) ss, err := parseSuperset(tee)
if err != nil { if err != nil {
return nil, err return nil, fmt.Errorf("error parsing Blob %v: %w", ref, err)
} }
var wb [16]byte return &Blob{ref, buf.String(), ss}, nil
afterObj := 0
for {
n, err := tee.Read(wb[:])
afterObj += n
for i := 0; i < n; i++ {
if !isASCIIWhite(wb[i]) {
return nil, fmt.Errorf("invalid bytes after JSON schema blob in %v", ref)
}
}
if afterObj > MaxSchemaBlobSize {
break
}
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
}
json := buf.String()
if len(json) > MaxSchemaBlobSize {
return nil, fmt.Errorf("schema: metadata blob %v is over expected limit; size=%d", ref, len(json))
}
return &Blob{ref, json, ss}, nil
}
func isASCIIWhite(b byte) bool {
switch b {
case ' ', '\t', '\r', '\n':
return true
}
return false
} }
// BytesPart is the type representing one of the "parts" in a "file" // BytesPart is the type representing one of the "parts" in a "file"

View File

@ -236,11 +236,7 @@ func TestBlobFromReader(t *testing.T) {
blob, err = BlobFromReader(br, strings.NewReader(`{"camliVersion": 1, "camliType": "foo"} X `)) blob, err = BlobFromReader(br, strings.NewReader(`{"camliVersion": 1, "camliType": "foo"} X `))
if err == nil { if err == nil {
// TODO(bradfitz): fix this somehow. Currently encoding/json's t.Errorf("bogus non-whitespace after the JSON object should cause an error")
// decoder over-reads.
// See: https://code.google.com/p/go/issues/detail?id=1955 ,
// which was "fixed", but not really.
t.Logf("TODO(bradfitz): make sure bogus non-whitespace after the JSON object causes an error.")
} }
} }