diff --git a/pkg/schema/blob.go b/pkg/schema/blob.go index 9bf00b58f..b9dfb6eeb 100644 --- a/pkg/schema/blob.go +++ b/pkg/schema/blob.go @@ -422,7 +422,7 @@ func (bb *Builder) SetFileName(name string) *Builder { if utf8.ValidString(baseName) { bb.m["fileName"] = baseName } else { - bb.m["fileNameBytes"] = []uint8(baseName) + bb.m["fileNameBytes"] = mixedArrayFromString(baseName) } return bb } @@ -433,11 +433,46 @@ func (bb *Builder) SetSymlinkTarget(target string) *Builder { if utf8.ValidString(target) { bb.m["symlinkTarget"] = target } else { - bb.m["symlinkTargetBytes"] = []uint8(target) + bb.m["symlinkTargetBytes"] = mixedArrayFromString(target) } return bb } +func mixedArrayFromString(s string) []interface{} { + buf := []byte(s) + var name []interface{} + n := 0 + for n < len(buf) { + part, offset := nextStringOrByte(buf[n:]) + name = append(name, part) + n += offset + } + + return name +} + +func nextStringOrByte(b []byte) (interface{}, int) { + n := 0 + var s []byte + for n < len(b) { + r, size := utf8.DecodeRune(b[n:]) + if r == utf8.RuneError { + // If we already have a UTF8 string segment, return it + if len(s) > 0 { + return string(s), n + } + // Return the single byte and an offset of 1 + return b[n], 1 + } + n += size // We have consumed size bytes + c := make([]byte, utf8.RuneLen(r)) + _ = utf8.EncodeRune(c, r) + s = append(s, c...) + } + + return string(s), n +} + // IsClaimType returns whether this blob builder is for a type // which should be signed. (a "claim" or "permanode") func (bb *Builder) IsClaimType() bool { diff --git a/pkg/schema/schema.go b/pkg/schema/schema.go index 88828ff16..354faed09 100644 --- a/pkg/schema/schema.go +++ b/pkg/schema/schema.go @@ -219,10 +219,10 @@ type superset struct { // not be accessed directly. Use the FileNameString accessor // instead, which also sanitizes malicious values. FileName string `json:"fileName"` - FileNameBytes []interface{} `json:"fileNameBytes"` // TODO: needs custom UnmarshalJSON? + FileNameBytes []interface{} `json:"fileNameBytes"` SymlinkTarget string `json:"symlinkTarget"` - SymlinkTargetBytes []interface{} `json:"symlinkTargetBytes"` // TODO: needs custom UnmarshalJSON? + SymlinkTargetBytes []interface{} `json:"symlinkTargetBytes"` UnixPermission string `json:"unixPermission"` UnixOwnerId int `json:"unixOwnerId"` diff --git a/pkg/test/integration/non-utf8_test.go b/pkg/test/integration/non-utf8_test.go new file mode 100644 index 000000000..5e1b80c17 --- /dev/null +++ b/pkg/test/integration/non-utf8_test.go @@ -0,0 +1,121 @@ +/* +Copyright 2013 Google Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration + +import ( + "bytes" + "encoding/hex" + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" + + "camlistore.org/pkg/test" +) + +var nonUTF8 = "416c697ae965202d204d6f69204c6f6c6974612e6d7033" // hex-encoding + +func tempDir(t *testing.T) (path string, cleanup func()) { + path, err := ioutil.TempDir("", "camtest-") + if err != nil { + t.Fatalf("ioutil.TempDir(): %v", err) + } + + cleanup = func() { + os.RemoveAll(path) + } + + return +} + +// Test that we can camput and camget a file whose name is not utf8, +// that we don't panic in the process and that the results are +// correct. +func TestNonUTF8FileName(t *testing.T) { + srcDir, cleanup := tempDir(t) + defer cleanup() + + base, err := hex.DecodeString(nonUTF8) + if err != nil { + t.Fatalf("hex.DecodeString(): %v", err) + } + + fd, err := os.Create(filepath.Join(srcDir, string(base))) + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + fd.Close() + + w := test.GetWorld(t) + out := test.MustRunCmd(t, w.Cmd("camput", "file", fd.Name())) + br := strings.Split(out, "\n")[0] + + // camput was a success. Can we get the file back in another directory? + dstDir, cleanup := tempDir(t) + defer cleanup() + + _ = test.MustRunCmd(t, w.Cmd("camget", "-o", dstDir, br)) + _, err = os.Lstat(filepath.Join(dstDir, string(base))) + if err != nil { + t.Fatalf("Failed to stat file %s in directory %s", + fd.Name(), dstDir) + } +} + +// Test that we can camput and camget a symbolic link whose target is +// not utf8, that we do no panic in the process and that the results +// are correct. +func TestNonUTF8SymlinkTarget(t *testing.T) { + srcDir, cleanup := tempDir(t) + defer cleanup() + + base, err := hex.DecodeString(nonUTF8) + if err != nil { + t.Fatalf("hex.DecodeString(): %v", err) + } + + fd, err := os.Create(filepath.Join(srcDir, string(base))) + if err != nil { + t.Fatalf("os.Create(): %v", err) + } + defer fd.Close() + + err = os.Symlink(string(base), filepath.Join(srcDir, "link")) + if err != nil { + t.Fatalf("os.Symlink(): %v", err) + } + + w := test.GetWorld(t) + out := test.MustRunCmd(t, w.Cmd("camput", "file", filepath.Join(srcDir, "link"))) + br := strings.Split(out, "\n")[0] + + // See if we can camget it back correctly + dstDir, cleanup := tempDir(t) + defer cleanup() + + _ = test.MustRunCmd(t, w.Cmd("camget", "-o", dstDir, br)) + target, err := os.Readlink(filepath.Join(dstDir, "link")) + if err != nil { + t.Fatalf("os.Readlink(): %v", err) + } + + if !bytes.Equal([]byte(target), base) { + t.Fatalf("Retrieved symlink contains points to unexpected target") + } + +}