diff --git a/pkg/file/walk.go b/pkg/file/walk.go index 8c7fdc5c9..a73781d45 100644 --- a/pkg/file/walk.go +++ b/pkg/file/walk.go @@ -125,7 +125,12 @@ func walkDir(f FS, path string, d fs.DirEntry, walkDirFn fs.WalkDirFunc) error { } for _, d1 := range dirs { - path1 := filepath.Join(path, d1.Name()) + name := d1.Name() + // Prevent infinite loops; this can happen with certain FS implementations (e.g. ZipFS). + if name == "" || name == "." { + continue + } + path1 := filepath.Join(path, name) if err := walkDir(f, path1, d1, walkDirFn); err != nil { if errors.Is(err, fs.SkipDir) { break diff --git a/pkg/hash/oshash/oshash.go b/pkg/hash/oshash/oshash.go index 2c42afd2a..a5271dd31 100644 --- a/pkg/hash/oshash/oshash.go +++ b/pkg/hash/oshash/oshash.go @@ -46,15 +46,16 @@ func oshash(size int64, head []byte, tail []byte) (string, error) { return fmt.Sprintf("%016x", result), nil } -// FromFilePath calculates the hash reading from src. +// FromReader calculates the hash reading from src. func FromReader(src io.ReadSeeker, fileSize int64) (string, error) { - if fileSize <= 0 { - return "", fmt.Errorf("cannot calculate oshash for empty file (size %d)", fileSize) + if fileSize <= 8 { + return "", fmt.Errorf("cannot calculate oshash where size < 8 (%d)", fileSize) } fileChunkSize := chunkSize if fileSize < fileChunkSize { - fileChunkSize = fileSize + // Must be a multiple of 8. + fileChunkSize = (fileSize / 8) * 8 } head := make([]byte, fileChunkSize) @@ -67,7 +68,7 @@ func FromReader(src io.ReadSeeker, fileSize int64) (string, error) { } // seek to the end of the file - the chunk size - _, err = src.Seek(-fileChunkSize, 2) + _, err = src.Seek(-fileChunkSize, io.SeekEnd) if err != nil { return "", err } diff --git a/pkg/hash/oshash/oshash_internal_test.go b/pkg/hash/oshash/oshash_internal_test.go deleted file mode 100644 index 20cdf989e..000000000 --- a/pkg/hash/oshash/oshash_internal_test.go +++ /dev/null @@ -1,75 +0,0 @@ -package oshash - -import ( - "math/rand" - "testing" -) - -// Note that the public API returns "" instead. -func TestOshashEmpty(t *testing.T) { - var size int64 - head := make([]byte, chunkSize) - tail := make([]byte, chunkSize) - want := "0000000000000000" - got, err := oshash(size, head, tail) - if err != nil { - t.Errorf("TestOshashEmpty: Error from oshash: %v", err) - } - if got != want { - t.Errorf("TestOshashEmpty: oshash(0, 0, 0) = %q; want %q", got, want) - } -} - -// As oshash sums byte values, causing collisions is trivial. -func TestOshashCollisions(t *testing.T) { - buf1 := []byte("this is dumb") - buf2 := []byte("dumb is this") - size := int64(len(buf1)) - head := make([]byte, chunkSize) - - tail1 := make([]byte, chunkSize) - copy(tail1[len(tail1)-len(buf1):], buf1) - hash1, err := oshash(size, head, tail1) - if err != nil { - t.Errorf("TestOshashCollisions: Error from oshash: %v", err) - } - - tail2 := make([]byte, chunkSize) - copy(tail2[len(tail2)-len(buf2):], buf2) - hash2, err := oshash(size, head, tail2) - if err != nil { - t.Errorf("TestOshashCollisions: Error from oshash: %v", err) - } - - if hash1 != hash2 { - t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2) - } -} - -func BenchmarkOsHash(b *testing.B) { - src := rand.NewSource(9999) - r := rand.New(src) - - size := int64(1234567890) - - head := make([]byte, 1024*64) - _, err := r.Read(head) - if err != nil { - b.Errorf("unable to generate head array: %v", err) - } - - tail := make([]byte, 1024*64) - _, err = r.Read(tail) - if err != nil { - b.Errorf("unable to generate tail array: %v", err) - } - - b.ResetTimer() - - for n := 0; n < b.N; n++ { - _, err := oshash(size, head, tail) - if err != nil { - b.Errorf("unexpected error: %v", err) - } - } -} diff --git a/pkg/hash/oshash/oshash_test.go b/pkg/hash/oshash/oshash_test.go new file mode 100644 index 000000000..9ef59b46d --- /dev/null +++ b/pkg/hash/oshash/oshash_test.go @@ -0,0 +1,111 @@ +package oshash + +import ( + "bytes" + "math/rand" + "testing" +) + +func BenchmarkOsHash(b *testing.B) { + src := rand.NewSource(9999) + r := rand.New(src) + + size := int64(1234567890) + + head := make([]byte, 1024*64) + _, err := r.Read(head) + if err != nil { + b.Errorf("unable to generate head array: %v", err) + } + + tail := make([]byte, 1024*64) + _, err = r.Read(tail) + if err != nil { + b.Errorf("unable to generate tail array: %v", err) + } + + b.ResetTimer() + + for n := 0; n < b.N; n++ { + _, err := oshash(size, head, tail) + if err != nil { + b.Errorf("unexpected error: %v", err) + } + } +} + +func TestFromReader(t *testing.T) { + makeByteArray := func(base []byte, mag int) []byte { + ret := base + for i := 0; i < mag; i++ { + ret = append(ret, ret...) + } + return ret + } + + makeTailArray := func(base []byte, tail []byte) []byte { + ret := base + t := make([]byte, chunkSize) + copy(t[len(t)-len(tail):], tail) + ret = append(ret, t...) + return ret + } + + tests := []struct { + name string + data []byte + want string + wantErr bool + }{ + { + "empty", + []byte{}, + "", + true, + }, + { + "regular", + makeByteArray([]byte("this is a test"), 15), + "6a0eba04654d0b9b", + false, + }, + { + "< chunk size", + []byte("hello world"), + "d3e392dee38cd4df", + false, + }, + { + "< 8", + []byte("hello"), + "", + true, + }, + { + "identical #1", + makeTailArray(make([]byte, chunkSize), []byte("this is dumb")), + "d5d6ddd820756920", + false, + }, + { + "identical #2", + makeTailArray(make([]byte, chunkSize), []byte("dumb is this")), + "d5d6ddd820756920", + false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := bytes.NewReader(tt.data) + + got, err := FromReader(r, int64(len(tt.data))) + if (err != nil) != tt.wantErr { + t.Errorf("FromReader() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("FromReader() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/match/path.go b/pkg/match/path.go index 8482b1a12..b4f202a5f 100644 --- a/pkg/match/path.go +++ b/pkg/match/path.go @@ -7,6 +7,7 @@ import ( "regexp" "strings" "unicode" + "unicode/utf8" "github.com/stashapp/stash/pkg/gallery" "github.com/stashapp/stash/pkg/image" @@ -77,7 +78,7 @@ func getPathWords(path string, trimExt bool) []string { // remove any single letter words var ret []string for _, w := range words { - if len(w) > 1 { + if utf8.RuneCountInString(w) > 1 { // #1450 - we need to open up the criteria for matching so that we // can match where path has no space between subject names - // ie name = "foo bar" - path = "foobar"