Fix a bunch of scanning / tagging bugs (#3154)

* Fix possible infinite loop/stack overflow with weird/broken zip files
* Fix path length calculation using bytes instead of characters (runes)
* Fix bug where oshash gets buffers with size not actually multiple of 8
* Add oshash tests

Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
alexandra-3 2022-12-01 15:48:04 +10:00 committed by GitHub
parent e614ca8d26
commit 87cea80e7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 125 additions and 82 deletions

View File

@ -125,7 +125,12 @@ func walkDir(f FS, path string, d fs.DirEntry, walkDirFn fs.WalkDirFunc) error {
}
for _, d1 := range dirs {
path1 := filepath.Join(path, d1.Name())
name := d1.Name()
// Prevent infinite loops; this can happen with certain FS implementations (e.g. ZipFS).
if name == "" || name == "." {
continue
}
path1 := filepath.Join(path, name)
if err := walkDir(f, path1, d1, walkDirFn); err != nil {
if errors.Is(err, fs.SkipDir) {
break

View File

@ -46,15 +46,16 @@ func oshash(size int64, head []byte, tail []byte) (string, error) {
return fmt.Sprintf("%016x", result), nil
}
// FromFilePath calculates the hash reading from src.
// FromReader calculates the hash reading from src.
func FromReader(src io.ReadSeeker, fileSize int64) (string, error) {
if fileSize <= 0 {
return "", fmt.Errorf("cannot calculate oshash for empty file (size %d)", fileSize)
if fileSize <= 8 {
return "", fmt.Errorf("cannot calculate oshash where size < 8 (%d)", fileSize)
}
fileChunkSize := chunkSize
if fileSize < fileChunkSize {
fileChunkSize = fileSize
// Must be a multiple of 8.
fileChunkSize = (fileSize / 8) * 8
}
head := make([]byte, fileChunkSize)
@ -67,7 +68,7 @@ func FromReader(src io.ReadSeeker, fileSize int64) (string, error) {
}
// seek to the end of the file - the chunk size
_, err = src.Seek(-fileChunkSize, 2)
_, err = src.Seek(-fileChunkSize, io.SeekEnd)
if err != nil {
return "", err
}

View File

@ -1,75 +0,0 @@
package oshash
import (
"math/rand"
"testing"
)
// Note that the public API returns "" instead.
func TestOshashEmpty(t *testing.T) {
var size int64
head := make([]byte, chunkSize)
tail := make([]byte, chunkSize)
want := "0000000000000000"
got, err := oshash(size, head, tail)
if err != nil {
t.Errorf("TestOshashEmpty: Error from oshash: %v", err)
}
if got != want {
t.Errorf("TestOshashEmpty: oshash(0, 0, 0) = %q; want %q", got, want)
}
}
// As oshash sums byte values, causing collisions is trivial.
func TestOshashCollisions(t *testing.T) {
buf1 := []byte("this is dumb")
buf2 := []byte("dumb is this")
size := int64(len(buf1))
head := make([]byte, chunkSize)
tail1 := make([]byte, chunkSize)
copy(tail1[len(tail1)-len(buf1):], buf1)
hash1, err := oshash(size, head, tail1)
if err != nil {
t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
}
tail2 := make([]byte, chunkSize)
copy(tail2[len(tail2)-len(buf2):], buf2)
hash2, err := oshash(size, head, tail2)
if err != nil {
t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
}
if hash1 != hash2 {
t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2)
}
}
func BenchmarkOsHash(b *testing.B) {
src := rand.NewSource(9999)
r := rand.New(src)
size := int64(1234567890)
head := make([]byte, 1024*64)
_, err := r.Read(head)
if err != nil {
b.Errorf("unable to generate head array: %v", err)
}
tail := make([]byte, 1024*64)
_, err = r.Read(tail)
if err != nil {
b.Errorf("unable to generate tail array: %v", err)
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := oshash(size, head, tail)
if err != nil {
b.Errorf("unexpected error: %v", err)
}
}
}

View File

@ -0,0 +1,111 @@
package oshash
import (
"bytes"
"math/rand"
"testing"
)
func BenchmarkOsHash(b *testing.B) {
src := rand.NewSource(9999)
r := rand.New(src)
size := int64(1234567890)
head := make([]byte, 1024*64)
_, err := r.Read(head)
if err != nil {
b.Errorf("unable to generate head array: %v", err)
}
tail := make([]byte, 1024*64)
_, err = r.Read(tail)
if err != nil {
b.Errorf("unable to generate tail array: %v", err)
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := oshash(size, head, tail)
if err != nil {
b.Errorf("unexpected error: %v", err)
}
}
}
func TestFromReader(t *testing.T) {
makeByteArray := func(base []byte, mag int) []byte {
ret := base
for i := 0; i < mag; i++ {
ret = append(ret, ret...)
}
return ret
}
makeTailArray := func(base []byte, tail []byte) []byte {
ret := base
t := make([]byte, chunkSize)
copy(t[len(t)-len(tail):], tail)
ret = append(ret, t...)
return ret
}
tests := []struct {
name string
data []byte
want string
wantErr bool
}{
{
"empty",
[]byte{},
"",
true,
},
{
"regular",
makeByteArray([]byte("this is a test"), 15),
"6a0eba04654d0b9b",
false,
},
{
"< chunk size",
[]byte("hello world"),
"d3e392dee38cd4df",
false,
},
{
"< 8",
[]byte("hello"),
"",
true,
},
{
"identical #1",
makeTailArray(make([]byte, chunkSize), []byte("this is dumb")),
"d5d6ddd820756920",
false,
},
{
"identical #2",
makeTailArray(make([]byte, chunkSize), []byte("dumb is this")),
"d5d6ddd820756920",
false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r := bytes.NewReader(tt.data)
got, err := FromReader(r, int64(len(tt.data)))
if (err != nil) != tt.wantErr {
t.Errorf("FromReader() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("FromReader() = %v, want %v", got, tt.want)
}
})
}
}

View File

@ -7,6 +7,7 @@ import (
"regexp"
"strings"
"unicode"
"unicode/utf8"
"github.com/stashapp/stash/pkg/gallery"
"github.com/stashapp/stash/pkg/image"
@ -77,7 +78,7 @@ func getPathWords(path string, trimExt bool) []string {
// remove any single letter words
var ret []string
for _, w := range words {
if len(w) > 1 {
if utf8.RuneCountInString(w) > 1 {
// #1450 - we need to open up the criteria for matching so that we
// can match where path has no space between subject names -
// ie name = "foo bar" - path = "foobar"