mirror of https://github.com/stashapp/stash.git
Fix a bunch of scanning / tagging bugs (#3154)
* Fix possible infinite loop/stack overflow with weird/broken zip files * Fix path length calculation using bytes instead of characters (runes) * Fix bug where oshash gets buffers with size not actually multiple of 8 * Add oshash tests Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
parent
e614ca8d26
commit
87cea80e7b
|
@ -125,7 +125,12 @@ func walkDir(f FS, path string, d fs.DirEntry, walkDirFn fs.WalkDirFunc) error {
|
|||
}
|
||||
|
||||
for _, d1 := range dirs {
|
||||
path1 := filepath.Join(path, d1.Name())
|
||||
name := d1.Name()
|
||||
// Prevent infinite loops; this can happen with certain FS implementations (e.g. ZipFS).
|
||||
if name == "" || name == "." {
|
||||
continue
|
||||
}
|
||||
path1 := filepath.Join(path, name)
|
||||
if err := walkDir(f, path1, d1, walkDirFn); err != nil {
|
||||
if errors.Is(err, fs.SkipDir) {
|
||||
break
|
||||
|
|
|
@ -46,15 +46,16 @@ func oshash(size int64, head []byte, tail []byte) (string, error) {
|
|||
return fmt.Sprintf("%016x", result), nil
|
||||
}
|
||||
|
||||
// FromFilePath calculates the hash reading from src.
|
||||
// FromReader calculates the hash reading from src.
|
||||
func FromReader(src io.ReadSeeker, fileSize int64) (string, error) {
|
||||
if fileSize <= 0 {
|
||||
return "", fmt.Errorf("cannot calculate oshash for empty file (size %d)", fileSize)
|
||||
if fileSize <= 8 {
|
||||
return "", fmt.Errorf("cannot calculate oshash where size < 8 (%d)", fileSize)
|
||||
}
|
||||
|
||||
fileChunkSize := chunkSize
|
||||
if fileSize < fileChunkSize {
|
||||
fileChunkSize = fileSize
|
||||
// Must be a multiple of 8.
|
||||
fileChunkSize = (fileSize / 8) * 8
|
||||
}
|
||||
|
||||
head := make([]byte, fileChunkSize)
|
||||
|
@ -67,7 +68,7 @@ func FromReader(src io.ReadSeeker, fileSize int64) (string, error) {
|
|||
}
|
||||
|
||||
// seek to the end of the file - the chunk size
|
||||
_, err = src.Seek(-fileChunkSize, 2)
|
||||
_, err = src.Seek(-fileChunkSize, io.SeekEnd)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -1,75 +0,0 @@
|
|||
package oshash
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Note that the public API returns "" instead.
|
||||
func TestOshashEmpty(t *testing.T) {
|
||||
var size int64
|
||||
head := make([]byte, chunkSize)
|
||||
tail := make([]byte, chunkSize)
|
||||
want := "0000000000000000"
|
||||
got, err := oshash(size, head, tail)
|
||||
if err != nil {
|
||||
t.Errorf("TestOshashEmpty: Error from oshash: %v", err)
|
||||
}
|
||||
if got != want {
|
||||
t.Errorf("TestOshashEmpty: oshash(0, 0, 0) = %q; want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
// As oshash sums byte values, causing collisions is trivial.
|
||||
func TestOshashCollisions(t *testing.T) {
|
||||
buf1 := []byte("this is dumb")
|
||||
buf2 := []byte("dumb is this")
|
||||
size := int64(len(buf1))
|
||||
head := make([]byte, chunkSize)
|
||||
|
||||
tail1 := make([]byte, chunkSize)
|
||||
copy(tail1[len(tail1)-len(buf1):], buf1)
|
||||
hash1, err := oshash(size, head, tail1)
|
||||
if err != nil {
|
||||
t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
|
||||
}
|
||||
|
||||
tail2 := make([]byte, chunkSize)
|
||||
copy(tail2[len(tail2)-len(buf2):], buf2)
|
||||
hash2, err := oshash(size, head, tail2)
|
||||
if err != nil {
|
||||
t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
|
||||
}
|
||||
|
||||
if hash1 != hash2 {
|
||||
t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkOsHash(b *testing.B) {
|
||||
src := rand.NewSource(9999)
|
||||
r := rand.New(src)
|
||||
|
||||
size := int64(1234567890)
|
||||
|
||||
head := make([]byte, 1024*64)
|
||||
_, err := r.Read(head)
|
||||
if err != nil {
|
||||
b.Errorf("unable to generate head array: %v", err)
|
||||
}
|
||||
|
||||
tail := make([]byte, 1024*64)
|
||||
_, err = r.Read(tail)
|
||||
if err != nil {
|
||||
b.Errorf("unable to generate tail array: %v", err)
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err := oshash(size, head, tail)
|
||||
if err != nil {
|
||||
b.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
package oshash
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkOsHash(b *testing.B) {
|
||||
src := rand.NewSource(9999)
|
||||
r := rand.New(src)
|
||||
|
||||
size := int64(1234567890)
|
||||
|
||||
head := make([]byte, 1024*64)
|
||||
_, err := r.Read(head)
|
||||
if err != nil {
|
||||
b.Errorf("unable to generate head array: %v", err)
|
||||
}
|
||||
|
||||
tail := make([]byte, 1024*64)
|
||||
_, err = r.Read(tail)
|
||||
if err != nil {
|
||||
b.Errorf("unable to generate tail array: %v", err)
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
for n := 0; n < b.N; n++ {
|
||||
_, err := oshash(size, head, tail)
|
||||
if err != nil {
|
||||
b.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFromReader(t *testing.T) {
|
||||
makeByteArray := func(base []byte, mag int) []byte {
|
||||
ret := base
|
||||
for i := 0; i < mag; i++ {
|
||||
ret = append(ret, ret...)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
makeTailArray := func(base []byte, tail []byte) []byte {
|
||||
ret := base
|
||||
t := make([]byte, chunkSize)
|
||||
copy(t[len(t)-len(tail):], tail)
|
||||
ret = append(ret, t...)
|
||||
return ret
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
data []byte
|
||||
want string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
"empty",
|
||||
[]byte{},
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"regular",
|
||||
makeByteArray([]byte("this is a test"), 15),
|
||||
"6a0eba04654d0b9b",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"< chunk size",
|
||||
[]byte("hello world"),
|
||||
"d3e392dee38cd4df",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"< 8",
|
||||
[]byte("hello"),
|
||||
"",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"identical #1",
|
||||
makeTailArray(make([]byte, chunkSize), []byte("this is dumb")),
|
||||
"d5d6ddd820756920",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"identical #2",
|
||||
makeTailArray(make([]byte, chunkSize), []byte("dumb is this")),
|
||||
"d5d6ddd820756920",
|
||||
false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
r := bytes.NewReader(tt.data)
|
||||
|
||||
got, err := FromReader(r, int64(len(tt.data)))
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("FromReader() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if got != tt.want {
|
||||
t.Errorf("FromReader() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
|
@ -7,6 +7,7 @@ import (
|
|||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/stashapp/stash/pkg/gallery"
|
||||
"github.com/stashapp/stash/pkg/image"
|
||||
|
@ -77,7 +78,7 @@ func getPathWords(path string, trimExt bool) []string {
|
|||
// remove any single letter words
|
||||
var ret []string
|
||||
for _, w := range words {
|
||||
if len(w) > 1 {
|
||||
if utf8.RuneCountInString(w) > 1 {
|
||||
// #1450 - we need to open up the criteria for matching so that we
|
||||
// can match where path has no space between subject names -
|
||||
// ie name = "foo bar" - path = "foobar"
|
||||
|
|
Loading…
Reference in New Issue