utils: oshash: add tests (#1285)

Co-authored-by: WithoutPants <53250216+WithoutPants@users.noreply.github.com>
This commit is contained in:
stashist 2021-05-25 03:32:59 +02:00 committed by GitHub
parent fc9d70f702
commit 872e0b531c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 76 additions and 25 deletions

View File

@ -7,6 +7,33 @@ import (
"os"
)
const chunkSize int64 = 64 * 1024
func oshash(size int64, head []byte, tail []byte) (string, error) {
// put the head and tail together
buf := append(head, tail...)
// convert bytes into uint64
ints := make([]uint64, len(buf)/8)
reader := bytes.NewReader(buf)
err := binary.Read(reader, binary.LittleEndian, &ints)
if err != nil {
return "", err
}
// sum the integers
var sum uint64
for _, v := range ints {
sum += v
}
// add the filesize
sum += uint64(size)
// output as hex
return fmt.Sprintf("%016x", sum), nil
}
// OSHashFromFilePath calculates the hash using the same algorithm that
// OpenSubtitles.org uses.
//
@ -24,14 +51,13 @@ func OSHashFromFilePath(filePath string) (string, error) {
return "", err
}
fileSize := int64(fi.Size())
fileSize := fi.Size()
if fileSize == 0 {
return "", nil
}
const chunkSize = 64 * 1024
fileChunkSize := int64(chunkSize)
fileChunkSize := chunkSize
if fileSize < fileChunkSize {
fileChunkSize = fileSize
}
@ -57,26 +83,5 @@ func OSHashFromFilePath(filePath string) (string, error) {
return "", err
}
// put the head and tail together
buf := append(head, tail...)
// convert bytes into uint64
ints := make([]uint64, len(buf)/8)
reader := bytes.NewReader(buf)
err = binary.Read(reader, binary.LittleEndian, &ints)
if err != nil {
return "", err
}
// sum the integers
var sum uint64
for _, v := range ints {
sum += v
}
// add the filesize
sum += uint64(fileSize)
// output as hex
return fmt.Sprintf("%016x", sum), nil
return oshash(fileSize, head, tail)
}

View File

@ -0,0 +1,46 @@
package utils
import (
"testing"
)
// Note that the public API returns "" instead.
func TestOshashEmpty(t *testing.T) {
var size int64 = 0
head := make([]byte, chunkSize)
tail := make([]byte, chunkSize)
want := "0000000000000000"
got, err := oshash(size, head, tail)
if err != nil {
t.Errorf("TestOshashEmpty: Error from oshash: %w", err)
}
if got != want {
t.Errorf("TestOshashEmpty: oshash(0, 0, 0) = %q; want %q", got, want)
}
}
// As oshash sums byte values, causing collisions is trivial.
func TestOshashCollisions(t *testing.T) {
buf1 := []byte("this is dumb")
buf2 := []byte("dumb is this")
var size int64 = int64(len(buf1))
head := make([]byte, chunkSize)
tail1 := make([]byte, chunkSize)
copy(tail1[len(tail1)-len(buf1):], buf1)
hash1, err := oshash(size, head, tail1)
if err != nil {
t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
}
tail2 := make([]byte, chunkSize)
copy(tail2[len(tail2)-len(buf2):], buf2)
hash2, err := oshash(size, head, tail2)
if err != nil {
t.Errorf("TestOshashCollisions: Error from oshash: %v", err)
}
if hash1 != hash2 {
t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2)
}
}