2020-08-06 01:21:14 +00:00
|
|
|
package utils
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/binary"
|
2021-10-12 00:59:51 +00:00
|
|
|
"errors"
|
2020-08-06 01:21:14 +00:00
|
|
|
"fmt"
|
2021-10-14 23:39:48 +00:00
|
|
|
"io"
|
2020-08-06 01:21:14 +00:00
|
|
|
"os"
|
|
|
|
)
|
|
|
|
|
2021-05-25 01:32:59 +00:00
|
|
|
const chunkSize int64 = 64 * 1024
|
|
|
|
|
2021-10-12 00:59:51 +00:00
|
|
|
var ErrOsHashLen = errors.New("buffer is not a multiple of 8")
|
2021-05-25 01:32:59 +00:00
|
|
|
|
2021-10-12 00:59:51 +00:00
|
|
|
func sumBytes(buf []byte) (uint64, error) {
|
|
|
|
if len(buf)%8 != 0 {
|
|
|
|
return 0, ErrOsHashLen
|
2021-05-25 01:32:59 +00:00
|
|
|
}
|
|
|
|
|
2021-10-12 00:59:51 +00:00
|
|
|
sz := len(buf) / 8
|
2021-05-25 01:32:59 +00:00
|
|
|
var sum uint64
|
2021-10-12 00:59:51 +00:00
|
|
|
for j := 0; j < sz; j++ {
|
|
|
|
sum += binary.LittleEndian.Uint64(buf[8*j : 8*(j+1)])
|
2021-05-25 01:32:59 +00:00
|
|
|
}
|
|
|
|
|
2021-10-12 00:59:51 +00:00
|
|
|
return sum, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func oshash(size int64, head []byte, tail []byte) (string, error) {
|
|
|
|
headSum, err := sumBytes(head)
|
|
|
|
if err != nil {
|
|
|
|
return "", fmt.Errorf("oshash head: %w", err)
|
|
|
|
}
|
|
|
|
tailSum, err := sumBytes(tail)
|
|
|
|
if err != nil {
|
|
|
|
return "", fmt.Errorf("oshash tail: %w", err)
|
|
|
|
}
|
2021-05-25 01:32:59 +00:00
|
|
|
|
2021-10-12 00:59:51 +00:00
|
|
|
// Compute the sum of the head, tail and file size
|
|
|
|
result := headSum + tailSum + uint64(size)
|
2021-05-25 01:32:59 +00:00
|
|
|
// output as hex
|
2021-10-12 00:59:51 +00:00
|
|
|
return fmt.Sprintf("%016x", result), nil
|
2021-05-25 01:32:59 +00:00
|
|
|
}
|
|
|
|
|
2021-10-14 23:39:48 +00:00
|
|
|
func OSHashFromReader(src io.ReadSeeker, fileSize int64) (string, error) {
|
2020-08-06 01:21:14 +00:00
|
|
|
if fileSize == 0 {
|
|
|
|
return "", nil
|
|
|
|
}
|
|
|
|
|
2021-05-25 01:32:59 +00:00
|
|
|
fileChunkSize := chunkSize
|
2020-08-06 01:21:14 +00:00
|
|
|
if fileSize < fileChunkSize {
|
|
|
|
fileChunkSize = fileSize
|
|
|
|
}
|
|
|
|
|
|
|
|
head := make([]byte, fileChunkSize)
|
|
|
|
tail := make([]byte, fileChunkSize)
|
|
|
|
|
|
|
|
// read the head of the file into the start of the buffer
|
2021-10-14 23:39:48 +00:00
|
|
|
_, err := src.Read(head)
|
2020-08-06 01:21:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
// seek to the end of the file - the chunk size
|
2021-10-14 23:39:48 +00:00
|
|
|
_, err = src.Seek(-fileChunkSize, 2)
|
2020-08-06 01:21:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
// read the tail of the file
|
2021-10-14 23:39:48 +00:00
|
|
|
_, err = src.Read(tail)
|
2020-08-06 01:21:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
2021-05-25 01:32:59 +00:00
|
|
|
return oshash(fileSize, head, tail)
|
2020-08-06 01:21:14 +00:00
|
|
|
}
|
2021-10-14 23:39:48 +00:00
|
|
|
|
|
|
|
// OSHashFromFilePath calculates the hash using the same algorithm that
|
|
|
|
// OpenSubtitles.org uses.
|
|
|
|
//
|
|
|
|
// Calculation is as follows:
|
|
|
|
// size + 64 bit checksum of the first and last 64k bytes of the file.
|
|
|
|
func OSHashFromFilePath(filePath string) (string, error) {
|
|
|
|
f, err := os.Open(filePath)
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
fi, err := f.Stat()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
|
|
|
|
fileSize := fi.Size()
|
|
|
|
|
|
|
|
return OSHashFromReader(f, fileSize)
|
|
|
|
}
|