Improve oshash code (#1829)

Reduce allocations. Don't create intermediary arrays which we then
consume right after. Manually fuse the arrays and decode straight into
the sum instead.

Furthermore, don't invoke a Reader, but carve out the locations via a
loop, directly.

These two changes taken together speeds up oshash computations by a
factor of 10 according to the benchmark tests. The main reason for
this change is a much lowered memory allocation rate which in turn
improves GC pressure.

While here, add a benchmark for oshash computations and use it for
testing the performance.
This commit is contained in:
SmallCoccinelle 2021-10-12 02:59:51 +02:00 committed by GitHub
parent e9d48683f8
commit 38384f2c60
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 16 deletions

View File

@ -1,37 +1,44 @@
package utils
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"os"
)
const chunkSize int64 = 64 * 1024
func oshash(size int64, head []byte, tail []byte) (string, error) {
// put the head and tail together
buf := append(head, tail...)
var ErrOsHashLen = errors.New("buffer is not a multiple of 8")
// convert bytes into uint64
ints := make([]uint64, len(buf)/8)
reader := bytes.NewReader(buf)
err := binary.Read(reader, binary.LittleEndian, &ints)
if err != nil {
return "", err
func sumBytes(buf []byte) (uint64, error) {
if len(buf)%8 != 0 {
return 0, ErrOsHashLen
}
// sum the integers
sz := len(buf) / 8
var sum uint64
for _, v := range ints {
sum += v
for j := 0; j < sz; j++ {
sum += binary.LittleEndian.Uint64(buf[8*j : 8*(j+1)])
}
// add the filesize
sum += uint64(size)
return sum, nil
}
func oshash(size int64, head []byte, tail []byte) (string, error) {
headSum, err := sumBytes(head)
if err != nil {
return "", fmt.Errorf("oshash head: %w", err)
}
tailSum, err := sumBytes(tail)
if err != nil {
return "", fmt.Errorf("oshash tail: %w", err)
}
// Compute the sum of the head, tail and file size
result := headSum + tailSum + uint64(size)
// output as hex
return fmt.Sprintf("%016x", sum), nil
return fmt.Sprintf("%016x", result), nil
}
// OSHashFromFilePath calculates the hash using the same algorithm that

View File

@ -1,6 +1,7 @@
package utils
import (
"math/rand"
"testing"
)
@ -44,3 +45,31 @@ func TestOshashCollisions(t *testing.T) {
t.Errorf("TestOshashCollisions: oshash(n, k, ... %v) =! oshash(n, k, ... %v)", buf1, buf2)
}
}
func BenchmarkOsHash(b *testing.B) {
src := rand.NewSource(9999)
r := rand.New(src)
size := int64(1234567890)
head := make([]byte, 1024*64)
_, err := r.Read(head)
if err != nil {
b.Errorf("unable to generate head array: %v", err)
}
tail := make([]byte, 1024*64)
_, err = r.Read(tail)
if err != nil {
b.Errorf("unable to generate tail array: %v", err)
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := oshash(size, head, tail)
if err != nil {
b.Errorf("unexpected error: %v", err)
}
}
}