From a7f636868569651ff030031ba3b301d4dc998614 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Sun, 5 Jun 2011 12:33:11 -0700 Subject: [PATCH] rollsum: make a tree out of the splits --- clients/go/camput/camput.go | 37 -------------- clients/go/camput/splits.go | 96 +++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 37 deletions(-) create mode 100644 clients/go/camput/splits.go diff --git a/clients/go/camput/camput.go b/clients/go/camput/camput.go index f04b84dc6..a22cf07bc 100644 --- a/clients/go/camput/camput.go +++ b/clients/go/camput/camput.go @@ -17,7 +17,6 @@ limitations under the License. package main import ( - "bufio" "crypto/sha1" "flag" "fmt" @@ -29,7 +28,6 @@ import ( "camli/blobref" "camli/client" - "camli/rollsum" "camli/schema" "camli/jsonsign" ) @@ -357,38 +355,3 @@ func main() { os.Exit(2) } } - -func showSplits() { - file := flag.Arg(0) - f, err := os.Open(file) - if err != nil { - panic(err.String()) - } - bufr := bufio.NewReader(f) - - rs := rollsum.New() - n := 0 - lastSplit := map[int]int{} - last := 0 - for { - c, err := bufr.ReadByte() - if err != nil { - if err == os.EOF { - break - } - panic(err.String()) - } - n++ - rs.Roll(c) - if rs.OnSplit() { - bits := rs.Bits() - log.Printf("split at %d (after %d), bits=%d", n, n - last, bits) - last = n - for bits, last := range lastSplit { - log.Printf(" since %d = %d", bits, n - last) - } - lastSplit[bits] = n - } - } - -} diff --git a/clients/go/camput/splits.go b/clients/go/camput/splits.go new file mode 100644 index 000000000..772fd4a9e --- /dev/null +++ b/clients/go/camput/splits.go @@ -0,0 +1,96 @@ +/* +Copyright 2011 Google Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "bufio" + "flag" + "fmt" + "os" + "log" + "strings" + + "camli/rollsum" +) + +type span struct { + from, to int64 + bits int + children []span +} + +func showSplits() { + file := flag.Arg(0) + f, err := os.Open(file) + if err != nil { + panic(err.String()) + } + bufr := bufio.NewReader(f) + + spans := []span{} + rs := rollsum.New() + n := int64(0) + last := n + + for { + c, err := bufr.ReadByte() + if err != nil { + if err == os.EOF { + if n != last { + spans = append(spans, span{from: last, to: n}) + } + break + } + panic(err.String()) + } + n++ + rs.Roll(c) + if rs.OnSplit() { + bits := rs.Bits() + sliceFrom := len(spans) + for sliceFrom > 0 && spans[sliceFrom-1].bits < bits { + sliceFrom-- + } + nCopy := len(spans) - sliceFrom + var children []span + if nCopy > 0 { + children = make([]span, nCopy) + nCopied := copy(children, spans[sliceFrom:]) + if nCopied != nCopy { + panic("n wrong") + } + spans = spans[:sliceFrom] + } + spans = append(spans, span{from: last, to: n, bits: bits, children: children}) + + log.Printf("split at %d (after %d), bits=%d", n, n-last, bits) + last = n + } + } + + var dumpSpans func(s []span, indent int) + dumpSpans = func(s []span, indent int) { + in := strings.Repeat(" ", indent) + for _, sp := range s { + fmt.Printf("%sfrom=%d, to=%d (len %d) bits=%d\n", in, sp.from, sp.to, sp.to - sp.from, sp.bits) + if len(sp.children) > 0 { + dumpSpans(sp.children, indent + 4) + } + } + } + dumpSpans(spans, 0) +}