schema: improved mixedArrayFromString

Change-Id: Iff9d68a498e13e5a90cd68c07ee9986c3407a8cc
This commit is contained in:
Brad Fitzpatrick 2014-05-14 16:07:35 -07:00
parent 5141595766
commit bf9909028f
2 changed files with 67 additions and 27 deletions

View File

@ -349,39 +349,37 @@ func stringFromMixedArray(parts []interface{}) string {
return buf.String()
}
func mixedArrayFromString(s string) []interface{} {
buf := []byte(s)
var name []interface{}
n := 0
for n < len(buf) {
part, offset := nextStringOrByte(buf[n:])
name = append(name, part)
n += offset
// mixedArrayFromString is the inverse of stringFromMixedArray. It
// splits a string to a series of either UTF-8 strings and non-UTF-8
// bytes.
func mixedArrayFromString(s string) (parts []interface{}) {
for len(s) > 0 {
if n := utf8StrLen(s); n > 0 {
parts = append(parts, s[:n])
s = s[n:]
} else {
parts = append(parts, s[0])
s = s[1:]
}
}
return name
return parts
}
func nextStringOrByte(b []byte) (interface{}, int) {
n := 0
var s []byte
for n < len(b) {
r, size := utf8.DecodeRune(b[n:])
if r == utf8.RuneError {
// If we already have a UTF8 string segment, return it
if len(s) > 0 {
return string(s), n
// utf8StrLen returns how many prefix bytes of s are valid UTF-8.
func utf8StrLen(s string) int {
for i, r := range s {
for r == utf8.RuneError {
// The RuneError value can be an error
// sentinel value (if it's size 1) or the same
// value encoded properly. Decode it to see if
// it's the 1 byte sentinel value.
_, size := utf8.DecodeRuneInString(s[i:])
if size == 1 {
return i
}
// Return the single byte and an offset of 1
return b[n], 1
}
n += size // We have consumed size bytes
c := make([]byte, utf8.RuneLen(r))
_ = utf8.EncodeRune(c, r)
s = append(s, c...)
}
return string(s), n
return len(s)
}
func (ss *superset) SumPartsSize() (size uint64) {

View File

@ -21,6 +21,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
"time"
@ -83,6 +84,47 @@ func TestSymlink(t *testing.T) {
t.Logf("Got json for symlink file: [%s]\n", json)
}
func TestUtf8StrLen(t *testing.T) {
tests := []struct {
in string
want int
}{
{"", 0},
{"a", 1},
{"foo", 3},
{"Здравствуйте!", 25},
{"foo\x80", 3},
{"\x80foo", 0},
}
for _, tt := range tests {
got := utf8StrLen(tt.in)
if got != tt.want {
t.Errorf("utf8StrLen(%q) = %v; want %v", tt.in, got, tt.want)
}
}
}
func TestMixedArrayFromString(t *testing.T) {
b80 := byte('\x80')
tests := []struct {
in string
want []interface{}
}{
{"foo", []interface{}{"foo"}},
{"\x80foo", []interface{}{b80, "foo"}},
{"foo\x80foo", []interface{}{"foo", b80, "foo"}},
{"foo\x80", []interface{}{"foo", b80}},
{"\x80", []interface{}{b80}},
{"\x80\x80", []interface{}{b80, b80}},
}
for _, tt := range tests {
got := mixedArrayFromString(tt.in)
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("mixedArrayFromString(%q) = %#v; want %#v", tt.in, got, tt.want)
}
}
}
type mixPartsTest struct {
json, expected string
}