mirror of https://github.com/perkeep/perkeep.git
schema: improved mixedArrayFromString
Change-Id: Iff9d68a498e13e5a90cd68c07ee9986c3407a8cc
This commit is contained in:
parent
5141595766
commit
bf9909028f
|
@ -349,39 +349,37 @@ func stringFromMixedArray(parts []interface{}) string {
|
|||
return buf.String()
|
||||
}
|
||||
|
||||
func mixedArrayFromString(s string) []interface{} {
|
||||
buf := []byte(s)
|
||||
var name []interface{}
|
||||
n := 0
|
||||
for n < len(buf) {
|
||||
part, offset := nextStringOrByte(buf[n:])
|
||||
name = append(name, part)
|
||||
n += offset
|
||||
// mixedArrayFromString is the inverse of stringFromMixedArray. It
|
||||
// splits a string to a series of either UTF-8 strings and non-UTF-8
|
||||
// bytes.
|
||||
func mixedArrayFromString(s string) (parts []interface{}) {
|
||||
for len(s) > 0 {
|
||||
if n := utf8StrLen(s); n > 0 {
|
||||
parts = append(parts, s[:n])
|
||||
s = s[n:]
|
||||
} else {
|
||||
parts = append(parts, s[0])
|
||||
s = s[1:]
|
||||
}
|
||||
}
|
||||
|
||||
return name
|
||||
return parts
|
||||
}
|
||||
|
||||
func nextStringOrByte(b []byte) (interface{}, int) {
|
||||
n := 0
|
||||
var s []byte
|
||||
for n < len(b) {
|
||||
r, size := utf8.DecodeRune(b[n:])
|
||||
if r == utf8.RuneError {
|
||||
// If we already have a UTF8 string segment, return it
|
||||
if len(s) > 0 {
|
||||
return string(s), n
|
||||
// utf8StrLen returns how many prefix bytes of s are valid UTF-8.
|
||||
func utf8StrLen(s string) int {
|
||||
for i, r := range s {
|
||||
for r == utf8.RuneError {
|
||||
// The RuneError value can be an error
|
||||
// sentinel value (if it's size 1) or the same
|
||||
// value encoded properly. Decode it to see if
|
||||
// it's the 1 byte sentinel value.
|
||||
_, size := utf8.DecodeRuneInString(s[i:])
|
||||
if size == 1 {
|
||||
return i
|
||||
}
|
||||
// Return the single byte and an offset of 1
|
||||
return b[n], 1
|
||||
}
|
||||
n += size // We have consumed size bytes
|
||||
c := make([]byte, utf8.RuneLen(r))
|
||||
_ = utf8.EncodeRune(c, r)
|
||||
s = append(s, c...)
|
||||
}
|
||||
|
||||
return string(s), n
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (ss *superset) SumPartsSize() (size uint64) {
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -83,6 +84,47 @@ func TestSymlink(t *testing.T) {
|
|||
t.Logf("Got json for symlink file: [%s]\n", json)
|
||||
}
|
||||
|
||||
func TestUtf8StrLen(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want int
|
||||
}{
|
||||
{"", 0},
|
||||
{"a", 1},
|
||||
{"foo", 3},
|
||||
{"Здравствуйте!", 25},
|
||||
{"foo\x80", 3},
|
||||
{"\x80foo", 0},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := utf8StrLen(tt.in)
|
||||
if got != tt.want {
|
||||
t.Errorf("utf8StrLen(%q) = %v; want %v", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMixedArrayFromString(t *testing.T) {
|
||||
b80 := byte('\x80')
|
||||
tests := []struct {
|
||||
in string
|
||||
want []interface{}
|
||||
}{
|
||||
{"foo", []interface{}{"foo"}},
|
||||
{"\x80foo", []interface{}{b80, "foo"}},
|
||||
{"foo\x80foo", []interface{}{"foo", b80, "foo"}},
|
||||
{"foo\x80", []interface{}{"foo", b80}},
|
||||
{"\x80", []interface{}{b80}},
|
||||
{"\x80\x80", []interface{}{b80, b80}},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := mixedArrayFromString(tt.in)
|
||||
if !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("mixedArrayFromString(%q) = %#v; want %#v", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type mixPartsTest struct {
|
||||
json, expected string
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue