664 lines
14 KiB
Go
664 lines
14 KiB
Go
// Copyright 2015 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package runes
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
"unicode/utf8"
|
|
|
|
"golang.org/x/text/internal/testtext"
|
|
"golang.org/x/text/transform"
|
|
)
|
|
|
|
type transformTest struct {
|
|
desc string
|
|
szDst int
|
|
atEOF bool
|
|
repl string
|
|
in string
|
|
out string // result string of first call to Transform
|
|
outFull string // transform of entire input string
|
|
err error
|
|
errSpan error
|
|
nSpan int
|
|
|
|
t transform.SpanningTransformer
|
|
}
|
|
|
|
const large = 10240
|
|
|
|
func (tt *transformTest) check(t *testing.T, i int) {
|
|
if tt.t == nil {
|
|
return
|
|
}
|
|
dst := make([]byte, tt.szDst)
|
|
src := []byte(tt.in)
|
|
nDst, nSrc, err := tt.t.Transform(dst, src, tt.atEOF)
|
|
if err != tt.err {
|
|
t.Errorf("%d:%s:error: got %v; want %v", i, tt.desc, err, tt.err)
|
|
}
|
|
if got := string(dst[:nDst]); got != tt.out {
|
|
t.Errorf("%d:%s:out: got %q; want %q", i, tt.desc, got, tt.out)
|
|
}
|
|
|
|
// Calls tt.t.Transform for the remainder of the input. We use this to test
|
|
// the nSrc return value.
|
|
out := make([]byte, large)
|
|
n := copy(out, dst[:nDst])
|
|
nDst, _, _ = tt.t.Transform(out[n:], src[nSrc:], true)
|
|
if got, want := string(out[:n+nDst]), tt.outFull; got != want {
|
|
t.Errorf("%d:%s:outFull: got %q; want %q", i, tt.desc, got, want)
|
|
}
|
|
|
|
tt.t.Reset()
|
|
p := 0
|
|
for ; p < len(tt.in) && p < len(tt.outFull) && tt.in[p] == tt.outFull[p]; p++ {
|
|
}
|
|
if tt.nSpan != 0 {
|
|
p = tt.nSpan
|
|
}
|
|
if n, err = tt.t.Span([]byte(tt.in), tt.atEOF); n != p || err != tt.errSpan {
|
|
t.Errorf("%d:%s:span: got %d, %v; want %d, %v", i, tt.desc, n, err, p, tt.errSpan)
|
|
}
|
|
}
|
|
|
|
func idem(r rune) rune { return r }
|
|
|
|
func TestMap(t *testing.T) {
|
|
runes := []rune{'a', 'ç', '中', '\U00012345', 'a'}
|
|
// Default mapper used for this test.
|
|
rotate := Map(func(r rune) rune {
|
|
for i, m := range runes {
|
|
if m == r {
|
|
return runes[i+1]
|
|
}
|
|
}
|
|
return r
|
|
})
|
|
|
|
for i, tt := range []transformTest{{
|
|
desc: "empty",
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "",
|
|
out: "",
|
|
outFull: "",
|
|
t: rotate,
|
|
}, {
|
|
desc: "no change",
|
|
szDst: 1,
|
|
atEOF: true,
|
|
in: "b",
|
|
out: "b",
|
|
outFull: "b",
|
|
t: rotate,
|
|
}, {
|
|
desc: "short dst",
|
|
szDst: 2,
|
|
atEOF: true,
|
|
in: "aaaa",
|
|
out: "ç",
|
|
outFull: "çççç",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "short dst ascii, no change",
|
|
szDst: 2,
|
|
atEOF: true,
|
|
in: "bbb",
|
|
out: "bb",
|
|
outFull: "bbb",
|
|
err: transform.ErrShortDst,
|
|
t: rotate,
|
|
}, {
|
|
desc: "short dst writing error",
|
|
szDst: 2,
|
|
atEOF: false,
|
|
in: "a\x80",
|
|
out: "ç",
|
|
outFull: "ç\ufffd",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "short dst writing incomplete rune",
|
|
szDst: 2,
|
|
atEOF: true,
|
|
in: "a\xc0",
|
|
out: "ç",
|
|
outFull: "ç\ufffd",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "short dst, longer",
|
|
szDst: 5,
|
|
atEOF: true,
|
|
in: "Hellø",
|
|
out: "Hell",
|
|
outFull: "Hellø",
|
|
err: transform.ErrShortDst,
|
|
t: rotate,
|
|
}, {
|
|
desc: "short dst, single",
|
|
szDst: 1,
|
|
atEOF: false,
|
|
in: "ø",
|
|
out: "",
|
|
outFull: "ø",
|
|
err: transform.ErrShortDst,
|
|
t: Map(idem),
|
|
}, {
|
|
desc: "short dst, longer, writing error",
|
|
szDst: 8,
|
|
atEOF: false,
|
|
in: "\x80Hello\x80",
|
|
out: "\ufffdHello",
|
|
outFull: "\ufffdHello\ufffd",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "short src",
|
|
szDst: 2,
|
|
atEOF: false,
|
|
in: "a\xc2",
|
|
out: "ç",
|
|
outFull: "ç\ufffd",
|
|
err: transform.ErrShortSrc,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "invalid input, atEOF",
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "\x80",
|
|
out: "\ufffd",
|
|
outFull: "\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "invalid input, !atEOF",
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "\x80",
|
|
out: "\ufffd",
|
|
outFull: "\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "incomplete rune !atEOF",
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "\xc2",
|
|
out: "",
|
|
outFull: "\ufffd",
|
|
err: transform.ErrShortSrc,
|
|
errSpan: transform.ErrShortSrc,
|
|
t: rotate,
|
|
}, {
|
|
desc: "invalid input, incomplete rune atEOF",
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "\xc2",
|
|
out: "\ufffd",
|
|
outFull: "\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "misc correct",
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "a\U00012345 ç!",
|
|
out: "ça 中!",
|
|
outFull: "ça 中!",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "misc correct and invalid",
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "Hello\x80 w\x80orl\xc0d!\xc0",
|
|
out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
|
|
outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "misc correct and invalid, short src",
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "Hello\x80 w\x80orl\xc0d!\xc2",
|
|
out: "Hello\ufffd w\ufffdorl\ufffdd!",
|
|
outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
|
|
err: transform.ErrShortSrc,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: rotate,
|
|
}, {
|
|
desc: "misc correct and invalid, short src, replacing RuneError",
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
|
|
out: "Hel?lo? w?orl?d!",
|
|
outFull: "Hel?lo? w?orl?d!?",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
err: transform.ErrShortSrc,
|
|
t: Map(func(r rune) rune {
|
|
if r == utf8.RuneError {
|
|
return '?'
|
|
}
|
|
return r
|
|
}),
|
|
}} {
|
|
tt.check(t, i)
|
|
}
|
|
}
|
|
|
|
func TestRemove(t *testing.T) {
|
|
remove := Remove(Predicate(func(r rune) bool {
|
|
return strings.ContainsRune("aeiou\u0300\uFF24\U00012345", r)
|
|
}))
|
|
|
|
for i, tt := range []transformTest{
|
|
0: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "",
|
|
out: "",
|
|
outFull: "",
|
|
t: remove,
|
|
},
|
|
1: {
|
|
szDst: 0,
|
|
atEOF: true,
|
|
in: "aaaa",
|
|
out: "",
|
|
outFull: "",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
2: {
|
|
szDst: 1,
|
|
atEOF: true,
|
|
in: "aaaa",
|
|
out: "",
|
|
outFull: "",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
3: {
|
|
szDst: 1,
|
|
atEOF: true,
|
|
in: "baaaa",
|
|
out: "b",
|
|
outFull: "b",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
4: {
|
|
szDst: 2,
|
|
atEOF: true,
|
|
in: "açaaa",
|
|
out: "ç",
|
|
outFull: "ç",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
5: {
|
|
szDst: 2,
|
|
atEOF: true,
|
|
in: "aaaç",
|
|
out: "ç",
|
|
outFull: "ç",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
6: {
|
|
szDst: 2,
|
|
atEOF: false,
|
|
in: "a\x80",
|
|
out: "",
|
|
outFull: "\ufffd",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
7: {
|
|
szDst: 1,
|
|
atEOF: true,
|
|
in: "a\xc0",
|
|
out: "",
|
|
outFull: "\ufffd",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
8: {
|
|
szDst: 1,
|
|
atEOF: false,
|
|
in: "a\xc2",
|
|
out: "",
|
|
outFull: "\ufffd",
|
|
err: transform.ErrShortSrc,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
9: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "\x80",
|
|
out: "\ufffd",
|
|
outFull: "\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
10: {
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "\x80",
|
|
out: "\ufffd",
|
|
outFull: "\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
11: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "\xc2",
|
|
out: "\ufffd",
|
|
outFull: "\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
12: {
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "\xc2",
|
|
out: "",
|
|
outFull: "\ufffd",
|
|
err: transform.ErrShortSrc,
|
|
errSpan: transform.ErrShortSrc,
|
|
t: remove,
|
|
},
|
|
13: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "Hello \U00012345world!",
|
|
out: "Hll wrld!",
|
|
outFull: "Hll wrld!",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
14: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "Hello\x80 w\x80orl\xc0d!\xc0",
|
|
out: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
|
|
outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
15: {
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "Hello\x80 w\x80orl\xc0d!\xc2",
|
|
out: "Hll\ufffd w\ufffdrl\ufffdd!",
|
|
outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
|
|
err: transform.ErrShortSrc,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
16: {
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
|
|
out: "Hello world!",
|
|
outFull: "Hello world!",
|
|
err: transform.ErrShortSrc,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: Remove(Predicate(func(r rune) bool { return r == utf8.RuneError })),
|
|
},
|
|
17: {
|
|
szDst: 4,
|
|
atEOF: true,
|
|
in: "Hellø",
|
|
out: "Hll",
|
|
outFull: "Hllø",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
18: {
|
|
szDst: 4,
|
|
atEOF: false,
|
|
in: "Hellø",
|
|
out: "Hll",
|
|
outFull: "Hllø",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
19: {
|
|
szDst: 8,
|
|
atEOF: false,
|
|
in: "\x80Hello\uFF24\x80",
|
|
out: "\ufffdHll",
|
|
outFull: "\ufffdHll\ufffd",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: remove,
|
|
},
|
|
20: {
|
|
szDst: 8,
|
|
atEOF: false,
|
|
in: "Hllll",
|
|
out: "Hllll",
|
|
outFull: "Hllll",
|
|
t: remove,
|
|
}} {
|
|
tt.check(t, i)
|
|
}
|
|
}
|
|
|
|
func TestReplaceIllFormed(t *testing.T) {
|
|
replace := ReplaceIllFormed()
|
|
|
|
for i, tt := range []transformTest{
|
|
0: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "",
|
|
out: "",
|
|
outFull: "",
|
|
t: replace,
|
|
},
|
|
1: {
|
|
szDst: 1,
|
|
atEOF: true,
|
|
in: "aa",
|
|
out: "a",
|
|
outFull: "aa",
|
|
err: transform.ErrShortDst,
|
|
t: replace,
|
|
},
|
|
2: {
|
|
szDst: 1,
|
|
atEOF: true,
|
|
in: "a\x80",
|
|
out: "a",
|
|
outFull: "a\ufffd",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: replace,
|
|
},
|
|
3: {
|
|
szDst: 1,
|
|
atEOF: true,
|
|
in: "a\xc2",
|
|
out: "a",
|
|
outFull: "a\ufffd",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: replace,
|
|
},
|
|
4: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "\x80",
|
|
out: "\ufffd",
|
|
outFull: "\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: replace,
|
|
},
|
|
5: {
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "\x80",
|
|
out: "\ufffd",
|
|
outFull: "\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: replace,
|
|
},
|
|
6: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "\xc2",
|
|
out: "\ufffd",
|
|
outFull: "\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: replace,
|
|
},
|
|
7: {
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "\xc2",
|
|
out: "",
|
|
outFull: "\ufffd",
|
|
err: transform.ErrShortSrc,
|
|
errSpan: transform.ErrShortSrc,
|
|
t: replace,
|
|
},
|
|
8: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "Hello world!",
|
|
out: "Hello world!",
|
|
outFull: "Hello world!",
|
|
t: replace,
|
|
},
|
|
9: {
|
|
szDst: large,
|
|
atEOF: true,
|
|
in: "Hello\x80 w\x80orl\xc2d!\xc2",
|
|
out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
|
|
outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: replace,
|
|
},
|
|
10: {
|
|
szDst: large,
|
|
atEOF: false,
|
|
in: "Hello\x80 w\x80orl\xc2d!\xc2",
|
|
out: "Hello\ufffd w\ufffdorl\ufffdd!",
|
|
outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
|
|
err: transform.ErrShortSrc,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: replace,
|
|
},
|
|
16: {
|
|
szDst: 10,
|
|
atEOF: false,
|
|
in: "\x80Hello\x80",
|
|
out: "\ufffdHello",
|
|
outFull: "\ufffdHello\ufffd",
|
|
err: transform.ErrShortDst,
|
|
errSpan: transform.ErrEndOfSpan,
|
|
t: replace,
|
|
},
|
|
17: {
|
|
szDst: 10,
|
|
atEOF: false,
|
|
in: "\ufffdHello\ufffd",
|
|
out: "\ufffdHello",
|
|
outFull: "\ufffdHello\ufffd",
|
|
err: transform.ErrShortDst,
|
|
t: replace,
|
|
},
|
|
} {
|
|
tt.check(t, i)
|
|
}
|
|
}
|
|
|
|
func TestMapAlloc(t *testing.T) {
|
|
if n := testtext.AllocsPerRun(3, func() {
|
|
Map(idem).Transform(nil, nil, false)
|
|
}); n > 0 {
|
|
t.Errorf("got %f; want 0", n)
|
|
}
|
|
}
|
|
|
|
func rmNop(r rune) bool { return false }
|
|
|
|
func TestRemoveAlloc(t *testing.T) {
|
|
if n := testtext.AllocsPerRun(3, func() {
|
|
Remove(Predicate(rmNop)).Transform(nil, nil, false)
|
|
}); n > 0 {
|
|
t.Errorf("got %f; want 0", n)
|
|
}
|
|
}
|
|
|
|
func TestReplaceIllFormedAlloc(t *testing.T) {
|
|
if n := testtext.AllocsPerRun(3, func() {
|
|
ReplaceIllFormed().Transform(nil, nil, false)
|
|
}); n > 0 {
|
|
t.Errorf("got %f; want 0", n)
|
|
}
|
|
}
|
|
|
|
func doBench(b *testing.B, t Transformer) {
|
|
for _, bc := range []struct{ name, data string }{
|
|
{"ascii", testtext.ASCII},
|
|
{"3byte", testtext.ThreeByteUTF8},
|
|
} {
|
|
dst := make([]byte, 2*len(bc.data))
|
|
src := []byte(bc.data)
|
|
|
|
testtext.Bench(b, bc.name+"/transform", func(b *testing.B) {
|
|
b.SetBytes(int64(len(src)))
|
|
for i := 0; i < b.N; i++ {
|
|
t.Transform(dst, src, true)
|
|
}
|
|
})
|
|
src = t.Bytes(src)
|
|
t.Reset()
|
|
testtext.Bench(b, bc.name+"/span", func(b *testing.B) {
|
|
b.SetBytes(int64(len(src)))
|
|
for i := 0; i < b.N; i++ {
|
|
t.Span(src, true)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func BenchmarkRemove(b *testing.B) {
|
|
doBench(b, Remove(Predicate(func(r rune) bool { return r == 'e' })))
|
|
}
|
|
|
|
func BenchmarkMapAll(b *testing.B) {
|
|
doBench(b, Map(func(r rune) rune { return 'a' }))
|
|
}
|
|
|
|
func BenchmarkMapNone(b *testing.B) {
|
|
doBench(b, Map(func(r rune) rune { return r }))
|
|
}
|
|
|
|
func BenchmarkReplaceIllFormed(b *testing.B) {
|
|
doBench(b, ReplaceIllFormed())
|
|
}
|
|
|
|
var (
|
|
input = strings.Repeat("Thé qüick brøwn føx jumps øver the lazy døg. ", 100)
|
|
)
|