260 lines
6.6 KiB
Go
260 lines
6.6 KiB
Go
// Copyright 2015 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package rangetable
|
|
|
|
import (
|
|
"unicode"
|
|
)
|
|
|
|
// atEnd is used to mark a completed iteration.
|
|
const atEnd = unicode.MaxRune + 1
|
|
|
|
// Merge returns a new RangeTable that is the union of the given tables.
|
|
// It can also be used to compact user-created RangeTables. The entries in
|
|
// R16 and R32 for any given RangeTable should be sorted and non-overlapping.
|
|
//
|
|
// A lookup in the resulting table can be several times faster than using In
|
|
// directly on the ranges. Merge is an expensive operation, however, and only
|
|
// makes sense if one intends to use the result for more than a couple of
|
|
// hundred lookups.
|
|
func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
|
|
rt := &unicode.RangeTable{}
|
|
if len(ranges) == 0 {
|
|
return rt
|
|
}
|
|
|
|
iter := tablesIter(make([]tableIndex, len(ranges)))
|
|
|
|
for i, t := range ranges {
|
|
iter[i] = tableIndex{t, 0, atEnd}
|
|
if len(t.R16) > 0 {
|
|
iter[i].next = rune(t.R16[0].Lo)
|
|
}
|
|
}
|
|
|
|
if r0 := iter.next16(); r0.Stride != 0 {
|
|
for {
|
|
r1 := iter.next16()
|
|
if r1.Stride == 0 {
|
|
rt.R16 = append(rt.R16, r0)
|
|
break
|
|
}
|
|
stride := r1.Lo - r0.Hi
|
|
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
|
|
// Fully merge the next range into the previous one.
|
|
r0.Hi, r0.Stride = r1.Hi, stride
|
|
continue
|
|
} else if stride == r0.Stride {
|
|
// Move the first element of r1 to r0. This may eliminate an
|
|
// entry.
|
|
r0.Hi = r1.Lo
|
|
r0.Stride = stride
|
|
r1.Lo = r1.Lo + r1.Stride
|
|
if r1.Lo > r1.Hi {
|
|
continue
|
|
}
|
|
}
|
|
rt.R16 = append(rt.R16, r0)
|
|
r0 = r1
|
|
}
|
|
}
|
|
|
|
for i, t := range ranges {
|
|
iter[i] = tableIndex{t, 0, atEnd}
|
|
if len(t.R32) > 0 {
|
|
iter[i].next = rune(t.R32[0].Lo)
|
|
}
|
|
}
|
|
|
|
if r0 := iter.next32(); r0.Stride != 0 {
|
|
for {
|
|
r1 := iter.next32()
|
|
if r1.Stride == 0 {
|
|
rt.R32 = append(rt.R32, r0)
|
|
break
|
|
}
|
|
stride := r1.Lo - r0.Hi
|
|
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
|
|
// Fully merge the next range into the previous one.
|
|
r0.Hi, r0.Stride = r1.Hi, stride
|
|
continue
|
|
} else if stride == r0.Stride {
|
|
// Move the first element of r1 to r0. This may eliminate an
|
|
// entry.
|
|
r0.Hi = r1.Lo
|
|
r1.Lo = r1.Lo + r1.Stride
|
|
if r1.Lo > r1.Hi {
|
|
continue
|
|
}
|
|
}
|
|
rt.R32 = append(rt.R32, r0)
|
|
r0 = r1
|
|
}
|
|
}
|
|
|
|
for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
|
|
rt.LatinOffset = i + 1
|
|
}
|
|
|
|
return rt
|
|
}
|
|
|
|
type tableIndex struct {
|
|
t *unicode.RangeTable
|
|
p uint32
|
|
next rune
|
|
}
|
|
|
|
type tablesIter []tableIndex
|
|
|
|
// sortIter does an insertion sort using the next field of tableIndex. Insertion
|
|
// sort is a good sorting algorithm for this case.
|
|
func sortIter(t []tableIndex) {
|
|
for i := range t {
|
|
for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
|
|
t[j], t[j-1] = t[j-1], t[j]
|
|
}
|
|
}
|
|
}
|
|
|
|
// next16 finds the ranged to be added to the table. If ranges overlap between
|
|
// multiple tables it clips the result to a non-overlapping range if the
|
|
// elements are not fully subsumed. It returns a zero range if there are no more
|
|
// ranges.
|
|
func (ti tablesIter) next16() unicode.Range16 {
|
|
sortIter(ti)
|
|
|
|
t0 := ti[0]
|
|
if t0.next == atEnd {
|
|
return unicode.Range16{}
|
|
}
|
|
r0 := t0.t.R16[t0.p]
|
|
r0.Lo = uint16(t0.next)
|
|
|
|
// We restrict the Hi of the current range if it overlaps with another range.
|
|
for i := range ti {
|
|
tn := ti[i]
|
|
// Since our tableIndices are sorted by next, we can break if the there
|
|
// is no overlap. The first value of a next range can always be merged
|
|
// into the current one, so we can break in case of equality as well.
|
|
if rune(r0.Hi) <= tn.next {
|
|
break
|
|
}
|
|
rn := tn.t.R16[tn.p]
|
|
rn.Lo = uint16(tn.next)
|
|
|
|
// Limit r0.Hi based on next ranges in list, but allow it to overlap
|
|
// with ranges as long as it subsumes it.
|
|
m := (rn.Lo - r0.Lo) % r0.Stride
|
|
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
|
|
// Overlap, take the min of the two Hi values: for simplicity's sake
|
|
// we only process one range at a time.
|
|
if r0.Hi > rn.Hi {
|
|
r0.Hi = rn.Hi
|
|
}
|
|
} else {
|
|
// Not a compatible stride. Set to the last possible value before
|
|
// rn.Lo, but ensure there is at least one value.
|
|
if x := rn.Lo - m; r0.Lo <= x {
|
|
r0.Hi = x
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
// Update the next values for each table.
|
|
for i := range ti {
|
|
tn := &ti[i]
|
|
if rune(r0.Hi) < tn.next {
|
|
break
|
|
}
|
|
rn := tn.t.R16[tn.p]
|
|
stride := rune(rn.Stride)
|
|
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
|
|
if rune(rn.Hi) < tn.next {
|
|
if tn.p++; int(tn.p) == len(tn.t.R16) {
|
|
tn.next = atEnd
|
|
} else {
|
|
tn.next = rune(tn.t.R16[tn.p].Lo)
|
|
}
|
|
}
|
|
}
|
|
|
|
if r0.Lo == r0.Hi {
|
|
r0.Stride = 1
|
|
}
|
|
|
|
return r0
|
|
}
|
|
|
|
// next32 finds the ranged to be added to the table. If ranges overlap between
|
|
// multiple tables it clips the result to a non-overlapping range if the
|
|
// elements are not fully subsumed. It returns a zero range if there are no more
|
|
// ranges.
|
|
func (ti tablesIter) next32() unicode.Range32 {
|
|
sortIter(ti)
|
|
|
|
t0 := ti[0]
|
|
if t0.next == atEnd {
|
|
return unicode.Range32{}
|
|
}
|
|
r0 := t0.t.R32[t0.p]
|
|
r0.Lo = uint32(t0.next)
|
|
|
|
// We restrict the Hi of the current range if it overlaps with another range.
|
|
for i := range ti {
|
|
tn := ti[i]
|
|
// Since our tableIndices are sorted by next, we can break if the there
|
|
// is no overlap. The first value of a next range can always be merged
|
|
// into the current one, so we can break in case of equality as well.
|
|
if rune(r0.Hi) <= tn.next {
|
|
break
|
|
}
|
|
rn := tn.t.R32[tn.p]
|
|
rn.Lo = uint32(tn.next)
|
|
|
|
// Limit r0.Hi based on next ranges in list, but allow it to overlap
|
|
// with ranges as long as it subsumes it.
|
|
m := (rn.Lo - r0.Lo) % r0.Stride
|
|
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
|
|
// Overlap, take the min of the two Hi values: for simplicity's sake
|
|
// we only process one range at a time.
|
|
if r0.Hi > rn.Hi {
|
|
r0.Hi = rn.Hi
|
|
}
|
|
} else {
|
|
// Not a compatible stride. Set to the last possible value before
|
|
// rn.Lo, but ensure there is at least one value.
|
|
if x := rn.Lo - m; r0.Lo <= x {
|
|
r0.Hi = x
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
// Update the next values for each table.
|
|
for i := range ti {
|
|
tn := &ti[i]
|
|
if rune(r0.Hi) < tn.next {
|
|
break
|
|
}
|
|
rn := tn.t.R32[tn.p]
|
|
stride := rune(rn.Stride)
|
|
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
|
|
if rune(rn.Hi) < tn.next {
|
|
if tn.p++; int(tn.p) == len(tn.t.R32) {
|
|
tn.next = atEnd
|
|
} else {
|
|
tn.next = rune(tn.t.R32[tn.p].Lo)
|
|
}
|
|
}
|
|
}
|
|
|
|
if r0.Lo == r0.Hi {
|
|
r0.Stride = 1
|
|
}
|
|
|
|
return r0
|
|
}
|