279 lines
5.8 KiB
Go
279 lines
5.8 KiB
Go
// Licensed to Elasticsearch B.V. under one or more contributor
|
|
// license agreements. See the NOTICE file distributed with
|
|
// this work for additional information regarding copyright
|
|
// ownership. Elasticsearch B.V. licenses this file to you under
|
|
// the Apache License, Version 2.0 (the "License"); you may
|
|
// not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
package match
|
|
|
|
import "regexp/syntax"
|
|
|
|
// common predefined patterns
|
|
var (
|
|
patDotStar = mustParse(`.*`)
|
|
patNullBeginDotStar = mustParse(`^.*`)
|
|
patNullEndDotStar = mustParse(`.*$`)
|
|
|
|
patEmptyText = mustParse(`^$`)
|
|
patEmptyWhiteText = mustParse(`^\s*$`)
|
|
|
|
// patterns matching any content
|
|
patAny1 = patDotStar
|
|
patAny2 = mustParse(`^.*`)
|
|
patAny3 = mustParse(`^.*$`)
|
|
patAny4 = mustParse(`.*$`)
|
|
|
|
patBeginText = mustParse(`^`)
|
|
patEndText = mustParse(`$`)
|
|
|
|
patDigits = mustParse(`\d`)
|
|
)
|
|
|
|
// isPrefixLiteral checks regular expression being literal checking string
|
|
// starting with literal pattern (like '^PATTERN')
|
|
func isPrefixLiteral(r *syntax.Regexp) bool {
|
|
return r.Op == syntax.OpConcat &&
|
|
len(r.Sub) == 2 &&
|
|
r.Sub[0].Op == syntax.OpBeginText &&
|
|
r.Sub[1].Op == syntax.OpLiteral
|
|
}
|
|
|
|
func isAltLiterals(r *syntax.Regexp) bool {
|
|
if r.Op != syntax.OpAlternate {
|
|
return false
|
|
}
|
|
|
|
for _, sub := range r.Sub {
|
|
if sub.Op != syntax.OpLiteral {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func isExactLiteral(r *syntax.Regexp) bool {
|
|
return r.Op == syntax.OpConcat &&
|
|
len(r.Sub) == 3 &&
|
|
r.Sub[0].Op == syntax.OpBeginText &&
|
|
r.Sub[1].Op == syntax.OpLiteral &&
|
|
r.Sub[2].Op == syntax.OpEndText
|
|
}
|
|
|
|
func isOneOfLiterals(r *syntax.Regexp) bool {
|
|
return r.Op == syntax.OpConcat &&
|
|
len(r.Sub) == 3 &&
|
|
r.Sub[0].Op == syntax.OpBeginText &&
|
|
isAltLiterals(r.Sub[1]) &&
|
|
r.Sub[2].Op == syntax.OpEndText
|
|
}
|
|
|
|
// isPrefixAltLiterals checks regular expression being alternative literals
|
|
// starting with literal pattern (like '^PATTERN')
|
|
func isPrefixAltLiterals(r *syntax.Regexp) bool {
|
|
isPrefixAlt := r.Op == syntax.OpConcat &&
|
|
len(r.Sub) == 2 &&
|
|
r.Sub[0].Op == syntax.OpBeginText &&
|
|
r.Sub[1].Op == syntax.OpAlternate
|
|
if !isPrefixAlt {
|
|
return false
|
|
}
|
|
|
|
for _, sub := range r.Sub[1].Sub {
|
|
if sub.Op != syntax.OpLiteral {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func isPrefixNumDate(r *syntax.Regexp) bool {
|
|
if r.Op != syntax.OpConcat || r.Sub[0].Op != syntax.OpBeginText {
|
|
return false
|
|
}
|
|
|
|
i := 1
|
|
if r.Sub[i].Op == syntax.OpLiteral {
|
|
i++
|
|
}
|
|
|
|
// check starts with digits `\d{n}` or `[0-9]{n}`
|
|
if !isMultiDigits(r.Sub[i]) {
|
|
return false
|
|
}
|
|
i++
|
|
|
|
for i < len(r.Sub) {
|
|
// check separator
|
|
if r.Sub[i].Op != syntax.OpLiteral {
|
|
return false
|
|
}
|
|
i++
|
|
|
|
// regex has 'OpLiteral' suffix, without any more digits/patterns following
|
|
if i == len(r.Sub) {
|
|
return true
|
|
}
|
|
|
|
// check digits
|
|
if !isMultiDigits(r.Sub[i]) {
|
|
return false
|
|
}
|
|
i++
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// isdotStar checks the term being `.*`.
|
|
func isdotStar(r *syntax.Regexp) bool {
|
|
return eqRegex(r, patDotStar)
|
|
}
|
|
|
|
func isEmptyText(r *syntax.Regexp) bool {
|
|
return eqRegex(r, patEmptyText)
|
|
}
|
|
|
|
func isEmptyTextWithWhitespace(r *syntax.Regexp) bool {
|
|
return eqRegex(r, patEmptyWhiteText)
|
|
}
|
|
|
|
func isAnyMatch(r *syntax.Regexp) bool {
|
|
return eqRegex(r, patAny1) ||
|
|
eqRegex(r, patAny2) ||
|
|
eqRegex(r, patAny3) ||
|
|
eqRegex(r, patAny4)
|
|
}
|
|
|
|
func isDigitMatch(r *syntax.Regexp) bool {
|
|
return eqRegex(r, patDigits)
|
|
}
|
|
|
|
func isMultiDigits(r *syntax.Regexp) bool {
|
|
return isConcatRepetition(r) && isDigitMatch(r.Sub[0])
|
|
}
|
|
|
|
func isConcatRepetition(r *syntax.Regexp) bool {
|
|
if r.Op != syntax.OpConcat {
|
|
return false
|
|
}
|
|
|
|
first := r.Sub[0]
|
|
for _, other := range r.Sub {
|
|
if other != first { // concat repetitions reuse references => compare pointers
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func eqRegex(r, proto *syntax.Regexp) bool {
|
|
unmatchable := r.Op != proto.Op || r.Flags != proto.Flags ||
|
|
(r.Min != proto.Min) || (r.Max != proto.Max) ||
|
|
(len(r.Sub) != len(proto.Sub)) ||
|
|
(len(r.Rune) != len(proto.Rune))
|
|
|
|
if unmatchable {
|
|
return false
|
|
}
|
|
|
|
for i := range r.Sub {
|
|
if !eqRegex(r.Sub[i], proto.Sub[i]) {
|
|
return false
|
|
}
|
|
}
|
|
|
|
for i := range r.Rune {
|
|
if r.Rune[i] != proto.Rune[i] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func eqPrefixAnyRegex(r *syntax.Regexp, protos ...*syntax.Regexp) bool {
|
|
for _, proto := range protos {
|
|
if eqPrefixRegex(r, proto) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func eqPrefixRegex(r, proto *syntax.Regexp) bool {
|
|
if r.Op != syntax.OpConcat {
|
|
return false
|
|
}
|
|
|
|
if proto.Op != syntax.OpConcat {
|
|
if len(r.Sub) == 0 {
|
|
return false
|
|
}
|
|
return eqRegex(r.Sub[0], proto)
|
|
}
|
|
|
|
if len(r.Sub) < len(proto.Sub) {
|
|
return false
|
|
}
|
|
|
|
for i := range proto.Sub {
|
|
if !eqRegex(r.Sub[i], proto.Sub[i]) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func eqSuffixAnyRegex(r *syntax.Regexp, protos ...*syntax.Regexp) bool {
|
|
for _, proto := range protos {
|
|
if eqSuffixRegex(r, proto) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func eqSuffixRegex(r, proto *syntax.Regexp) bool {
|
|
if r.Op != syntax.OpConcat {
|
|
return false
|
|
}
|
|
|
|
if proto.Op != syntax.OpConcat {
|
|
i := len(r.Sub) - 1
|
|
if i < 0 {
|
|
return false
|
|
}
|
|
return eqRegex(r.Sub[i], proto)
|
|
}
|
|
|
|
if len(r.Sub) < len(proto.Sub) {
|
|
return false
|
|
}
|
|
|
|
d := len(r.Sub) - len(proto.Sub)
|
|
for i := range proto.Sub {
|
|
if !eqRegex(r.Sub[d+i], proto.Sub[i]) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func mustParse(pattern string) *syntax.Regexp {
|
|
r, err := syntax.Parse(pattern, syntax.Perl)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return r
|
|
}
|