youtubebeat/vendor/github.com/elastic/beats/libbeat/common/match/cmp.go

279 lines
5.8 KiB
Go

// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package match
import "regexp/syntax"
// common predefined patterns
var (
patDotStar = mustParse(`.*`)
patNullBeginDotStar = mustParse(`^.*`)
patNullEndDotStar = mustParse(`.*$`)
patEmptyText = mustParse(`^$`)
patEmptyWhiteText = mustParse(`^\s*$`)
// patterns matching any content
patAny1 = patDotStar
patAny2 = mustParse(`^.*`)
patAny3 = mustParse(`^.*$`)
patAny4 = mustParse(`.*$`)
patBeginText = mustParse(`^`)
patEndText = mustParse(`$`)
patDigits = mustParse(`\d`)
)
// isPrefixLiteral checks regular expression being literal checking string
// starting with literal pattern (like '^PATTERN')
func isPrefixLiteral(r *syntax.Regexp) bool {
return r.Op == syntax.OpConcat &&
len(r.Sub) == 2 &&
r.Sub[0].Op == syntax.OpBeginText &&
r.Sub[1].Op == syntax.OpLiteral
}
func isAltLiterals(r *syntax.Regexp) bool {
if r.Op != syntax.OpAlternate {
return false
}
for _, sub := range r.Sub {
if sub.Op != syntax.OpLiteral {
return false
}
}
return true
}
func isExactLiteral(r *syntax.Regexp) bool {
return r.Op == syntax.OpConcat &&
len(r.Sub) == 3 &&
r.Sub[0].Op == syntax.OpBeginText &&
r.Sub[1].Op == syntax.OpLiteral &&
r.Sub[2].Op == syntax.OpEndText
}
func isOneOfLiterals(r *syntax.Regexp) bool {
return r.Op == syntax.OpConcat &&
len(r.Sub) == 3 &&
r.Sub[0].Op == syntax.OpBeginText &&
isAltLiterals(r.Sub[1]) &&
r.Sub[2].Op == syntax.OpEndText
}
// isPrefixAltLiterals checks regular expression being alternative literals
// starting with literal pattern (like '^PATTERN')
func isPrefixAltLiterals(r *syntax.Regexp) bool {
isPrefixAlt := r.Op == syntax.OpConcat &&
len(r.Sub) == 2 &&
r.Sub[0].Op == syntax.OpBeginText &&
r.Sub[1].Op == syntax.OpAlternate
if !isPrefixAlt {
return false
}
for _, sub := range r.Sub[1].Sub {
if sub.Op != syntax.OpLiteral {
return false
}
}
return true
}
func isPrefixNumDate(r *syntax.Regexp) bool {
if r.Op != syntax.OpConcat || r.Sub[0].Op != syntax.OpBeginText {
return false
}
i := 1
if r.Sub[i].Op == syntax.OpLiteral {
i++
}
// check starts with digits `\d{n}` or `[0-9]{n}`
if !isMultiDigits(r.Sub[i]) {
return false
}
i++
for i < len(r.Sub) {
// check separator
if r.Sub[i].Op != syntax.OpLiteral {
return false
}
i++
// regex has 'OpLiteral' suffix, without any more digits/patterns following
if i == len(r.Sub) {
return true
}
// check digits
if !isMultiDigits(r.Sub[i]) {
return false
}
i++
}
return true
}
// isdotStar checks the term being `.*`.
func isdotStar(r *syntax.Regexp) bool {
return eqRegex(r, patDotStar)
}
func isEmptyText(r *syntax.Regexp) bool {
return eqRegex(r, patEmptyText)
}
func isEmptyTextWithWhitespace(r *syntax.Regexp) bool {
return eqRegex(r, patEmptyWhiteText)
}
func isAnyMatch(r *syntax.Regexp) bool {
return eqRegex(r, patAny1) ||
eqRegex(r, patAny2) ||
eqRegex(r, patAny3) ||
eqRegex(r, patAny4)
}
func isDigitMatch(r *syntax.Regexp) bool {
return eqRegex(r, patDigits)
}
func isMultiDigits(r *syntax.Regexp) bool {
return isConcatRepetition(r) && isDigitMatch(r.Sub[0])
}
func isConcatRepetition(r *syntax.Regexp) bool {
if r.Op != syntax.OpConcat {
return false
}
first := r.Sub[0]
for _, other := range r.Sub {
if other != first { // concat repetitions reuse references => compare pointers
return false
}
}
return true
}
func eqRegex(r, proto *syntax.Regexp) bool {
unmatchable := r.Op != proto.Op || r.Flags != proto.Flags ||
(r.Min != proto.Min) || (r.Max != proto.Max) ||
(len(r.Sub) != len(proto.Sub)) ||
(len(r.Rune) != len(proto.Rune))
if unmatchable {
return false
}
for i := range r.Sub {
if !eqRegex(r.Sub[i], proto.Sub[i]) {
return false
}
}
for i := range r.Rune {
if r.Rune[i] != proto.Rune[i] {
return false
}
}
return true
}
func eqPrefixAnyRegex(r *syntax.Regexp, protos ...*syntax.Regexp) bool {
for _, proto := range protos {
if eqPrefixRegex(r, proto) {
return true
}
}
return false
}
func eqPrefixRegex(r, proto *syntax.Regexp) bool {
if r.Op != syntax.OpConcat {
return false
}
if proto.Op != syntax.OpConcat {
if len(r.Sub) == 0 {
return false
}
return eqRegex(r.Sub[0], proto)
}
if len(r.Sub) < len(proto.Sub) {
return false
}
for i := range proto.Sub {
if !eqRegex(r.Sub[i], proto.Sub[i]) {
return false
}
}
return true
}
func eqSuffixAnyRegex(r *syntax.Regexp, protos ...*syntax.Regexp) bool {
for _, proto := range protos {
if eqSuffixRegex(r, proto) {
return true
}
}
return false
}
func eqSuffixRegex(r, proto *syntax.Regexp) bool {
if r.Op != syntax.OpConcat {
return false
}
if proto.Op != syntax.OpConcat {
i := len(r.Sub) - 1
if i < 0 {
return false
}
return eqRegex(r.Sub[i], proto)
}
if len(r.Sub) < len(proto.Sub) {
return false
}
d := len(r.Sub) - len(proto.Sub)
for i := range proto.Sub {
if !eqRegex(r.Sub[d+i], proto.Sub[i]) {
return false
}
}
return true
}
func mustParse(pattern string) *syntax.Regexp {
r, err := syntax.Parse(pattern, syntax.Perl)
if err != nil {
panic(err)
}
return r
}