youtubebeat/vendor/github.com/elastic/beats/filebeat/input/log/input.go

732 lines
21 KiB
Go

// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package log
import (
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/elastic/beats/filebeat/channel"
"github.com/elastic/beats/filebeat/harvester"
"github.com/elastic/beats/filebeat/input"
"github.com/elastic/beats/filebeat/input/file"
"github.com/elastic/beats/filebeat/util"
"github.com/elastic/beats/libbeat/common"
"github.com/elastic/beats/libbeat/common/atomic"
"github.com/elastic/beats/libbeat/logp"
"github.com/elastic/beats/libbeat/monitoring"
)
const (
recursiveGlobDepth = 8
harvesterErrMsg = "Harvester could not be started on new file: %s, Err: %s"
)
var (
filesRenamed = monitoring.NewInt(nil, "filebeat.input.log.files.renamed")
filesTruncated = monitoring.NewInt(nil, "filebeat.input.log.files.truncated")
harvesterSkipped = monitoring.NewInt(nil, "filebeat.harvester.skipped")
errHarvesterLimit = errors.New("harvester limit reached")
)
func init() {
err := input.Register("log", NewInput)
if err != nil {
panic(err)
}
}
// Input contains the input and its config
type Input struct {
cfg *common.Config
config config
states *file.States
harvesters *harvester.Registry
outlet channel.Outleter
stateOutlet channel.Outleter
done chan struct{}
numHarvesters atomic.Uint32
meta map[string]string
}
// NewInput instantiates a new Log
func NewInput(
cfg *common.Config,
outlet channel.Connector,
context input.Context,
) (input.Input, error) {
// Note: underlying output.
// The input and harvester do have different requirements
// on the timings the outlets must be closed/unblocked.
// The outlet generated here is the underlying outlet, only closed
// once all workers have been shut down.
// For state updates and events, separate sub-outlets will be used.
out, err := outlet(cfg, context.DynamicFields)
if err != nil {
return nil, err
}
// stateOut will only be unblocked if the beat is shut down.
// otherwise it can block on a full publisher pipeline, so state updates
// can be forwarded correctly to the registrar.
stateOut := channel.CloseOnSignal(channel.SubOutlet(out), context.BeatDone)
meta := context.Meta
if len(meta) == 0 {
meta = nil
}
p := &Input{
config: defaultConfig,
cfg: cfg,
harvesters: harvester.NewRegistry(),
outlet: out,
stateOutlet: stateOut,
states: file.NewStates(),
done: context.Done,
meta: meta,
}
if err := cfg.Unpack(&p.config); err != nil {
return nil, err
}
if err := p.config.resolveRecursiveGlobs(); err != nil {
return nil, fmt.Errorf("Failed to resolve recursive globs in config: %v", err)
}
if err := p.config.normalizeGlobPatterns(); err != nil {
return nil, fmt.Errorf("Failed to normalize globs patterns: %v", err)
}
// Create empty harvester to check if configs are fine
// TODO: Do config validation instead
_, err = p.createHarvester(file.State{}, nil)
if err != nil {
return nil, err
}
if len(p.config.Paths) == 0 {
return nil, fmt.Errorf("each input must have at least one path defined")
}
err = p.loadStates(context.States)
if err != nil {
return nil, err
}
logp.Info("Configured paths: %v", p.config.Paths)
return p, nil
}
// LoadStates loads states into input
// It goes through all states coming from the registry. Only the states which match the glob patterns of
// the input will be loaded and updated. All other states will not be touched.
func (p *Input) loadStates(states []file.State) error {
logp.Debug("input", "exclude_files: %s. Number of stats: %d", p.config.ExcludeFiles, len(states))
for _, state := range states {
// Check if state source belongs to this input. If yes, update the state.
if p.matchesFile(state.Source) && p.matchesMeta(state.Meta) {
state.TTL = -1
// In case a input is tried to be started with an unfinished state matching the glob pattern
if !state.Finished {
return fmt.Errorf("Can only start an input when all related states are finished: %+v", state)
}
// Update input states and send new states to registry
err := p.updateState(state)
if err != nil {
logp.Err("Problem putting initial state: %+v", err)
return err
}
}
}
logp.Debug("input", "input with previous states loaded: %v", p.states.Count())
return nil
}
// Run runs the input
func (p *Input) Run() {
logp.Debug("input", "Start next scan")
// TailFiles is like ignore_older = 1ns and only on startup
if p.config.TailFiles {
ignoreOlder := p.config.IgnoreOlder
// Overwrite ignore_older for the first scan
p.config.IgnoreOlder = 1
defer func() {
// Reset ignore_older after first run
p.config.IgnoreOlder = ignoreOlder
// Disable tail_files after the first run
p.config.TailFiles = false
}()
}
p.scan()
// It is important that a first scan is run before cleanup to make sure all new states are read first
if p.config.CleanInactive > 0 || p.config.CleanRemoved {
beforeCount := p.states.Count()
cleanedStates, pendingClean := p.states.Cleanup()
logp.Debug("input", "input states cleaned up. Before: %d, After: %d, Pending: %d",
beforeCount, beforeCount-cleanedStates, pendingClean)
}
// Marking removed files to be cleaned up. Cleanup happens after next scan to make sure all states are updated first
if p.config.CleanRemoved {
for _, state := range p.states.GetStates() {
// os.Stat will return an error in case the file does not exist
stat, err := os.Stat(state.Source)
if err != nil {
if os.IsNotExist(err) {
p.removeState(state)
logp.Debug("input", "Remove state for file as file removed: %s", state.Source)
} else {
logp.Err("input state for %s was not removed: %s", state.Source, err)
}
} else {
// Check if existing source on disk and state are the same. Remove if not the case.
newState := file.NewState(stat, state.Source, p.config.Type, p.meta)
if !newState.FileStateOS.IsSame(state.FileStateOS) {
p.removeState(state)
logp.Debug("input", "Remove state for file as file removed or renamed: %s", state.Source)
}
}
}
}
}
func (p *Input) removeState(state file.State) {
// Only clean up files where state is Finished
if !state.Finished {
logp.Debug("input", "State for file not removed because harvester not finished: %s", state.Source)
return
}
state.TTL = 0
err := p.updateState(state)
if err != nil {
logp.Err("File cleanup state update error: %s", err)
}
}
// getFiles returns all files which have to be harvested
// All globs are expanded and then directory and excluded files are removed
func (p *Input) getFiles() map[string]os.FileInfo {
paths := map[string]os.FileInfo{}
for _, path := range p.config.Paths {
matches, err := filepath.Glob(path)
if err != nil {
logp.Err("glob(%s) failed: %v", path, err)
continue
}
OUTER:
// Check any matched files to see if we need to start a harvester
for _, file := range matches {
// check if the file is in the exclude_files list
if p.isFileExcluded(file) {
logp.Debug("input", "Exclude file: %s", file)
continue
}
// Fetch Lstat File info to detected also symlinks
fileInfo, err := os.Lstat(file)
if err != nil {
logp.Debug("input", "lstat(%s) failed: %s", file, err)
continue
}
if fileInfo.IsDir() {
logp.Debug("input", "Skipping directory: %s", file)
continue
}
isSymlink := fileInfo.Mode()&os.ModeSymlink > 0
if isSymlink && !p.config.Symlinks {
logp.Debug("input", "File %s skipped as it is a symlink.", file)
continue
}
// Fetch Stat file info which fetches the inode. In case of a symlink, the original inode is fetched
fileInfo, err = os.Stat(file)
if err != nil {
logp.Debug("input", "stat(%s) failed: %s", file, err)
continue
}
// If symlink is enabled, it is checked that original is not part of same input
// It original is harvested by other input, states will potentially overwrite each other
if p.config.Symlinks {
for _, finfo := range paths {
if os.SameFile(finfo, fileInfo) {
logp.Info("Same file found as symlink and originap. Skipping file: %s", file)
continue OUTER
}
}
}
paths[file] = fileInfo
}
}
return paths
}
// matchesFile returns true in case the given filePath is part of this input, means matches its glob patterns
func (p *Input) matchesFile(filePath string) bool {
// Path is cleaned to ensure we always compare clean paths
filePath = filepath.Clean(filePath)
for _, glob := range p.config.Paths {
// Glob is cleaned to ensure we always compare clean paths
glob = filepath.Clean(glob)
// Evaluate if glob matches filePath
match, err := filepath.Match(glob, filePath)
if err != nil {
logp.Debug("input", "Error matching glob: %s", err)
continue
}
// Check if file is not excluded
if match && !p.isFileExcluded(filePath) {
return true
}
}
return false
}
// matchesMeta returns true in case the given meta is equal to the one of this input, false if not
func (p *Input) matchesMeta(meta map[string]string) bool {
if len(meta) != len(p.meta) {
return false
}
for k, v := range p.meta {
if meta[k] != v {
return false
}
}
return true
}
type FileSortInfo struct {
info os.FileInfo
path string
}
func getSortInfos(paths map[string]os.FileInfo) []FileSortInfo {
sortInfos := make([]FileSortInfo, 0, len(paths))
for path, info := range paths {
sortInfo := FileSortInfo{info: info, path: path}
sortInfos = append(sortInfos, sortInfo)
}
return sortInfos
}
func getSortedFiles(scanOrder string, scanSort string, sortInfos []FileSortInfo) ([]FileSortInfo, error) {
var sortFunc func(i, j int) bool
switch scanSort {
case "modtime":
switch scanOrder {
case "asc":
sortFunc = func(i, j int) bool {
return sortInfos[i].info.ModTime().Before(sortInfos[j].info.ModTime())
}
case "desc":
sortFunc = func(i, j int) bool {
return sortInfos[i].info.ModTime().After(sortInfos[j].info.ModTime())
}
default:
return nil, fmt.Errorf("Unexpected value for scan.order: %v", scanOrder)
}
case "filename":
switch scanOrder {
case "asc":
sortFunc = func(i, j int) bool {
return strings.Compare(sortInfos[i].info.Name(), sortInfos[j].info.Name()) < 0
}
case "desc":
sortFunc = func(i, j int) bool {
return strings.Compare(sortInfos[i].info.Name(), sortInfos[j].info.Name()) > 0
}
default:
return nil, fmt.Errorf("Unexpected value for scan.order: %v", scanOrder)
}
default:
return nil, fmt.Errorf("Unexpected value for scan.sort: %v", scanSort)
}
if sortFunc != nil {
sort.Slice(sortInfos, sortFunc)
}
return sortInfos, nil
}
func getFileState(path string, info os.FileInfo, p *Input) (file.State, error) {
var err error
var absolutePath string
absolutePath, err = filepath.Abs(path)
if err != nil {
return file.State{}, fmt.Errorf("could not fetch abs path for file %s: %s", absolutePath, err)
}
logp.Debug("input", "Check file for harvesting: %s", absolutePath)
// Create new state for comparison
newState := file.NewState(info, absolutePath, p.config.Type, p.meta)
return newState, nil
}
func getKeys(paths map[string]os.FileInfo) []string {
files := make([]string, 0)
for file := range paths {
files = append(files, file)
}
return files
}
// Scan starts a scanGlob for each provided path/glob
func (p *Input) scan() {
var sortInfos []FileSortInfo
var files []string
paths := p.getFiles()
var err error
if p.config.ScanSort != "" {
sortInfos, err = getSortedFiles(p.config.ScanOrder, p.config.ScanSort, getSortInfos(paths))
if err != nil {
logp.Err("Failed to sort files during scan due to error %s", err)
}
}
if sortInfos == nil {
files = getKeys(paths)
}
for i := 0; i < len(paths); i++ {
var path string
var info os.FileInfo
if sortInfos == nil {
path = files[i]
info = paths[path]
} else {
path = sortInfos[i].path
info = sortInfos[i].info
}
select {
case <-p.done:
logp.Info("Scan aborted because input stopped.")
return
default:
}
newState, err := getFileState(path, info, p)
if err != nil {
logp.Err("Skipping file %s due to error %s", path, err)
}
// Load last state
lastState := p.states.FindPrevious(newState)
// Ignores all files which fall under ignore_older
if p.isIgnoreOlder(newState) {
err := p.handleIgnoreOlder(lastState, newState)
if err != nil {
logp.Err("Updating ignore_older state error: %s", err)
}
continue
}
// Decides if previous state exists
if lastState.IsEmpty() {
logp.Debug("input", "Start harvester for new file: %s", newState.Source)
err := p.startHarvester(newState, 0)
if err == errHarvesterLimit {
logp.Debug("input", harvesterErrMsg, newState.Source, err)
continue
}
if err != nil {
logp.Err(harvesterErrMsg, newState.Source, err)
}
} else {
p.harvestExistingFile(newState, lastState)
}
}
}
// harvestExistingFile continues harvesting a file with a known state if needed
func (p *Input) harvestExistingFile(newState file.State, oldState file.State) {
logp.Debug("input", "Update existing file for harvesting: %s, offset: %v", newState.Source, oldState.Offset)
// No harvester is running for the file, start a new harvester
// It is important here that only the size is checked and not modification time, as modification time could be incorrect on windows
// https://blogs.technet.microsoft.com/asiasupp/2010/12/14/file-date-modified-property-are-not-updating-while-modifying-a-file-without-closing-it/
if oldState.Finished && newState.Fileinfo.Size() > oldState.Offset {
// Resume harvesting of an old file we've stopped harvesting from
// This could also be an issue with force_close_older that a new harvester is started after each scan but not needed?
// One problem with comparing modTime is that it is in seconds, and scans can happen more then once a second
logp.Debug("input", "Resuming harvesting of file: %s, offset: %d, new size: %d", newState.Source, oldState.Offset, newState.Fileinfo.Size())
err := p.startHarvester(newState, oldState.Offset)
if err != nil {
logp.Err("Harvester could not be started on existing file: %s, Err: %s", newState.Source, err)
}
return
}
// File size was reduced -> truncated file
if oldState.Finished && newState.Fileinfo.Size() < oldState.Offset {
logp.Debug("input", "Old file was truncated. Starting from the beginning: %s, offset: %d, new size: %d ", newState.Source, newState.Fileinfo.Size())
err := p.startHarvester(newState, 0)
if err != nil {
logp.Err("Harvester could not be started on truncated file: %s, Err: %s", newState.Source, err)
}
filesTruncated.Add(1)
return
}
// Check if file was renamed
if oldState.Source != "" && oldState.Source != newState.Source {
// This does not start a new harvester as it is assume that the older harvester is still running
// or no new lines were detected. It sends only an event status update to make sure the new name is persisted.
logp.Debug("input", "File rename was detected: %s -> %s, Current offset: %v", oldState.Source, newState.Source, oldState.Offset)
if oldState.Finished {
logp.Debug("input", "Updating state for renamed file: %s -> %s, Current offset: %v", oldState.Source, newState.Source, oldState.Offset)
// Update state because of file rotation
oldState.Source = newState.Source
err := p.updateState(oldState)
if err != nil {
logp.Err("File rotation state update error: %s", err)
}
filesRenamed.Add(1)
} else {
logp.Debug("input", "File rename detected but harvester not finished yet.")
}
}
if !oldState.Finished {
// Nothing to do. Harvester is still running and file was not renamed
logp.Debug("input", "Harvester for file is still running: %s", newState.Source)
} else {
logp.Debug("input", "File didn't change: %s", newState.Source)
}
}
// handleIgnoreOlder handles states which fall under ignore older
// Based on the state information it is decided if the state information has to be updated or not
func (p *Input) handleIgnoreOlder(lastState, newState file.State) error {
logp.Debug("input", "Ignore file because ignore_older reached: %s", newState.Source)
if !lastState.IsEmpty() {
if !lastState.Finished {
logp.Info("File is falling under ignore_older before harvesting is finished. Adjust your close_* settings: %s", newState.Source)
}
// Old state exist, no need to update it
return nil
}
// Make sure file is not falling under clean_inactive yet
if p.isCleanInactive(newState) {
logp.Debug("input", "Do not write state for ignore_older because clean_inactive reached")
return nil
}
// Set offset to end of file to be consistent with files which were harvested before
// See https://github.com/elastic/beats/pull/2907
newState.Offset = newState.Fileinfo.Size()
// Write state for ignore_older file as none exists yet
newState.Finished = true
err := p.updateState(newState)
if err != nil {
return err
}
return nil
}
// isFileExcluded checks if the given path should be excluded
func (p *Input) isFileExcluded(file string) bool {
patterns := p.config.ExcludeFiles
return len(patterns) > 0 && harvester.MatchAny(patterns, file)
}
// isIgnoreOlder checks if the given state reached ignore_older
func (p *Input) isIgnoreOlder(state file.State) bool {
// ignore_older is disable
if p.config.IgnoreOlder == 0 {
return false
}
modTime := state.Fileinfo.ModTime()
if time.Since(modTime) > p.config.IgnoreOlder {
return true
}
return false
}
// isCleanInactive checks if the given state false under clean_inactive
func (p *Input) isCleanInactive(state file.State) bool {
// clean_inactive is disable
if p.config.CleanInactive <= 0 {
return false
}
modTime := state.Fileinfo.ModTime()
if time.Since(modTime) > p.config.CleanInactive {
return true
}
return false
}
// subOutletWrap returns a factory method that will wrap the passed outlet
// in a SubOutlet and memoize the result so the wrapping is done only once.
func subOutletWrap(outlet channel.Outleter) func() channel.Outleter {
var subOutlet channel.Outleter
return func() channel.Outleter {
if subOutlet == nil {
subOutlet = channel.SubOutlet(outlet)
}
return subOutlet
}
}
// createHarvester creates a new harvester instance from the given state
func (p *Input) createHarvester(state file.State, onTerminate func()) (*Harvester, error) {
// Each wraps the outlet, for closing the outlet individually
h, err := NewHarvester(
p.cfg,
state,
p.states,
func(d *util.Data) bool {
return p.stateOutlet.OnEvent(d)
},
subOutletWrap(p.outlet),
)
if err == nil {
h.onTerminate = onTerminate
}
return h, err
}
// startHarvester starts a new harvester with the given offset
// In case the HarvesterLimit is reached, an error is returned
func (p *Input) startHarvester(state file.State, offset int64) error {
if p.numHarvesters.Inc() > p.config.HarvesterLimit && p.config.HarvesterLimit > 0 {
p.numHarvesters.Dec()
harvesterSkipped.Add(1)
return errHarvesterLimit
}
// Set state to "not" finished to indicate that a harvester is running
state.Finished = false
state.Offset = offset
// Create harvester with state
h, err := p.createHarvester(state, func() { p.numHarvesters.Dec() })
if err != nil {
p.numHarvesters.Dec()
return err
}
err = h.Setup()
if err != nil {
p.numHarvesters.Dec()
return fmt.Errorf("error setting up harvester: %s", err)
}
// Update state before staring harvester
// This makes sure the states is set to Finished: false
// This is synchronous state update as part of the scan
h.SendStateUpdate()
if err = p.harvesters.Start(h); err != nil {
p.numHarvesters.Dec()
}
return err
}
// updateState updates the input state and forwards the event to the spooler
// All state updates done by the input itself are synchronous to make sure not states are overwritten
func (p *Input) updateState(state file.State) error {
// Add ttl if cleanOlder is enabled and TTL is not already 0
if p.config.CleanInactive > 0 && state.TTL != 0 {
state.TTL = p.config.CleanInactive
}
if len(state.Meta) == 0 {
state.Meta = nil
}
// Update first internal state
p.states.Update(state)
data := util.NewData()
data.SetState(state)
ok := p.outlet.OnEvent(data)
if !ok {
logp.Info("input outlet closed")
return errors.New("input outlet closed")
}
return nil
}
// Wait waits for the all harvesters to complete and only then call stop
func (p *Input) Wait() {
p.harvesters.WaitForCompletion()
p.Stop()
}
// Stop stops all harvesters and then stops the input
func (p *Input) Stop() {
// Stop all harvesters
// In case the beatDone channel is closed, this will not wait for completion
// Otherwise Stop will wait until output is complete
p.harvesters.Stop()
// close state updater
p.stateOutlet.Close()
// stop all communication between harvesters and publisher pipeline
p.outlet.Close()
}