731 lines
21 KiB
Go
731 lines
21 KiB
Go
// Licensed to Elasticsearch B.V. under one or more contributor
|
|
// license agreements. See the NOTICE file distributed with
|
|
// this work for additional information regarding copyright
|
|
// ownership. Elasticsearch B.V. licenses this file to you under
|
|
// the Apache License, Version 2.0 (the "License"); you may
|
|
// not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
package log
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/elastic/beats/filebeat/channel"
|
|
"github.com/elastic/beats/filebeat/harvester"
|
|
"github.com/elastic/beats/filebeat/input"
|
|
"github.com/elastic/beats/filebeat/input/file"
|
|
"github.com/elastic/beats/filebeat/util"
|
|
"github.com/elastic/beats/libbeat/common"
|
|
"github.com/elastic/beats/libbeat/common/atomic"
|
|
"github.com/elastic/beats/libbeat/logp"
|
|
"github.com/elastic/beats/libbeat/monitoring"
|
|
)
|
|
|
|
const (
|
|
recursiveGlobDepth = 8
|
|
harvesterErrMsg = "Harvester could not be started on new file: %s, Err: %s"
|
|
)
|
|
|
|
var (
|
|
filesRenamed = monitoring.NewInt(nil, "filebeat.input.log.files.renamed")
|
|
filesTruncated = monitoring.NewInt(nil, "filebeat.input.log.files.truncated")
|
|
harvesterSkipped = monitoring.NewInt(nil, "filebeat.harvester.skipped")
|
|
|
|
errHarvesterLimit = errors.New("harvester limit reached")
|
|
)
|
|
|
|
func init() {
|
|
err := input.Register("log", NewInput)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// Input contains the input and its config
|
|
type Input struct {
|
|
cfg *common.Config
|
|
config config
|
|
states *file.States
|
|
harvesters *harvester.Registry
|
|
outlet channel.Outleter
|
|
stateOutlet channel.Outleter
|
|
done chan struct{}
|
|
numHarvesters atomic.Uint32
|
|
meta map[string]string
|
|
}
|
|
|
|
// NewInput instantiates a new Log
|
|
func NewInput(
|
|
cfg *common.Config,
|
|
outlet channel.Connector,
|
|
context input.Context,
|
|
) (input.Input, error) {
|
|
|
|
// Note: underlying output.
|
|
// The input and harvester do have different requirements
|
|
// on the timings the outlets must be closed/unblocked.
|
|
// The outlet generated here is the underlying outlet, only closed
|
|
// once all workers have been shut down.
|
|
// For state updates and events, separate sub-outlets will be used.
|
|
out, err := outlet(cfg, context.DynamicFields)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// stateOut will only be unblocked if the beat is shut down.
|
|
// otherwise it can block on a full publisher pipeline, so state updates
|
|
// can be forwarded correctly to the registrar.
|
|
stateOut := channel.CloseOnSignal(channel.SubOutlet(out), context.BeatDone)
|
|
|
|
meta := context.Meta
|
|
if len(meta) == 0 {
|
|
meta = nil
|
|
}
|
|
|
|
p := &Input{
|
|
config: defaultConfig,
|
|
cfg: cfg,
|
|
harvesters: harvester.NewRegistry(),
|
|
outlet: out,
|
|
stateOutlet: stateOut,
|
|
states: file.NewStates(),
|
|
done: context.Done,
|
|
meta: meta,
|
|
}
|
|
|
|
if err := cfg.Unpack(&p.config); err != nil {
|
|
return nil, err
|
|
}
|
|
if err := p.config.resolveRecursiveGlobs(); err != nil {
|
|
return nil, fmt.Errorf("Failed to resolve recursive globs in config: %v", err)
|
|
}
|
|
if err := p.config.normalizeGlobPatterns(); err != nil {
|
|
return nil, fmt.Errorf("Failed to normalize globs patterns: %v", err)
|
|
}
|
|
|
|
// Create empty harvester to check if configs are fine
|
|
// TODO: Do config validation instead
|
|
_, err = p.createHarvester(file.State{}, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(p.config.Paths) == 0 {
|
|
return nil, fmt.Errorf("each input must have at least one path defined")
|
|
}
|
|
|
|
err = p.loadStates(context.States)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
logp.Info("Configured paths: %v", p.config.Paths)
|
|
|
|
return p, nil
|
|
}
|
|
|
|
// LoadStates loads states into input
|
|
// It goes through all states coming from the registry. Only the states which match the glob patterns of
|
|
// the input will be loaded and updated. All other states will not be touched.
|
|
func (p *Input) loadStates(states []file.State) error {
|
|
logp.Debug("input", "exclude_files: %s. Number of stats: %d", p.config.ExcludeFiles, len(states))
|
|
|
|
for _, state := range states {
|
|
// Check if state source belongs to this input. If yes, update the state.
|
|
if p.matchesFile(state.Source) && p.matchesMeta(state.Meta) {
|
|
state.TTL = -1
|
|
|
|
// In case a input is tried to be started with an unfinished state matching the glob pattern
|
|
if !state.Finished {
|
|
return fmt.Errorf("Can only start an input when all related states are finished: %+v", state)
|
|
}
|
|
|
|
// Update input states and send new states to registry
|
|
err := p.updateState(state)
|
|
if err != nil {
|
|
logp.Err("Problem putting initial state: %+v", err)
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
logp.Debug("input", "input with previous states loaded: %v", p.states.Count())
|
|
return nil
|
|
}
|
|
|
|
// Run runs the input
|
|
func (p *Input) Run() {
|
|
logp.Debug("input", "Start next scan")
|
|
|
|
// TailFiles is like ignore_older = 1ns and only on startup
|
|
if p.config.TailFiles {
|
|
ignoreOlder := p.config.IgnoreOlder
|
|
|
|
// Overwrite ignore_older for the first scan
|
|
p.config.IgnoreOlder = 1
|
|
defer func() {
|
|
// Reset ignore_older after first run
|
|
p.config.IgnoreOlder = ignoreOlder
|
|
// Disable tail_files after the first run
|
|
p.config.TailFiles = false
|
|
}()
|
|
}
|
|
p.scan()
|
|
|
|
// It is important that a first scan is run before cleanup to make sure all new states are read first
|
|
if p.config.CleanInactive > 0 || p.config.CleanRemoved {
|
|
beforeCount := p.states.Count()
|
|
cleanedStates, pendingClean := p.states.Cleanup()
|
|
logp.Debug("input", "input states cleaned up. Before: %d, After: %d, Pending: %d",
|
|
beforeCount, beforeCount-cleanedStates, pendingClean)
|
|
}
|
|
|
|
// Marking removed files to be cleaned up. Cleanup happens after next scan to make sure all states are updated first
|
|
if p.config.CleanRemoved {
|
|
for _, state := range p.states.GetStates() {
|
|
// os.Stat will return an error in case the file does not exist
|
|
stat, err := os.Stat(state.Source)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
p.removeState(state)
|
|
logp.Debug("input", "Remove state for file as file removed: %s", state.Source)
|
|
} else {
|
|
logp.Err("input state for %s was not removed: %s", state.Source, err)
|
|
}
|
|
} else {
|
|
// Check if existing source on disk and state are the same. Remove if not the case.
|
|
newState := file.NewState(stat, state.Source, p.config.Type, p.meta)
|
|
if !newState.FileStateOS.IsSame(state.FileStateOS) {
|
|
p.removeState(state)
|
|
logp.Debug("input", "Remove state for file as file removed or renamed: %s", state.Source)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *Input) removeState(state file.State) {
|
|
// Only clean up files where state is Finished
|
|
if !state.Finished {
|
|
logp.Debug("input", "State for file not removed because harvester not finished: %s", state.Source)
|
|
return
|
|
}
|
|
|
|
state.TTL = 0
|
|
err := p.updateState(state)
|
|
if err != nil {
|
|
logp.Err("File cleanup state update error: %s", err)
|
|
}
|
|
}
|
|
|
|
// getFiles returns all files which have to be harvested
|
|
// All globs are expanded and then directory and excluded files are removed
|
|
func (p *Input) getFiles() map[string]os.FileInfo {
|
|
paths := map[string]os.FileInfo{}
|
|
|
|
for _, path := range p.config.Paths {
|
|
matches, err := filepath.Glob(path)
|
|
if err != nil {
|
|
logp.Err("glob(%s) failed: %v", path, err)
|
|
continue
|
|
}
|
|
|
|
OUTER:
|
|
// Check any matched files to see if we need to start a harvester
|
|
for _, file := range matches {
|
|
|
|
// check if the file is in the exclude_files list
|
|
if p.isFileExcluded(file) {
|
|
logp.Debug("input", "Exclude file: %s", file)
|
|
continue
|
|
}
|
|
|
|
// Fetch Lstat File info to detected also symlinks
|
|
fileInfo, err := os.Lstat(file)
|
|
if err != nil {
|
|
logp.Debug("input", "lstat(%s) failed: %s", file, err)
|
|
continue
|
|
}
|
|
|
|
if fileInfo.IsDir() {
|
|
logp.Debug("input", "Skipping directory: %s", file)
|
|
continue
|
|
}
|
|
|
|
isSymlink := fileInfo.Mode()&os.ModeSymlink > 0
|
|
if isSymlink && !p.config.Symlinks {
|
|
logp.Debug("input", "File %s skipped as it is a symlink.", file)
|
|
continue
|
|
}
|
|
|
|
// Fetch Stat file info which fetches the inode. In case of a symlink, the original inode is fetched
|
|
fileInfo, err = os.Stat(file)
|
|
if err != nil {
|
|
logp.Debug("input", "stat(%s) failed: %s", file, err)
|
|
continue
|
|
}
|
|
|
|
// If symlink is enabled, it is checked that original is not part of same input
|
|
// It original is harvested by other input, states will potentially overwrite each other
|
|
if p.config.Symlinks {
|
|
for _, finfo := range paths {
|
|
if os.SameFile(finfo, fileInfo) {
|
|
logp.Info("Same file found as symlink and originap. Skipping file: %s", file)
|
|
continue OUTER
|
|
}
|
|
}
|
|
}
|
|
|
|
paths[file] = fileInfo
|
|
}
|
|
}
|
|
|
|
return paths
|
|
}
|
|
|
|
// matchesFile returns true in case the given filePath is part of this input, means matches its glob patterns
|
|
func (p *Input) matchesFile(filePath string) bool {
|
|
// Path is cleaned to ensure we always compare clean paths
|
|
filePath = filepath.Clean(filePath)
|
|
|
|
for _, glob := range p.config.Paths {
|
|
|
|
// Glob is cleaned to ensure we always compare clean paths
|
|
glob = filepath.Clean(glob)
|
|
|
|
// Evaluate if glob matches filePath
|
|
match, err := filepath.Match(glob, filePath)
|
|
if err != nil {
|
|
logp.Debug("input", "Error matching glob: %s", err)
|
|
continue
|
|
}
|
|
|
|
// Check if file is not excluded
|
|
if match && !p.isFileExcluded(filePath) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// matchesMeta returns true in case the given meta is equal to the one of this input, false if not
|
|
func (p *Input) matchesMeta(meta map[string]string) bool {
|
|
if len(meta) != len(p.meta) {
|
|
return false
|
|
}
|
|
|
|
for k, v := range p.meta {
|
|
if meta[k] != v {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
type FileSortInfo struct {
|
|
info os.FileInfo
|
|
path string
|
|
}
|
|
|
|
func getSortInfos(paths map[string]os.FileInfo) []FileSortInfo {
|
|
sortInfos := make([]FileSortInfo, 0, len(paths))
|
|
for path, info := range paths {
|
|
sortInfo := FileSortInfo{info: info, path: path}
|
|
sortInfos = append(sortInfos, sortInfo)
|
|
}
|
|
|
|
return sortInfos
|
|
}
|
|
|
|
func getSortedFiles(scanOrder string, scanSort string, sortInfos []FileSortInfo) ([]FileSortInfo, error) {
|
|
var sortFunc func(i, j int) bool
|
|
switch scanSort {
|
|
case "modtime":
|
|
switch scanOrder {
|
|
case "asc":
|
|
sortFunc = func(i, j int) bool {
|
|
return sortInfos[i].info.ModTime().Before(sortInfos[j].info.ModTime())
|
|
}
|
|
case "desc":
|
|
sortFunc = func(i, j int) bool {
|
|
return sortInfos[i].info.ModTime().After(sortInfos[j].info.ModTime())
|
|
}
|
|
default:
|
|
return nil, fmt.Errorf("Unexpected value for scan.order: %v", scanOrder)
|
|
}
|
|
case "filename":
|
|
switch scanOrder {
|
|
case "asc":
|
|
sortFunc = func(i, j int) bool {
|
|
return strings.Compare(sortInfos[i].info.Name(), sortInfos[j].info.Name()) < 0
|
|
}
|
|
case "desc":
|
|
sortFunc = func(i, j int) bool {
|
|
return strings.Compare(sortInfos[i].info.Name(), sortInfos[j].info.Name()) > 0
|
|
}
|
|
default:
|
|
return nil, fmt.Errorf("Unexpected value for scan.order: %v", scanOrder)
|
|
}
|
|
default:
|
|
return nil, fmt.Errorf("Unexpected value for scan.sort: %v", scanSort)
|
|
}
|
|
|
|
if sortFunc != nil {
|
|
sort.Slice(sortInfos, sortFunc)
|
|
}
|
|
|
|
return sortInfos, nil
|
|
}
|
|
|
|
func getFileState(path string, info os.FileInfo, p *Input) (file.State, error) {
|
|
var err error
|
|
var absolutePath string
|
|
absolutePath, err = filepath.Abs(path)
|
|
if err != nil {
|
|
return file.State{}, fmt.Errorf("could not fetch abs path for file %s: %s", absolutePath, err)
|
|
}
|
|
logp.Debug("input", "Check file for harvesting: %s", absolutePath)
|
|
// Create new state for comparison
|
|
newState := file.NewState(info, absolutePath, p.config.Type, p.meta)
|
|
return newState, nil
|
|
}
|
|
|
|
func getKeys(paths map[string]os.FileInfo) []string {
|
|
files := make([]string, 0)
|
|
for file := range paths {
|
|
files = append(files, file)
|
|
}
|
|
return files
|
|
}
|
|
|
|
// Scan starts a scanGlob for each provided path/glob
|
|
func (p *Input) scan() {
|
|
var sortInfos []FileSortInfo
|
|
var files []string
|
|
|
|
paths := p.getFiles()
|
|
|
|
var err error
|
|
|
|
if p.config.ScanSort != "" {
|
|
sortInfos, err = getSortedFiles(p.config.ScanOrder, p.config.ScanSort, getSortInfos(paths))
|
|
if err != nil {
|
|
logp.Err("Failed to sort files during scan due to error %s", err)
|
|
}
|
|
}
|
|
|
|
if sortInfos == nil {
|
|
files = getKeys(paths)
|
|
}
|
|
|
|
for i := 0; i < len(paths); i++ {
|
|
|
|
var path string
|
|
var info os.FileInfo
|
|
|
|
if sortInfos == nil {
|
|
path = files[i]
|
|
info = paths[path]
|
|
} else {
|
|
path = sortInfos[i].path
|
|
info = sortInfos[i].info
|
|
}
|
|
|
|
select {
|
|
case <-p.done:
|
|
logp.Info("Scan aborted because input stopped.")
|
|
return
|
|
default:
|
|
}
|
|
|
|
newState, err := getFileState(path, info, p)
|
|
if err != nil {
|
|
logp.Err("Skipping file %s due to error %s", path, err)
|
|
}
|
|
|
|
// Load last state
|
|
lastState := p.states.FindPrevious(newState)
|
|
|
|
// Ignores all files which fall under ignore_older
|
|
if p.isIgnoreOlder(newState) {
|
|
err := p.handleIgnoreOlder(lastState, newState)
|
|
if err != nil {
|
|
logp.Err("Updating ignore_older state error: %s", err)
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Decides if previous state exists
|
|
if lastState.IsEmpty() {
|
|
logp.Debug("input", "Start harvester for new file: %s", newState.Source)
|
|
err := p.startHarvester(newState, 0)
|
|
if err == errHarvesterLimit {
|
|
logp.Debug("input", harvesterErrMsg, newState.Source, err)
|
|
continue
|
|
}
|
|
if err != nil {
|
|
logp.Err(harvesterErrMsg, newState.Source, err)
|
|
}
|
|
} else {
|
|
p.harvestExistingFile(newState, lastState)
|
|
}
|
|
}
|
|
}
|
|
|
|
// harvestExistingFile continues harvesting a file with a known state if needed
|
|
func (p *Input) harvestExistingFile(newState file.State, oldState file.State) {
|
|
logp.Debug("input", "Update existing file for harvesting: %s, offset: %v", newState.Source, oldState.Offset)
|
|
|
|
// No harvester is running for the file, start a new harvester
|
|
// It is important here that only the size is checked and not modification time, as modification time could be incorrect on windows
|
|
// https://blogs.technet.microsoft.com/asiasupp/2010/12/14/file-date-modified-property-are-not-updating-while-modifying-a-file-without-closing-it/
|
|
if oldState.Finished && newState.Fileinfo.Size() > oldState.Offset {
|
|
// Resume harvesting of an old file we've stopped harvesting from
|
|
// This could also be an issue with force_close_older that a new harvester is started after each scan but not needed?
|
|
// One problem with comparing modTime is that it is in seconds, and scans can happen more then once a second
|
|
logp.Debug("input", "Resuming harvesting of file: %s, offset: %d, new size: %d", newState.Source, oldState.Offset, newState.Fileinfo.Size())
|
|
err := p.startHarvester(newState, oldState.Offset)
|
|
if err != nil {
|
|
logp.Err("Harvester could not be started on existing file: %s, Err: %s", newState.Source, err)
|
|
}
|
|
return
|
|
}
|
|
|
|
// File size was reduced -> truncated file
|
|
if oldState.Finished && newState.Fileinfo.Size() < oldState.Offset {
|
|
logp.Debug("input", "Old file was truncated. Starting from the beginning: %s, offset: %d, new size: %d ", newState.Source, newState.Fileinfo.Size())
|
|
err := p.startHarvester(newState, 0)
|
|
if err != nil {
|
|
logp.Err("Harvester could not be started on truncated file: %s, Err: %s", newState.Source, err)
|
|
}
|
|
|
|
filesTruncated.Add(1)
|
|
return
|
|
}
|
|
|
|
// Check if file was renamed
|
|
if oldState.Source != "" && oldState.Source != newState.Source {
|
|
// This does not start a new harvester as it is assume that the older harvester is still running
|
|
// or no new lines were detected. It sends only an event status update to make sure the new name is persisted.
|
|
logp.Debug("input", "File rename was detected: %s -> %s, Current offset: %v", oldState.Source, newState.Source, oldState.Offset)
|
|
|
|
if oldState.Finished {
|
|
logp.Debug("input", "Updating state for renamed file: %s -> %s, Current offset: %v", oldState.Source, newState.Source, oldState.Offset)
|
|
// Update state because of file rotation
|
|
oldState.Source = newState.Source
|
|
err := p.updateState(oldState)
|
|
if err != nil {
|
|
logp.Err("File rotation state update error: %s", err)
|
|
}
|
|
|
|
filesRenamed.Add(1)
|
|
} else {
|
|
logp.Debug("input", "File rename detected but harvester not finished yet.")
|
|
}
|
|
}
|
|
|
|
if !oldState.Finished {
|
|
// Nothing to do. Harvester is still running and file was not renamed
|
|
logp.Debug("input", "Harvester for file is still running: %s", newState.Source)
|
|
} else {
|
|
logp.Debug("input", "File didn't change: %s", newState.Source)
|
|
}
|
|
}
|
|
|
|
// handleIgnoreOlder handles states which fall under ignore older
|
|
// Based on the state information it is decided if the state information has to be updated or not
|
|
func (p *Input) handleIgnoreOlder(lastState, newState file.State) error {
|
|
logp.Debug("input", "Ignore file because ignore_older reached: %s", newState.Source)
|
|
|
|
if !lastState.IsEmpty() {
|
|
if !lastState.Finished {
|
|
logp.Info("File is falling under ignore_older before harvesting is finished. Adjust your close_* settings: %s", newState.Source)
|
|
}
|
|
// Old state exist, no need to update it
|
|
return nil
|
|
}
|
|
|
|
// Make sure file is not falling under clean_inactive yet
|
|
if p.isCleanInactive(newState) {
|
|
logp.Debug("input", "Do not write state for ignore_older because clean_inactive reached")
|
|
return nil
|
|
}
|
|
|
|
// Set offset to end of file to be consistent with files which were harvested before
|
|
// See https://github.com/elastic/beats/pull/2907
|
|
newState.Offset = newState.Fileinfo.Size()
|
|
|
|
// Write state for ignore_older file as none exists yet
|
|
newState.Finished = true
|
|
err := p.updateState(newState)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// isFileExcluded checks if the given path should be excluded
|
|
func (p *Input) isFileExcluded(file string) bool {
|
|
patterns := p.config.ExcludeFiles
|
|
return len(patterns) > 0 && harvester.MatchAny(patterns, file)
|
|
}
|
|
|
|
// isIgnoreOlder checks if the given state reached ignore_older
|
|
func (p *Input) isIgnoreOlder(state file.State) bool {
|
|
// ignore_older is disable
|
|
if p.config.IgnoreOlder == 0 {
|
|
return false
|
|
}
|
|
|
|
modTime := state.Fileinfo.ModTime()
|
|
if time.Since(modTime) > p.config.IgnoreOlder {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// isCleanInactive checks if the given state false under clean_inactive
|
|
func (p *Input) isCleanInactive(state file.State) bool {
|
|
// clean_inactive is disable
|
|
if p.config.CleanInactive <= 0 {
|
|
return false
|
|
}
|
|
|
|
modTime := state.Fileinfo.ModTime()
|
|
if time.Since(modTime) > p.config.CleanInactive {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// subOutletWrap returns a factory method that will wrap the passed outlet
|
|
// in a SubOutlet and memoize the result so the wrapping is done only once.
|
|
func subOutletWrap(outlet channel.Outleter) func() channel.Outleter {
|
|
var subOutlet channel.Outleter
|
|
return func() channel.Outleter {
|
|
if subOutlet == nil {
|
|
subOutlet = channel.SubOutlet(outlet)
|
|
}
|
|
return subOutlet
|
|
}
|
|
}
|
|
|
|
// createHarvester creates a new harvester instance from the given state
|
|
func (p *Input) createHarvester(state file.State, onTerminate func()) (*Harvester, error) {
|
|
// Each wraps the outlet, for closing the outlet individually
|
|
h, err := NewHarvester(
|
|
p.cfg,
|
|
state,
|
|
p.states,
|
|
func(d *util.Data) bool {
|
|
return p.stateOutlet.OnEvent(d)
|
|
},
|
|
subOutletWrap(p.outlet),
|
|
)
|
|
if err == nil {
|
|
h.onTerminate = onTerminate
|
|
}
|
|
return h, err
|
|
}
|
|
|
|
// startHarvester starts a new harvester with the given offset
|
|
// In case the HarvesterLimit is reached, an error is returned
|
|
func (p *Input) startHarvester(state file.State, offset int64) error {
|
|
if p.numHarvesters.Inc() > p.config.HarvesterLimit && p.config.HarvesterLimit > 0 {
|
|
p.numHarvesters.Dec()
|
|
harvesterSkipped.Add(1)
|
|
return errHarvesterLimit
|
|
}
|
|
// Set state to "not" finished to indicate that a harvester is running
|
|
state.Finished = false
|
|
state.Offset = offset
|
|
|
|
// Create harvester with state
|
|
h, err := p.createHarvester(state, func() { p.numHarvesters.Dec() })
|
|
if err != nil {
|
|
p.numHarvesters.Dec()
|
|
return err
|
|
}
|
|
|
|
err = h.Setup()
|
|
if err != nil {
|
|
p.numHarvesters.Dec()
|
|
return fmt.Errorf("error setting up harvester: %s", err)
|
|
}
|
|
|
|
// Update state before staring harvester
|
|
// This makes sure the states is set to Finished: false
|
|
// This is synchronous state update as part of the scan
|
|
h.SendStateUpdate()
|
|
|
|
if err = p.harvesters.Start(h); err != nil {
|
|
p.numHarvesters.Dec()
|
|
}
|
|
return err
|
|
}
|
|
|
|
// updateState updates the input state and forwards the event to the spooler
|
|
// All state updates done by the input itself are synchronous to make sure not states are overwritten
|
|
func (p *Input) updateState(state file.State) error {
|
|
// Add ttl if cleanOlder is enabled and TTL is not already 0
|
|
if p.config.CleanInactive > 0 && state.TTL != 0 {
|
|
state.TTL = p.config.CleanInactive
|
|
}
|
|
|
|
if len(state.Meta) == 0 {
|
|
state.Meta = nil
|
|
}
|
|
|
|
// Update first internal state
|
|
p.states.Update(state)
|
|
|
|
data := util.NewData()
|
|
data.SetState(state)
|
|
ok := p.outlet.OnEvent(data)
|
|
if !ok {
|
|
logp.Info("input outlet closed")
|
|
return errors.New("input outlet closed")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Wait waits for the all harvesters to complete and only then call stop
|
|
func (p *Input) Wait() {
|
|
p.harvesters.WaitForCompletion()
|
|
p.Stop()
|
|
}
|
|
|
|
// Stop stops all harvesters and then stops the input
|
|
func (p *Input) Stop() {
|
|
// Stop all harvesters
|
|
// In case the beatDone channel is closed, this will not wait for completion
|
|
// Otherwise Stop will wait until output is complete
|
|
p.harvesters.Stop()
|
|
|
|
// close state updater
|
|
p.stateOutlet.Close()
|
|
|
|
// stop all communication between harvesters and publisher pipeline
|
|
p.outlet.Close()
|
|
}
|