582 lines
17 KiB
Go
582 lines
17 KiB
Go
// Licensed to Elasticsearch B.V. under one or more contributor
|
|
// license agreements. See the NOTICE file distributed with
|
|
// this work for additional information regarding copyright
|
|
// ownership. Elasticsearch B.V. licenses this file to you under
|
|
// the Apache License, Version 2.0 (the "License"); you may
|
|
// not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
// Package log harvests different inputs for new information. Currently
|
|
// two harvester types exist:
|
|
//
|
|
// * log
|
|
// * stdin
|
|
//
|
|
// The log harvester reads a file line by line. In case the end of a file is found
|
|
// with an incomplete line, the line pointer stays at the beginning of the incomplete
|
|
// line. As soon as the line is completed, it is read and returned.
|
|
//
|
|
// The stdin harvesters reads data from stdin.
|
|
package log
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/gofrs/uuid"
|
|
"golang.org/x/text/transform"
|
|
|
|
"github.com/elastic/beats/libbeat/beat"
|
|
"github.com/elastic/beats/libbeat/common"
|
|
file_helper "github.com/elastic/beats/libbeat/common/file"
|
|
"github.com/elastic/beats/libbeat/logp"
|
|
"github.com/elastic/beats/libbeat/monitoring"
|
|
|
|
"github.com/elastic/beats/filebeat/channel"
|
|
"github.com/elastic/beats/filebeat/harvester"
|
|
"github.com/elastic/beats/filebeat/input/file"
|
|
"github.com/elastic/beats/filebeat/reader"
|
|
"github.com/elastic/beats/filebeat/reader/docker_json"
|
|
"github.com/elastic/beats/filebeat/reader/json"
|
|
"github.com/elastic/beats/filebeat/reader/multiline"
|
|
"github.com/elastic/beats/filebeat/reader/readfile"
|
|
"github.com/elastic/beats/filebeat/reader/readfile/encoding"
|
|
"github.com/elastic/beats/filebeat/util"
|
|
)
|
|
|
|
var (
|
|
harvesterMetrics = monitoring.Default.NewRegistry("filebeat.harvester")
|
|
|
|
harvesterStarted = monitoring.NewInt(harvesterMetrics, "started")
|
|
harvesterClosed = monitoring.NewInt(harvesterMetrics, "closed")
|
|
harvesterRunning = monitoring.NewInt(harvesterMetrics, "running")
|
|
harvesterOpenFiles = monitoring.NewInt(harvesterMetrics, "open_files")
|
|
|
|
ErrFileTruncate = errors.New("detected file being truncated")
|
|
ErrRenamed = errors.New("file was renamed")
|
|
ErrRemoved = errors.New("file was removed")
|
|
ErrInactive = errors.New("file inactive")
|
|
ErrClosed = errors.New("reader closed")
|
|
)
|
|
|
|
// OutletFactory provides an outlet for the harvester
|
|
type OutletFactory func() channel.Outleter
|
|
|
|
// Harvester contains all harvester related data
|
|
type Harvester struct {
|
|
id uuid.UUID
|
|
config config
|
|
source harvester.Source // the source being watched
|
|
|
|
// shutdown handling
|
|
done chan struct{}
|
|
stopOnce sync.Once
|
|
stopWg *sync.WaitGroup
|
|
stopLock sync.Mutex
|
|
|
|
// internal harvester state
|
|
state file.State
|
|
states *file.States
|
|
log *Log
|
|
|
|
// file reader pipeline
|
|
reader reader.Reader
|
|
encodingFactory encoding.EncodingFactory
|
|
encoding encoding.Encoding
|
|
|
|
// event/state publishing
|
|
outletFactory OutletFactory
|
|
publishState func(*util.Data) bool
|
|
|
|
onTerminate func()
|
|
}
|
|
|
|
// NewHarvester creates a new harvester
|
|
func NewHarvester(
|
|
config *common.Config,
|
|
state file.State,
|
|
states *file.States,
|
|
publishState func(*util.Data) bool,
|
|
outletFactory OutletFactory,
|
|
) (*Harvester, error) {
|
|
|
|
id, err := uuid.NewV4()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
h := &Harvester{
|
|
config: defaultConfig,
|
|
state: state,
|
|
states: states,
|
|
publishState: publishState,
|
|
done: make(chan struct{}),
|
|
stopWg: &sync.WaitGroup{},
|
|
id: id,
|
|
outletFactory: outletFactory,
|
|
}
|
|
|
|
if err := config.Unpack(&h.config); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
encodingFactory, ok := encoding.FindEncoding(h.config.Encoding)
|
|
if !ok || encodingFactory == nil {
|
|
return nil, fmt.Errorf("unknown encoding('%v')", h.config.Encoding)
|
|
}
|
|
h.encodingFactory = encodingFactory
|
|
|
|
// Add ttl if clean_inactive is set
|
|
if h.config.CleanInactive > 0 {
|
|
h.state.TTL = h.config.CleanInactive
|
|
}
|
|
|
|
// Add outlet signal so harvester can also stop itself
|
|
return h, nil
|
|
}
|
|
|
|
// open does open the file given under h.Path and assigns the file handler to h.log
|
|
func (h *Harvester) open() error {
|
|
switch h.config.Type {
|
|
case harvester.StdinType:
|
|
return h.openStdin()
|
|
case harvester.LogType:
|
|
return h.openFile()
|
|
case harvester.DockerType:
|
|
return h.openFile()
|
|
default:
|
|
return fmt.Errorf("Invalid harvester type: %+v", h.config)
|
|
}
|
|
}
|
|
|
|
// ID returns the unique harvester identifier
|
|
func (h *Harvester) ID() uuid.UUID {
|
|
return h.id
|
|
}
|
|
|
|
// Setup opens the file handler and creates the reader for the harvester
|
|
func (h *Harvester) Setup() error {
|
|
err := h.open()
|
|
if err != nil {
|
|
return fmt.Errorf("Harvester setup failed. Unexpected file opening error: %s", err)
|
|
}
|
|
|
|
h.reader, err = h.newLogFileReader()
|
|
if err != nil {
|
|
if h.source != nil {
|
|
h.source.Close()
|
|
}
|
|
return fmt.Errorf("Harvester setup failed. Unexpected encoding line reader error: %s", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Run start the harvester and reads files line by line and sends events to the defined output
|
|
func (h *Harvester) Run() error {
|
|
// Allow for some cleanup on termination
|
|
if h.onTerminate != nil {
|
|
defer h.onTerminate()
|
|
}
|
|
|
|
outlet := channel.CloseOnSignal(h.outletFactory(), h.done)
|
|
forwarder := harvester.NewForwarder(outlet)
|
|
|
|
// This is to make sure a harvester is not started anymore if stop was already
|
|
// called before the harvester was started. The waitgroup is not incremented afterwards
|
|
// as otherwise it could happened that between checking for the close channel and incrementing
|
|
// the waitgroup, the harvester could be stopped.
|
|
// Here stopLock is used to prevent a data race where stopWg.Add(1) below is called
|
|
// while stopWg.Wait() is executing in a different goroutine, which is forbidden
|
|
// according to sync.WaitGroup docs.
|
|
h.stopLock.Lock()
|
|
h.stopWg.Add(1)
|
|
h.stopLock.Unlock()
|
|
select {
|
|
case <-h.done:
|
|
h.stopWg.Done()
|
|
return nil
|
|
default:
|
|
}
|
|
|
|
defer func() {
|
|
// Channel to stop internal harvester routines
|
|
h.stop()
|
|
|
|
// Makes sure file is properly closed when the harvester is stopped
|
|
h.cleanup()
|
|
|
|
harvesterRunning.Add(-1)
|
|
|
|
// Marks harvester stopping completed
|
|
h.stopWg.Done()
|
|
}()
|
|
|
|
harvesterStarted.Add(1)
|
|
harvesterRunning.Add(1)
|
|
|
|
// Closes reader after timeout or when done channel is closed
|
|
// This routine is also responsible to properly stop the reader
|
|
go func(source string) {
|
|
|
|
closeTimeout := make(<-chan time.Time)
|
|
// starts close_timeout timer
|
|
if h.config.CloseTimeout > 0 {
|
|
closeTimeout = time.After(h.config.CloseTimeout)
|
|
}
|
|
|
|
select {
|
|
// Applies when timeout is reached
|
|
case <-closeTimeout:
|
|
logp.Info("Closing harvester because close_timeout was reached: %s", source)
|
|
// Required when reader loop returns and reader finished
|
|
case <-h.done:
|
|
}
|
|
|
|
h.stop()
|
|
h.log.Close()
|
|
}(h.state.Source)
|
|
|
|
logp.Info("Harvester started for file: %s", h.state.Source)
|
|
|
|
for {
|
|
select {
|
|
case <-h.done:
|
|
return nil
|
|
default:
|
|
}
|
|
|
|
message, err := h.reader.Next()
|
|
if err != nil {
|
|
switch err {
|
|
case ErrFileTruncate:
|
|
logp.Info("File was truncated. Begin reading file from offset 0: %s", h.state.Source)
|
|
h.state.Offset = 0
|
|
filesTruncated.Add(1)
|
|
case ErrRemoved:
|
|
logp.Info("File was removed: %s. Closing because close_removed is enabled.", h.state.Source)
|
|
case ErrRenamed:
|
|
logp.Info("File was renamed: %s. Closing because close_renamed is enabled.", h.state.Source)
|
|
case ErrClosed:
|
|
logp.Info("Reader was closed: %s. Closing.", h.state.Source)
|
|
case io.EOF:
|
|
logp.Info("End of file reached: %s. Closing because close_eof is enabled.", h.state.Source)
|
|
case ErrInactive:
|
|
logp.Info("File is inactive: %s. Closing because close_inactive of %v reached.", h.state.Source, h.config.CloseInactive)
|
|
default:
|
|
logp.Err("Read line error: %v; File: %v", err, h.state.Source)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Strip UTF-8 BOM if beginning of file
|
|
// As all BOMS are converted to UTF-8 it is enough to only remove this one
|
|
if h.state.Offset == 0 {
|
|
message.Content = bytes.Trim(message.Content, "\xef\xbb\xbf")
|
|
}
|
|
|
|
// Get copy of state to work on
|
|
// This is important in case sending is not successful so on shutdown
|
|
// the old offset is reported
|
|
state := h.getState()
|
|
startingOffset := state.Offset
|
|
state.Offset += int64(message.Bytes)
|
|
|
|
// Create state event
|
|
data := util.NewData()
|
|
if h.source.HasState() {
|
|
data.SetState(state)
|
|
}
|
|
|
|
text := string(message.Content)
|
|
|
|
// Check if data should be added to event. Only export non empty events.
|
|
if !message.IsEmpty() && h.shouldExportLine(text) {
|
|
fields := common.MapStr{
|
|
"source": state.Source,
|
|
"offset": startingOffset, // Offset here is the offset before the starting char.
|
|
}
|
|
fields.DeepUpdate(message.Fields)
|
|
|
|
// Check if json fields exist
|
|
var jsonFields common.MapStr
|
|
if f, ok := fields["json"]; ok {
|
|
jsonFields = f.(common.MapStr)
|
|
}
|
|
|
|
data.Event = beat.Event{
|
|
Timestamp: message.Ts,
|
|
}
|
|
|
|
if h.config.JSON != nil && len(jsonFields) > 0 {
|
|
ts := json.MergeJSONFields(fields, jsonFields, &text, *h.config.JSON)
|
|
if !ts.IsZero() {
|
|
// there was a `@timestamp` key in the event, so overwrite
|
|
// the resulting timestamp
|
|
data.Event.Timestamp = ts
|
|
}
|
|
} else if &text != nil {
|
|
if fields == nil {
|
|
fields = common.MapStr{}
|
|
}
|
|
fields["message"] = text
|
|
}
|
|
|
|
data.Event.Fields = fields
|
|
}
|
|
|
|
// Always send event to update state, also if lines was skipped
|
|
// Stop harvester in case of an error
|
|
if !h.sendEvent(data, forwarder) {
|
|
return nil
|
|
}
|
|
|
|
// Update state of harvester as successfully sent
|
|
h.state = state
|
|
}
|
|
}
|
|
|
|
// stop is intended for internal use and closed the done channel to stop execution
|
|
func (h *Harvester) stop() {
|
|
h.stopOnce.Do(func() {
|
|
close(h.done)
|
|
})
|
|
}
|
|
|
|
// Stop stops harvester and waits for completion
|
|
func (h *Harvester) Stop() {
|
|
h.stop()
|
|
// Prevent stopWg.Wait() to be called at the same time as stopWg.Add(1)
|
|
h.stopLock.Lock()
|
|
h.stopWg.Wait()
|
|
h.stopLock.Unlock()
|
|
}
|
|
|
|
// sendEvent sends event to the spooler channel
|
|
// Return false if event was not sent
|
|
func (h *Harvester) sendEvent(data *util.Data, forwarder *harvester.Forwarder) bool {
|
|
if h.source.HasState() {
|
|
h.states.Update(data.GetState())
|
|
}
|
|
|
|
err := forwarder.Send(data)
|
|
return err == nil
|
|
}
|
|
|
|
// SendStateUpdate send an empty event with the current state to update the registry
|
|
// close_timeout does not apply here to make sure a harvester is closed properly. In
|
|
// case the output is blocked the harvester will stay open to make sure no new harvester
|
|
// is started. As soon as the output becomes available again, the finished state is written
|
|
// and processing can continue.
|
|
func (h *Harvester) SendStateUpdate() {
|
|
if !h.source.HasState() {
|
|
return
|
|
}
|
|
|
|
logp.Debug("harvester", "Update state: %s, offset: %v", h.state.Source, h.state.Offset)
|
|
h.states.Update(h.state)
|
|
|
|
d := util.NewData()
|
|
d.SetState(h.state)
|
|
h.publishState(d)
|
|
}
|
|
|
|
// shouldExportLine decides if the line is exported or not based on
|
|
// the include_lines and exclude_lines options.
|
|
func (h *Harvester) shouldExportLine(line string) bool {
|
|
if len(h.config.IncludeLines) > 0 {
|
|
if !harvester.MatchAny(h.config.IncludeLines, line) {
|
|
// drop line
|
|
logp.Debug("harvester", "Drop line as it does not match any of the include patterns %s", line)
|
|
return false
|
|
}
|
|
}
|
|
if len(h.config.ExcludeLines) > 0 {
|
|
if harvester.MatchAny(h.config.ExcludeLines, line) {
|
|
// drop line
|
|
logp.Debug("harvester", "Drop line as it does match one of the exclude patterns%s", line)
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// openFile opens a file and checks for the encoding. In case the encoding cannot be detected
|
|
// or the file cannot be opened because for example of failing read permissions, an error
|
|
// is returned and the harvester is closed. The file will be picked up again the next time
|
|
// the file system is scanned
|
|
func (h *Harvester) openFile() error {
|
|
f, err := file_helper.ReadOpen(h.state.Source)
|
|
if err != nil {
|
|
return fmt.Errorf("Failed opening %s: %s", h.state.Source, err)
|
|
}
|
|
|
|
harvesterOpenFiles.Add(1)
|
|
|
|
// Makes sure file handler is also closed on errors
|
|
err = h.validateFile(f)
|
|
if err != nil {
|
|
f.Close()
|
|
harvesterOpenFiles.Add(-1)
|
|
return err
|
|
}
|
|
|
|
h.source = File{File: f}
|
|
return nil
|
|
}
|
|
|
|
func (h *Harvester) validateFile(f *os.File) error {
|
|
info, err := f.Stat()
|
|
if err != nil {
|
|
return fmt.Errorf("Failed getting stats for file %s: %s", h.state.Source, err)
|
|
}
|
|
|
|
if !info.Mode().IsRegular() {
|
|
return fmt.Errorf("Tried to open non regular file: %q %s", info.Mode(), info.Name())
|
|
}
|
|
|
|
// Compares the stat of the opened file to the state given by the input. Abort if not match.
|
|
if !os.SameFile(h.state.Fileinfo, info) {
|
|
return errors.New("file info is not identical with opened file. Aborting harvesting and retrying file later again")
|
|
}
|
|
|
|
h.encoding, err = h.encodingFactory(f)
|
|
if err != nil {
|
|
|
|
if err == transform.ErrShortSrc {
|
|
logp.Info("Initialising encoding for '%v' failed due to file being too short", f)
|
|
} else {
|
|
logp.Err("Initialising encoding for '%v' failed: %v", f, err)
|
|
}
|
|
return err
|
|
}
|
|
|
|
// get file offset. Only update offset if no error
|
|
offset, err := h.initFileOffset(f)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
logp.Debug("harvester", "Setting offset for file: %s. Offset: %d ", h.state.Source, offset)
|
|
h.state.Offset = offset
|
|
|
|
return nil
|
|
}
|
|
|
|
func (h *Harvester) initFileOffset(file *os.File) (int64, error) {
|
|
// continue from last known offset
|
|
if h.state.Offset > 0 {
|
|
logp.Debug("harvester", "Set previous offset for file: %s. Offset: %d ", h.state.Source, h.state.Offset)
|
|
return file.Seek(h.state.Offset, os.SEEK_SET)
|
|
}
|
|
|
|
// get offset from file in case of encoding factory was required to read some data.
|
|
logp.Debug("harvester", "Setting offset for file based on seek: %s", h.state.Source)
|
|
return file.Seek(0, os.SEEK_CUR)
|
|
}
|
|
|
|
// getState returns an updated copy of the harvester state
|
|
func (h *Harvester) getState() file.State {
|
|
if !h.source.HasState() {
|
|
return file.State{}
|
|
}
|
|
|
|
state := h.state
|
|
|
|
// refreshes the values in State with the values from the harvester itself
|
|
state.FileStateOS = file_helper.GetOSState(h.state.Fileinfo)
|
|
return state
|
|
}
|
|
|
|
func (h *Harvester) cleanup() {
|
|
// Mark harvester as finished
|
|
h.state.Finished = true
|
|
|
|
logp.Debug("harvester", "Stopping harvester for file: %s", h.state.Source)
|
|
defer logp.Debug("harvester", "harvester cleanup finished for file: %s", h.state.Source)
|
|
|
|
// Make sure file is closed as soon as harvester exits
|
|
// If file was never opened, it can't be closed
|
|
if h.source != nil {
|
|
|
|
// close file handler
|
|
h.source.Close()
|
|
|
|
logp.Debug("harvester", "Closing file: %s", h.state.Source)
|
|
harvesterOpenFiles.Add(-1)
|
|
|
|
// On completion, push offset so we can continue where we left off if we relaunch on the same file
|
|
// Only send offset if file object was created successfully
|
|
h.SendStateUpdate()
|
|
} else {
|
|
logp.Warn("Stopping harvester, NOT closing file as file info not available: %s", h.state.Source)
|
|
}
|
|
|
|
harvesterClosed.Add(1)
|
|
}
|
|
|
|
// newLogFileReader creates a new reader to read log files
|
|
//
|
|
// It creates a chain of readers which looks as following:
|
|
//
|
|
// limit -> (multiline -> timeout) -> strip_newline -> json -> encode -> line -> log_file
|
|
//
|
|
// Each reader on the left, contains the reader on the right and calls `Next()` to fetch more data.
|
|
// At the base of all readers the the log_file reader. That means in the data is flowing in the opposite direction:
|
|
//
|
|
// log_file -> line -> encode -> json -> strip_newline -> (timeout -> multiline) -> limit
|
|
//
|
|
// log_file implements io.Reader interface and encode reader is an adapter for io.Reader to
|
|
// reader.Reader also handling file encodings. All other readers implement reader.Reader
|
|
func (h *Harvester) newLogFileReader() (reader.Reader, error) {
|
|
var r reader.Reader
|
|
var err error
|
|
|
|
// TODO: NewLineReader uses additional buffering to deal with encoding and testing
|
|
// for new lines in input stream. Simple 8-bit based encodings, or plain
|
|
// don't require 'complicated' logic.
|
|
h.log, err = NewLog(h.source, h.config.LogConfig)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
r, err = readfile.NewEncodeReader(h.log, h.encoding, h.config.BufferSize)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if h.config.DockerJSON != nil {
|
|
// Docker json-file format, add custom parsing to the pipeline
|
|
r = docker_json.New(r, h.config.DockerJSON.Stream, h.config.DockerJSON.Partial, h.config.DockerJSON.CRIFlags)
|
|
}
|
|
|
|
if h.config.JSON != nil {
|
|
r = json.New(r, h.config.JSON)
|
|
}
|
|
|
|
r = readfile.NewStripNewline(r)
|
|
|
|
if h.config.Multiline != nil {
|
|
r, err = multiline.New(r, "\n", h.config.MaxBytes, h.config.Multiline)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return readfile.NewLimitReader(r, h.config.MaxBytes), nil
|
|
}
|