// Licensed to Elasticsearch B.V. under one or more contributor // license agreements. See the NOTICE file distributed with // this work for additional information regarding copyright // ownership. Elasticsearch B.V. licenses this file to you under // the Apache License, Version 2.0 (the "License"); you may // not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. // Package log harvests different inputs for new information. Currently // two harvester types exist: // // * log // * stdin // // The log harvester reads a file line by line. In case the end of a file is found // with an incomplete line, the line pointer stays at the beginning of the incomplete // line. As soon as the line is completed, it is read and returned. // // The stdin harvesters reads data from stdin. package log import ( "bytes" "errors" "fmt" "io" "os" "sync" "time" "github.com/gofrs/uuid" "golang.org/x/text/transform" "github.com/elastic/beats/libbeat/beat" "github.com/elastic/beats/libbeat/common" file_helper "github.com/elastic/beats/libbeat/common/file" "github.com/elastic/beats/libbeat/logp" "github.com/elastic/beats/libbeat/monitoring" "github.com/elastic/beats/filebeat/channel" "github.com/elastic/beats/filebeat/harvester" "github.com/elastic/beats/filebeat/input/file" "github.com/elastic/beats/filebeat/reader" "github.com/elastic/beats/filebeat/reader/docker_json" "github.com/elastic/beats/filebeat/reader/json" "github.com/elastic/beats/filebeat/reader/multiline" "github.com/elastic/beats/filebeat/reader/readfile" "github.com/elastic/beats/filebeat/reader/readfile/encoding" "github.com/elastic/beats/filebeat/util" ) var ( harvesterMetrics = monitoring.Default.NewRegistry("filebeat.harvester") harvesterStarted = monitoring.NewInt(harvesterMetrics, "started") harvesterClosed = monitoring.NewInt(harvesterMetrics, "closed") harvesterRunning = monitoring.NewInt(harvesterMetrics, "running") harvesterOpenFiles = monitoring.NewInt(harvesterMetrics, "open_files") ErrFileTruncate = errors.New("detected file being truncated") ErrRenamed = errors.New("file was renamed") ErrRemoved = errors.New("file was removed") ErrInactive = errors.New("file inactive") ErrClosed = errors.New("reader closed") ) // OutletFactory provides an outlet for the harvester type OutletFactory func() channel.Outleter // Harvester contains all harvester related data type Harvester struct { id uuid.UUID config config source harvester.Source // the source being watched // shutdown handling done chan struct{} stopOnce sync.Once stopWg *sync.WaitGroup stopLock sync.Mutex // internal harvester state state file.State states *file.States log *Log // file reader pipeline reader reader.Reader encodingFactory encoding.EncodingFactory encoding encoding.Encoding // event/state publishing outletFactory OutletFactory publishState func(*util.Data) bool onTerminate func() } // NewHarvester creates a new harvester func NewHarvester( config *common.Config, state file.State, states *file.States, publishState func(*util.Data) bool, outletFactory OutletFactory, ) (*Harvester, error) { id, err := uuid.NewV4() if err != nil { return nil, err } h := &Harvester{ config: defaultConfig, state: state, states: states, publishState: publishState, done: make(chan struct{}), stopWg: &sync.WaitGroup{}, id: id, outletFactory: outletFactory, } if err := config.Unpack(&h.config); err != nil { return nil, err } encodingFactory, ok := encoding.FindEncoding(h.config.Encoding) if !ok || encodingFactory == nil { return nil, fmt.Errorf("unknown encoding('%v')", h.config.Encoding) } h.encodingFactory = encodingFactory // Add ttl if clean_inactive is set if h.config.CleanInactive > 0 { h.state.TTL = h.config.CleanInactive } // Add outlet signal so harvester can also stop itself return h, nil } // open does open the file given under h.Path and assigns the file handler to h.log func (h *Harvester) open() error { switch h.config.Type { case harvester.StdinType: return h.openStdin() case harvester.LogType: return h.openFile() case harvester.DockerType: return h.openFile() default: return fmt.Errorf("Invalid harvester type: %+v", h.config) } } // ID returns the unique harvester identifier func (h *Harvester) ID() uuid.UUID { return h.id } // Setup opens the file handler and creates the reader for the harvester func (h *Harvester) Setup() error { err := h.open() if err != nil { return fmt.Errorf("Harvester setup failed. Unexpected file opening error: %s", err) } h.reader, err = h.newLogFileReader() if err != nil { if h.source != nil { h.source.Close() } return fmt.Errorf("Harvester setup failed. Unexpected encoding line reader error: %s", err) } return nil } // Run start the harvester and reads files line by line and sends events to the defined output func (h *Harvester) Run() error { // Allow for some cleanup on termination if h.onTerminate != nil { defer h.onTerminate() } outlet := channel.CloseOnSignal(h.outletFactory(), h.done) forwarder := harvester.NewForwarder(outlet) // This is to make sure a harvester is not started anymore if stop was already // called before the harvester was started. The waitgroup is not incremented afterwards // as otherwise it could happened that between checking for the close channel and incrementing // the waitgroup, the harvester could be stopped. // Here stopLock is used to prevent a data race where stopWg.Add(1) below is called // while stopWg.Wait() is executing in a different goroutine, which is forbidden // according to sync.WaitGroup docs. h.stopLock.Lock() h.stopWg.Add(1) h.stopLock.Unlock() select { case <-h.done: h.stopWg.Done() return nil default: } defer func() { // Channel to stop internal harvester routines h.stop() // Makes sure file is properly closed when the harvester is stopped h.cleanup() harvesterRunning.Add(-1) // Marks harvester stopping completed h.stopWg.Done() }() harvesterStarted.Add(1) harvesterRunning.Add(1) // Closes reader after timeout or when done channel is closed // This routine is also responsible to properly stop the reader go func(source string) { closeTimeout := make(<-chan time.Time) // starts close_timeout timer if h.config.CloseTimeout > 0 { closeTimeout = time.After(h.config.CloseTimeout) } select { // Applies when timeout is reached case <-closeTimeout: logp.Info("Closing harvester because close_timeout was reached: %s", source) // Required when reader loop returns and reader finished case <-h.done: } h.stop() h.log.Close() }(h.state.Source) logp.Info("Harvester started for file: %s", h.state.Source) for { select { case <-h.done: return nil default: } message, err := h.reader.Next() if err != nil { switch err { case ErrFileTruncate: logp.Info("File was truncated. Begin reading file from offset 0: %s", h.state.Source) h.state.Offset = 0 filesTruncated.Add(1) case ErrRemoved: logp.Info("File was removed: %s. Closing because close_removed is enabled.", h.state.Source) case ErrRenamed: logp.Info("File was renamed: %s. Closing because close_renamed is enabled.", h.state.Source) case ErrClosed: logp.Info("Reader was closed: %s. Closing.", h.state.Source) case io.EOF: logp.Info("End of file reached: %s. Closing because close_eof is enabled.", h.state.Source) case ErrInactive: logp.Info("File is inactive: %s. Closing because close_inactive of %v reached.", h.state.Source, h.config.CloseInactive) default: logp.Err("Read line error: %v; File: %v", err, h.state.Source) } return nil } // Strip UTF-8 BOM if beginning of file // As all BOMS are converted to UTF-8 it is enough to only remove this one if h.state.Offset == 0 { message.Content = bytes.Trim(message.Content, "\xef\xbb\xbf") } // Get copy of state to work on // This is important in case sending is not successful so on shutdown // the old offset is reported state := h.getState() startingOffset := state.Offset state.Offset += int64(message.Bytes) // Create state event data := util.NewData() if h.source.HasState() { data.SetState(state) } text := string(message.Content) // Check if data should be added to event. Only export non empty events. if !message.IsEmpty() && h.shouldExportLine(text) { fields := common.MapStr{ "source": state.Source, "offset": startingOffset, // Offset here is the offset before the starting char. } fields.DeepUpdate(message.Fields) // Check if json fields exist var jsonFields common.MapStr if f, ok := fields["json"]; ok { jsonFields = f.(common.MapStr) } data.Event = beat.Event{ Timestamp: message.Ts, } if h.config.JSON != nil && len(jsonFields) > 0 { ts := json.MergeJSONFields(fields, jsonFields, &text, *h.config.JSON) if !ts.IsZero() { // there was a `@timestamp` key in the event, so overwrite // the resulting timestamp data.Event.Timestamp = ts } } else if &text != nil { if fields == nil { fields = common.MapStr{} } fields["message"] = text } data.Event.Fields = fields } // Always send event to update state, also if lines was skipped // Stop harvester in case of an error if !h.sendEvent(data, forwarder) { return nil } // Update state of harvester as successfully sent h.state = state } } // stop is intended for internal use and closed the done channel to stop execution func (h *Harvester) stop() { h.stopOnce.Do(func() { close(h.done) }) } // Stop stops harvester and waits for completion func (h *Harvester) Stop() { h.stop() // Prevent stopWg.Wait() to be called at the same time as stopWg.Add(1) h.stopLock.Lock() h.stopWg.Wait() h.stopLock.Unlock() } // sendEvent sends event to the spooler channel // Return false if event was not sent func (h *Harvester) sendEvent(data *util.Data, forwarder *harvester.Forwarder) bool { if h.source.HasState() { h.states.Update(data.GetState()) } err := forwarder.Send(data) return err == nil } // SendStateUpdate send an empty event with the current state to update the registry // close_timeout does not apply here to make sure a harvester is closed properly. In // case the output is blocked the harvester will stay open to make sure no new harvester // is started. As soon as the output becomes available again, the finished state is written // and processing can continue. func (h *Harvester) SendStateUpdate() { if !h.source.HasState() { return } logp.Debug("harvester", "Update state: %s, offset: %v", h.state.Source, h.state.Offset) h.states.Update(h.state) d := util.NewData() d.SetState(h.state) h.publishState(d) } // shouldExportLine decides if the line is exported or not based on // the include_lines and exclude_lines options. func (h *Harvester) shouldExportLine(line string) bool { if len(h.config.IncludeLines) > 0 { if !harvester.MatchAny(h.config.IncludeLines, line) { // drop line logp.Debug("harvester", "Drop line as it does not match any of the include patterns %s", line) return false } } if len(h.config.ExcludeLines) > 0 { if harvester.MatchAny(h.config.ExcludeLines, line) { // drop line logp.Debug("harvester", "Drop line as it does match one of the exclude patterns%s", line) return false } } return true } // openFile opens a file and checks for the encoding. In case the encoding cannot be detected // or the file cannot be opened because for example of failing read permissions, an error // is returned and the harvester is closed. The file will be picked up again the next time // the file system is scanned func (h *Harvester) openFile() error { f, err := file_helper.ReadOpen(h.state.Source) if err != nil { return fmt.Errorf("Failed opening %s: %s", h.state.Source, err) } harvesterOpenFiles.Add(1) // Makes sure file handler is also closed on errors err = h.validateFile(f) if err != nil { f.Close() harvesterOpenFiles.Add(-1) return err } h.source = File{File: f} return nil } func (h *Harvester) validateFile(f *os.File) error { info, err := f.Stat() if err != nil { return fmt.Errorf("Failed getting stats for file %s: %s", h.state.Source, err) } if !info.Mode().IsRegular() { return fmt.Errorf("Tried to open non regular file: %q %s", info.Mode(), info.Name()) } // Compares the stat of the opened file to the state given by the input. Abort if not match. if !os.SameFile(h.state.Fileinfo, info) { return errors.New("file info is not identical with opened file. Aborting harvesting and retrying file later again") } h.encoding, err = h.encodingFactory(f) if err != nil { if err == transform.ErrShortSrc { logp.Info("Initialising encoding for '%v' failed due to file being too short", f) } else { logp.Err("Initialising encoding for '%v' failed: %v", f, err) } return err } // get file offset. Only update offset if no error offset, err := h.initFileOffset(f) if err != nil { return err } logp.Debug("harvester", "Setting offset for file: %s. Offset: %d ", h.state.Source, offset) h.state.Offset = offset return nil } func (h *Harvester) initFileOffset(file *os.File) (int64, error) { // continue from last known offset if h.state.Offset > 0 { logp.Debug("harvester", "Set previous offset for file: %s. Offset: %d ", h.state.Source, h.state.Offset) return file.Seek(h.state.Offset, os.SEEK_SET) } // get offset from file in case of encoding factory was required to read some data. logp.Debug("harvester", "Setting offset for file based on seek: %s", h.state.Source) return file.Seek(0, os.SEEK_CUR) } // getState returns an updated copy of the harvester state func (h *Harvester) getState() file.State { if !h.source.HasState() { return file.State{} } state := h.state // refreshes the values in State with the values from the harvester itself state.FileStateOS = file_helper.GetOSState(h.state.Fileinfo) return state } func (h *Harvester) cleanup() { // Mark harvester as finished h.state.Finished = true logp.Debug("harvester", "Stopping harvester for file: %s", h.state.Source) defer logp.Debug("harvester", "harvester cleanup finished for file: %s", h.state.Source) // Make sure file is closed as soon as harvester exits // If file was never opened, it can't be closed if h.source != nil { // close file handler h.source.Close() logp.Debug("harvester", "Closing file: %s", h.state.Source) harvesterOpenFiles.Add(-1) // On completion, push offset so we can continue where we left off if we relaunch on the same file // Only send offset if file object was created successfully h.SendStateUpdate() } else { logp.Warn("Stopping harvester, NOT closing file as file info not available: %s", h.state.Source) } harvesterClosed.Add(1) } // newLogFileReader creates a new reader to read log files // // It creates a chain of readers which looks as following: // // limit -> (multiline -> timeout) -> strip_newline -> json -> encode -> line -> log_file // // Each reader on the left, contains the reader on the right and calls `Next()` to fetch more data. // At the base of all readers the the log_file reader. That means in the data is flowing in the opposite direction: // // log_file -> line -> encode -> json -> strip_newline -> (timeout -> multiline) -> limit // // log_file implements io.Reader interface and encode reader is an adapter for io.Reader to // reader.Reader also handling file encodings. All other readers implement reader.Reader func (h *Harvester) newLogFileReader() (reader.Reader, error) { var r reader.Reader var err error // TODO: NewLineReader uses additional buffering to deal with encoding and testing // for new lines in input stream. Simple 8-bit based encodings, or plain // don't require 'complicated' logic. h.log, err = NewLog(h.source, h.config.LogConfig) if err != nil { return nil, err } r, err = readfile.NewEncodeReader(h.log, h.encoding, h.config.BufferSize) if err != nil { return nil, err } if h.config.DockerJSON != nil { // Docker json-file format, add custom parsing to the pipeline r = docker_json.New(r, h.config.DockerJSON.Stream, h.config.DockerJSON.Partial, h.config.DockerJSON.CRIFlags) } if h.config.JSON != nil { r = json.New(r, h.config.JSON) } r = readfile.NewStripNewline(r) if h.config.Multiline != nil { r, err = multiline.New(r, "\n", h.config.MaxBytes, h.config.Multiline) if err != nil { return nil, err } } return readfile.NewLimitReader(r, h.config.MaxBytes), nil }