youtubebeat/vendor/github.com/elastic/beats/filebeat/reader/readfile/line.go

194 lines
5.2 KiB
Go

// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package readfile
import (
"io"
"golang.org/x/text/encoding"
"golang.org/x/text/transform"
"github.com/elastic/beats/libbeat/common/streambuf"
"github.com/elastic/beats/libbeat/logp"
)
// lineReader reads lines from underlying reader, decoding the input stream
// using the configured codec. The reader keeps track of bytes consumed
// from raw input stream for every decoded line.
type LineReader struct {
reader io.Reader
codec encoding.Encoding
bufferSize int
nl []byte
inBuffer *streambuf.Buffer
outBuffer *streambuf.Buffer
inOffset int // input buffer read offset
byteCount int // number of bytes decoded from input buffer into output buffer
decoder transform.Transformer
}
// New creates a new reader object
func NewLineReader(input io.Reader, codec encoding.Encoding, bufferSize int) (*LineReader, error) {
encoder := codec.NewEncoder()
// Create newline char based on encoding
nl, _, err := transform.Bytes(encoder, []byte{'\n'})
if err != nil {
return nil, err
}
return &LineReader{
reader: input,
codec: codec,
bufferSize: bufferSize,
nl: nl,
decoder: codec.NewDecoder(),
inBuffer: streambuf.New(nil),
outBuffer: streambuf.New(nil),
}, nil
}
// Next reads the next line until the new line character
func (r *LineReader) Next() ([]byte, int, error) {
// This loop is need in case advance detects an line ending which turns out
// not to be one when decoded. If that is the case, reading continues.
for {
// read next 'potential' line from input buffer/reader
err := r.advance()
if err != nil {
return nil, 0, err
}
// Check last decoded byte really being '\n' also unencoded
// if not, continue reading
buf := r.outBuffer.Bytes()
// This can happen if something goes wrong during decoding
if len(buf) == 0 {
logp.Err("Empty buffer returned by advance")
continue
}
if buf[len(buf)-1] == '\n' {
break
} else {
logp.Debug("line", "Line ending char found which wasn't one: %s", buf[len(buf)-1])
}
}
// output buffer contains complete line ending with '\n'. Extract
// byte slice from buffer and reset output buffer.
bytes, err := r.outBuffer.Collect(r.outBuffer.Len())
r.outBuffer.Reset()
if err != nil {
// This should never happen as otherwise we have a broken state
panic(err)
}
// return and reset consumed bytes count
sz := r.byteCount
r.byteCount = 0
return bytes, sz, nil
}
// Reads from the buffer until a new line character is detected
// Returns an error otherwise
func (r *LineReader) advance() error {
// Initial check if buffer has already a newLine character
idx := r.inBuffer.IndexFrom(r.inOffset, r.nl)
// fill inBuffer until '\n' sequence has been found in input buffer
for idx == -1 {
// increase search offset to reduce iterations on buffer when looping
newOffset := r.inBuffer.Len() - len(r.nl)
if newOffset > r.inOffset {
r.inOffset = newOffset
}
buf := make([]byte, r.bufferSize)
// try to read more bytes into buffer
n, err := r.reader.Read(buf)
// Appends buffer also in case of err
r.inBuffer.Append(buf[:n])
if err != nil {
return err
}
// empty read => return buffer error (more bytes required error)
if n == 0 {
return streambuf.ErrNoMoreBytes
}
// Check if buffer has newLine character
idx = r.inBuffer.IndexFrom(r.inOffset, r.nl)
}
// found encoded byte sequence for '\n' in buffer
// -> decode input sequence into outBuffer
sz, err := r.decode(idx + len(r.nl))
if err != nil {
logp.Err("Error decoding line: %s", err)
// In case of error increase size by unencoded length
sz = idx + len(r.nl)
}
// consume transformed bytes from input buffer
err = r.inBuffer.Advance(sz)
r.inBuffer.Reset()
// continue scanning input buffer from last position + 1
r.inOffset = idx + 1 - sz
if r.inOffset < 0 {
// fix inOffset if '\n' has encoding > 8bits + firl line has been decoded
r.inOffset = 0
}
return err
}
func (r *LineReader) decode(end int) (int, error) {
var err error
buffer := make([]byte, 1024)
inBytes := r.inBuffer.Bytes()
start := 0
for start < end {
var nDst, nSrc int
nDst, nSrc, err = r.decoder.Transform(buffer, inBytes[start:end], false)
if err != nil {
// Check if error is different from destination buffer too short
if err != transform.ErrShortDst {
r.outBuffer.Write(inBytes[0:end])
start = end
break
}
// Reset error as decoding continues
err = nil
}
start += nSrc
r.outBuffer.Write(buffer[:nDst])
}
r.byteCount += start
return start, err
}