youtubebeat/vendor/github.com/elastic/beats/libbeat/outputs/elasticsearch/client.go

814 lines
20 KiB
Go

// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package elasticsearch
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"time"
"github.com/elastic/beats/libbeat/beat"
"github.com/elastic/beats/libbeat/logp"
"github.com/elastic/beats/libbeat/outputs"
"github.com/elastic/beats/libbeat/outputs/outil"
"github.com/elastic/beats/libbeat/outputs/transport"
"github.com/elastic/beats/libbeat/publisher"
"github.com/elastic/beats/libbeat/testing"
)
// Client is an elasticsearch client.
type Client struct {
Connection
tlsConfig *transport.TLSConfig
index outil.Selector
pipeline *outil.Selector
params map[string]string
timeout time.Duration
// buffered bulk requests
bulkRequ *bulkRequest
// buffered json response reader
json jsonReader
// additional configs
compressionLevel int
proxyURL *url.URL
observer outputs.Observer
}
// ClientSettings contains the settings for a client.
type ClientSettings struct {
URL string
Proxy *url.URL
TLS *transport.TLSConfig
Username, Password string
EscapeHTML bool
Parameters map[string]string
Headers map[string]string
Index outil.Selector
Pipeline *outil.Selector
Timeout time.Duration
CompressionLevel int
Observer outputs.Observer
}
type connectCallback func(client *Client) error
// Connection manages the connection for a given client.
type Connection struct {
URL string
Username string
Password string
Headers map[string]string
http *http.Client
onConnectCallback func() error
encoder bodyEncoder
version string
}
type bulkIndexAction struct {
Index bulkEventMeta `json:"index" struct:"index"`
}
type bulkCreateAction struct {
Create bulkEventMeta `json:"create" struct:"create"`
}
type bulkEventMeta struct {
Index string `json:"_index" struct:"_index"`
DocType string `json:"_type" struct:"_type"`
Pipeline string `json:"pipeline,omitempty" struct:"pipeline,omitempty"`
ID string `json:"_id,omitempty" struct:"_id,omitempty"`
}
type bulkResultStats struct {
acked int // number of events ACKed by Elasticsearch
duplicates int // number of events failed with `create` due to ID already being indexed
fails int // number of failed events (can be retried)
nonIndexable int // number of failed events (not indexable -> must be dropped)
}
var (
nameItems = []byte("items")
nameStatus = []byte("status")
nameError = []byte("error")
)
var (
errExpectedItemObject = errors.New("expected item response object")
errExpectedStatusCode = errors.New("expected item status code")
errUnexpectedEmptyObject = errors.New("empty object")
errExpectedObjectEnd = errors.New("expected end of object")
errTempBulkFailure = errors.New("temporary bulk send failure")
)
const (
eventType = "doc"
)
// NewClient instantiates a new client.
func NewClient(
s ClientSettings,
onConnect *callbacksRegistry,
) (*Client, error) {
proxy := http.ProxyFromEnvironment
if s.Proxy != nil {
proxy = http.ProxyURL(s.Proxy)
}
pipeline := s.Pipeline
if pipeline != nil && pipeline.IsEmpty() {
pipeline = nil
}
u, err := url.Parse(s.URL)
if err != nil {
return nil, fmt.Errorf("failed to parse elasticsearch URL: %v", err)
}
if u.User != nil {
s.Username = u.User.Username()
s.Password, _ = u.User.Password()
u.User = nil
// Re-write URL without credentials.
s.URL = u.String()
}
logp.Info("Elasticsearch url: %s", s.URL)
// TODO: add socks5 proxy support
var dialer, tlsDialer transport.Dialer
dialer = transport.NetDialer(s.Timeout)
tlsDialer, err = transport.TLSDialer(dialer, s.TLS, s.Timeout)
if err != nil {
return nil, err
}
if st := s.Observer; st != nil {
dialer = transport.StatsDialer(dialer, st)
tlsDialer = transport.StatsDialer(tlsDialer, st)
}
params := s.Parameters
bulkRequ, err := newBulkRequest(s.URL, "", "", params, nil)
if err != nil {
return nil, err
}
var encoder bodyEncoder
compression := s.CompressionLevel
if compression == 0 {
encoder = newJSONEncoder(nil, s.EscapeHTML)
} else {
encoder, err = newGzipEncoder(compression, nil, s.EscapeHTML)
if err != nil {
return nil, err
}
}
client := &Client{
Connection: Connection{
URL: s.URL,
Username: s.Username,
Password: s.Password,
Headers: s.Headers,
http: &http.Client{
Transport: &http.Transport{
Dial: dialer.Dial,
DialTLS: tlsDialer.Dial,
Proxy: proxy,
},
Timeout: s.Timeout,
},
encoder: encoder,
},
tlsConfig: s.TLS,
index: s.Index,
pipeline: pipeline,
params: params,
timeout: s.Timeout,
bulkRequ: bulkRequ,
compressionLevel: compression,
proxyURL: s.Proxy,
observer: s.Observer,
}
client.Connection.onConnectCallback = func() error {
if onConnect != nil {
onConnect.mutex.Lock()
defer onConnect.mutex.Unlock()
for _, callback := range onConnect.callbacks {
err := callback(client)
if err != nil {
return err
}
}
}
return nil
}
return client, nil
}
// Clone clones a client.
func (client *Client) Clone() *Client {
// when cloning the connection callback and params are not copied. A
// client's close is for example generated for topology-map support. With params
// most likely containing the ingest node pipeline and default callback trying to
// create install a template, we don't want these to be included in the clone.
c, _ := NewClient(
ClientSettings{
URL: client.URL,
Index: client.index,
Pipeline: client.pipeline,
Proxy: client.proxyURL,
TLS: client.tlsConfig,
Username: client.Username,
Password: client.Password,
Parameters: nil, // XXX: do not pass params?
Headers: client.Headers,
Timeout: client.http.Timeout,
CompressionLevel: client.compressionLevel,
},
nil, // XXX: do not pass connection callback?
)
return c
}
func (client *Client) Publish(batch publisher.Batch) error {
events := batch.Events()
rest, err := client.publishEvents(events)
if len(rest) == 0 {
batch.ACK()
} else {
batch.RetryEvents(rest)
}
return err
}
// PublishEvents sends all events to elasticsearch. On error a slice with all
// events not published or confirmed to be processed by elasticsearch will be
// returned. The input slice backing memory will be reused by return the value.
func (client *Client) publishEvents(
data []publisher.Event,
) ([]publisher.Event, error) {
begin := time.Now()
st := client.observer
if st != nil {
st.NewBatch(len(data))
}
if len(data) == 0 {
return nil, nil
}
body := client.encoder
body.Reset()
// encode events into bulk request buffer, dropping failed elements from
// events slice
origCount := len(data)
data = bulkEncodePublishRequest(body, client.index, client.pipeline, data)
newCount := len(data)
if st != nil && origCount > newCount {
st.Dropped(origCount - newCount)
}
if newCount == 0 {
return nil, nil
}
requ := client.bulkRequ
requ.Reset(body)
status, result, sendErr := client.sendBulkRequest(requ)
if sendErr != nil {
logp.Err("Failed to perform any bulk index operations: %s", sendErr)
return data, sendErr
}
debugf("PublishEvents: %d events have been published to elasticsearch in %v.",
len(data),
time.Now().Sub(begin))
// check response for transient errors
var failedEvents []publisher.Event
var stats bulkResultStats
if status != 200 {
failedEvents = data
stats.fails = len(failedEvents)
} else {
client.json.init(result.raw)
failedEvents, stats = bulkCollectPublishFails(&client.json, data)
}
failed := len(failedEvents)
if st := client.observer; st != nil {
dropped := stats.nonIndexable
duplicates := stats.duplicates
acked := len(data) - failed - dropped - duplicates
st.Acked(acked)
st.Failed(failed)
st.Dropped(dropped)
st.Duplicate(duplicates)
}
if failed > 0 {
if sendErr == nil {
sendErr = errTempBulkFailure
}
return failedEvents, sendErr
}
return nil, nil
}
// fillBulkRequest encodes all bulk requests and returns slice of events
// successfully added to bulk request.
func bulkEncodePublishRequest(
body bulkWriter,
index outil.Selector,
pipeline *outil.Selector,
data []publisher.Event,
) []publisher.Event {
okEvents := data[:0]
for i := range data {
event := &data[i].Content
meta, err := createEventBulkMeta(index, pipeline, event)
if err != nil {
logp.Err("Failed to encode event meta data: %s", err)
continue
}
if err := body.Add(meta, event); err != nil {
logp.Err("Failed to encode event: %s", err)
continue
}
okEvents = append(okEvents, data[i])
}
return okEvents
}
func createEventBulkMeta(
indexSel outil.Selector,
pipelineSel *outil.Selector,
event *beat.Event,
) (interface{}, error) {
pipeline, err := getPipeline(event, pipelineSel)
if err != nil {
err := fmt.Errorf("failed to select pipeline: %v", err)
return nil, err
}
index, err := getIndex(event, indexSel)
if err != nil {
err := fmt.Errorf("failed to select event index: %v", err)
return nil, err
}
var id string
if m := event.Meta; m != nil {
if tmp := m["id"]; tmp != nil {
if s, ok := tmp.(string); ok {
id = s
} else {
logp.Err("Event ID '%v' is no string value", id)
}
}
}
meta := bulkEventMeta{
Index: index,
DocType: eventType,
Pipeline: pipeline,
ID: id,
}
if id != "" {
return bulkCreateAction{meta}, nil
}
return bulkIndexAction{meta}, nil
}
func getPipeline(event *beat.Event, pipelineSel *outil.Selector) (string, error) {
if event.Meta != nil {
if pipeline, exists := event.Meta["pipeline"]; exists {
if p, ok := pipeline.(string); ok {
return p, nil
}
return "", errors.New("pipeline metadata is no string")
}
}
if pipelineSel != nil {
return pipelineSel.Select(event)
}
return "", nil
}
// getIndex returns the full index name
// Index is either defined in the config as part of the output
// or can be overload by the event through setting index
func getIndex(event *beat.Event, index outil.Selector) (string, error) {
if event.Meta != nil {
if str, exists := event.Meta["index"]; exists {
idx, ok := str.(string)
if ok {
ts := event.Timestamp.UTC()
return fmt.Sprintf("%s-%d.%02d.%02d",
idx, ts.Year(), ts.Month(), ts.Day()), nil
}
}
}
return index.Select(event)
}
// bulkCollectPublishFails checks per item errors returning all events
// to be tried again due to error code returned for that items. If indexing an
// event failed due to some error in the event itself (e.g. does not respect mapping),
// the event will be dropped.
func bulkCollectPublishFails(
reader *jsonReader,
data []publisher.Event,
) ([]publisher.Event, bulkResultStats) {
if err := reader.expectDict(); err != nil {
logp.Err("Failed to parse bulk response: expected JSON object")
return nil, bulkResultStats{}
}
// find 'items' field in response
for {
kind, name, err := reader.nextFieldName()
if err != nil {
logp.Err("Failed to parse bulk response")
return nil, bulkResultStats{}
}
if kind == dictEnd {
logp.Err("Failed to parse bulk response: no 'items' field in response")
return nil, bulkResultStats{}
}
// found items array -> continue
if bytes.Equal(name, nameItems) {
break
}
reader.ignoreNext()
}
// check items field is an array
if err := reader.expectArray(); err != nil {
logp.Err("Failed to parse bulk response: expected items array")
return nil, bulkResultStats{}
}
count := len(data)
failed := data[:0]
stats := bulkResultStats{}
for i := 0; i < count; i++ {
status, msg, err := itemStatus(reader)
if err != nil {
return nil, bulkResultStats{}
}
if status < 300 {
stats.acked++
continue // ok value
}
if status == 409 {
// 409 is used to indicate an event with same ID already exists if
// `create` op_type is used.
stats.duplicates++
continue // ok
}
if status < 500 && status != 429 {
// hard failure, don't collect
logp.Warn("Cannot index event %#v (status=%v): %s", data[i], status, msg)
stats.nonIndexable++
continue
}
debugf("Bulk item insert failed (i=%v, status=%v): %s", i, status, msg)
stats.fails++
failed = append(failed, data[i])
}
return failed, stats
}
func itemStatus(reader *jsonReader) (int, []byte, error) {
// skip outer dictionary
if err := reader.expectDict(); err != nil {
return 0, nil, errExpectedItemObject
}
// find first field in outer dictionary (e.g. 'create')
kind, _, err := reader.nextFieldName()
if err != nil {
logp.Err("Failed to parse bulk response item: %s", err)
return 0, nil, err
}
if kind == dictEnd {
err = errUnexpectedEmptyObject
logp.Err("Failed to parse bulk response item: %s", err)
return 0, nil, err
}
// parse actual item response code and error message
status, msg, err := itemStatusInner(reader)
if err != nil {
logp.Err("Failed to parse bulk response item: %s", err)
return 0, nil, err
}
// close dictionary. Expect outer dictionary to have only one element
kind, _, err = reader.step()
if err != nil {
logp.Err("Failed to parse bulk response item: %s", err)
return 0, nil, err
}
if kind != dictEnd {
err = errExpectedObjectEnd
logp.Err("Failed to parse bulk response item: %s", err)
return 0, nil, err
}
return status, msg, nil
}
func itemStatusInner(reader *jsonReader) (int, []byte, error) {
if err := reader.expectDict(); err != nil {
return 0, nil, errExpectedItemObject
}
status := -1
var msg []byte
for {
kind, name, err := reader.nextFieldName()
if err != nil {
logp.Err("Failed to parse bulk response item: %s", err)
}
if kind == dictEnd {
break
}
switch {
case bytes.Equal(name, nameStatus): // name == "status"
status, err = reader.nextInt()
if err != nil {
logp.Err("Failed to parse bulk response item: %s", err)
return 0, nil, err
}
case bytes.Equal(name, nameError): // name == "error"
msg, err = reader.ignoreNext() // collect raw string for "error" field
if err != nil {
return 0, nil, err
}
default: // ignore unknown fields
_, err = reader.ignoreNext()
if err != nil {
return 0, nil, err
}
}
}
if status < 0 {
return 0, nil, errExpectedStatusCode
}
return status, msg, nil
}
// LoadJSON creates a PUT request based on a JSON document.
func (client *Client) LoadJSON(path string, json map[string]interface{}) ([]byte, error) {
status, body, err := client.Request("PUT", path, "", nil, json)
if err != nil {
return body, fmt.Errorf("couldn't load json. Error: %s", err)
}
if status > 300 {
return body, fmt.Errorf("couldn't load json. Status: %v", status)
}
return body, nil
}
// GetVersion returns the elasticsearch version the client is connected to
func (client *Client) GetVersion() string {
return client.Connection.version
}
func (client *Client) Test(d testing.Driver) {
d.Run("elasticsearch: "+client.URL, func(d testing.Driver) {
u, err := url.Parse(client.URL)
d.Fatal("parse url", err)
address := u.Hostname()
if u.Port() != "" {
address += ":" + u.Port()
}
d.Run("connection", func(d testing.Driver) {
netDialer := transport.TestNetDialer(d, client.timeout)
_, err = netDialer.Dial("tcp", address)
d.Fatal("dial up", err)
})
if u.Scheme != "https" {
d.Warn("TLS", "secure connection disabled")
} else {
d.Run("TLS", func(d testing.Driver) {
netDialer := transport.NetDialer(client.timeout)
tlsDialer, err := transport.TestTLSDialer(d, netDialer, client.tlsConfig, client.timeout)
_, err = tlsDialer.Dial("tcp", address)
d.Fatal("dial up", err)
})
}
err = client.Connect()
d.Fatal("talk to server", err)
d.Info("version", client.version)
})
}
func (client *Client) String() string {
return "elasticsearch(" + client.Connection.URL + ")"
}
// Connect connects the client.
func (conn *Connection) Connect() error {
var err error
conn.version, err = conn.Ping()
if err != nil {
return err
}
err = conn.onConnectCallback()
if err != nil {
return fmt.Errorf("Connection marked as failed because the onConnect callback failed: %v", err)
}
return nil
}
// Ping sends a GET request to the Elasticsearch.
func (conn *Connection) Ping() (string, error) {
debugf("ES Ping(url=%v)", conn.URL)
status, body, err := conn.execRequest("GET", conn.URL, nil)
if err != nil {
debugf("Ping request failed with: %v", err)
return "", err
}
if status >= 300 {
return "", fmt.Errorf("Non 2xx response code: %d", status)
}
var response struct {
Version struct {
Number string
}
}
err = json.Unmarshal(body, &response)
if err != nil {
return "", fmt.Errorf("Failed to parse JSON response: %v", err)
}
debugf("Ping status code: %v", status)
logp.Info("Connected to Elasticsearch version %s", response.Version.Number)
return response.Version.Number, nil
}
// Close closes a connection.
func (conn *Connection) Close() error {
return nil
}
// Request sends a request via the connection.
func (conn *Connection) Request(
method, path string,
pipeline string,
params map[string]string,
body interface{},
) (int, []byte, error) {
url := addToURL(conn.URL, path, pipeline, params)
debugf("%s %s %s %v", method, url, pipeline, body)
return conn.RequestURL(method, url, body)
}
// RequestURL sends a request with the connection object to an alternative url
func (conn *Connection) RequestURL(
method, url string,
body interface{},
) (int, []byte, error) {
if body == nil {
return conn.execRequest(method, url, nil)
}
if err := conn.encoder.Marshal(body); err != nil {
logp.Warn("Failed to json encode body (%v): %#v", err, body)
return 0, nil, ErrJSONEncodeFailed
}
return conn.execRequest(method, url, conn.encoder.Reader())
}
func (conn *Connection) execRequest(
method, url string,
body io.Reader,
) (int, []byte, error) {
req, err := http.NewRequest(method, url, body)
if err != nil {
logp.Warn("Failed to create request", err)
return 0, nil, err
}
if body != nil {
conn.encoder.AddHeader(&req.Header)
}
return conn.execHTTPRequest(req)
}
func (conn *Connection) execHTTPRequest(req *http.Request) (int, []byte, error) {
req.Header.Add("Accept", "application/json")
if conn.Username != "" || conn.Password != "" {
req.SetBasicAuth(conn.Username, conn.Password)
}
for name, value := range conn.Headers {
req.Header.Add(name, value)
}
// The stlib will override the value in the header based on the configured `Host`
// on the request which default to the current machine.
//
// We use the normalized key header to retrieve the user configured value and assign it to the host.
if host := req.Header.Get("Host"); host != "" {
req.Host = host
}
resp, err := conn.http.Do(req)
if err != nil {
return 0, nil, err
}
defer closing(resp.Body)
status := resp.StatusCode
obj, err := ioutil.ReadAll(resp.Body)
if err != nil {
return status, nil, err
}
if status >= 300 {
// add the response body with the error returned by Elasticsearch
err = fmt.Errorf("%v: %s", resp.Status, obj)
}
return status, obj, err
}
func (conn *Connection) GetVersion() string {
return conn.version
}
func closing(c io.Closer) {
err := c.Close()
if err != nil {
logp.Warn("Close failed with: %v", err)
}
}