From 0ea086dc4351e01a2b6a985562cc6661e7f011c0 Mon Sep 17 00:00:00 2001 From: Gabriel Augendre Date: Tue, 20 Nov 2018 09:35:31 +0100 Subject: [PATCH] Limit parallelism --- beater/youtubebeat.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/beater/youtubebeat.go b/beater/youtubebeat.go index 191cc94..3dae92b 100644 --- a/beater/youtubebeat.go +++ b/beater/youtubebeat.go @@ -44,6 +44,8 @@ func scrapeVideos(startId string, fieldsToSend chan common.MapStr, done chan boo colly.Async(true), colly.MaxDepth(10), ) + videoCollector.Limit(&colly.LimitRule{DomainGlob: "*", Parallelism: 5}) + videoCollector.AllowURLRevisit = true videoCollector.OnHTML("body", func(e *colly.HTMLElement) { url := e.Request.URL.String() @@ -101,6 +103,8 @@ func (bt *Youtubebeat) Run(b *beat.Beat) error { ticker := time.NewTicker(bt.config.Period) for { select { + case <-done: + return nil case <-bt.done: return nil case <-ticker.C: