Limit parallelism
This commit is contained in:
parent
287424055e
commit
0ea086dc43
1 changed files with 4 additions and 0 deletions
|
@ -44,6 +44,8 @@ func scrapeVideos(startId string, fieldsToSend chan common.MapStr, done chan boo
|
||||||
colly.Async(true),
|
colly.Async(true),
|
||||||
colly.MaxDepth(10),
|
colly.MaxDepth(10),
|
||||||
)
|
)
|
||||||
|
videoCollector.Limit(&colly.LimitRule{DomainGlob: "*", Parallelism: 5})
|
||||||
|
videoCollector.AllowURLRevisit = true
|
||||||
|
|
||||||
videoCollector.OnHTML("body", func(e *colly.HTMLElement) {
|
videoCollector.OnHTML("body", func(e *colly.HTMLElement) {
|
||||||
url := e.Request.URL.String()
|
url := e.Request.URL.String()
|
||||||
|
@ -101,6 +103,8 @@ func (bt *Youtubebeat) Run(b *beat.Beat) error {
|
||||||
ticker := time.NewTicker(bt.config.Period)
|
ticker := time.NewTicker(bt.config.Period)
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
case <-done:
|
||||||
|
return nil
|
||||||
case <-bt.done:
|
case <-bt.done:
|
||||||
return nil
|
return nil
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
|
|
Loading…
Reference in a new issue