Use a start url

This commit is contained in:
Gabriel Augendre 2018-11-20 14:48:22 +01:00
parent 27d49980ed
commit 797dd0eeaf
5 changed files with 8 additions and 3 deletions

View File

@ -6,6 +6,7 @@ youtubebeat:
# Defines how often an event is sent to the output
parallelism: 5
max_depth: 10
start_url: "https://www.youtube.com"
output.elasticsearch.index: "youtubebeat"

View File

@ -92,7 +92,7 @@ func scrapeVideos(b *beat.Beat, bt *Youtubebeat, done chan bool) {
}
})
videoCollector.Visit(BaseUrl)
videoCollector.Visit(bt.config.StartUrl)
videoCollector.Wait()
logp.Info("Done parsing all videos")
done <- true

View File

@ -6,9 +6,11 @@ package config
type Config struct {
Parallelism int `config:"parallelism"`
MaxDepth int `config:"max_depth"`
StartUrl string `config:"start_url"`
}
var DefaultConfig = Config{
Parallelism: 5,
MaxDepth: 10,
StartUrl: "https://www.youtube.com",
}

View File

@ -6,6 +6,7 @@ youtubebeat:
# Defines how often an event is sent to the output
parallelism: 5
max_depth: 10
start_url: "https://www.youtube.com"
output.elasticsearch.index: "youtubebeat"

View File

@ -6,6 +6,7 @@ youtubebeat:
# Defines how often an event is sent to the output
parallelism: 5
max_depth: 10
start_url: "https://www.youtube.com"
output.elasticsearch.index: "youtubebeat"