Browse Source

Use a start url

master
Gabriel Augendre 3 years ago
parent
commit
797dd0eeaf
  1. 1
      _meta/beat.yml
  2. 2
      beater/youtubebeat.go
  3. 6
      config/config.go
  4. 1
      youtubebeat.reference.yml
  5. 1
      youtubebeat.yml

1
_meta/beat.yml

@ -6,6 +6,7 @@ youtubebeat:
# Defines how often an event is sent to the output
parallelism: 5
max_depth: 10
start_url: "https://www.youtube.com"
output.elasticsearch.index: "youtubebeat"

2
beater/youtubebeat.go

@ -92,7 +92,7 @@ func scrapeVideos(b *beat.Beat, bt *Youtubebeat, done chan bool) {
}
})
videoCollector.Visit(BaseUrl)
videoCollector.Visit(bt.config.StartUrl)
videoCollector.Wait()
logp.Info("Done parsing all videos")
done <- true

6
config/config.go

@ -4,11 +4,13 @@
package config
type Config struct {
Parallelism int `config:"parallelism"`
MaxDepth int `config:"max_depth"`
Parallelism int `config:"parallelism"`
MaxDepth int `config:"max_depth"`
StartUrl string `config:"start_url"`
}
var DefaultConfig = Config{
Parallelism: 5,
MaxDepth: 10,
StartUrl: "https://www.youtube.com",
}

1
youtubebeat.reference.yml

@ -6,6 +6,7 @@ youtubebeat:
# Defines how often an event is sent to the output
parallelism: 5
max_depth: 10
start_url: "https://www.youtube.com"
output.elasticsearch.index: "youtubebeat"

1
youtubebeat.yml

@ -6,6 +6,7 @@ youtubebeat:
# Defines how often an event is sent to the output
parallelism: 5
max_depth: 10
start_url: "https://www.youtube.com"
output.elasticsearch.index: "youtubebeat"

Loading…
Cancel
Save