Use a start url

This commit is contained in:
Gabriel Augendre 2018-11-20 14:48:22 +01:00
parent 27d49980ed
commit 797dd0eeaf
5 changed files with 8 additions and 3 deletions

View file

@ -6,6 +6,7 @@ youtubebeat:
# Defines how often an event is sent to the output # Defines how often an event is sent to the output
parallelism: 5 parallelism: 5
max_depth: 10 max_depth: 10
start_url: "https://www.youtube.com"
output.elasticsearch.index: "youtubebeat" output.elasticsearch.index: "youtubebeat"

View file

@ -92,7 +92,7 @@ func scrapeVideos(b *beat.Beat, bt *Youtubebeat, done chan bool) {
} }
}) })
videoCollector.Visit(BaseUrl) videoCollector.Visit(bt.config.StartUrl)
videoCollector.Wait() videoCollector.Wait()
logp.Info("Done parsing all videos") logp.Info("Done parsing all videos")
done <- true done <- true

View file

@ -6,9 +6,11 @@ package config
type Config struct { type Config struct {
Parallelism int `config:"parallelism"` Parallelism int `config:"parallelism"`
MaxDepth int `config:"max_depth"` MaxDepth int `config:"max_depth"`
StartUrl string `config:"start_url"`
} }
var DefaultConfig = Config{ var DefaultConfig = Config{
Parallelism: 5, Parallelism: 5,
MaxDepth: 10, MaxDepth: 10,
StartUrl: "https://www.youtube.com",
} }

View file

@ -6,6 +6,7 @@ youtubebeat:
# Defines how often an event is sent to the output # Defines how often an event is sent to the output
parallelism: 5 parallelism: 5
max_depth: 10 max_depth: 10
start_url: "https://www.youtube.com"
output.elasticsearch.index: "youtubebeat" output.elasticsearch.index: "youtubebeat"

View file

@ -6,6 +6,7 @@ youtubebeat:
# Defines how often an event is sent to the output # Defines how often an event is sent to the output
parallelism: 5 parallelism: 5
max_depth: 10 max_depth: 10
start_url: "https://www.youtube.com"
output.elasticsearch.index: "youtubebeat" output.elasticsearch.index: "youtubebeat"