Use a start url
This commit is contained in:
parent
27d49980ed
commit
797dd0eeaf
5 changed files with 8 additions and 3 deletions
|
@ -6,6 +6,7 @@ youtubebeat:
|
|||
# Defines how often an event is sent to the output
|
||||
parallelism: 5
|
||||
max_depth: 10
|
||||
start_url: "https://www.youtube.com"
|
||||
|
||||
output.elasticsearch.index: "youtubebeat"
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ func scrapeVideos(b *beat.Beat, bt *Youtubebeat, done chan bool) {
|
|||
}
|
||||
})
|
||||
|
||||
videoCollector.Visit(BaseUrl)
|
||||
videoCollector.Visit(bt.config.StartUrl)
|
||||
videoCollector.Wait()
|
||||
logp.Info("Done parsing all videos")
|
||||
done <- true
|
||||
|
|
|
@ -6,9 +6,11 @@ package config
|
|||
type Config struct {
|
||||
Parallelism int `config:"parallelism"`
|
||||
MaxDepth int `config:"max_depth"`
|
||||
StartUrl string `config:"start_url"`
|
||||
}
|
||||
|
||||
var DefaultConfig = Config{
|
||||
Parallelism: 5,
|
||||
MaxDepth: 10,
|
||||
StartUrl: "https://www.youtube.com",
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ youtubebeat:
|
|||
# Defines how often an event is sent to the output
|
||||
parallelism: 5
|
||||
max_depth: 10
|
||||
start_url: "https://www.youtube.com"
|
||||
|
||||
output.elasticsearch.index: "youtubebeat"
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ youtubebeat:
|
|||
# Defines how often an event is sent to the output
|
||||
parallelism: 5
|
||||
max_depth: 10
|
||||
start_url: "https://www.youtube.com"
|
||||
|
||||
output.elasticsearch.index: "youtubebeat"
|
||||
|
||||
|
|
Loading…
Reference in a new issue