Use a start url
This commit is contained in:
parent
27d49980ed
commit
797dd0eeaf
5 changed files with 8 additions and 3 deletions
|
@ -6,6 +6,7 @@ youtubebeat:
|
||||||
# Defines how often an event is sent to the output
|
# Defines how often an event is sent to the output
|
||||||
parallelism: 5
|
parallelism: 5
|
||||||
max_depth: 10
|
max_depth: 10
|
||||||
|
start_url: "https://www.youtube.com"
|
||||||
|
|
||||||
output.elasticsearch.index: "youtubebeat"
|
output.elasticsearch.index: "youtubebeat"
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,7 @@ func scrapeVideos(b *beat.Beat, bt *Youtubebeat, done chan bool) {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
videoCollector.Visit(BaseUrl)
|
videoCollector.Visit(bt.config.StartUrl)
|
||||||
videoCollector.Wait()
|
videoCollector.Wait()
|
||||||
logp.Info("Done parsing all videos")
|
logp.Info("Done parsing all videos")
|
||||||
done <- true
|
done <- true
|
||||||
|
|
|
@ -4,11 +4,13 @@
|
||||||
package config
|
package config
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
Parallelism int `config:"parallelism"`
|
Parallelism int `config:"parallelism"`
|
||||||
MaxDepth int `config:"max_depth"`
|
MaxDepth int `config:"max_depth"`
|
||||||
|
StartUrl string `config:"start_url"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var DefaultConfig = Config{
|
var DefaultConfig = Config{
|
||||||
Parallelism: 5,
|
Parallelism: 5,
|
||||||
MaxDepth: 10,
|
MaxDepth: 10,
|
||||||
|
StartUrl: "https://www.youtube.com",
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ youtubebeat:
|
||||||
# Defines how often an event is sent to the output
|
# Defines how often an event is sent to the output
|
||||||
parallelism: 5
|
parallelism: 5
|
||||||
max_depth: 10
|
max_depth: 10
|
||||||
|
start_url: "https://www.youtube.com"
|
||||||
|
|
||||||
output.elasticsearch.index: "youtubebeat"
|
output.elasticsearch.index: "youtubebeat"
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@ youtubebeat:
|
||||||
# Defines how often an event is sent to the output
|
# Defines how often an event is sent to the output
|
||||||
parallelism: 5
|
parallelism: 5
|
||||||
max_depth: 10
|
max_depth: 10
|
||||||
|
start_url: "https://www.youtube.com"
|
||||||
|
|
||||||
output.elasticsearch.index: "youtubebeat"
|
output.elasticsearch.index: "youtubebeat"
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue