youtubebeat/vendor/github.com/temoto/robotstxt/google_test.go

275 lines
5 KiB
Go

package robotstxt
import (
"strings"
"testing"
"time"
)
const (
robotsCaseMatching = `user-agent: a
Disallow: /
user-agent: b
Disallow: /*
user-agent: c
Disallow: /fish
user-agent: d
Disallow: /fish*
user-agent: e
Disallow: /fish/
user-agent: f
Disallow: fish/
user-agent: g
Disallow: /*.php
user-agent: h
Disallow: /*.php$
user-agent: i
Disallow: /fish*.php`
robotsCasePrecedence = `user-agent: a
Disallow: /
Allow: /p
user-agent: b
Disallow: /folder
Allow: /folder/
user-agent: c
Disallow: /*.htm
Allow: /page
user-agent: d
Disallow: /
Allow: /$
user-agent: e
Disallow: /
Allow: /$`
)
func TestGroupOrder(t *testing.T) {
const robotsCaseOrder = `user-agent: googlebot-news
Disallow: /
user-agent: *
Disallow: /
user-agent: googlebot
Disallow: /`
agents := []string{"Googlebot-News (Googlebot)", "Googlebot", "Googlebot-Image (Googlebot)", "Otherbot (web)", "Otherbot (News)"}
paths := []string{"/1", "/3", "/3", "/2", "/2"}
if r, e := FromString(robotsCaseOrder); e != nil {
t.Fatal(e)
} else {
for i, a := range agents {
ExpectDisallow(t, r, paths[i], a)
}
}
}
func TestSitemaps(t *testing.T) {
const robotsCaseSitemaps = `sitemap: http://test.com/a
user-agent: a
disallow: /c
sitemap: http://test.com/b
user-agent: b
disallow: /d
user-agent: e
sitemap: http://test.com/c
user-agent: f
disallow: /g`
if r, e := FromString(robotsCaseSitemaps); e != nil {
t.Fatal(e)
} else {
if len(r.Sitemaps) != 3 {
for i, s := range r.Sitemaps {
t.Logf("Sitemap %d: %s", i, s)
}
t.Fatalf("Expected 3 sitemaps, got %d:\n%v", len(r.Sitemaps), r.Sitemaps)
}
}
}
func TestCrawlDelays(t *testing.T) {
const robotsCaseDelays = `useragent: a
# some comment : with colon
disallow: /c
user-agent: b
crawldelay: 3.5
disallow: /d
user-agent: e
sitemap: http://test.com/c
user-agent: f
disallow: /g
crawl-delay: 5`
if r, e := FromString(robotsCaseDelays); e != nil {
t.Fatal(e)
} else {
if len(r.Sitemaps) != 1 {
t.Fatalf("Expected 1 sitemaps, got %d", len(r.Sitemaps))
}
if g := r.groups["b"]; g.CrawlDelay != time.Duration(3.5*float64(time.Second)) {
t.Fatalf("Expected crawl delay of 3.5 for group 2, got %v", g.CrawlDelay)
}
if g := r.groups["f"]; g.CrawlDelay != (5 * time.Second) {
t.Fatalf("Expected crawl delay of 5 for group 3, got %v", g.CrawlDelay)
}
}
}
func TestWildcards(t *testing.T) {
const robotsCaseWildcards = `user-agent: *
Disallow: /path*l$`
if r, e := FromString(robotsCaseWildcards); e != nil {
t.Fatal(e)
} else {
if s := r.groups["*"].rules[0].pattern.String(); s != "/path.*l$" {
t.Fatalf("Expected pattern to be /path.*l$, got %s", s)
}
}
}
func TestURLMatching(t *testing.T) {
var ok bool
cases := map[string][]string{
"a": []string{
"/",
"/test",
"",
"/path/to/whatever",
},
"b": []string{
"/",
"/test",
"",
"/path/to/whatever",
},
"c": []string{
"/fish",
"/fish.html",
"/fish/salmon.html",
"/fishheads",
"/fishheads/yummy.html",
"/fish.php?id=anything",
"^/Fish.asp",
"^/catfish",
"^/?id=fish",
},
"d": []string{
"/fish",
"/fish.html",
"/fish/salmon.html",
"/fishheads",
"/fishheads/yummy.html",
"/fish.php?id=anything",
"^/Fish.asp",
"^/catfish",
"^/?id=fish",
},
"e": []string{
"/fish/",
"/fish/?id=anything",
"/fish/salmon.htm",
"^/fish",
"^/fish.html",
"^/Fish/Salmon.asp",
},
"f": []string{
"/fish/",
"/fish/?id=anything",
"/fish/salmon.htm",
"^/fish",
"^/fish.html",
"^/Fish/Salmon.asp",
},
"g": []string{
"/filename.php",
"/folder/filename.php",
"/folder/filename.php?parameters",
"/folder/any.php.file.html",
"/filename.php/",
"^/",
"^/windows.PHP",
},
"h": []string{
"/filename.php",
"/folder/filename.php",
"^/filename.php?parameters",
"^/filename.php/",
"^/filename.php5",
"^/windows.PHP",
},
"i": []string{
"/fish.php",
"/fishheads/catfish.php?parameters",
"^/Fish.PHP",
},
}
if r, e := FromString(robotsCaseMatching); e != nil {
t.Fatal(e)
} else {
for k, ar := range cases {
for _, p := range ar {
ok = strings.HasPrefix(p, "^")
if ok {
p = p[1:]
}
if allow := r.TestAgent(p, k); allow != ok {
t.Errorf("Agent %s, path %s, expected %v, got %v", k, p, ok, allow)
}
}
}
}
}
func TestURLPrecedence(t *testing.T) {
var ok bool
cases := map[string][]string{
"a": []string{
"/page",
"^/test",
},
"b": []string{
"/folder/page",
"^/folder1",
"^/folder.htm",
},
"c": []string{
"^/page.htm",
"/page1.asp",
},
"d": []string{
"/",
"^/index",
},
"e": []string{
"^/page.htm",
"/",
},
}
if r, e := FromString(robotsCasePrecedence); e != nil {
t.Fatal(e)
} else {
for k, ar := range cases {
for _, p := range ar {
ok = !strings.HasPrefix(p, "^")
if !ok {
p = p[1:]
}
if allow := r.TestAgent(p, k); allow != ok {
t.Errorf("Agent %s, path %s, expected %v, got %v", k, p, ok, allow)
}
}
}
}
}
func getIndexInSlice(ar []*Group, g *Group) int {
for i, v := range ar {
if v == g {
return i
}
}
return -1
}