// Copyright 2018 Adam Tauber // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package colly_test import ( "github.com/antchfx/htmlquery" "github.com/gocolly/colly" "reflect" "strings" "testing" ) // Borrowed from http://infohost.nmt.edu/tcc/help/pubs/xhtml/example.html // Added attributes to the `
  • ` tags for testing purposes const htmlPage = ` Your page title here

    Your major heading here

    This is a regular text paragraph.

    ` func TestAttr(t *testing.T) { resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)} doc, _ := htmlquery.Parse(strings.NewReader(htmlPage)) xmlNode := htmlquery.FindOne(doc, "/html") xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode) if xmlElem.Attr("xmlns") != "http://www.w3.org/1999/xhtml" { t.Fatalf("failed xmlns attribute test: %v != http://www.w3.org/1999/xhtml", xmlElem.Attr("xmlns")) } if xmlElem.Attr("xml:lang") != "en" { t.Fatalf("failed lang attribute test: %v != en", xmlElem.Attr("lang")) } } func TestChildText(t *testing.T) { resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)} doc, _ := htmlquery.Parse(strings.NewReader(htmlPage)) xmlNode := htmlquery.FindOne(doc, "/html") xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode) if text := xmlElem.ChildText("//p"); text != "This is a regular text paragraph." { t.Fatalf("failed child tag test: %v != This is a regular text paragraph.", text) } if text := xmlElem.ChildText("//dl"); text != "" { t.Fatalf("failed child tag test: %v != \"\"", text) } } func TestChildTexts(t *testing.T) { resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)} doc, _ := htmlquery.Parse(strings.NewReader(htmlPage)) xmlNode := htmlquery.FindOne(doc, "/html") xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode) expected := []string{"First bullet of a bullet list.", "This is the second bullet."} if texts := xmlElem.ChildTexts("//li"); reflect.DeepEqual(texts, expected) == false { t.Fatalf("failed child tags test: %v != %v", texts, expected) } if texts := xmlElem.ChildTexts("//dl"); reflect.DeepEqual(texts, make([]string, 0)) == false { t.Fatalf("failed child tag test: %v != \"\"", texts) } } func TestChildAttr(t *testing.T) { resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)} doc, _ := htmlquery.Parse(strings.NewReader(htmlPage)) xmlNode := htmlquery.FindOne(doc, "/html") xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode) if attr := xmlElem.ChildAttr("/body/ul/li[1]", "class"); attr != "list-item-1" { t.Fatalf("failed child attribute test: %v != list-item-1", attr) } if attr := xmlElem.ChildAttr("/body/ul/li[2]", "class"); attr != "list-item-2" { t.Fatalf("failed child attribute test: %v != list-item-2", attr) } } func TestChildAttrs(t *testing.T) { resp := &colly.Response{StatusCode: 200, Body: []byte(htmlPage)} doc, _ := htmlquery.Parse(strings.NewReader(htmlPage)) xmlNode := htmlquery.FindOne(doc, "/html") xmlElem := colly.NewXMLElementFromHTMLNode(resp, xmlNode) attrs := xmlElem.ChildAttrs("/body/ul/li", "class") if len(attrs) != 2 { t.Fatalf("failed child attributes length test: %d != 2", len(attrs)) } for _, attr := range attrs { if !(attr == "list-item-1" || attr == "list-item-2") { t.Fatalf("failed child attributes values test: %s != list-item-(1 or 2)", attr) } } }