// Utility functions for working with text package sanitize import ( "testing" ) var Format = "\ninput: %q\nexpected: %q\noutput: %q" type Test struct { input string expected string } // NB the treatment of accents - they are removed and replaced with ascii transliterations var urls = []Test{ {"ReAd ME.md", `read-me.md`}, {"E88E08A7-279C-4CC1-8B90-86DE0D7044_3C.html", `e88e08a7-279c-4cc1-8b90-86de0d7044-3c.html`}, {"/user/test/I am a long url's_-?ASDF@£$%£%^testé.html", `/user/test/i-am-a-long-urls-asdfteste.html`}, {"/../../4-icon.jpg", `/4-icon.jpg`}, {"/Images_dir/../4-icon.jpg", `/images-dir/4-icon.jpg`}, {"../4 icon.*", `/4-icon.`}, {"Spac ey/Nôm/test før url", `spac-ey/nom/test-foer-url`}, {"../*", `/`}, } func TestPath(t *testing.T) { for _, test := range urls { output := Path(test.input) if output != test.expected { t.Fatalf(Format, test.input, test.expected, output) } } } func BenchmarkPath(b *testing.B) { for i := 0; i < b.N; i++ { for _, test := range urls { output := Path(test.input) if output != test.expected { b.Fatalf(Format, test.input, test.expected, output) } } } } var fileNames = []Test{ {"ReAd ME.md", `read-me.md`}, {"/var/etc/jobs/go/go/src/pkg/foo/bar.go", `bar.go`}, {"I am a long url's_-?ASDF@£$%£%^é.html", `i-am-a-long-urls-asdfe.html`}, {"/../../4-icon.jpg", `4-icon.jpg`}, {"/Images/../4-icon.jpg", `4-icon.jpg`}, {"../4 icon.jpg", `4-icon.jpg`}, {"../4 icon-testé *8%^\"'\".jpg ", `4-icon-teste-8.jpg`}, {"Überfluß an Döner macht schöner.JPEG", `ueberfluss-an-doener-macht-schoener.jpeg`}, {"Ä-_-Ü_:()_Ö-_-ä-_-ü-_-ö-_ß.webm", `ae-ue-oe-ae-ue-oe-ss.webm`}, } func TestName(t *testing.T) { for _, test := range fileNames { output := Name(test.input) if output != test.expected { t.Fatalf(Format, test.input, test.expected, output) } } } func BenchmarkName(b *testing.B) { for i := 0; i < b.N; i++ { for _, test := range fileNames { output := Name(test.input) if output != test.expected { b.Fatalf(Format, test.input, test.expected, output) } } } } var baseFileNames = []Test{ {"The power & the Glory jpg file. The end", `The-power-the-Glory-jpg-file-The-end`}, {"/../../4-iCoN.jpg", `-4-iCoN-jpg`}, {"And/Or", `And-Or`}, {"Sonic.EXE", `Sonic-EXE`}, {"012: #Fetch for Defaults", `012-Fetch-for-Defaults`}, } func TestBaseName(t *testing.T) { for _, test := range baseFileNames { output := BaseName(test.input) if output != test.expected { t.Fatalf(Format, test.input, test.expected, output) } } } // Test with some malformed or malicious html // NB because we remove all tokens after a < until the next > // and do not attempt to parse, we should be safe from invalid html, // but will sometimes completely empty the string if we have invalid input // Note we sometimes use " in order to keep things on one line and use the ` character var htmlTests = []Test{ {` `, " "}, {`&#x000D;`, `&#x000D;`}, {`

`, ``}, {"

Bold Not bold

\nAlso not bold.", "Bold Not bold\nAlso not bold."}, {`FOO ZOO`, "FOO\rZOO"}, {`">`, `alert("XSS")"`}, {``, ``}, {``, ``}, {`> & test <`, `> & test <`}, {``, ``}, {`“hello” it’s for ‘real’`, `"hello" it's for 'real'`}, {``, ``}, {`'';!--"=&{()}`, `'';!--"=&{()}`}, {"LINE 1
\nLINE 2", "LINE 1\nLINE 2"}, // Examples from https://githubengineering.com/githubs-post-csp-journey/ {` ... `, `...`}, {` -->
`, `Click -- `}, } func TestHTML(t *testing.T) { for _, test := range htmlTests { output := HTML(test.input) if output != test.expected { t.Fatalf(Format, test.input, test.expected, output) } } } var htmlTestsAllowing = []Test{ {``, ``}, {`hello world`, `hello world`}, {`hello



rulers`, `hello



rulers`}, {`

Span

`, `

Span

`}, {`
Div

test

invalid

test

`, `
Div

test

invalid

test

`}, {`

Some text

`, `

Some text

`}, {`hello world`, `hello world`}, {`text

inside

too`, `text

inside

too`}, {`&#x000D;`, `&#x000D;`}, {`

`, `

`}, {"

Bold Not bold

\nAlso not bold.", "

Bold Not bold

\nAlso not bold."}, {"`FOO ZOO", "`FOO ZOO"}, {`">`, `">`}, {``, ``}, {`>> >> `}, {`> & test <`, `> & test <`}, {``, ``}, {``, ``}, {`