From 2a0853d21ac0a293310b346cddbdf8c30609b0ac Mon Sep 17 00:00:00 2001 From: Ralph Slooten Date: Thu, 5 Sep 2024 17:15:53 +1200 Subject: [PATCH] Fix: Relax URL detection in link check tool (#357) --- .github/workflows/tests.yml | 2 +- internal/linkcheck/linkcheck_test.go | 71 ++++++++++++++++++++++++++++ internal/linkcheck/main.go | 2 +- 3 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 internal/linkcheck/linkcheck_test.go diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f19c297c3..d81a34fc9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -26,7 +26,7 @@ jobs: key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} restore-keys: | ${{ runner.os }}-go- - - run: go test -p 1 ./internal/storage ./server ./server/pop3 ./internal/tools ./internal/html2text -v + - run: go test -p 1 ./internal/storage ./server ./server/pop3 ./internal/tools ./internal/html2text ./internal/linkcheck -v - run: go test -p 1 ./internal/storage ./internal/html2text -bench=. # build the assets diff --git a/internal/linkcheck/linkcheck_test.go b/internal/linkcheck/linkcheck_test.go new file mode 100644 index 000000000..bf27a8fbb --- /dev/null +++ b/internal/linkcheck/linkcheck_test.go @@ -0,0 +1,71 @@ +package linkcheck + +import ( + "reflect" + "testing" + + "github.com/axllent/mailpit/internal/storage" +) + +var ( + testHTML = ` + + + + + + +
+

HTTP link

+

HTTPS link

+

HTTPS link

+

Localhost link (ignored)

+

Localhost link (ignored)

+

Single quotes link (ignored)

+

+

This should be ignored

+

Link with spaces

+

URL-encoded characters

+
+ + ` + + expectedHTMLLinks = []string{ + "http://example.com", "https://example.com", "HTTPS://EXAMPLE.COM", "http://localhost", "https://localhost", "https://127.0.0.1", "http://link with spaces", "http://example.com/?blaah=yes&test=true", + "http://remote-host/style.css", // css + "https://example.com/image.jpg", // images + } + + testTextLinks = `This is a line with http://example.com https://example.com + HTTPS://EXAMPLE.COM + [http://localhost] + www.google.com < ignored + |||http://example.com/?some=query-string||| + ` + + expectedTextLinks = []string{ + "http://example.com", "https://example.com", "HTTPS://EXAMPLE.COM", "http://localhost", "http://example.com/?some=query-string", + } +) + +func TestLinkDetection(t *testing.T) { + + t.Log("Testing HTML link detection") + + m := storage.Message{} + + m.Text = testTextLinks + m.HTML = testHTML + + textLinks := extractTextLinks(&m) + + if !reflect.DeepEqual(textLinks, expectedTextLinks) { + t.Fatalf("Failed to detect text links correctly") + } + + htmlLinks := extractHTMLLinks(&m) + + if !reflect.DeepEqual(htmlLinks, expectedHTMLLinks) { + t.Fatalf("Failed to detect HTML links correctly") + } +} diff --git a/internal/linkcheck/main.go b/internal/linkcheck/main.go index 8a1f2ab25..eaba8baba 100644 --- a/internal/linkcheck/main.go +++ b/internal/linkcheck/main.go @@ -10,7 +10,7 @@ import ( "github.com/axllent/mailpit/internal/tools" ) -var linkRe = regexp.MustCompile(`(?m)\b(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:'!\/~+#-]*[\w@?^=%&\/~+#-])`) +var linkRe = regexp.MustCompile(`(?im)\b(http|https):\/\/([\-\w@:%_\+'!.~#?,&\/\/=;]+)`) // RunTests will run all tests on an HTML string func RunTests(msg *storage.Message, followRedirects bool) (Response, error) {