Skip to content

Commit

Permalink
Fix: Relax URL detection in link check tool (#357)
Browse files Browse the repository at this point in the history
  • Loading branch information
axllent committed Sep 5, 2024
1 parent dc1a16e commit 2a0853d
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- run: go test -p 1 ./internal/storage ./server ./server/pop3 ./internal/tools ./internal/html2text -v
- run: go test -p 1 ./internal/storage ./server ./server/pop3 ./internal/tools ./internal/html2text ./internal/linkcheck -v
- run: go test -p 1 ./internal/storage ./internal/html2text -bench=.

# build the assets
Expand Down
71 changes: 71 additions & 0 deletions internal/linkcheck/linkcheck_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package linkcheck

import (
"reflect"
"testing"

"github.com/axllent/mailpit/internal/storage"
)

var (
testHTML = `
<html>
<head>
<link rel=stylesheet href="http://remote-host/style.css"></link>
<script async src="https://www.googletagmanager.com/gtag/js?id=ignored"></script>
</head>
<body>
<div>
<p><a href="http://example.com">HTTP link</a></p>
<p><a href="https://example.com">HTTPS link</a></p>
<p><a href="HTTPS://EXAMPLE.COM">HTTPS link</a></p>
<p><a href="http://localhost">Localhost link</a> (ignored)</p>
<p><a href="https://localhost">Localhost link</a> (ignored)</p>
<p><a href='https://127.0.0.1'>Single quotes link</a> (ignored)</p>
<p><img src=https://example.com/image.jpg></p>
<p href="http://invalid-link.com">This should be ignored</p>
<p><a href="http://link with spaces">Link with spaces</a></p>
<p><a href="http://example.com/?blaah=yes&amp;test=true">URL-encoded characters</a></p>
</div>
</body>
</html>`

expectedHTMLLinks = []string{
"http://example.com", "https://example.com", "HTTPS://EXAMPLE.COM", "http://localhost", "https://localhost", "https://127.0.0.1", "http://link with spaces", "http://example.com/?blaah=yes&test=true",
"http://remote-host/style.css", // css
"https://example.com/image.jpg", // images
}

testTextLinks = `This is a line with http://example.com https://example.com
HTTPS://EXAMPLE.COM
[http://localhost]
www.google.com < ignored
|||http://example.com/?some=query-string|||
`

expectedTextLinks = []string{
"http://example.com", "https://example.com", "HTTPS://EXAMPLE.COM", "http://localhost", "http://example.com/?some=query-string",
}
)

func TestLinkDetection(t *testing.T) {

t.Log("Testing HTML link detection")

m := storage.Message{}

m.Text = testTextLinks
m.HTML = testHTML

textLinks := extractTextLinks(&m)

if !reflect.DeepEqual(textLinks, expectedTextLinks) {
t.Fatalf("Failed to detect text links correctly")
}

htmlLinks := extractHTMLLinks(&m)

if !reflect.DeepEqual(htmlLinks, expectedHTMLLinks) {
t.Fatalf("Failed to detect HTML links correctly")
}
}
2 changes: 1 addition & 1 deletion internal/linkcheck/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"github.com/axllent/mailpit/internal/tools"
)

var linkRe = regexp.MustCompile(`(?m)\b(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:'!\/~+#-]*[\w@?^=%&\/~+#-])`)
var linkRe = regexp.MustCompile(`(?im)\b(http|https):\/\/([\-\w@:%_\+'!.~#?,&\/\/=;]+)`)

// RunTests will run all tests on an HTML string
func RunTests(msg *storage.Message, followRedirects bool) (Response, error) {
Expand Down

0 comments on commit 2a0853d

Please sign in to comment.