From ad0a513c1133a74394c480bffcb421d08e8b4ffd Mon Sep 17 00:00:00 2001 From: Tarun Koyalwar Date: Wed, 13 Mar 2024 02:59:00 +0530 Subject: [PATCH] parse absolute invalid urls --- url/parsers.go | 39 ++++++++++++++++++++++++++++++++++++--- url/url_test.go | 6 ++++-- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/url/parsers.go b/url/parsers.go index 711fbd9..05e1a68 100644 --- a/url/parsers.go +++ b/url/parsers.go @@ -166,14 +166,47 @@ func absoluteURLParser(u *URL) (*URL, error) { } copy(u.URL, urlparse) } else { + + // try parsing with fallback if it is invalid URL escape error + // split and read until first / and then parse the url + parsed, err := url.Parse(HTTPS + SchemeSeparator + u.Original) + if err != nil { + if !strings.Contains(err.Error(), "invalid URL escape") { + // if it is not a invalid URL escape error then it is most likely a relative path + u.IsRelative = true + return u, nil + } + } else { + // successfully parsed absolute url + parsed.Scheme = "" // remove newly added scheme + copy(u.URL, parsed) + return u, nil + } + + // this is most likely a url of type scanme.sh/%2s/%invalid // if no prefix try to parse it with https // if failed we consider it as a relative path and not a full url - urlparse, parseErr := url.Parse(HTTPS + SchemeSeparator + u.Original) + pathIndex := strings.IndexRune(u.Original, '/') + if pathIndex == -1 { + // no path found most likely a relative path or localhost path + urlparse, parseErr := url.Parse(HTTPS + SchemeSeparator + u.Original) + if parseErr != nil { + // most likely a relativeurls + u.IsRelative = true + } else { + urlparse.Scheme = "" // remove newly added scheme + copy(u.URL, urlparse) + } + return u, nil + } + // split until first / and then parse the url to handle invalid urls like + // scnme.sh/xyz/%u2s/%invalid + urlparse, parseErr := url.Parse(HTTPS + SchemeSeparator + u.Original[:pathIndex]) if parseErr != nil { - // most likely a relativeurl + // most likely a relativeurls u.IsRelative = true - // TODO: investigate if prefix / should be added } else { + urlparse.Path = u.Original[pathIndex:] urlparse.Scheme = "" // remove newly added scheme copy(u.URL, urlparse) } diff --git a/url/url_test.go b/url/url_test.go index 99bbe63..a60a053 100644 --- a/url/url_test.go +++ b/url/url_test.go @@ -101,11 +101,13 @@ func TestInvalidURLs(t *testing.T) { "https://127.0.0.1:52272/%invalid", "http.s3.amazonaws.com", "https.s3.amazonaws.com", + "scanme.sh/xyz/invalid", + "scanme.sh/xyz/%u2s/%invalid", } for _, v := range testcases { - urlx, err := ParseURL(v, true) + urlx, err := ParseAbsoluteURL(v, true) require.Nilf(t, err, "got error for url %v", v) - require.Equal(t, urlx.String(), v) + require.Equal(t, v, urlx.String()) } }