From 8ac63584b82d640635e46c1d08860662391edc78 Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:29:03 +0900 Subject: [PATCH 01/13] Fix --- checked_http_client.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 checked_http_client.go diff --git a/checked_http_client.go b/checked_http_client.go new file mode 100644 index 0000000..bc506f6 --- /dev/null +++ b/checked_http_client.go @@ -0,0 +1,31 @@ +package main + +import ( + "fmt" + "net/http" + "net/url" +) + +type checkedHttpClient struct { + client httpClient + acceptedStatusCodes statusCodeSet +} + +func newCheckedHttpClient(c httpClient, acceptedStatusCodes statusCodeSet) httpClient { + return &checkedHttpClient{c, acceptedStatusCodes} +} + +func (c *checkedHttpClient) Get(u *url.URL, header http.Header) (httpResponse, error) { + r, err := c.client.Get(u, header) + if err != nil { + return nil, err + } + + code := r.StatusCode() + + if c.acceptedStatusCodes.Contains(code) { + return r, nil + } else { + return nil, fmt.Errorf("%v", code) + } +} From 6784bde5c51f14c5845035a24c0d09c56c1460ae Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:33:12 +0900 Subject: [PATCH 02/13] Refactor --- command.go | 34 ++++++++++++----------- redirect_http_client.go | 53 +++++++++++++++--------------------- redirect_http_client_test.go | 13 +-------- 3 files changed, 41 insertions(+), 59 deletions(-) diff --git a/command.go b/command.go index 0512b6e..bb5f8e6 100644 --- a/command.go +++ b/command.go @@ -44,24 +44,26 @@ func (c *command) runWithError(ss []string) (bool, error) { return true, nil } - client := newRedirectHttpClient( - newThrottledHttpClient( - c.httpClientFactory.Create( - httpClientOptions{ - MaxConnectionsPerHost: args.MaxConnectionsPerHost, - MaxResponseBodySize: args.MaxResponseBodySize, - BufferSize: args.BufferSize, - Proxy: args.Proxy, - SkipTLSVerification: args.SkipTLSVerification, - Timeout: time.Duration(args.Timeout) * time.Second, - Header: args.Header, - }, + client := newCheckedHttpClient( + newRedirectHttpClient( + newThrottledHttpClient( + c.httpClientFactory.Create( + httpClientOptions{ + MaxConnectionsPerHost: args.MaxConnectionsPerHost, + MaxResponseBodySize: args.MaxResponseBodySize, + BufferSize: args.BufferSize, + Proxy: args.Proxy, + SkipTLSVerification: args.SkipTLSVerification, + Timeout: time.Duration(args.Timeout) * time.Second, + Header: args.Header, + }, + ), + args.RateLimit, + args.MaxConnections, + args.MaxConnectionsPerHost, ), - args.RateLimit, - args.MaxConnections, - args.MaxConnectionsPerHost, + args.MaxRedirections, ), - args.MaxRedirections, args.AcceptedStatusCodes, ) diff --git a/redirect_http_client.go b/redirect_http_client.go index 89151f0..dba0bfc 100644 --- a/redirect_http_client.go +++ b/redirect_http_client.go @@ -9,13 +9,12 @@ import ( ) type redirectHttpClient struct { - client httpClient - maxRedirections int - acceptedStatusCodes statusCodeSet + client httpClient + maxRedirections int } -func newRedirectHttpClient(c httpClient, maxRedirections int, acceptedStatusCodes statusCodeSet) httpClient { - return &redirectHttpClient{c, maxRedirections, acceptedStatusCodes} +func newRedirectHttpClient(c httpClient, maxRedirections int) httpClient { + return &redirectHttpClient{c, maxRedirections} } func (c *redirectHttpClient) Get(u *url.URL, header http.Header) (httpResponse, error) { @@ -37,46 +36,38 @@ func (c *redirectHttpClient) Get(u *url.URL, header http.Header) (httpResponse, } r, err := c.client.Get(u, header) - if err != nil { - return nil, c.formatError(err, i, u) + if err != nil && i == 0 { + return nil, err + } else if err != nil { + return nil, fmt.Errorf("%w (following redirect %v)", err, u.String()) } code := r.StatusCode() - if c.acceptedStatusCodes.Contains(code) { + if code < 300 || code >= 400 { return r, nil - } else if code >= 300 && code <= 399 { - i++ + } - if i > c.maxRedirections { - return nil, errors.New("too many redirections") - } + i++ - s := r.Header("Location") + if i > c.maxRedirections { + return nil, errors.New("too many redirections") + } - if len(s) == 0 { - return nil, errors.New("location header not set") - } + s := r.Header("Location") - u, err = u.Parse(s) + if len(s) == 0 { + return nil, errors.New("location header not set") + } - if err != nil { - return nil, err - } + u, err = u.Parse(s) - cj.SetCookies(u, parseCookies(r.Header("set-cookie"))) - } else { - return nil, c.formatError(fmt.Errorf("%v", code), i, u) + if err != nil { + return nil, err } - } -} -func (*redirectHttpClient) formatError(err error, redirections int, u *url.URL) error { - if redirections == 0 { - return err + cj.SetCookies(u, parseCookies(r.Header("set-cookie"))) } - - return fmt.Errorf("%w (following redirect %v)", err, u.String()) } func parseCookies(s string) []*http.Cookie { diff --git a/redirect_http_client_test.go b/redirect_http_client_test.go index 2676759..cdf5dfc 100644 --- a/redirect_http_client_test.go +++ b/redirect_http_client_test.go @@ -10,10 +10,8 @@ import ( const testUrl = "http://foo.com" -var acceptedStatusCodes = statusCodeSet{{200, 300}: struct{}{}} - func TestNewRedirectHttpClient(t *testing.T) { - newRedirectHttpClient(newFakeHttpClient(nil), 42, acceptedStatusCodes) + newRedirectHttpClient(newFakeHttpClient(nil), 42) } func TestRedirectHttpClientGet(t *testing.T) { @@ -32,7 +30,6 @@ func TestRedirectHttpClientGet(t *testing.T) { }, ), 42, - acceptedStatusCodes, ).Get(u, nil) assert.Nil(t, err) @@ -65,7 +62,6 @@ func TestRedirectHttpClientGetWithRedirect(t *testing.T) { }, ), 42, - acceptedStatusCodes, ).Get(u, nil) assert.Nil(t, err) @@ -100,7 +96,6 @@ func TestRedirectHttpClientGetWithRedirects(t *testing.T) { }, ), maxRedirections, - acceptedStatusCodes, ).Get(u, nil) assert.Nil(t, err) @@ -139,7 +134,6 @@ func TestRedirectHttpClientGetWithRelativeRedirect(t *testing.T) { }, ), maxRedirections, - acceptedStatusCodes, ).Get(u, nil) assert.Nil(t, err) @@ -169,7 +163,6 @@ func TestRedirectHttpClientFailWithTooManyRedirects(t *testing.T) { }, ), maxRedirections, - acceptedStatusCodes, ).Get(u, nil) assert.Nil(t, r) @@ -189,7 +182,6 @@ func TestRedirectHttpClientFailWithUnsetLocationHeader(t *testing.T) { }, ), 42, - acceptedStatusCodes, ).Get(u, nil) assert.Nil(t, r) @@ -213,7 +205,6 @@ func TestRedirectHttpClientFailWithInvalidLocationURL(t *testing.T) { }, ), 42, - acceptedStatusCodes, ).Get(u, nil) assert.Nil(t, r) @@ -232,7 +223,6 @@ func TestRedirectHttpClientFailWithInvalidStatusCode(t *testing.T) { }, ), 42, - acceptedStatusCodes, ).Get(u, nil) assert.Nil(t, r) @@ -263,7 +253,6 @@ func TestRedirectHttpClientFailAfterRedirect(t *testing.T) { }, ), 42, - acceptedStatusCodes, ).Get(u, nil) assert.Nil(t, r) From fb246c39a265cdd2ba14424e8210ac59245530e6 Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:38:52 +0900 Subject: [PATCH 03/13] Fix --- checked_http_client_test.go | 44 ++++++++++++++++++++++++++++++++++++ redirect_http_client_test.go | 20 +--------------- 2 files changed, 45 insertions(+), 19 deletions(-) create mode 100644 checked_http_client_test.go diff --git a/checked_http_client_test.go b/checked_http_client_test.go new file mode 100644 index 0000000..e3ba8d1 --- /dev/null +++ b/checked_http_client_test.go @@ -0,0 +1,44 @@ +package main + +import ( + "net/url" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCheckedHttpClientFailWithValidStatusCode(t *testing.T) { + u, err := url.Parse(testUrl) + + assert.Nil(t, err) + + r, err := newCheckedHttpClient( + newFakeHttpClient( + func(u *url.URL) (*fakeHttpResponse, error) { + return newFakeHttpResponse(200, testUrl, nil, nil), nil + }, + ), + statusCodeSet{{200, 201}: {}}, + ).Get(u, nil) + + assert.Nil(t, err) + assert.NotNil(t, r) +} + +func TestCheckedHttpClientFailWithInvalidStatusCode(t *testing.T) { + u, err := url.Parse(testUrl) + + assert.Nil(t, err) + + r, err := newCheckedHttpClient( + newFakeHttpClient( + func(u *url.URL) (*fakeHttpResponse, error) { + return newFakeHttpResponse(404, testUrl, nil, nil), nil + }, + ), + statusCodeSet{{200, 201}: {}}, + ).Get(u, nil) + + assert.Nil(t, r) + assert.Equal(t, err.Error(), "404") +} diff --git a/redirect_http_client_test.go b/redirect_http_client_test.go index cdf5dfc..1b637bc 100644 --- a/redirect_http_client_test.go +++ b/redirect_http_client_test.go @@ -211,24 +211,6 @@ func TestRedirectHttpClientFailWithInvalidLocationURL(t *testing.T) { assert.Contains(t, err.Error(), "parse") } -func TestRedirectHttpClientFailWithInvalidStatusCode(t *testing.T) { - u, err := url.Parse(testUrl) - - assert.Nil(t, err) - - r, err := newRedirectHttpClient( - newFakeHttpClient( - func(u *url.URL) (*fakeHttpResponse, error) { - return newFakeHttpResponse(404, testUrl, nil, nil), nil - }, - ), - 42, - ).Get(u, nil) - - assert.Nil(t, r) - assert.Equal(t, err.Error(), "404") -} - func TestRedirectHttpClientFailAfterRedirect(t *testing.T) { u, err := url.Parse(testUrl) @@ -249,7 +231,7 @@ func TestRedirectHttpClientFailAfterRedirect(t *testing.T) { ), nil } - return newFakeHttpResponse(404, "", nil, nil), nil + return nil, errors.New("foo") }, ), 42, From 51c321cd1d365ecc6ebce4a86ef92fc876525d0f Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:41:53 +0900 Subject: [PATCH 04/13] Fix --- redirect_http_client.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/redirect_http_client.go b/redirect_http_client.go index dba0bfc..9bc6034 100644 --- a/redirect_http_client.go +++ b/redirect_http_client.go @@ -42,9 +42,7 @@ func (c *redirectHttpClient) Get(u *url.URL, header http.Header) (httpResponse, return nil, fmt.Errorf("%w (following redirect %v)", err, u.String()) } - code := r.StatusCode() - - if code < 300 || code >= 400 { + if c := r.StatusCode(); c < 300 || c >= 400 { return r, nil } From 9e2d9537189976d7814e95033f67a9136c84eb04 Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:46:34 +0900 Subject: [PATCH 05/13] Fix --- go.mod | 2 +- go.sum | 1 + redirect_http_client.go | 13 +++---------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index 933693a..b909836 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/raviqqe/muffet/v2 -go 1.19 +go 1.22 require ( github.com/andybalholm/brotli v1.1.0 diff --git a/go.sum b/go.sum index 3c921f6..3709a88 100644 --- a/go.sum +++ b/go.sum @@ -42,6 +42,7 @@ github.com/valyala/fasthttp v1.52.0/go.mod h1:hf5C4QnVMkNXMspnsUlfM3WitlgYflyhHY github.com/yhat/scrape v0.0.0-20161128144610-24b7890b0945 h1:6Ju8pZBYFTN9FaV/JvNBiIHcsgEmP4z4laciqjfjY8E= github.com/yhat/scrape v0.0.0-20161128144610-24b7890b0945/go.mod h1:4vRFPPNYllgCacoj+0FoKOjTW68rUhEfqPLiEJaK2w8= go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/ratelimit v0.3.0 h1:IdZd9wqvFXnvLvSEBo0KPcGfkoBGNkpTHlrE3Rcjkjw= go.uber.org/ratelimit v0.3.0/go.mod h1:So5LG7CV1zWpY1sHe+DXTJqQvOx+FFPFaAs2SnoyBaI= golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4= diff --git a/redirect_http_client.go b/redirect_http_client.go index 9bc6034..19584e0 100644 --- a/redirect_http_client.go +++ b/redirect_http_client.go @@ -23,14 +23,11 @@ func (c *redirectHttpClient) Get(u *url.URL, header http.Header) (httpResponse, } cj, err := cookiejar.New(nil) - if err != nil { return nil, err } - i := 0 - - for { + for i := range c.maxRedirections + 1 { for _, c := range cj.Cookies(u) { header.Add("cookie", c.String()) } @@ -46,12 +43,6 @@ func (c *redirectHttpClient) Get(u *url.URL, header http.Header) (httpResponse, return r, nil } - i++ - - if i > c.maxRedirections { - return nil, errors.New("too many redirections") - } - s := r.Header("Location") if len(s) == 0 { @@ -66,6 +57,8 @@ func (c *redirectHttpClient) Get(u *url.URL, header http.Header) (httpResponse, cj.SetCookies(u, parseCookies(r.Header("set-cookie"))) } + + return nil, errors.New("too many redirections") } func parseCookies(s string) []*http.Cookie { From 0ab58250033e95d94c6a3bbbb117a5d8d3e2a6d8 Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:47:41 +0900 Subject: [PATCH 06/13] Fix --- redirect_http_client.go | 1 - 1 file changed, 1 deletion(-) diff --git a/redirect_http_client.go b/redirect_http_client.go index 19584e0..2b4e36a 100644 --- a/redirect_http_client.go +++ b/redirect_http_client.go @@ -50,7 +50,6 @@ func (c *redirectHttpClient) Get(u *url.URL, header http.Header) (httpResponse, } u, err = u.Parse(s) - if err != nil { return nil, err } From c84b6226ce7065dfbab64c456ace61b5beee77f5 Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:52:44 +0900 Subject: [PATCH 07/13] Fix --- redirect_http_client_test.go | 71 ++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/redirect_http_client_test.go b/redirect_http_client_test.go index 1b637bc..487cf66 100644 --- a/redirect_http_client_test.go +++ b/redirect_http_client_test.go @@ -3,6 +3,7 @@ package main import ( "errors" "net/url" + "strconv" "testing" "github.com/stretchr/testify/assert" @@ -36,6 +37,28 @@ func TestRedirectHttpClientGet(t *testing.T) { assert.Equal(t, 200, r.StatusCode()) } +func TestRedirectHttpClientGetWithoutRedirect(t *testing.T) { + u, err := url.Parse(testUrl) + + assert.Nil(t, err) + + r, err := newRedirectHttpClient( + newFakeHttpClient( + func(u *url.URL) (*fakeHttpResponse, error) { + if u.String() != testUrl { + return nil, errors.New("") + } + + return newFakeHtmlResponse(testUrl, ""), nil + }, + ), + 0, + ).Get(u, nil) + + assert.Nil(t, err) + assert.Equal(t, 200, r.StatusCode()) +} + func TestRedirectHttpClientGetWithRedirect(t *testing.T) { u, err := url.Parse(testUrl) @@ -142,32 +165,34 @@ func TestRedirectHttpClientGetWithRelativeRedirect(t *testing.T) { } func TestRedirectHttpClientFailWithTooManyRedirects(t *testing.T) { - const maxRedirections = 42 - - u, err := url.Parse(testUrl) + for _, n := range []int{0, 1, 42} { + t.Run(strconv.Itoa(n), func(t *testing.T) { + u, err := url.Parse(testUrl) - assert.Nil(t, err) + assert.Nil(t, err) - i := 0 - r, err := newRedirectHttpClient( - newFakeHttpClient( - func(u *url.URL) (*fakeHttpResponse, error) { - i++ - - return newFakeHttpResponse( - 300, - testUrl, - nil, - map[string]string{"location": testUrl}, - ), nil - }, - ), - maxRedirections, - ).Get(u, nil) + i := 0 + r, err := newRedirectHttpClient( + newFakeHttpClient( + func(u *url.URL) (*fakeHttpResponse, error) { + i++ - assert.Nil(t, r) - assert.Equal(t, err.Error(), "too many redirections") - assert.Equal(t, maxRedirections+1, i) + return newFakeHttpResponse( + 300, + testUrl, + nil, + map[string]string{"location": testUrl}, + ), nil + }, + ), + n, + ).Get(u, nil) + + assert.Nil(t, r) + assert.Equal(t, err.Error(), "too many redirections") + assert.Equal(t, n+1, i) + }) + } } func TestRedirectHttpClientFailWithUnsetLocationHeader(t *testing.T) { From cb29558e6efb2f3c3ebb9b94557d570f0b91349b Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:53:45 +0900 Subject: [PATCH 08/13] Fix --- checked_http_client.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/checked_http_client.go b/checked_http_client.go index bc506f6..55b4e3c 100644 --- a/checked_http_client.go +++ b/checked_http_client.go @@ -23,9 +23,9 @@ func (c *checkedHttpClient) Get(u *url.URL, header http.Header) (httpResponse, e code := r.StatusCode() - if c.acceptedStatusCodes.Contains(code) { - return r, nil - } else { + if !c.acceptedStatusCodes.Contains(code) { return nil, fmt.Errorf("%v", code) } + + return r, nil } From a408bde4956d253caab9f11874c7347ece60d5cc Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:55:44 +0900 Subject: [PATCH 09/13] Fix --- checked_http_client.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/checked_http_client.go b/checked_http_client.go index 55b4e3c..ef4f86b 100644 --- a/checked_http_client.go +++ b/checked_http_client.go @@ -19,11 +19,7 @@ func (c *checkedHttpClient) Get(u *url.URL, header http.Header) (httpResponse, e r, err := c.client.Get(u, header) if err != nil { return nil, err - } - - code := r.StatusCode() - - if !c.acceptedStatusCodes.Contains(code) { + } else if code := r.StatusCode(); !c.acceptedStatusCodes.Contains(code) { return nil, fmt.Errorf("%v", code) } From 1e42d1fed0e500ee21c97bc436fc5dddbfe70506 Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 19:58:08 +0900 Subject: [PATCH 10/13] Fix --- redirect_http_client.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/redirect_http_client.go b/redirect_http_client.go index 2b4e36a..5dbf8a2 100644 --- a/redirect_http_client.go +++ b/redirect_http_client.go @@ -37,9 +37,7 @@ func (c *redirectHttpClient) Get(u *url.URL, header http.Header) (httpResponse, return nil, err } else if err != nil { return nil, fmt.Errorf("%w (following redirect %v)", err, u.String()) - } - - if c := r.StatusCode(); c < 300 || c >= 400 { + } else if c := r.StatusCode(); c < 300 || c >= 400 { return r, nil } From 87b9cdb095d15f44f9f53c5e5f376a1d3772e191 Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 20:01:49 +0900 Subject: [PATCH 11/13] Fix --- .github/workflows/test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d622284..5607c81 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -9,6 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 - run: go build - uses: golangci/golangci-lint-action@v4 - run: go test -race -covermode atomic -coverprofile coverage.txt From ecd0236e70fcf7db4c1367b99e4d585636ac710f Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 20:03:18 +0900 Subject: [PATCH 12/13] Fix --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index b909836..839b9ac 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/raviqqe/muffet/v2 -go 1.22 +go 1.22.0 require ( github.com/andybalholm/brotli v1.1.0 From 25e62adf4eabb9202d3258e4dc1376f4b3cd7e8f Mon Sep 17 00:00:00 2001 From: Yota Toyama Date: Wed, 28 Feb 2024 20:03:54 +0900 Subject: [PATCH 13/13] Fix --- .github/workflows/test.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 5607c81..d622284 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -9,7 +9,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - run: go build - uses: golangci/golangci-lint-action@v4 - run: go test -race -covermode atomic -coverprofile coverage.txt