Skip to content

Commit

Permalink
Refactor (#277)
Browse files Browse the repository at this point in the history
- Rename option
- Update snapshots
- Add todo
  • Loading branch information
raviqqe authored Jan 29, 2023
1 parent be9e885 commit 079b922
Show file tree
Hide file tree
Showing 11 changed files with 147 additions and 156 deletions.
2 changes: 1 addition & 1 deletion .snapshots/TestHelp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Application Options:
--header=<header>... Custom headers
-f, --ignore-fragments Ignore URL fragments
--json Output results in JSON
--include-success-in-json Include successful results in JSON
--experimental-verbose-json Include successful results in JSON
--junit Output results as JUnit XML file
-r, --max-redirections=<count> Maximum number of redirections
(default: 64)
Expand Down
55 changes: 28 additions & 27 deletions arguments.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,34 @@ import (
)

type arguments struct {
BufferSize int `short:"b" long:"buffer-size" value-name:"<size>" default:"4096" description:"HTTP response buffer size in bytes"`
MaxConnections int `short:"c" long:"max-connections" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections"`
MaxConnectionsPerHost int `long:"max-connections-per-host" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections per host"`
MaxResponseBodySize int `long:"max-response-body-size" value-name:"<size>" default:"10000000" description:"Maximum response body size to read"`
RawExcludedPatterns []string `short:"e" long:"exclude" value-name:"<pattern>..." description:"Exclude URLs matched with given regular expressions"`
RawIncludedPatterns []string `short:"i" long:"include" value-name:"<pattern>..." description:"Include URLs matched with given regular expressions"`
FollowRobotsTxt bool `long:"follow-robots-txt" description:"Follow robots.txt when scraping pages"`
FollowSitemapXML bool `long:"follow-sitemap-xml" description:"Scrape only pages listed in sitemap.xml"`
RawHeaders []string `long:"header" value-name:"<header>..." description:"Custom headers"`
IgnoreFragments bool `short:"f" long:"ignore-fragments" description:"Ignore URL fragments"`
JSONOutput bool `long:"json" description:"Output results in JSON"`
IncludeSuccessInJSONOutput bool `long:"include-success-in-json" description:"Include successful results in JSON"`
JUnitOutput bool `long:"junit" description:"Output results as JUnit XML file"`
MaxRedirections int `short:"r" long:"max-redirections" value-name:"<count>" default:"64" description:"Maximum number of redirections"`
RateLimit int `long:"rate-limit" value-name:"<rate>" description:"Max requests per second"`
Timeout int `short:"t" long:"timeout" value-name:"<seconds>" default:"10" description:"Timeout for HTTP requests in seconds"`
Verbose bool `short:"v" long:"verbose" description:"Show successful results too"`
Proxy string `long:"proxy" value-name:"<host>" description:"HTTP proxy host"`
SkipTLSVerification bool `long:"skip-tls-verification" description:"Skip TLS certificate verification"`
OnePageOnly bool `long:"one-page-only" description:"Only check links found in the given URL"`
Color color `long:"color" description:"Color output" choice:"auto" choice:"always" choice:"never" default:"auto"`
Help bool `short:"h" long:"help" description:"Show this help"`
Version bool `long:"version" description:"Show version"`
URL string
ExcludedPatterns []*regexp.Regexp
IncludePatterns []*regexp.Regexp
Headers map[string]string
BufferSize int `short:"b" long:"buffer-size" value-name:"<size>" default:"4096" description:"HTTP response buffer size in bytes"`
MaxConnections int `short:"c" long:"max-connections" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections"`
MaxConnectionsPerHost int `long:"max-connections-per-host" value-name:"<count>" default:"512" description:"Maximum number of HTTP connections per host"`
MaxResponseBodySize int `long:"max-response-body-size" value-name:"<size>" default:"10000000" description:"Maximum response body size to read"`
RawExcludedPatterns []string `short:"e" long:"exclude" value-name:"<pattern>..." description:"Exclude URLs matched with given regular expressions"`
RawIncludedPatterns []string `short:"i" long:"include" value-name:"<pattern>..." description:"Include URLs matched with given regular expressions"`
FollowRobotsTxt bool `long:"follow-robots-txt" description:"Follow robots.txt when scraping pages"`
FollowSitemapXML bool `long:"follow-sitemap-xml" description:"Scrape only pages listed in sitemap.xml"`
RawHeaders []string `long:"header" value-name:"<header>..." description:"Custom headers"`
IgnoreFragments bool `short:"f" long:"ignore-fragments" description:"Ignore URL fragments"`
JSONOutput bool `long:"json" description:"Output results in JSON"`
// TODO Integrate this option into --verbose in v3.
VerboseJSON bool `long:"experimental-verbose-json" description:"Include successful results in JSON"`
JUnitOutput bool `long:"junit" description:"Output results as JUnit XML file"`
MaxRedirections int `short:"r" long:"max-redirections" value-name:"<count>" default:"64" description:"Maximum number of redirections"`
RateLimit int `long:"rate-limit" value-name:"<rate>" description:"Max requests per second"`
Timeout int `short:"t" long:"timeout" value-name:"<seconds>" default:"10" description:"Timeout for HTTP requests in seconds"`
Verbose bool `short:"v" long:"verbose" description:"Show successful results too"`
Proxy string `long:"proxy" value-name:"<host>" description:"HTTP proxy host"`
SkipTLSVerification bool `long:"skip-tls-verification" description:"Skip TLS certificate verification"`
OnePageOnly bool `long:"one-page-only" description:"Only check links found in the given URL"`
Color color `long:"color" description:"Color output" choice:"auto" choice:"always" choice:"never" default:"auto"`
Help bool `short:"h" long:"help" description:"Show this help"`
Version bool `long:"version" description:"Show version"`
URL string
ExcludedPatterns []*regexp.Regexp
IncludePatterns []*regexp.Regexp
Headers map[string]string
}

func getArguments(ss []string) (*arguments, error) {
Expand Down
58 changes: 27 additions & 31 deletions command.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,9 @@ func (c *command) runWithError(ss []string) (bool, error) {
go checker.Check(p)

if args.JSONOutput {
return c.printResultsInJSON(checker.Results(), args.IncludeSuccessInJSONOutput)
}

if args.JUnitOutput {
return c.printResultsAsJunitXML(checker.Results())
return c.printResultsInJSON(checker.Results(), args.VerboseJSON)
} else if args.JUnitOutput {
return c.printResultsInJUnitXML(checker.Results())
}

formatter := newPageResultFormatter(
Expand All @@ -135,28 +133,25 @@ func (c *command) runWithError(ss []string) (bool, error) {
c.print(formatter.Format(r))
}

if !r.OK() {
ok = false
}
ok = ok && r.OK()
}

return ok, nil
}

func (c *command) printResultsInJSON(rc <-chan *pageResult, includeSuccess bool) (bool, error) {
results := []interface{}{}
func (c *command) printResultsInJSON(rc <-chan *pageResult, verbose bool) (bool, error) {
rs := []interface{}{}
ok := true

for r := range rc {
if r.OK() && includeSuccess {
results = append(results, newJSONSuccessPageResult(r))
} else if !r.OK() {
results = append(results, newJSONErrorPageResult(r))
ok = false
if !r.OK() || verbose {
rs = append(rs, newJSONPageResult(r, verbose))
}

ok = ok && r.OK()
}

bs, err := json.Marshal(results)
bs, err := json.Marshal(rs)

if err != nil {
return false, err
Expand All @@ -167,33 +162,34 @@ func (c *command) printResultsInJSON(rc <-chan *pageResult, includeSuccess bool)
return ok, nil
}

func (c *command) printResultsAsJunitXML(rc <-chan *pageResult) (bool, error) {
func (c *command) printResultsInJUnitXML(rc <-chan *pageResult) (bool, error) {
rs := []*xmlPageResult{}
ok := true

for r := range rc {
rs = append(rs, newXMLPageResult(r))

if !r.OK() {
ok = false
}
ok = ok && r.OK()
}

results := &struct {
// spell-checker: disable-next-line
XMLName xml.Name `xml:"testsuites"`
// spell-checker: disable-next-line
PageResults []*xmlPageResult `xml:"testsuite"`
}{PageResults: rs}

data, err := xml.MarshalIndent(results, "", " ")
bs, err := xml.MarshalIndent(
struct {
// spell-checker: disable-next-line
XMLName xml.Name `xml:"testsuites"`
// spell-checker: disable-next-line
PageResults []*xmlPageResult `xml:"testsuite"`
}{
PageResults: rs,
},
"",
" ",
)

if err != nil {
return ok, err
return false, err
}

c.print(xml.Header)
c.print(string(data))
c.print(string(bs))

return ok, nil
}
Expand Down
2 changes: 1 addition & 1 deletion command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ func TestCommandIncludeSuccessfulPageInJSONOutputWhenRequested(t *testing.T) {
func(u *url.URL) (*fakeHttpResponse, error) {
return newFakeHtmlResponse("", ""), nil
},
).Run([]string{"--json", "--include-success-in-json", "http://foo.com"})
).Run([]string{"--json", "--experimental-verbose-json", "http://foo.com"})

assert.True(t, ok)
assert.Equal(t, strings.TrimSpace(b.String()), "[{\"url\":\"\",\"links\":[]}]")
Expand Down
18 changes: 14 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
module github.com/raviqqe/muffet/v2

go 1.12
go 1.19

require (
github.com/andybalholm/brotli v1.0.4
github.com/bradleyjkemp/cupaloy v2.3.0+incompatible
github.com/jessevdk/go-flags v1.5.0
github.com/klauspost/compress v1.15.14 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/logrusorgru/aurora/v3 v3.0.0
github.com/mattn/go-colorable v0.1.13
github.com/mattn/go-isatty v0.0.17
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
github.com/oxffaa/gopher-parse-sitemap v0.0.0-20191021113419-005d2eb1def4
github.com/stretchr/testify v1.8.1
github.com/temoto/robotstxt v1.1.2
github.com/valyala/fasthttp v1.44.0
github.com/yhat/scrape v0.0.0-20161128144610-24b7890b0945
go.uber.org/ratelimit v0.2.0
golang.org/x/net v0.5.0
)

require (
github.com/andres-erbsen/clock v0.0.0-20160526145045-9e14626cd129 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/klauspost/compress v1.15.14 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
golang.org/x/sys v0.4.0 // indirect
golang.org/x/text v0.6.0 // indirect
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
18 changes: 0 additions & 18 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -48,48 +48,30 @@ github.com/valyala/fasthttp v1.44.0/go.mod h1:f6VbjjoI3z1NDOZOv17o6RvtRSWxC77seB
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
github.com/yhat/scrape v0.0.0-20161128144610-24b7890b0945 h1:6Ju8pZBYFTN9FaV/JvNBiIHcsgEmP4z4laciqjfjY8E=
github.com/yhat/scrape v0.0.0-20161128144610-24b7890b0945/go.mod h1:4vRFPPNYllgCacoj+0FoKOjTW68rUhEfqPLiEJaK2w8=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/ratelimit v0.2.0 h1:UQE2Bgi7p2B85uP5dC2bbRtig0C+OeNRnNEafLjsLPA=
go.uber.org/ratelimit v0.2.0/go.mod h1:YYBV4e4naJvhpitQrWJu1vCpgB7CboMe0qhltKt6mUg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20220214200702-86341886e292/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw=
golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.4.0 h1:Zr2JFtRQNX3BCZ8YtxRE9hNJYC8J6I1MVbMg6owUp18=
golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k=
golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
Expand Down
43 changes: 20 additions & 23 deletions json_page_result.go
Original file line number Diff line number Diff line change
@@ -1,41 +1,38 @@
package main

type jsonErrorPageResult struct {
URL string `json:"url"`
Links []*jsonErrorLinkResult `json:"links"`
}

type jsonErrorLinkResult struct {
type jsonPageResult struct {
URL string `json:"url"`
Error string `json:"error"`
}

type jsonSuccessPageResult struct {
URL string `json:"url"`
Links []*jsonSuccessLinkResult `json:"links"`
Links []any `json:"links"`
}

type jsonSuccessLinkResult struct {
URL string `json:"url"`
Status int `json:"status"`
}

func newJSONErrorPageResult(r *pageResult) *jsonErrorPageResult {
ls := make([]*jsonErrorLinkResult, 0, len(r.ErrorLinkResults))
type jsonErrorLinkResult struct {
URL string `json:"url"`
Error string `json:"error"`
}

for _, r := range r.ErrorLinkResults {
ls = append(ls, &jsonErrorLinkResult{r.URL, r.Error.Error()})
func newJSONPageResult(r *pageResult, verbose bool) *jsonPageResult {
c := len(r.ErrorLinkResults)

if verbose {
c += len(r.SuccessLinkResults)
}

return &jsonErrorPageResult{r.URL, ls}
}
ls := make([]any, 0, c)

func newJSONSuccessPageResult(r *pageResult) *jsonSuccessPageResult {
ls := make([]*jsonSuccessLinkResult, 0, len(r.SuccessLinkResults))
if verbose {
for _, r := range r.SuccessLinkResults {
ls = append(ls, &jsonSuccessLinkResult{r.URL, r.StatusCode})
}
}

for _, r := range r.SuccessLinkResults {
ls = append(ls, &jsonSuccessLinkResult{r.URL, r.StatusCode})
for _, r := range r.ErrorLinkResults {
ls = append(ls, &jsonErrorLinkResult{r.URL, r.Error.Error()})
}

return &jsonSuccessPageResult{r.URL, ls}
return &jsonPageResult{r.URL, ls}
}
19 changes: 8 additions & 11 deletions json_page_result_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,35 @@ import (
"encoding/json"
"errors"
"testing"
"time"

"github.com/bradleyjkemp/cupaloy"
"github.com/stretchr/testify/assert"
)

func TestMarshalErrorJSONPageResult(t *testing.T) {
d, _ := time.ParseDuration("1s")
bs, err := json.Marshal(newJSONErrorPageResult(
bs, err := json.Marshal(newJSONPageResult(
&pageResult{
"http://foo.com",
[]*successLinkResult{},
[]*errorLinkResult{
{"http://foo.com/bar", errors.New("baz"), d},
{"http://foo.com/bar", errors.New("baz"), 0},
},
d,
}))
0,
}, false))
assert.Nil(t, err)
cupaloy.SnapshotT(t, bs)
}

func TestMarshalSuccessJSONPageResult(t *testing.T) {
d, _ := time.ParseDuration("1s")
bs, err := json.Marshal(newJSONSuccessPageResult(
bs, err := json.Marshal(newJSONPageResult(
&pageResult{
"http://foo.com",
[]*successLinkResult{
{"http://foo.com/foo", 200, d},
{"http://foo.com/foo", 200, 0},
},
[]*errorLinkResult{},
d,
}))
0,
}, true))
assert.Nil(t, err)
cupaloy.SnapshotT(t, bs)
}
Loading

0 comments on commit 079b922

Please sign in to comment.